graphml.cpp 15.9 KB
Newer Older
Tiago Peixoto's avatar
Tiago Peixoto committed
1
// Copyright (C) 2006  Tiago de Paula Peixoto <tiago@skewed.de>
2
// Copyright (C) 2004  The Trustees of Indiana University.
Tiago Peixoto's avatar
Tiago Peixoto committed
3
//
4
5
6
// Use, modification and distribution is subject to the Boost Software
// License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
Tiago Peixoto's avatar
Tiago Peixoto committed
7
8
9
10
11

//  Authors: Douglas Gregor
//           Andrew Lumsdaine
//           Tiago de Paula Peixoto

12
#include <boost/python.hpp>
13
#include <boost/variant.hpp>
14
15
#include <expat.h>
#include <boost/graph/graphml.hpp>
16
#include <boost/algorithm/string/replace.hpp>
17
18
#include <boost/archive/iterators/xml_escape.hpp>
#include <boost/archive/iterators/ostream_iterator.hpp>
19
20
21
22
23
24
#include <boost/archive/iterators/base64_from_binary.hpp>
#include <boost/archive/iterators/binary_from_base64.hpp>
#include <boost/archive/iterators/insert_linebreaks.hpp>
#include <boost/archive/iterators/transform_width.hpp>
#include <boost/archive/iterators/ostream_iterator.hpp>
#include <sstream>
Tiago Peixoto's avatar
Tiago Peixoto committed
25

26
using namespace boost;
27
28
namespace boost
{
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

std::string base64_encode(const std::string& s)
{
    static const std::string base64_padding[] = {"", "==","="};
    namespace bai = boost::archive::iterators;
    std::stringstream os;
    typedef bai::base64_from_binary<bai::transform_width<const char *, 6, 8> >
        base64_enc;
    std::copy(base64_enc(s.c_str()), base64_enc(s.c_str() + s.size()),
              std::ostream_iterator<char>(os));
    os << base64_padding[s.size() % 3];
    return os.str();
}

std::string base64_decode(const std::string& s)
{
    namespace bai = boost::archive::iterators;
    std::stringstream os;

    typedef bai::transform_width<bai::binary_from_base64<const char *>, 8, 6> base64_dec;

    unsigned int size = s.size();

    // Remove the padding characters, cf. https://svn.boost.org/trac/boost/ticket/5629
    if (size && s[size - 1] == '=')
    {
        --size;
        if (size && s[size - 1] == '=')
            --size;
    }
    if (size == 0)
        return std::string();

    std::copy(base64_dec(s.data()), base64_dec(s.data() + size),
              std::ostream_iterator<char>(os));

    return os.str();
}


69
70
std::string protect_xml_string(const std::string& os)
{
71
72
73
74
75
76
    using namespace boost::archive::iterators;
    std::stringstream s;
    std::copy(xml_escape<const char*>(os.c_str()),
              xml_escape<const char*>(os.c_str()+os.size()),
              ostream_iterator<char>(s));
    return s.str();
77
78
}
}
Tiago Peixoto's avatar
Tiago Peixoto committed
79
80
81
82

class graphml_reader
{
public:
83
    graphml_reader(mutate_graph& g, bool integer_vertices, bool store_ids)
84
85
86
87
88
        : m_g(g),
          m_canonical_vertices(false),
          m_canonical_edges(false),
          m_integer_vertices(integer_vertices),
          m_store_ids(store_ids) { }
89

Tiago Peixoto's avatar
Tiago Peixoto committed
90
91
    void run(std::istream& in)
    {
92
        const int buffer_size = 4096;
93
94
95
96
        m_parser = XML_ParserCreateNS(0,'|');
        XML_SetElementHandler(m_parser, &on_start_element, &on_end_element);
        XML_SetCharacterDataHandler(m_parser, &on_character_data);
        XML_SetUserData(m_parser, this);
97
        char buffer[buffer_size];
98
99

        bool okay = true;
100
        do
101
102
        {
            in.read(buffer, buffer_size);
103
            okay = XML_Parse(m_parser, buffer, in.gcount(), in.gcount() == 0);
104
        }
105
        while (okay && in.good());
106

107
        if (!okay)
108
109
        {
            std::stringstream s;
110
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
111
112
              <<", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << XML_ErrorString(XML_GetErrorCode(m_parser));
113
114
            throw parse_error(s.str());
        }
115
        XML_ParserFree(m_parser);
Tiago Peixoto's avatar
Tiago Peixoto committed
116
117
118
119
    }

private:
    /// The kinds of keys. Not all of these are supported
120
121
122
    enum key_kind {
        graph_key,
        node_key,
123
124
125
        edge_key,
        hyperedge_key,
        port_key,
126
        endpoint_key,
127
        all_key
Tiago Peixoto's avatar
Tiago Peixoto committed
128
129
    };

130
131
132
133
134
135
136
137
    enum desc_kind
    {
        M_VERTEX_DESCRIPTOR,
        M_EDGE_DESCRIPTOR,
        M_GRAPH_DESCRIPTOR
    };


138
    static void
Tiago Peixoto's avatar
Tiago Peixoto committed
139
    on_start_element(void* user_data, const XML_Char *c_name,
140
                     const XML_Char **atts)
Tiago Peixoto's avatar
Tiago Peixoto committed
141
    {
142
143
144
        graphml_reader* self = static_cast<graphml_reader*>(user_data);

        std::string name(c_name);
145
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
146
147

        if (name == "edge")
148
149
        {
            std::string id;
150
            std::string source, target;
151
            while (*atts)
152
            {
153
154
155
156
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
157
158
                else if (name == "source") source = value;
                else if (name == "target") target = value;
159
                else if (name == "directed")
160
                {
161
                    bool edge_is_directed = (value == "directed");
162
163
                    if (self->m_g.is_directed() != 2 &&
                        edge_is_directed != self->m_g.is_directed())
164
                    {
165
                        if (edge_is_directed)
166
167
168
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
169
                    }
170
                    self->m_g.flip_directed(edge_is_directed);
171
                }
172
173
            }

174
175
            self->m_active_descriptor = self->handle_edge(id, source, target);
            self->m_descriptor_kind = M_EDGE_DESCRIPTOR;
176
        }
177
        else if (name == "node")
178
179
180
        {
            std::string id;

181
            while (*atts)
182
            {
183
184
                std::string name = *atts++;
                std::string value = *atts++;
185

186
                if (name == "id") id = value;
187
188
            }

189
190
            self->m_active_descriptor = self->handle_vertex(id);
            self->m_descriptor_kind = M_VERTEX_DESCRIPTOR;
191
192
        }
        else if (name == "data")
193
        {
194
            while (*atts)
195
196
197
198
199
200
201
            {
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "key") self->m_active_key = value;
            }
        }
202
        else if (name == "key")
203
204
        {
            std::string id;
205
206
207
208
            std::string key_name;
            std::string key_type;
            key_kind kind = all_key;

209
            while (*atts)
210
            {
211
212
213
214
                std::string name = *atts++;
                std::string value = *atts++;

                if (name == "id") id = value;
215
216
                else if (name == "attr.name") key_name = value;
                else if (name == "attr.type") key_type = value;
217
                else if (name == "for")
218
                {
219
220
221
222
223
224
225
                    if (value == "graph") kind = graph_key;
                    else if (value == "node") kind = node_key;
                    else if (value == "edge") kind = edge_key;
                    else if (value == "hyperedge") kind = hyperedge_key;
                    else if (value == "port") kind = port_key;
                    else if (value == "endpoint") kind = endpoint_key;
                    else if (value == "all") kind = all_key;
226
                    else
227
                    {
228
                        std::stringstream s;
229
230
231
                        s << "on line "
                          << XML_GetCurrentLineNumber(self->m_parser)
                          << ", column "
232
                          << XML_GetCurrentColumnNumber(self->m_parser)
233
234
                          << ": unrecognized key kind '" << value << "'";
                        throw parse_error(s.str());
235
236
                    }
                }
237
238
            }

239
240
241
242
            self->m_keys[id] = kind;
            self->m_key_name[id] = key_name;
            self->m_key_type[id] = key_type;
            self->m_active_key = id;
243
244
        }
        else if (name == "graph")
245
        {
246
            while (*atts)
247
            {
248
249
                std::string name = *atts++;
                std::string value = *atts++;
250
251

                if (name == "edgedefault")
252
253
                {
                    bool edge_is_directed = (value == "directed");
254
255
                    if ( self->m_g.is_directed() != 2 &&
                         edge_is_directed != self->m_g.is_directed())
256
                    {
257
                        if (edge_is_directed)
258
259
260
261
                            throw directed_graph_error();
                        else
                            throw undirected_graph_error();
                    }
262
                    self->m_g.flip_directed(edge_is_directed);
263
264
265
266
267
                }
                else if (name == "parse.nodeids")
                {
                    self->m_canonical_vertices = (value == "canonical");
                }
268
269
270
271
                else if (name == "parse.edgeids")
                {
                    self->m_canonical_edges = (value == "canonical");
                }
272
            }
273
274
            self->m_active_descriptor = any();
            self->m_descriptor_kind = M_GRAPH_DESCRIPTOR;
275
        }
276
277

        self->m_character_data.clear();
Tiago Peixoto's avatar
Tiago Peixoto committed
278
    }
279

Tiago Peixoto's avatar
Tiago Peixoto committed
280
281
282
    static void
    on_end_element(void* user_data, const XML_Char *c_name)
    {
283
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
284

285
        std::string name(c_name);
286
        replace_first(name, "http://graphml.graphdrawing.org/xmlns|", "");
287

288
289
        if (name == "data")
        {
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
            switch (self->m_descriptor_kind)
            {
            case M_VERTEX_DESCRIPTOR:
                self->handle_vertex_property(self->m_active_key, self->m_active_descriptor,
                                             self->m_character_data);
                break;
            case M_EDGE_DESCRIPTOR:
                self->handle_edge_property(self->m_active_key, self->m_active_descriptor,
                                           self->m_character_data);
                break;
            case M_GRAPH_DESCRIPTOR:
                self->handle_graph_property(self->m_active_key, self->m_active_descriptor,
                                             self->m_character_data);
                break;
            }
305
        }
306
307
308
309
        else if (name == "default")
        {
            self->m_key_default[self->m_active_key] = self->m_character_data;
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
310
311
312
313
314
    }

    static void
    on_character_data(void* user_data, const XML_Char* s, int len)
    {
315
316
        graphml_reader* self = static_cast<graphml_reader*>(user_data);
        self->m_character_data.append(s, len);
Tiago Peixoto's avatar
Tiago Peixoto committed
317
318
    }

319
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
320
321
    handle_vertex(const std::string& v)
    {
322
323
324
325
326
327
328
        bool is_new = false;

        if (m_canonical_vertices)
        {
            size_t id;

            //strip leading "n" from name
329
            try
330
            {
331
                id = lexical_cast<size_t>(std::string(v,1));
332
333
334
            }
            catch (bad_lexical_cast)
            {
335
                std::stringstream s;
336
                s << "on line " << XML_GetCurrentLineNumber(m_parser)
337
338
339
                  << ", column " << XML_GetCurrentColumnNumber(m_parser)
                  << ": invalid vertex: " << v;
                throw parse_error(s.str());
340
            }
341

342
            if (m_integer_vertices)
343
            {
344
345
346
347
348
349
350
351
352
353
354
                is_new = (m_g.n_vertices() <= id);
                for (size_t i = m_g.n_vertices(); i <= id; ++i)
                    m_g.do_add_vertex();
            }
            else
            {
                while(id >= m_canonical_vertex.size())
                {
                    m_canonical_vertex.push_back(m_g.do_add_vertex());
                    is_new = true;
                }
355
356
357
358
359
360
            }
        }
        else
        {
            if (m_vertex.find(v) == m_vertex.end())
            {
361
362
                m_vertex[v] = m_g.do_add_vertex();
                is_new = true;
363
364
365
            }
        }

366
        any vd = get_vertex_descriptor(v);
367
368
369
        if (is_new)
        {
            std::map<std::string, std::string>::iterator iter;
370
            for (iter = m_key_default.begin(); iter != m_key_default.end();
371
                 ++iter)
372
            {
373
                if (m_keys[iter->first] == node_key)
374
                    handle_vertex_property(iter->first, vd, iter->second);
375
            }
376
377
            if (m_store_ids && !m_canonical_vertices)
                m_g.set_vertex_property("_graphml_vertex_id",
378
                                        vd, v, "string");
379
        }
380
        return vd;
Tiago Peixoto's avatar
Tiago Peixoto committed
381
382
    }

383
    any
Tiago Peixoto's avatar
Tiago Peixoto committed
384
385
    get_vertex_descriptor(const std::string& v)
    {
386
387
388
389
        if (m_canonical_vertices)
        {
            //strip leading "n" from name
            size_t id = lexical_cast<size_t>(std::string(v,1));
390
391
            if (m_integer_vertices)
                return id;
392
393
394
            return m_canonical_vertex[id];
        }
        else
395
        {
396
397
            return m_vertex[v];
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
398
399
    }

400
    any
401
402
    handle_edge(const std::string& id, const std::string& u,
                const std::string& v)
Tiago Peixoto's avatar
Tiago Peixoto committed
403
    {
404
405
        handle_vertex(u);
        handle_vertex(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
406

407
408
409
        any source, target;
        source = get_vertex_descriptor(u);
        target = get_vertex_descriptor(v);
Tiago Peixoto's avatar
Tiago Peixoto committed
410

411
412
413
414
415
        any edge;
        bool added;
        tie(edge, added) = m_g.do_add_edge(source, target);
        if (!added)
            throw bad_parallel_edge(u, v);
Tiago Peixoto's avatar
Tiago Peixoto committed
416

417
418
419
420
        std::map<std::string, std::string>::iterator iter;
        for (iter = m_key_default.begin(); iter != m_key_default.end(); ++iter)
        {
            if (m_keys[iter->first] == edge_key)
421
                handle_edge_property(iter->first, edge, iter->second);
422
        }
423
424

        if (m_store_ids && !m_canonical_edges)
425
            m_g.set_edge_property("_graphml_edge_id", edge, id, "string");
426

427
        return edge;
428
    }
Tiago Peixoto's avatar
Tiago Peixoto committed
429

430
431
432
    void handle_edge_property(const std::string& key_id,
                              const any& descriptor,
                              const std::string& value)
Tiago Peixoto's avatar
Tiago Peixoto committed
433
    {
434
        try
435
        {
436
437
            m_g.set_edge_property(m_key_name[key_id], descriptor,
                              value, m_key_type[key_id]);
438
        }
439
        catch (parse_error &e)
440
        {
441
            std::stringstream s;
442
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
443
444
445
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
446
        }
Tiago Peixoto's avatar
Tiago Peixoto committed
447
448
    }

449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
    void handle_vertex_property(const std::string& key_id,
                                const any& descriptor,
                                const std::string& value)
    {
        try
        {
            m_g.set_vertex_property(m_key_name[key_id], descriptor,
                                    value, m_key_type[key_id]);
        }
        catch (parse_error &e)
        {
            std::stringstream s;
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
        }
    }

    void handle_graph_property(const std::string& key_id,
                               const any&,
                               const std::string& value)
471
    {
472
473
474
475
476
477
478
479
480
481
482
483
484
        try
        {
            m_g.set_graph_property(m_key_name[key_id], value,
                                   m_key_type[key_id]);
        }
        catch (parse_error &e)
        {
            std::stringstream s;
            s << "on line " << XML_GetCurrentLineNumber(m_parser)
              << ", column " << XML_GetCurrentColumnNumber(m_parser)
              << ": " << e.error;
            throw parse_error(s.str());
        }
485
486
    }

487
    mutate_graph& m_g;
Tiago Peixoto's avatar
Tiago Peixoto committed
488
489
490
    std::map<std::string, key_kind> m_keys;
    std::map<std::string, std::string> m_key_name;
    std::map<std::string, std::string> m_key_type;
491
492
493
    std::map<std::string, std::string> m_key_default;
    std::map<std::string, any> m_vertex;
    std::vector<any> m_canonical_vertex;
494
495
496
497

    any m_active_descriptor;
    desc_kind m_descriptor_kind;

Tiago Peixoto's avatar
Tiago Peixoto committed
498
499
500
501
    std::string m_active_key;
    std::string m_character_data;
    bool m_canonical_vertices;
    bool m_canonical_edges;
502
    bool m_integer_vertices;
503
    bool m_store_ids;
504
    bool m_ignore_directedness;
505
    XML_Parser m_parser;
Tiago Peixoto's avatar
Tiago Peixoto committed
506
507
};

508
namespace boost
Tiago Peixoto's avatar
Tiago Peixoto committed
509
510
{
void
511
read_graphml(std::istream& in, mutate_graph& g, bool integer_vertices, bool store_ids)
512
{
513
    graphml_reader reader(g, integer_vertices, store_ids);
514
    reader.run(in);
Tiago Peixoto's avatar
Tiago Peixoto committed
515
516
}
}