source: trunk/src/core/RpEntityRef.cc @ 1617

Last change on this file since 1617 was 1582, checked in by dkearney, 15 years ago

adjustments to entity ref encoding, allowing for repeated entity refs like &&

File size: 4.8 KB
Line 
1/*
2 * ======================================================================
3 *  AUTHOR:  Derrick Kearney, Purdue University
4 *  Copyright (c) 2004-2006  Purdue Research Foundation
5 *
6 *  See the file "license.terms" for information on usage and
7 *  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
8 *
9 *  Also see below text for additional information on usage and redistribution
10 *
11 * ======================================================================
12 * ----------------------------------------------------------------------
13 *  Rappture Library Entity Reference Translation Header
14 *
15 *  Begin Character Entity Translator
16 *
17 *  The next section of code implements routines used to translate
18 *  character entity references into their corresponding strings.
19 *
20 *  Examples:
21 *
22 *        &          "&"
23 *        &lt;           "<"
24 *        &gt;           ">"
25 *        &quot;         "\""
26 *        &apos;         "'"
27 *
28 */
29
30#include "RpEntityRef.h"
31#include <cctype>
32#include <cstring>
33
34using namespace Rappture;
35
36typedef struct {
37    const char *replacement;                /* Replacement string. */
38    size_t length;                        /* Length of replacement string,
39                                         * including the ampersand. */
40    const char *entity;                        /* Single character entity. */
41} PredefEntityRef;
42
43static PredefEntityRef predef[] = {
44    { "&quot;",  6,  "\"" },
45    { "&amp;",   5,  "&"  },
46    { "&lt;",    4,  "<"  },
47    { "&gt;",    4,  ">"  },
48    { "&apos;",  6,  "'"  }
49};
50static int nPredefs = sizeof(predef) / sizeof (PredefEntityRef);
51
52/*
53 * EntityRef::decode --
54 *
55 *        Convert XML character data into the original text.  The trick
56 *        here determine the runs of characters that do not require
57 *        replacements and to append the substring in one shot. 
58 */
59const char*
60EntityRef::decode (const char* string, unsigned int len)
61{
62    if (string == NULL) {
63        // Don't do anything with NULL strings.
64        return NULL;
65    }
66    _bout.clear();
67    if (len == 0) {
68        len = strlen(string);
69    }
70    const char *p, *start, *pend;
71    start = string;                     /* Mark the start of a run of
72                                         * characters that contain no
73                                         * replacements. */
74    for (p = string, pend = p + len; p < pend; /*empty*/) {
75        if (*p == '&') {
76            PredefEntityRef *ep, *epend;
77            for (ep = predef, epend = ep + nPredefs; ep < epend; ep++) {
78                size_t length;
79                length = pend - p;        /* Get the # bytes left. */
80                if ((length >= ep->length) && (ep->replacement[1] == *(p+1)) &&
81                    (strncmp(ep->replacement, p, ep->length) == 0)) {
82                    /* Found entity replacement. Append any preceding
83                     * characters into the buffer before the entity itself. */
84                    if (p > start) {
85                        _bout.append(start, p - start);
86                    }
87                    start = p + ep->length;
88                    _bout.append(ep->entity, 1);
89                    p += ep->length;
90                    goto next;
91                }
92            }
93        }
94        p++;
95    next:
96        ;
97    }
98    if (p > start) {
99        /* Append any left over characters into the buffer. */
100        _bout.append(start, p - start);
101    }
102    _bout.append("\0", 1);
103    return _bout.bytes();
104}
105
106const char*
107EntityRef::encode (const char* string, unsigned int len)
108{
109    if (string == NULL) {
110        return NULL;                   /* Don't do anything with NULL
111                                        * strings. */
112    }
113    _bout.clear();
114    if (len == 0) {
115        len = strlen(string);
116    }
117    const char *p, *start, *pend;
118    start = string;                     /* Mark the start of a run of
119                                         * characters that contain no
120                                         * replacements. */
121    for (p = string, pend = p + len; p < pend; p++) {
122        PredefEntityRef *ep, *epend;
123        for (ep = predef, epend = ep + nPredefs; ep < epend; ep++) {
124            if (ep->entity[0] == *p) {
125                /* Found entity requiring replacement. Append any preceding
126                 * characters into the buffer before the entity itself. */
127                if (p > start) {
128                    _bout.append(start, p - start);
129                }
130                start = p + 1;
131                _bout.append(ep->replacement, ep->length);
132                break;
133            }
134        }
135    }
136    if (p > start) {
137        /* Append any left over characters into the buffer. */
138        _bout.append(start, p - start);
139    }
140    _bout.append("\0", 1);
141    return _bout.bytes();
142}
143
144int
145EntityRef::size () {
146    return _bout.size();
147}
148
149
Note: See TracBrowser for help on using the repository browser.