source: branches/blt4/src/core/RpEncode.cc @ 2327

Last change on this file since 2327 was 1617, checked in by gah, 15 years ago
File size: 12.3 KB
Line 
1
2/*
3 * ----------------------------------------------------------------------
4 *  Rappture::encoding
5 *
6 *  The encoding module for rappture used to zip and b64 encode data.
7 * ======================================================================
8 *  AUTHOR:  Derrick Kearney, Purdue University
9 *  Copyright (c) 2004-2007  Purdue Research Foundation
10 *
11 *  See the file "license.terms" for information on usage and
12 *  redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES.
13 * ======================================================================
14 */
15#include "RpEncode.h"
16#include <cstring>
17
18
19/**********************************************************************/
20// FUNCTION: Rappture::encoding::isbinary()
21/// isbinary checks to see if given string is binary.
22
23/**
24 * Checks to see if any of size characters in *buf are binary
25 * Full function call:
26 * Rappture::encoding::isbinary(buf,size);
27 *
28 */
29
30static char _base64chars[256] = {
31    0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */
32    1, /* "\t" 9 */
33    1, /* "\n " 10 */
34    1, /* "" 11 */
35    1, /* "" 12 */
36    1, /* " " 13 */
37    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 31 */
38    1, /* " " 32 */
39    0, /* "!" 33 */
40    0, /* """ 34 */
41    0, /* "#" 35 */
42    0, /* "$" 36 */
43    0, /* "%" 37 */
44    0, /* "&" 38 */
45    0, /* "'" 39 */
46    0, /* "(" 40 */
47    0, /* ")" 41 */
48    0, /* "*" 42 */
49    1, /* "+" 43 */
50    0, /* "," 44 */
51    0, /* "-" 45 */
52    0, /* "." 46 */
53    1, /* "/" 47 */
54    1, /* "0" 48 */
55    1, /* "1" 49 */
56    1, /* "2" 50 */
57    1, /* "3" 51 */
58    1, /* "4" 52 */
59    1, /* "5" 53 */
60    1, /* "6" 54 */
61    1, /* "7" 55 */
62    1, /* "8" 56 */
63    1, /* "9" 57 */
64    0, /* ":" 58 */
65    0, /* ";" 59 */
66    0, /* "<" 60 */
67    1, /* "=" 61 */
68    0, /* ">" 62 */
69    0, /* "?" 63 */
70    0, /* "@" 64 */
71    1, /* "A" 65 */
72    1, /* "B" 66 */
73    1, /* "C" 67 */
74    1, /* "D" 68 */
75    1, /* "E" 69 */
76    1, /* "F" 70 */
77    1, /* "G" 71 */
78    1, /* "H" 72 */
79    1, /* "I" 73 */
80    1, /* "J" 74 */
81    1, /* "K" 75 */
82    1, /* "L" 76 */
83    1, /* "M" 77 */
84    1, /* "N" 78 */
85    1, /* "O" 79 */
86    1, /* "P" 80 */
87    1, /* "Q" 81 */
88    1, /* "R" 82 */
89    1, /* "S" 83 */
90    1, /* "T" 84 */
91    1, /* "U" 85 */
92    1, /* "V" 86 */
93    1, /* "W" 87 */
94    1, /* "X" 88 */
95    1, /* "Y" 89 */
96    1, /* "Z" 90 */
97    0, /* "[" 91 */
98    0, /* "\" 92 */
99    0, /* "]" 93 */
100    0, /* "^" 94 */
101    0, /* "_" 95 */
102    0, /* "`" 96 */
103    1, /* "a" 97 */
104    1, /* "b" 98 */
105    1, /* "c" 99 */
106    1, /* "d" 100 */
107    1, /* "e" 101 */
108    1, /* "f" 102 */
109    1, /* "g" 103 */
110    1, /* "h" 104 */
111    1, /* "i" 105 */
112    1, /* "j" 106 */
113    1, /* "k" 107 */
114    1, /* "l" 108 */
115    1, /* "m" 109 */
116    1, /* "n" 110 */
117    1, /* "o" 111 */
118    1, /* "p" 112 */
119    1, /* "q" 113 */
120    1, /* "r" 114 */
121    1, /* "s" 115 */
122    1, /* "t" 116 */
123    1, /* "u" 117 */
124    1, /* "v" 118 */
125    1, /* "w" 119 */
126    1, /* "x" 120 */
127    1, /* "y" 121 */
128    1, /* "z" 122 */
129    0, /* "{" 123 */
130    0, /* "|" 124 */
131    0, /* "}" 125 */
132    0, /* "~" 126 */
133    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
134    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
135    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
141};
142
143/*
144 * Valid XML (ASCII encoded) characters 0-127:
145 *
146 *      0x9 (\t) 0xA (\n) 0xD (\r) and
147 *      0x20 (space) through 0x7F (del)
148 *
149 * This isn't for UTF-8, only ASCII.  We don't allow high-order bit
150 * characters for ASCII editors.
151 */
152static unsigned char _xmlchars[256] = {
153    /*     -  0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F */
154    /*00*/    0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0,
155    /*10*/    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
156    /*20*/    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
157    /*30*/    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
158    /*40*/    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
159    /*50*/    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
160    /*60*/    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
161    /*70*/    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
162    /*80*/    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
163    /*90*/    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
164    /*A0*/    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165    /*B0*/    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166    /*C0*/    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
167    /*E0*/    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
168    /*F0*/    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
169};
170
171/*
172 * This routine is misnamed "isBinary". By definition, all strings are binary,
173 * even the ones with just letters or digits.  It's really a test if the
174 * string can be used by XML verbatim and if it can be read by the usual ASCII
175 * editors (the reason high-order bit characters are disallowed).  [Note we
176 * aren't checking if entity replacements are necessary.]
177 *
178 * The "is*" routines should be moved somewhere else since they really don't
179 * have anything to do with encoding/decoding.
180 */
181bool
182Rappture::encoding::isBinary(const char* buf, int size)
183{
184    if (buf == NULL) {
185        return false;                   /* Really should let this segfault. */
186    }
187    if (size < 0) {
188        size = strlen(buf);
189    }
190    unsigned const char *p, *pend;
191    for (p = (unsigned const char *)buf, pend = p + size; p < pend; p++) {
192        if (!_xmlchars[*p]) {
193            return true;               
194        }
195    }
196    return false;
197}
198
199bool
200Rappture::encoding::isBase64(const char* buf, int size)
201{
202    if (buf == NULL) {
203        return false;                   /* Really should let this segfault. */
204    }
205    if (size < 0) {
206        size = strlen(buf);
207    }
208    unsigned const char *p, *pend;
209    for (p = (unsigned const char *)buf, pend = p + size; p < pend; p++) {
210        if (!_base64chars[*p]) {
211            fprintf(stderr, "%c %u is not base64\n", *p, *p);
212            return false;
213        }
214    }
215    return true;
216}
217
218bool
219Rappture::encoding::isGzipped(const char* buf, int size)
220{
221    unsigned int first, second;
222    if (buf == NULL) {
223        return false;                   /* Really should let this segfault. */
224    }
225    first = buf[0];
226    second = buf[1];
227    return ((first == 0x1f)  && (second == 0x8b));
228}
229
230/**********************************************************************/
231// FUNCTION: Rappture::encoding::headerFlags()
232/// checks header of given string to determine if it was encoded by rappture.
233/**
234 * Checks to see if the string buf was encoded by rappture
235 * and contains the proper "@@RP-ENC:" header.
236 * rappture encoded strings start with the string "@@RP-ENC:X\n"
237 * where X is one of z, b64, zb64
238 * This function will not work for strings that do not have the header.
239 * Full function call:
240 * Rappture::encoding::headerFlags(buf,size);
241 *
242 */
243
244unsigned int
245Rappture::encoding::headerFlags(const char* buf, int size)
246{
247    size_t flags = 0;
248    size_t len = 0;
249
250    if (buf == NULL) {
251        return flags;
252    }
253
254    if (size < 0) {
255        len = strlen(buf);
256    } else {
257        len = size;
258    }
259
260    // check the following for valid rappture encoded string:
261    // all strings encoded by rappture are at least 11 characters
262    // rappture encoded strings start with the '@' character
263    // rappture encoded strings start with the string "@@RP-ENC:X\n"
264    // where X is one of z, b64, zb64
265    if ((len >= 11) &&  ('@' == *buf) &&  (strncmp("@@RP-ENC:",buf,9) == 0) ) {
266
267        size_t idx = 9;
268
269        // check the string length and if the z flag was specified
270        // add 1 for \n
271        if (    (len >= (idx + 1))
272            &&  (buf[idx] == 'z') ) {
273            flags |= RPENC_Z;
274            ++idx;
275        }
276        // check the string length and if the b64 flag was specified
277        // add 1 for \n
278        if (    (len >= (idx + 2 + 1))
279            &&  (buf[idx]   == 'b')
280            &&  (buf[idx+1] == '6')
281            &&  (buf[idx+2] == '4') ) {
282            flags |= RPENC_B64;
283            idx += 3;
284        }
285        // check for the '\n' at the end of the header
286        if (buf[idx] != '\n') {
287            flags = 0;
288        }
289    }
290    return flags;
291}
292
293/**********************************************************************/
294// FUNCTION: Rappture::encoding::encode()
295/// Rappture::encoding::encode function encodes provided string
296/**
297 * Encode a string by compressing it with zlib and then base64 encoding it.
298 *
299 * Full function call:
300 * Rappture::encoding::encode(buf,flags)
301 */
302
303bool
304Rappture::encoding::encode(Rappture::Outcome &status, Rappture::Buffer& buf,
305                           unsigned int flags)
306{
307    Rappture::Buffer outData;
308
309    if (buf.size() <= 0) {
310        return true;                // Nothing to encode.
311    }
312    if ((flags & (RPENC_Z | RPENC_B64)) == 0) {
313        // By default compress and encode the string.
314        flags |= RPENC_Z | RPENC_B64;
315    }
316    if (outData.append(buf.bytes(), buf.size()) != (int)buf.size()) {
317        status.addError("can't append %lu bytes", buf.size());
318        return false;
319    }
320    if (!outData.encode(status, flags)) {
321        return false;
322    }
323    buf.clear();
324    if ((flags & RPENC_RAW) == 0) {
325        switch (flags & (RPENC_Z | RPENC_B64)) {
326        case RPENC_Z:
327            buf.append("@@RP-ENC:z\n", 11);
328            break;
329        case RPENC_B64:
330            buf.append("@@RP-ENC:b64\n", 13);
331            break;
332        case (RPENC_B64 | RPENC_Z):
333            buf.append("@@RP-ENC:zb64\n", 14);
334            break;
335        default:
336            break;
337        }
338    }
339    if (buf.append(outData.bytes(),outData.size()) != (int)outData.size()) {
340        status.addError("can't append %d bytes", outData.size());
341        return false;
342    }
343    return true;
344}
345
346/**********************************************************************/
347// FUNCTION: Rappture::encoding::decode()
348/// Rappture::encoding::decode function decodes provided string
349/**
350 * Decode a string by uncompressing it with zlib and base64 decoding it.
351 * If binary data is provided, the data is base64 decoded and uncompressed.
352 * Rappture::encoding::isbinary is used to qualify binary data.
353 *
354 * Full function call:
355 * Rappture::encoding::decode(context, buf,flags)
356 *
357 * The check header flag is confusing here.
358 */
359
360bool
361Rappture::encoding::decode(Rappture::Outcome &status, Rappture::Buffer& buf,
362                           unsigned int flags)
363{
364    Rappture::Buffer outData;
365
366    const char *bytes;
367
368    size_t size;
369    size = buf.size();
370    if (size == 0) {
371        return true;                // Nothing to decode.
372    }
373    bytes = buf.bytes();
374    if ((flags & RPENC_RAW) == 0) {
375        unsigned int headerFlags = 0;
376        if ((size > 11) && (strncmp(bytes, "@@RP-ENC:z\n", 11) == 0)) {
377            bytes += 11;
378            size -= 11;
379            headerFlags = RPENC_Z;
380        } else if ((size > 13) && (strncmp(bytes, "@@RP-ENC:b64\n", 13) == 0)){
381            bytes += 13;
382            size -= 13;
383            headerFlags = RPENC_B64;
384        } else if ((size > 14) && (strncmp(bytes, "@@RP-ENC:zb64\n", 14) == 0)){
385            bytes += 14;
386            size -= 14;
387            headerFlags = (RPENC_B64 | RPENC_Z);
388        } else if ((size > 13) && (strncmp(bytes, "@@RP-ENC:raw\n", 13) == 0)){
389            bytes += 13;
390            size -= 13;
391        }
392         if (headerFlags != 0) {
393            unsigned int reqFlags;
394
395            reqFlags = flags & (RPENC_B64 | RPENC_Z);
396            /*
397             * If there's a header and the programmer also requested decoding
398             * flags, verify that the two are the same.  We don't want to
399             * penalize the programmer for over-specifying.  But we need to
400             * catch cases when they don't match.  If you really want to
401             * override the header, you should also specify the RPENC_RAW flag
402             * (-noheader).
403             */
404            if ((reqFlags != 0) && (reqFlags != headerFlags)) {
405                status.addError("decode flags don't match the header");
406                return false;
407            }
408            flags |= headerFlags;
409        }
410    }
411    if (outData.append(bytes, size) != (int)size) {
412        status.addError("can't append %d bytes to buffer", size);
413        return false;
414    }
415    if (flags & (RPENC_B64 | RPENC_Z)) {
416        if (!outData.decode(status, flags)) {
417            return false;
418        }
419    }
420    buf.move(outData);
421    return true;
422}
423
Note: See TracBrowser for help on using the repository browser.