1 | |
---|
2 | /* |
---|
3 | * ---------------------------------------------------------------------- |
---|
4 | * Rappture::encoding |
---|
5 | * |
---|
6 | * The encoding module for rappture used to zip and b64 encode data. |
---|
7 | * ====================================================================== |
---|
8 | * AUTHOR: Derrick Kearney, Purdue University |
---|
9 | * Copyright (c) 2004-2007 Purdue Research Foundation |
---|
10 | * |
---|
11 | * See the file "license.terms" for information on usage and |
---|
12 | * redistribution of this file, and for a DISCLAIMER OF ALL WARRANTIES. |
---|
13 | * ====================================================================== |
---|
14 | */ |
---|
15 | #include "RpEncode.h" |
---|
16 | #include <cstring> |
---|
17 | |
---|
18 | |
---|
19 | /**********************************************************************/ |
---|
20 | // FUNCTION: Rappture::encoding::isbinary() |
---|
21 | /// isbinary checks to see if given string is binary. |
---|
22 | |
---|
23 | /** |
---|
24 | * Checks to see if any of size characters in *buf are binary |
---|
25 | * Full function call: |
---|
26 | * Rappture::encoding::isbinary(buf,size); |
---|
27 | * |
---|
28 | */ |
---|
29 | |
---|
30 | static char _base64chars[256] = { |
---|
31 | 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 8 */ |
---|
32 | 1, /* "\t" 9 */ |
---|
33 | 1, /* "\n " 10 */ |
---|
34 | 1, /* "" 11 */ |
---|
35 | 1, /* "" 12 */ |
---|
36 | 1, /* "
" 13 */ |
---|
37 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 31 */ |
---|
38 | 1, /* " " 32 */ |
---|
39 | 0, /* "!" 33 */ |
---|
40 | 0, /* """ 34 */ |
---|
41 | 0, /* "#" 35 */ |
---|
42 | 0, /* "$" 36 */ |
---|
43 | 0, /* "%" 37 */ |
---|
44 | 0, /* "&" 38 */ |
---|
45 | 0, /* "'" 39 */ |
---|
46 | 0, /* "(" 40 */ |
---|
47 | 0, /* ")" 41 */ |
---|
48 | 0, /* "*" 42 */ |
---|
49 | 1, /* "+" 43 */ |
---|
50 | 0, /* "," 44 */ |
---|
51 | 0, /* "-" 45 */ |
---|
52 | 0, /* "." 46 */ |
---|
53 | 1, /* "/" 47 */ |
---|
54 | 1, /* "0" 48 */ |
---|
55 | 1, /* "1" 49 */ |
---|
56 | 1, /* "2" 50 */ |
---|
57 | 1, /* "3" 51 */ |
---|
58 | 1, /* "4" 52 */ |
---|
59 | 1, /* "5" 53 */ |
---|
60 | 1, /* "6" 54 */ |
---|
61 | 1, /* "7" 55 */ |
---|
62 | 1, /* "8" 56 */ |
---|
63 | 1, /* "9" 57 */ |
---|
64 | 0, /* ":" 58 */ |
---|
65 | 0, /* ";" 59 */ |
---|
66 | 0, /* "<" 60 */ |
---|
67 | 1, /* "=" 61 */ |
---|
68 | 0, /* ">" 62 */ |
---|
69 | 0, /* "?" 63 */ |
---|
70 | 0, /* "@" 64 */ |
---|
71 | 1, /* "A" 65 */ |
---|
72 | 1, /* "B" 66 */ |
---|
73 | 1, /* "C" 67 */ |
---|
74 | 1, /* "D" 68 */ |
---|
75 | 1, /* "E" 69 */ |
---|
76 | 1, /* "F" 70 */ |
---|
77 | 1, /* "G" 71 */ |
---|
78 | 1, /* "H" 72 */ |
---|
79 | 1, /* "I" 73 */ |
---|
80 | 1, /* "J" 74 */ |
---|
81 | 1, /* "K" 75 */ |
---|
82 | 1, /* "L" 76 */ |
---|
83 | 1, /* "M" 77 */ |
---|
84 | 1, /* "N" 78 */ |
---|
85 | 1, /* "O" 79 */ |
---|
86 | 1, /* "P" 80 */ |
---|
87 | 1, /* "Q" 81 */ |
---|
88 | 1, /* "R" 82 */ |
---|
89 | 1, /* "S" 83 */ |
---|
90 | 1, /* "T" 84 */ |
---|
91 | 1, /* "U" 85 */ |
---|
92 | 1, /* "V" 86 */ |
---|
93 | 1, /* "W" 87 */ |
---|
94 | 1, /* "X" 88 */ |
---|
95 | 1, /* "Y" 89 */ |
---|
96 | 1, /* "Z" 90 */ |
---|
97 | 0, /* "[" 91 */ |
---|
98 | 0, /* "\" 92 */ |
---|
99 | 0, /* "]" 93 */ |
---|
100 | 0, /* "^" 94 */ |
---|
101 | 0, /* "_" 95 */ |
---|
102 | 0, /* "`" 96 */ |
---|
103 | 1, /* "a" 97 */ |
---|
104 | 1, /* "b" 98 */ |
---|
105 | 1, /* "c" 99 */ |
---|
106 | 1, /* "d" 100 */ |
---|
107 | 1, /* "e" 101 */ |
---|
108 | 1, /* "f" 102 */ |
---|
109 | 1, /* "g" 103 */ |
---|
110 | 1, /* "h" 104 */ |
---|
111 | 1, /* "i" 105 */ |
---|
112 | 1, /* "j" 106 */ |
---|
113 | 1, /* "k" 107 */ |
---|
114 | 1, /* "l" 108 */ |
---|
115 | 1, /* "m" 109 */ |
---|
116 | 1, /* "n" 110 */ |
---|
117 | 1, /* "o" 111 */ |
---|
118 | 1, /* "p" 112 */ |
---|
119 | 1, /* "q" 113 */ |
---|
120 | 1, /* "r" 114 */ |
---|
121 | 1, /* "s" 115 */ |
---|
122 | 1, /* "t" 116 */ |
---|
123 | 1, /* "u" 117 */ |
---|
124 | 1, /* "v" 118 */ |
---|
125 | 1, /* "w" 119 */ |
---|
126 | 1, /* "x" 120 */ |
---|
127 | 1, /* "y" 121 */ |
---|
128 | 1, /* "z" 122 */ |
---|
129 | 0, /* "{" 123 */ |
---|
130 | 0, /* "|" 124 */ |
---|
131 | 0, /* "}" 125 */ |
---|
132 | 0, /* "~" 126 */ |
---|
133 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
134 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
135 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
136 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
137 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
138 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
139 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
140 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
---|
141 | }; |
---|
142 | |
---|
143 | /* |
---|
144 | * Valid XML (ASCII encoded) characters 0-127: |
---|
145 | * |
---|
146 | * 0x9 (\t) 0xA (\n) 0xD (\r) and |
---|
147 | * 0x20 (space) through 0x7F (del) |
---|
148 | * |
---|
149 | * This isn't for UTF-8, only ASCII. We don't allow high-order bit |
---|
150 | * characters for ASCII editors. |
---|
151 | */ |
---|
152 | static unsigned char _xmlchars[256] = { |
---|
153 | /* - 0 1 2 3 4 5 6 7 8 9 A B C D E F */ |
---|
154 | /*00*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, |
---|
155 | /*10*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
---|
156 | /*20*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
---|
157 | /*30*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
---|
158 | /*40*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
---|
159 | /*50*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
---|
160 | /*60*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
---|
161 | /*70*/ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
---|
162 | /*80*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
163 | /*90*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
164 | /*A0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
165 | /*B0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
166 | /*C0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
167 | /*E0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
---|
168 | /*F0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
---|
169 | }; |
---|
170 | |
---|
171 | /* |
---|
172 | * This routine is misnamed "isBinary". By definition, all strings are binary, |
---|
173 | * even the ones with just letters or digits. It's really a test if the |
---|
174 | * string can be used by XML verbatim and if it can be read by the usual ASCII |
---|
175 | * editors (the reason high-order bit characters are disallowed). [Note we |
---|
176 | * aren't checking if entity replacements are necessary.] |
---|
177 | * |
---|
178 | * The "is*" routines should be moved somewhere else since they really don't |
---|
179 | * have anything to do with encoding/decoding. |
---|
180 | */ |
---|
181 | bool |
---|
182 | Rappture::encoding::isBinary(const char* buf, int size) |
---|
183 | { |
---|
184 | if (buf == NULL) { |
---|
185 | return false; /* Really should let this segfault. */ |
---|
186 | } |
---|
187 | if (size < 0) { |
---|
188 | size = strlen(buf); |
---|
189 | } |
---|
190 | unsigned const char *p, *pend; |
---|
191 | for (p = (unsigned const char *)buf, pend = p + size; p < pend; p++) { |
---|
192 | if (!_xmlchars[*p]) { |
---|
193 | return true; |
---|
194 | } |
---|
195 | } |
---|
196 | return false; |
---|
197 | } |
---|
198 | |
---|
199 | bool |
---|
200 | Rappture::encoding::isBase64(const char* buf, int size) |
---|
201 | { |
---|
202 | if (buf == NULL) { |
---|
203 | return false; /* Really should let this segfault. */ |
---|
204 | } |
---|
205 | if (size < 0) { |
---|
206 | size = strlen(buf); |
---|
207 | } |
---|
208 | unsigned const char *p, *pend; |
---|
209 | for (p = (unsigned const char *)buf, pend = p + size; p < pend; p++) { |
---|
210 | if (!_base64chars[*p]) { |
---|
211 | fprintf(stderr, "%c %u is not base64\n", *p, *p); |
---|
212 | return false; |
---|
213 | } |
---|
214 | } |
---|
215 | return true; |
---|
216 | } |
---|
217 | |
---|
218 | bool |
---|
219 | Rappture::encoding::isGzipped(const char* buf, int size) |
---|
220 | { |
---|
221 | unsigned int first, second; |
---|
222 | if (buf == NULL) { |
---|
223 | return false; /* Really should let this segfault. */ |
---|
224 | } |
---|
225 | first = buf[0]; |
---|
226 | second = buf[1]; |
---|
227 | return ((first == 0x1f) && (second == 0x8b)); |
---|
228 | } |
---|
229 | |
---|
230 | /**********************************************************************/ |
---|
231 | // FUNCTION: Rappture::encoding::headerFlags() |
---|
232 | /// checks header of given string to determine if it was encoded by rappture. |
---|
233 | /** |
---|
234 | * Checks to see if the string buf was encoded by rappture |
---|
235 | * and contains the proper "@@RP-ENC:" header. |
---|
236 | * rappture encoded strings start with the string "@@RP-ENC:X\n" |
---|
237 | * where X is one of z, b64, zb64 |
---|
238 | * This function will not work for strings that do not have the header. |
---|
239 | * Full function call: |
---|
240 | * Rappture::encoding::headerFlags(buf,size); |
---|
241 | * |
---|
242 | */ |
---|
243 | |
---|
244 | unsigned int |
---|
245 | Rappture::encoding::headerFlags(const char* buf, int size) |
---|
246 | { |
---|
247 | size_t flags = 0; |
---|
248 | size_t len = 0; |
---|
249 | |
---|
250 | if (buf == NULL) { |
---|
251 | return flags; |
---|
252 | } |
---|
253 | |
---|
254 | if (size < 0) { |
---|
255 | len = strlen(buf); |
---|
256 | } else { |
---|
257 | len = size; |
---|
258 | } |
---|
259 | |
---|
260 | // check the following for valid rappture encoded string: |
---|
261 | // all strings encoded by rappture are at least 11 characters |
---|
262 | // rappture encoded strings start with the '@' character |
---|
263 | // rappture encoded strings start with the string "@@RP-ENC:X\n" |
---|
264 | // where X is one of z, b64, zb64 |
---|
265 | if ((len >= 11) && ('@' == *buf) && (strncmp("@@RP-ENC:",buf,9) == 0) ) { |
---|
266 | |
---|
267 | size_t idx = 9; |
---|
268 | |
---|
269 | // check the string length and if the z flag was specified |
---|
270 | // add 1 for \n |
---|
271 | if ( (len >= (idx + 1)) |
---|
272 | && (buf[idx] == 'z') ) { |
---|
273 | flags |= RPENC_Z; |
---|
274 | ++idx; |
---|
275 | } |
---|
276 | // check the string length and if the b64 flag was specified |
---|
277 | // add 1 for \n |
---|
278 | if ( (len >= (idx + 2 + 1)) |
---|
279 | && (buf[idx] == 'b') |
---|
280 | && (buf[idx+1] == '6') |
---|
281 | && (buf[idx+2] == '4') ) { |
---|
282 | flags |= RPENC_B64; |
---|
283 | idx += 3; |
---|
284 | } |
---|
285 | // check for the '\n' at the end of the header |
---|
286 | if (buf[idx] != '\n') { |
---|
287 | flags = 0; |
---|
288 | } |
---|
289 | } |
---|
290 | return flags; |
---|
291 | } |
---|
292 | |
---|
293 | /**********************************************************************/ |
---|
294 | // FUNCTION: Rappture::encoding::encode() |
---|
295 | /// Rappture::encoding::encode function encodes provided string |
---|
296 | /** |
---|
297 | * Encode a string by compressing it with zlib and then base64 encoding it. |
---|
298 | * |
---|
299 | * Full function call: |
---|
300 | * Rappture::encoding::encode(buf,flags) |
---|
301 | */ |
---|
302 | |
---|
303 | bool |
---|
304 | Rappture::encoding::encode(Rappture::Outcome &status, Rappture::Buffer& buf, |
---|
305 | unsigned int flags) |
---|
306 | { |
---|
307 | Rappture::Buffer outData; |
---|
308 | |
---|
309 | if (buf.size() <= 0) { |
---|
310 | return true; // Nothing to encode. |
---|
311 | } |
---|
312 | if ((flags & (RPENC_Z | RPENC_B64)) == 0) { |
---|
313 | // By default compress and encode the string. |
---|
314 | flags |= RPENC_Z | RPENC_B64; |
---|
315 | } |
---|
316 | if (outData.append(buf.bytes(), buf.size()) != (int)buf.size()) { |
---|
317 | status.addError("can't append %lu bytes", buf.size()); |
---|
318 | return false; |
---|
319 | } |
---|
320 | if (!outData.encode(status, flags)) { |
---|
321 | return false; |
---|
322 | } |
---|
323 | buf.clear(); |
---|
324 | if ((flags & RPENC_RAW) == 0) { |
---|
325 | switch (flags & (RPENC_Z | RPENC_B64)) { |
---|
326 | case RPENC_Z: |
---|
327 | buf.append("@@RP-ENC:z\n", 11); |
---|
328 | break; |
---|
329 | case RPENC_B64: |
---|
330 | buf.append("@@RP-ENC:b64\n", 13); |
---|
331 | break; |
---|
332 | case (RPENC_B64 | RPENC_Z): |
---|
333 | buf.append("@@RP-ENC:zb64\n", 14); |
---|
334 | break; |
---|
335 | default: |
---|
336 | break; |
---|
337 | } |
---|
338 | } |
---|
339 | if (buf.append(outData.bytes(),outData.size()) != (int)outData.size()) { |
---|
340 | status.addError("can't append %d bytes", outData.size()); |
---|
341 | return false; |
---|
342 | } |
---|
343 | return true; |
---|
344 | } |
---|
345 | |
---|
346 | /**********************************************************************/ |
---|
347 | // FUNCTION: Rappture::encoding::decode() |
---|
348 | /// Rappture::encoding::decode function decodes provided string |
---|
349 | /** |
---|
350 | * Decode a string by uncompressing it with zlib and base64 decoding it. |
---|
351 | * If binary data is provided, the data is base64 decoded and uncompressed. |
---|
352 | * Rappture::encoding::isbinary is used to qualify binary data. |
---|
353 | * |
---|
354 | * Full function call: |
---|
355 | * Rappture::encoding::decode(context, buf,flags) |
---|
356 | * |
---|
357 | * The check header flag is confusing here. |
---|
358 | */ |
---|
359 | |
---|
360 | bool |
---|
361 | Rappture::encoding::decode(Rappture::Outcome &status, Rappture::Buffer& buf, |
---|
362 | unsigned int flags) |
---|
363 | { |
---|
364 | Rappture::Buffer outData; |
---|
365 | |
---|
366 | const char *bytes; |
---|
367 | |
---|
368 | size_t size; |
---|
369 | size = buf.size(); |
---|
370 | if (size == 0) { |
---|
371 | return true; // Nothing to decode. |
---|
372 | } |
---|
373 | bytes = buf.bytes(); |
---|
374 | if ((flags & RPENC_RAW) == 0) { |
---|
375 | unsigned int headerFlags = 0; |
---|
376 | if ((size > 11) && (strncmp(bytes, "@@RP-ENC:z\n", 11) == 0)) { |
---|
377 | bytes += 11; |
---|
378 | size -= 11; |
---|
379 | headerFlags = RPENC_Z; |
---|
380 | } else if ((size > 13) && (strncmp(bytes, "@@RP-ENC:b64\n", 13) == 0)){ |
---|
381 | bytes += 13; |
---|
382 | size -= 13; |
---|
383 | headerFlags = RPENC_B64; |
---|
384 | } else if ((size > 14) && (strncmp(bytes, "@@RP-ENC:zb64\n", 14) == 0)){ |
---|
385 | bytes += 14; |
---|
386 | size -= 14; |
---|
387 | headerFlags = (RPENC_B64 | RPENC_Z); |
---|
388 | } else if ((size > 13) && (strncmp(bytes, "@@RP-ENC:raw\n", 13) == 0)){ |
---|
389 | bytes += 13; |
---|
390 | size -= 13; |
---|
391 | } |
---|
392 | if (headerFlags != 0) { |
---|
393 | unsigned int reqFlags; |
---|
394 | |
---|
395 | reqFlags = flags & (RPENC_B64 | RPENC_Z); |
---|
396 | /* |
---|
397 | * If there's a header and the programmer also requested decoding |
---|
398 | * flags, verify that the two are the same. We don't want to |
---|
399 | * penalize the programmer for over-specifying. But we need to |
---|
400 | * catch cases when they don't match. If you really want to |
---|
401 | * override the header, you should also specify the RPENC_RAW flag |
---|
402 | * (-noheader). |
---|
403 | */ |
---|
404 | if ((reqFlags != 0) && (reqFlags != headerFlags)) { |
---|
405 | status.addError("decode flags don't match the header"); |
---|
406 | return false; |
---|
407 | } |
---|
408 | flags |= headerFlags; |
---|
409 | } |
---|
410 | } |
---|
411 | if (outData.append(bytes, size) != (int)size) { |
---|
412 | status.addError("can't append %d bytes to buffer", size); |
---|
413 | return false; |
---|
414 | } |
---|
415 | if (flags & (RPENC_B64 | RPENC_Z)) { |
---|
416 | if (!outData.decode(status, flags)) { |
---|
417 | return false; |
---|
418 | } |
---|
419 | } |
---|
420 | buf.move(outData); |
---|
421 | return true; |
---|
422 | } |
---|
423 | |
---|