=== modified file 'include/rfc1738.h' --- include/rfc1738.h 2010-11-02 00:12:43 +0000 +++ include/rfc1738.h 2011-03-04 12:51:18 +0000 @@ -6,33 +6,49 @@ #endif /* Encoder rfc1738_do_escape flag values. */ -#define RFC1738_ESCAPE_UNSAFE 0 -#define RFC1738_ESCAPE_RESERVED 1 -#define RFC1738_ESCAPE_UNESCAPED -1 - +#define RFC1738_ESCAPE_CTRLS 1 +#define RFC1738_ESCAPE_UNSAFE 2 +#define RFC1738_ESCAPE_RESERVED 4 +#define RFC1738_ESCAPE_ALL (RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_RESERVED|RFC1738_ESCAPE_CTRLS) + // exclusions +#define RFC1738_ESCAPE_NOSPACE 128 +#define RFC1738_ESCAPE_NOPERCENT 256 + // Backward compatibility +#define RFC1738_ESCAPE_UNESCAPED (RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS|RFC1738_ESCAPE_NOPERCENT) /** * \group rfc1738 RFC 1738 URL-escaping library * * Public API is formed of a triplet of encode functions mapping to the rfc1738_do_encode() engine. * - * ASCII characters are split into three groups: + * ASCII characters are split into four groups: * \item SAFE Characters which are safe to occur in any URL. For example A,B,C - * \item UNSAFE Characters which are completely usafe to occur in any URL. For example; backspace, tab, space, newline + * \item CTRLS Binary control codes. Dangerous to include in URLs. + * \item UNSAFE Characters which are completely usafe to occur in any URL. For example; backspace, tab, space, newline. * \item RESERVED Characters which are reserved for special meaning and may only occur in certain parts of a URL. * * Returns a static buffer containing the RFC 1738 compliant, escaped version of the given url. * - * \param flags RFC1738_ESCAPE_UNSAFE Only encode unsafe characters. Ignore reserved. - * \param flags RFC1738_ESCAPE_RESERVED Encode all unsafe and reserved characters. - * \param flags RFC1738_ESCAPE_UNESCAPED Encode all unsafe characters which have not already been encoded. + * \param flags RFC1738_ESCAPE_CTRLS Encode the blatantly dangerous binary codes. + * \param flags RFC1738_ESCAPE_UNSAFE Encode printable unsafe characters (excluding CTRLs). + * \param flags RFC1738_ESCAPE_RESERVED Encode reserved characters. + * \param flags RFC1738_ESCAPE_ALL Encode all binary CTRL, unsafe and reserved characters. + * \param flags RFC1738_ESCAPE_NOSPACE Ignore the space whitespace character. + * \param flags RFC1738_ESCAPE_NOPERCENT Ignore the escaping delimiter '%'. */ extern char *rfc1738_do_escape(const char *url, int flags); /* Old API functions */ -#define rfc1738_escape(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE) -#define rfc1738_escape_part(x) rfc1738_do_escape(x, RFC1738_ESCAPE_RESERVED) -#define rfc1738_escape_unescaped(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNESCAPED) + + /* Default RFC 1738 escaping. Escape all UNSAFE characters and binary CTRL codes */ +#define rfc1738_escape(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS) + + /* Escape a partial URL. Encoding every binary code, unsafe or reserved character. */ +#define rfc1738_escape_part(x) rfc1738_do_escape(x, RFC1738_ESCAPE_ALL) + + /* Escape a URL. Encoding every unsafe characters but skipping reserved and already-encoded bytes. + * Suitable for safely encoding an absolute URL which may be encoded but is not trusted. */ +#define rfc1738_escape_unescaped(x) rfc1738_do_escape(x, RFC1738_ESCAPE_UNSAFE|RFC1738_ESCAPE_CTRLS|RFC1738_ESCAPE_NOPERCENT) /** === modified file 'lib/rfc1738.c' --- lib/rfc1738.c 2010-11-01 05:44:28 +0000 +++ lib/rfc1738.c 2011-03-04 13:11:40 +0000 @@ -34,7 +34,6 @@ #include "config.h" #include "rfc1738.h" -//#include "util.h" #if HAVE_STDIO_H #include @@ -53,6 +52,7 @@ (char) 0x22, /* " */ (char) 0x23, /* # */ #if 0 /* done in code */ + (char) 0x20, /* space */ (char) 0x25, /* % */ #endif (char) 0x7B, /* { */ @@ -64,8 +64,7 @@ (char) 0x5B, /* [ */ (char) 0x5D, /* ] */ (char) 0x60, /* ` */ - (char) 0x27, /* ' */ - (char) 0x20 /* space */ + (char) 0x27 /* ' */ }; static char rfc1738_reserved_chars[] = { @@ -97,36 +96,49 @@ buf = (char*)xcalloc(bufsize, 1); } for (p = url, q = buf; *p != '\0' && q < (buf + bufsize - 1); p++, q++) { + + /* a-z, A-Z and 0-9 are SAFE. */ + if ((*p >= 'a' && *p <= 'z') || (*p >= 'A' && *p <= 'Z') || (*p >= '0' && *p <= '9')) { + *q = *p; + continue; + } + do_escape = 0; /* RFC 1738 defines these chars as unsafe */ - for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) { - if (*p == rfc1738_unsafe_chars[i]) { - do_escape = 1; - break; + if ((flags & RFC1738_ESCAPE_UNSAFE)) { + for (i = 0;i < sizeof(rfc1738_unsafe_chars); i++) { + if (*p == rfc1738_unsafe_chars[i]) { + do_escape = 1; + break; + } } + /* Handle % separately */ + if (!(flags & RFC1738_ESCAPE_NOPERCENT) && *p == '%') + do_escape = 1; + /* Handle space separately */ + else if (!(flags & RFC1738_ESCAPE_NOSPACE) && *p <= ' ') + do_escape = 1; } - /* Handle % separately */ - if (flags != RFC1738_ESCAPE_UNESCAPED && *p == '%') - do_escape = 1; /* RFC 1738 defines these chars as reserved */ - for (i = 0; i < sizeof(rfc1738_reserved_chars) && flags == RFC1738_ESCAPE_RESERVED; i++) { - if (*p == rfc1738_reserved_chars[i]) { - do_escape = 1; - break; + if ((flags & RFC1738_ESCAPE_RESERVED) && do_escape == 0) { + for (i = 0; i < sizeof(rfc1738_reserved_chars); i++) { + if (*p == rfc1738_reserved_chars[i]) { + do_escape = 1; + break; + } } } - /* RFC 1738 says any control chars (0x00-0x1F) are encoded */ - if ((unsigned char) *p <= (unsigned char) 0x1F) { - do_escape = 1; - } - /* RFC 1738 says 0x7f is encoded */ - if (*p == (char) 0x7F) { - do_escape = 1; - } - /* RFC 1738 says any non-US-ASCII are encoded */ - if (((unsigned char) *p >= (unsigned char) 0x80)) { - do_escape = 1; + if ((flags & RFC1738_ESCAPE_CTRLS) && do_escape == 0) { + /* RFC 1738 says any control chars (0x00-0x1F) are encoded */ + if ((unsigned char) *p <= (unsigned char) 0x1F) + do_escape = 1; + /* RFC 1738 says 0x7f is encoded */ + else if (*p == (char) 0x7F) + do_escape = 1; + /* RFC 1738 says any non-US-ASCII are encoded */ + else if (((unsigned char) *p >= (unsigned char) 0x80)) + do_escape = 1; } /* Do the triplet encoding, or just copy the char */ /* note: we do not need snprintf here as q is appropriately === modified file 'lib/tests/testRFC1738.cc' --- lib/tests/testRFC1738.cc 2010-03-30 17:32:03 +0000 +++ lib/tests/testRFC1738.cc 2011-03-04 12:16:08 +0000 @@ -87,10 +87,6 @@ { char *result; -#define RFC1738_ESCAPE_UNSAFE 0 -#define RFC1738_ESCAPE_RESERVED 1 -#define RFC1738_ESCAPE_UNESCAPED -1 - /* TEST: Escaping only unsafe characters */ /* regular URL (no encoding needed) */