------------------------------------------------------------ revno: 12349 committer: Amos Jeffries branch nick: helpers timestamp: Sat 2012-12-01 00:08:47 +1300 message: Add kv-pir support to url_rewrite_helper interface This stage of the helper reply protocol adds kv-pair support to the url_rewrite_helper interfacefor URL redirect and rewrite operations. It uses the new Notes objects and kv-pair field added by the stage 2 helper protocol instead of parsing the 'other' field. Although, the 'other' field is still parsed when *no* result field is received for backward compatibility with older helpers. The response syntax for URL helpers becomes: [channel-ID SP] result [SP kv-pair ...] [SP other] EOL NP: 'other' field is now deprecated and will be ignored/discarded on any response containing a result code field. When result code "OK" is presented by the helper several kv-pairs are reserved to control Squid actions: * "rewrite-url=" is added to return a re-written URL. - When this key is presented the URL is re-written to the new one by Squid without client interaction. - The 'url' keys presence will override this key. * "url=" is added to return the redirected-to URL. - When this key name is presented an HTTP redirect response is generated for the client. - This keys presence overrides the 'rewrite-url' key actions. * "status=" is added to hold the HTTP status code for use in redirect operations. - This field is optional and status is no longer required for marking redirect actions. - If no redirect status is provided Squid will assign one (currently the default is 302, that may change in the future). - This key is only relevant when 'url' key is also presented. In all other uses it is currently ignored. When result codes BH or ERR are presented by the helper no redirect or rewrite action is performed and no kv-pair key names are reserved for use at this time. Any other keys MAY be sent on any response. The URL helper interface makes no other use of them, but this patch does pass them on to the ALE object for logging as transaction Notes. All kv-pairs returned by the helper (including the url, stauts rewrite-url keys) are available for logging via the %{...}note log format option. As with changes to other helpers interfaces in earlier updates, only the first value presented for any of the reserved kv-pairs is used. Multiple values are accepted as notes, but otherwise ignored by Squid and do not affect the transaction outcome. Additionally, when the BH result code is received from the helper a simple recovery is attempted. The lookup request will be re-scheduled (up to once) in an attempt to find a better responding helper. NOTE: Helper notes are *not* passe to adaptation interfaces. - REQMOD adaptation happens before URL helpers are used. - REPMOD adaptation may at some point receive them, but that change is not done by this update. This update was sponsored by Edgewave. === modified file 'src/AccessLogEntry.h' --- src/AccessLogEntry.h 2012-10-29 04:59:58 +0000 +++ src/AccessLogEntry.h 2012-11-30 11:08:47 +0000 @@ -231,7 +231,9 @@ HttpReply *reply; HttpRequest *request; //< virgin HTTP request HttpRequest *adapted_request; //< HTTP request after adaptation and redirection - /// key:value pairs set by note and adaptation_meta + + /// key:value pairs set by note and adaptation_meta directives + /// plus key=value pairs returned from URL rewrite/redirect helper NotePairs notes; #if ICAP_CLIENT === modified file 'src/ClientRequestContext.h' --- src/ClientRequestContext.h 2012-11-04 12:27:49 +0000 +++ src/ClientRequestContext.h 2012-11-30 11:08:47 +0000 @@ -56,6 +56,14 @@ ACLChecklist *acl_checklist; /* need ptr back so we can unreg if needed */ int redirect_state; + /** + * URL-rewrite/redirect helper may return BH for internal errors. + * We attempt to recover by trying the lookup again, but limit the + * number of retries to prevent lag and lockups. + * This tracks the number of previous failures for the current context. + */ + uint8_t redirect_fail_count; + bool host_header_verify_done; bool http_access_done; bool adapted_http_access_done; === modified file 'src/HttpRequest.cc' --- src/HttpRequest.cc 2012-10-27 00:13:19 +0000 +++ src/HttpRequest.cc 2012-11-30 11:08:47 +0000 @@ -62,7 +62,9 @@ init(); } -HttpRequest::HttpRequest(const HttpRequestMethod& aMethod, AnyP::ProtocolType aProtocol, const char *aUrlpath) : HttpMsg(hoRequest) +HttpRequest::HttpRequest(const HttpRequestMethod& aMethod, AnyP::ProtocolType aProtocol, const char *aUrlpath) : + HttpMsg(hoRequest), + helperNotes(NULL) { static unsigned int id = 1; debugs(93,7, HERE << "constructed, this=" << this << " id=" << ++id); @@ -163,6 +165,11 @@ myportname.clean(); + if (!helperNotes) { + delete helperNotes; + helperNotes = NULL; + } + tag.clean(); #if USE_AUTH extacl_user.clean(); @@ -221,6 +228,10 @@ // XXX: what to do with copy->peer_domain? copy->myportname = myportname; + if (helperNotes) { + copy->helperNotes = new Notes; + copy->helperNotes->notes = helperNotes->notes; + } copy->tag = tag; #if USE_AUTH copy->extacl_user = extacl_user; === modified file 'src/HttpRequest.h' --- src/HttpRequest.h 2012-10-26 11:36:45 +0000 +++ src/HttpRequest.h 2012-11-30 11:08:47 +0000 @@ -37,6 +37,7 @@ #include "HierarchyLogEntry.h" #include "HttpMsg.h" #include "HttpRequestMethod.h" +#include "Notes.h" #include "RequestFlags.h" #if USE_AUTH @@ -199,6 +200,8 @@ String myportname; // Internal tag name= value from port this requests arrived in. + Notes *helperNotes; // collection of meta notes associated with this request. + String tag; /* Internal tag for this request */ String extacl_user; /* User name returned by extacl lookup */ === modified file 'src/Notes.cc' --- src/Notes.cc 2012-11-27 21:19:46 +0000 +++ src/Notes.cc 2012-11-30 11:08:47 +0000 @@ -102,6 +102,30 @@ return note; } +void +Notes::add(const Notes &src) +{ + typedef Notes::NotesList::const_iterator AMLI; + typedef Note::Values::iterator VLI; + + for (AMLI i = src.notes.begin(); i != src.notes.end(); ++i) { + + // ensure we have a key by that name to fill out values for... + // NP: not sharing pointers at the key level since merging other helpers + // details later would affect this src objects keys, which is a bad idea. + Note::Pointer ourKey = add((*i)->key); + + // known key names, merge the values lists... + for (VLI v = (*i)->values.begin(); v != (*i)->values.end(); ++v ) { + // 2012-11-29: values are read-only and Pointer can safely be shared + // for now we share pointers to save memory and gain speed. + // If that ever ceases to be true, convert this to a full copy. + ourKey->values.push_back(*v); + // TODO: prune/skip duplicates ? + } + } +} + Note::Pointer Notes::parse(ConfigParser &parser) { === modified file 'src/Notes.h' --- src/Notes.h 2012-11-27 21:19:46 +0000 +++ src/Notes.h 2012-11-30 11:08:47 +0000 @@ -97,6 +97,18 @@ void add(const String ¬eKey, const String ¬eValue); /** + * Adds a set of notes from another notes list to this set. + * Creating entries for any new keys needed. + * If the key name already exists in list, add the given value to its set of values. + * + * WARNING: + * The list entries are all of shared Pointer type. Altering the src object(s) after + * using this function will update both Notes lists. Likewise, altering this + * destination NotesList will affect any relevant copies of src still in use. + */ + void add(const Notes &src); + + /** * Returns a pointer to an existing Note with given key name or nil. */ Note::Pointer find(const String ¬eKey) const; @@ -112,13 +124,24 @@ * returns a pointer to the existing object. */ Note::Pointer add(const String ¬eKey); - }; class NotePairs : public HttpHeader { public: NotePairs() : HttpHeader(hoNote) {} + + /// convert a NotesList into a NotesPairs + /// appending to any existing entries already present + void append(const Notes::NotesList &src) { + for (Notes::NotesList::const_iterator m = src.begin(); m != src.end(); ++m) + for (Note::Values::iterator v =(*m)->values.begin(); v != (*m)->values.end(); ++v) + putExt((*m)->key.termedBuf(), (*v)->value.termedBuf()); + } + + void append(const NotePairs *src) { + HttpHeader::append(dynamic_cast(src)); + } }; #endif === modified file 'src/client_side.cc' --- src/client_side.cc 2012-11-08 10:49:58 +0000 +++ src/client_side.cc 2012-11-30 11:08:47 +0000 @@ -615,6 +615,8 @@ aLogEntry->http.method = request->method; aLogEntry->http.version = request->http_ver; aLogEntry->hier = request->hier; + if (request->helperNotes) + aLogEntry->notes.append(request->helperNotes->notes); if (request->content_length > 0) // negative when no body or unknown length aLogEntry->cache.requestSize += request->content_length; aLogEntry->cache.extuser = request->extacl_user.termedBuf(); === modified file 'src/client_side_request.cc' --- src/client_side_request.cc 2012-11-08 08:49:33 +0000 +++ src/client_side_request.cc 2012-11-30 11:08:47 +0000 @@ -156,6 +156,7 @@ { http_access_done = false; redirect_done = false; + redirect_fail_count = 0; no_cache_done = false; interpreted_req_hdrs = false; #if USE_SSL @@ -1203,57 +1204,104 @@ assert(redirect_state == REDIRECT_PENDING); redirect_state = REDIRECT_DONE; - if (reply.other().hasContent()) { - /* 2012-06-28: This cast is due to urlParse() truncating too-long URLs itself. - * At this point altering the helper buffer in that way is not harmful, but annoying. - * When Bug 1961 is resolved and urlParse has a const API, this needs to die. - */ - char * result = const_cast(reply.other().content()); - http_status status = (http_status) atoi(result); - - if (status == HTTP_MOVED_PERMANENTLY - || status == HTTP_MOVED_TEMPORARILY - || status == HTTP_SEE_OTHER - || status == HTTP_PERMANENT_REDIRECT - || status == HTTP_TEMPORARY_REDIRECT) { - char *t = NULL; - - if ((t = strchr(result, ':')) != NULL) { + // copy the URL rewriter response Notes to the HTTP request for logging + // do it early to ensure that no matter what the outcome the notes are present. + // TODO put them straight into the transaction state record (ALE?) eventually + if (!old_request->helperNotes) + old_request->helperNotes = new Notes; + old_request->helperNotes->add(reply.notes); + + switch(reply.result) { + case HelperReply::Unknown: + case HelperReply::TT: + // Handler in redirect.cc should have already mapped Unknown + // IF it contained valid entry for the old URL-rewrite helper protocol + debugs(85, DBG_IMPORTANT, "ERROR: URL rewrite helper returned invalid result code. Wrong helper? " << reply); + break; + + case HelperReply::BrokenHelper: + debugs(85, DBG_IMPORTANT, "ERROR: URL rewrite helper: " << reply << ", attempt #" << (redirect_fail_count+1) << " of 2"); + if (redirect_fail_count < 2) { // XXX: make this configurable ? + ++redirect_fail_count; + // reset state flag to try redirector again from scratch. + redirect_done = false; + } + break; + + case HelperReply::Error: + // no change to be done. + break; + + case HelperReply::Okay: { + // #1: redirect with a specific status code OK status=NNN url="..." + // #2: redirect with a default status code OK url="..." + // #3: re-write the URL OK rewrite-url="..." + + Note::Pointer statusNote = reply.notes.find("status"); + Note::Pointer urlNote = reply.notes.find("url"); + + if (urlNote != NULL) { + // HTTP protocol redirect to be done. + + // TODO: change default redirect status for appropriate requests + // Squid defaults to 302 status for now for better compatibility with old clients. + // HTTP/1.0 client should get 302 (HTTP_MOVED_TEMPORARILY) + // HTTP/1.1 client contacting reverse-proxy should get 307 (HTTP_TEMPORARY_REDIRECT) + // HTTP/1.1 client being diverted by forward-proxy should get 303 (HTTP_SEE_OTHER) + http_status status = HTTP_MOVED_TEMPORARILY; + if (statusNote != NULL) { + const char * result = statusNote->firstValue(); + status = (http_status) atoi(result); + } + + if (status == HTTP_MOVED_PERMANENTLY + || status == HTTP_MOVED_TEMPORARILY + || status == HTTP_SEE_OTHER + || status == HTTP_PERMANENT_REDIRECT + || status == HTTP_TEMPORARY_REDIRECT) { http->redirect.status = status; - http->redirect.location = xstrdup(t + 1); + http->redirect.location = xstrdup(urlNote->firstValue()); // TODO: validate the URL produced here is RFC 2616 compliant absolute URI } else { - debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid " << status << " redirect Location: " << result); + debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid " << status << " redirect Location: " << urlNote->firstValue()); } - } else if (strcmp(result, http->uri)) { - // XXX: validate the URL properly *without* generating a whole new request object right here. - // XXX: the clone() should be done only AFTER we know the new URL is valid. - HttpRequest *new_request = old_request->clone(); - if (urlParse(old_request->method, result, new_request)) { - debugs(61,2, HERE << "URL-rewriter diverts URL from " << urlCanonical(old_request) << " to " << urlCanonical(new_request)); - - // update the new request to flag the re-writing was done on it - new_request->flags.redirected = 1; - - // unlink bodypipe from the old request. Not needed there any longer. - if (old_request->body_pipe != NULL) { - old_request->body_pipe = NULL; - debugs(61,2, HERE << "URL-rewriter diverts body_pipe " << new_request->body_pipe << - " from request " << old_request << " to " << new_request); + } else { + // URL-rewrite wanted. Ew. + urlNote = reply.notes.find("rewrite-url"); + + // prevent broken helpers causing too much damage. If old URL == new URL skip the re-write. + if (urlNote != NULL && strcmp(urlNote->firstValue(), http->uri)) { + // XXX: validate the URL properly *without* generating a whole new request object right here. + // XXX: the clone() should be done only AFTER we know the new URL is valid. + HttpRequest *new_request = old_request->clone(); + if (urlParse(old_request->method, const_cast(urlNote->firstValue()), new_request)) { + debugs(61,2, HERE << "URL-rewriter diverts URL from " << urlCanonical(old_request) << " to " << urlCanonical(new_request)); + + // update the new request to flag the re-writing was done on it + new_request->flags.redirected = 1; + + // unlink bodypipe from the old request. Not needed there any longer. + if (old_request->body_pipe != NULL) { + old_request->body_pipe = NULL; + debugs(61,2, HERE << "URL-rewriter diverts body_pipe " << new_request->body_pipe << + " from request " << old_request << " to " << new_request); + } + + // update the current working ClientHttpRequest fields + safe_free(http->uri); + http->uri = xstrdup(urlCanonical(new_request)); + HTTPMSGUNLOCK(old_request); + http->request = HTTPMSGLOCK(new_request); + } else { + debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid request: " << + old_request->method << " " << urlNote->firstValue() << " " << old_request->http_ver); + delete new_request; } - - // update the current working ClientHttpRequest fields - safe_free(http->uri); - http->uri = xstrdup(urlCanonical(new_request)); - HTTPMSGUNLOCK(old_request); - http->request = HTTPMSGLOCK(new_request); - } else { - debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid request: " << - old_request->method << " " << result << " " << old_request->http_ver); - delete new_request; } } } + break; + } /* FIXME PIPELINE: This is innacurate during pipelining */ === modified file 'src/redirect.cc' --- src/redirect.cc 2012-11-27 21:19:46 +0000 +++ src/redirect.cc 2012-11-30 11:08:47 +0000 @@ -78,7 +78,8 @@ redirectStateData *r = static_cast(data); debugs(61, 5, HERE << "reply=" << reply); - // XXX: This funtion is now kept only to check for and display this garbage use-case + // XXX: This function is now kept only to check for and display the garbage use-case + // and to map the old helper response format(s) into new format result code and key=value pairs // it can be removed when the helpers are all updated to the normalized "OK/ERR kv-pairs" format if (reply.result == HelperReply::Unknown) { @@ -99,6 +100,51 @@ } if (reply.other().hasContent() && *res == '\0') reply.modifiableOther().clean(); // drop the whole buffer of garbage. + + // if we still have anything in other() after all that + // parse it into status=, url= and rewrite-url= keys + if (reply.other().hasContent()) { + /* 2012-06-28: This cast is due to urlParse() truncating too-long URLs itself. + * At this point altering the helper buffer in that way is not harmful, but annoying. + * When Bug 1961 is resolved and urlParse has a const API, this needs to die. + */ + const char * result = reply.other().content(); + const http_status status = (http_status) atoi(result); + + HelperReply newReply; + newReply.result = reply.result; + newReply.notes = reply.notes; + + if (status == HTTP_MOVED_PERMANENTLY + || status == HTTP_MOVED_TEMPORARILY + || status == HTTP_SEE_OTHER + || status == HTTP_PERMANENT_REDIRECT + || status == HTTP_TEMPORARY_REDIRECT) { + + if (const char *t = strchr(result, ':')) { + char statusBuf[4]; + snprintf(statusBuf, sizeof(statusBuf),"%3u",status); + newReply.notes.add("status", statusBuf); + ++t; + // TODO: validate the URL produced here is RFC 2616 compliant URI + newReply.notes.add("url", t); + } else { + debugs(85, DBG_CRITICAL, "ERROR: URL-rewrite produces invalid " << status << " redirect Location: " << result); + } + } else { + // status code is not a redirect code (or does not exist) + // treat as a re-write URL request + // TODO: validate the URL produced here is RFC 2616 compliant URI + newReply.notes.add("rewrite-url", reply.other().content()); + } + + void *cbdata; + if (cbdataReferenceValidDone(r->data, &cbdata)) + r->handler(cbdata, newReply); + + redirectStateFree(r); + return; + } } }