Re: Custom log format

From: Jeffrey Mogul <mogul@dont-contact.us>
Date: Tue, 20 Nov 2001 11:54:31 -0800

Henrik wrote:

    Sure. Send in the diff to squid-dev@squid-cache.org, and we will try to
    archive it at a suitable place such as the web page discussed...

Here are diffs for squid 1.1.20 to support logging of MD5 digests of
FTP and HTTP response bodies.

Caution:
(1) There is no documentation for these changes, besides what
you get here. Sorry. I no longer remember exactly why I did
things the way I did them.

(2) I suspect there might still be a few lurking bugs. In particular,
certain cases might exist where a transfer is aborted in such a
way that the code computes and logs an MD5 value for only part of
the response body, without flagging the transfer as "aborted".

(3) This code was designed to add some fields to squid.store.log.
You might want to use it in a different way.

(4) I didn't bother to even try to think about logging MD5 digests
for:
        (a) protocols other than HTTP and FTP; we didn't get enough
                of these to make it worthwhile.
        (b) responses retrieved from cache; we run Squid with caching
                disabled, since we cannot accept the performance hit
                caused by caching.
and we never have (and probably never will) run ICP, so I have no
idea if that would change things.

(5) The code is meant to be used with the original RSA reference
code for MD5:

    /* MD5.H - header file for MD5C.C
     */
    
    /* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
    rights reserved.
    
and

    /* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
     */
    
    /* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
    rights reserved.

which I have not included. You can find these on the net.

-Jeff

*** 1.1 1998/08/17 23:23:17
--- ftp.c 1998/08/17 23:34:24
***************
*** 358,364 ****
--- 358,368 ----
              /* oh, this is so gross -- we found the marker at the
               * end of the previous read, but theres more data!
               * So put the marker back in. */
+ #ifdef MD5LOG
+ storeAppendMD5(entry, MAGIC_MARKER, MAGIC_MARKER_SZ);
+ #else
              storeAppend(entry, MAGIC_MARKER, MAGIC_MARKER_SZ);
+ #endif MD5LOG
          }
          /* check for a magic marker at the end of the read */
          data->got_marker = 0;
***************
*** 368,376 ****
--- 372,384 ----
                  len -= MAGIC_MARKER_SZ;
              }
          }
+ #ifdef MD5LOG
+ proc_ftphdrs_and_append(data, buf, len);
+ #else
          storeAppend(entry, buf, len);
          if (data->reply_hdr_state < 2 && len > 0)
              ftpProcessReplyHeader(data, buf, len);
+ #endif
          commSetSelect(fd,
              COMM_SELECT_READ,
              (PF) ftpReadReply,
***************
*** 384,389 ****
--- 392,450 ----
      return 0;
  }
  
+ #ifdef MD5LOG
+ /*
+ * If we are computing an MD5 digest over the response body,
+ * we have to handle response headers separately from the body.
+ * This is a minor pain, because Squid otherwise doesn't make
+ * the response body visible as unit.
+ */
+ proc_ftphdrs_and_append(FtpStateData *data, char *buf, int len)
+ {
+ int hdrs_in_buf = 0;
+ int hdr_len;
+ int other_len;
+ char *hdr_end;
+ StoreEntry *entry = data->entry;
+
+ if (data->reply_hdr_state < 2) {
+ hdrs_in_buf = 1; /* this buffer contains headers */
+ ftpProcessReplyHeader(data, buf, len);
+ }
+ if (data->reply_hdr_state < 2) {
+ /* no end-of-header seen yet, nothing is included in digest */
+ storeAppend(entry, buf, len); /* invoke handlers! */
+ }
+ else {
+ if (hdrs_in_buf) {
+ /* only digest the part after the headers */
+ hdr_end = mime_headers_end(buf);
+ if (hdr_end)
+ hdr_len = hdr_end - buf;
+ else
+ hdr_len = 0;
+
+ /* Paranoia */
+ if ((hdr_len > len) || (hdr_len < 0))
+ hdr_len = 0;
+
+ if (hdr_len > 0)
+ storeAppend(entry, buf, hdr_len);
+
+ other_len = len - hdr_len;
+
+ if (other_len > 0)
+ storeAppendMD5(entry, &(buf[hdr_len]), other_len);
+ /* this sequence invokes the handlers twice */
+ }
+ else {
+ /* buffer contains no headers, digest the whole thing */
+ storeAppendMD5(entry, buf, len); /* invoke handlers! */
+ }
+ }
+ }
+ #endif
+
  static void
  ftpSendComplete(int fd, char *buf, int size, int errflag, void *data)
  {
*** 1.1 1998/07/29 23:31:20
--- http.c 1998/10/16 22:08:31
***************
*** 1,5 ****
  /*
! * $Id: http.c,v 1.1 1998/07/29 23:31:20 mogul Exp $
   *
   * DEBUG: section 11 Hypertext Transfer Protocol (HTTP)
   * AUTHOR: Harvest Derived
--- 1,5 ----
  /*
! * $Id: http.c,v 1.3 1998/10/16 21:46:19 mogul Exp mogul $
   *
   * DEBUG: section 11 Hypertext Transfer Protocol (HTTP)
   * AUTHOR: Harvest Derived
***************
*** 631,644 ****
--- 631,654 ----
      } else if (len == 0) {
          /* Connection closed; retrieval done. */
          httpState->eof = 1;
+ #ifdef MD5LOG
+ proc_hdrs_and_append(httpState, buf, len);
+ #else
          if (httpState->reply_hdr_state < 2)
              httpProcessReplyHeader(httpState, buf, len);
          storeAppend(entry, buf, len); /* invoke handlers! */
+ #endif
          storeComplete(entry); /* deallocates mem_obj->request */
          comm_close(fd);
      } else if (entry->flag & CLIENT_ABORT_REQUEST) {
          /* append the last bit of info we get */
+ #ifdef MD5LOG
+ proc_hdrs_and_append(httpState, buf, len);
+ debug(11, 0, "httpReadReply ABORT %d\n", httpState->reply_hdr_state);
+ entry->aborted = 1;
+ #else
          storeAppend(entry, buf, len);
+ #endif
          squid_error_entry(entry, ERR_CLIENT_ABORT, NULL);
          comm_close(fd);
      } else if (entry->flag & DELETE_BEHIND && !storeClientWaiting(entry)) {
***************
*** 646,654 ****
--- 656,668 ----
          squid_error_entry(entry, ERR_NO_CLIENTS_BIG_OBJ, NULL);
          comm_close(fd);
      } else {
+ #ifdef MD5LOG
+ proc_hdrs_and_append(httpState, buf, len);
+ #else
          if (httpState->reply_hdr_state < 2)
              httpProcessReplyHeader(httpState, buf, len);
          storeAppend(entry, buf, len);
+ #endif
          commSetSelect(fd,
              COMM_SELECT_READ,
              httpReadReply,
***************
*** 661,666 ****
--- 675,733 ----
      }
  }
  
+ #ifdef MD5LOG
+ /*
+ * If we are computing an MD5 digest over the response body,
+ * we have to handle response headers separately from the body.
+ * This is a minor pain, because Squid otherwise doesn't make
+ * the response body visible as unit.
+ */
+ proc_hdrs_and_append(HttpStateData *httpState, char *buf, int len)
+ {
+ int hdrs_in_buf = 0;
+ int hdr_len;
+ int other_len;
+ char *hdr_end;
+ StoreEntry *entry = httpState->entry;
+
+ if (httpState->reply_hdr_state < 2) {
+ hdrs_in_buf = 1; /* this buffer contains headers */
+ httpProcessReplyHeader(httpState, buf, len);
+ }
+ if (httpState->reply_hdr_state < 2) {
+ /* no end-of-header seen yet, nothing is included in digest */
+ storeAppend(entry, buf, len); /* invoke handlers! */
+ }
+ else {
+ if (hdrs_in_buf) {
+ /* only digest the part after the headers */
+ hdr_end = mime_headers_end(buf);
+ if (hdr_end)
+ hdr_len = hdr_end - buf;
+ else
+ hdr_len = 0;
+
+ /* Paranoia */
+ if ((hdr_len > len) || (hdr_len < 0))
+ hdr_len = 0;
+
+ if (hdr_len > 0)
+ storeAppend(entry, buf, hdr_len);
+
+ other_len = len - hdr_len;
+
+ if (other_len > 0)
+ storeAppendMD5(entry, &(buf[hdr_len]), other_len);
+ /* this sequence invokes the handlers twice */
+ }
+ else {
+ /* buffer contains no headers, digest the whole thing */
+ storeAppendMD5(entry, buf, len); /* invoke handlers! */
+ }
+ }
+ }
+ #endif
+
  /* This will be called when request write is complete. Schedule read of
   * reply. */
  static void
*** 1.1 1998/07/30 18:06:21
--- icp.c 1998/07/30 18:11:39
***************
*** 310,315 ****
--- 310,318 ----
      safe_free(icpState->reply_hdr);
  #endif /* LOG_FULL_HEADERS */
      if ((entry = icpState->entry)) {
+ #ifdef MD5LOG
+ entry->elapsed_msec = elapsed_msec;
+ #endif
          storeUnregister(entry, fd);
          storeUnlockObject(entry);
          icpState->entry = NULL;
*** 1.1 1998/07/29 22:01:41
--- store.c 1998/10/16 22:08:05
***************
*** 455,460 ****
--- 455,465 ----
  storeLog(int tag, const StoreEntry * e)
  {
      LOCAL_ARRAY(char, logmsg, MAX_URL << 1);
+ #ifdef MD5LOG
+ LOCAL_ARRAY(unsigned char, md5digest, MD5_DIGESTLEN);
+ LOCAL_ARRAY(char, md5string, ((MD5_DIGESTLEN << 1)+1));
+ int i;
+ #endif
      MemObject *mem = e->mem_obj;
      struct _http_reply *reply;
      if (storelog_fd < 0)
***************
*** 467,473 ****
--- 472,487 ----
          mem->log_url = xstrdup(e->url);
      }
      reply = mem->reply;
+ #ifdef MD5LOG
+ MD5Final(md5digest, &(e->md5ctx));
+ for (i = 0; i < MD5_DIGESTLEN; i++) {
+ sprintf(&(md5string[i*2]), "%02x", md5digest[i]);
+ }
+
+ sprintf(logmsg, "%9d.%03d %-7s %4d %9d %9d %9d %s %d/%d %s %s %d %d %s\n",
+ #else
      sprintf(logmsg, "%9d.%03d %-7s %4d %9d %9d %9d %s %d/%d %s %s\n",
+ #endif
          (int) current_time.tv_sec,
          (int) current_time.tv_usec / 1000,
          storeLogTags[tag],
***************
*** 479,485 ****
--- 493,506 ----
          reply->content_length,
          mem->e_current_len - mem->reply->hdr_sz,
          RequestMethodStr[e->method],
+ #ifdef MD5LOG
+ mem->log_url,
+ e->elapsed_msec,
+ e->aborted,
+ md5string);
+ #else
          mem->log_url);
+ #endif
      file_write(storelog_fd,
          xstrdup(logmsg),
          strlen(logmsg),
***************
*** 759,764 ****
--- 780,790 ----
      debug(20, 3, "storeCreateEntry: '%s' icp flags=%x\n", url, flags);
  
      e = new_StoreEntry(WITH_MEMOBJ, log_url);
+ #ifdef MD5LOG
+ MD5Init(&(e->md5ctx));
+ e->elapsed_msec = 0;
+ e->aborted = 0;
+ #endif
      e->lock_count = 1; /* Note lock here w/o calling storeLock() */
      mem = e->mem_obj;
      e->url = xstrdup(url);
***************
*** 994,999 ****
--- 1020,1061 ----
          InvokeHandlers(e);
  }
  
+ #ifdef MD5LOG
+ /* Append incoming data from a primary server to an entry.
+ Include this data in the MD5 digest */
+ void
+ storeAppendMD5(StoreEntry * e, const char *data, int len)
+ {
+ MemObject *mem;
+ /* sanity check */
+ if (e == NULL) {
+ debug_trap("storeAppendMD5: NULL entry.");
+ return;
+ } else if ((mem = e->mem_obj) == NULL) {
+ debug_trap("storeAppendMD5: NULL entry->mem_obj");
+ return;
+ } else if (mem->data == NULL) {
+ debug_trap("storeAppendMD5: NULL entry->mem_obj->data");
+ return;
+ }
+ if (len) {
+ debug(20, 5, "storeAppendMD5: appending %d bytes for '%s'\n",
+ len, e->key);
+ storeGetMemSpace(len);
+ if (sm_stats.n_pages_in_use > store_pages_low) {
+ if (mem->e_current_len > Config.Store.maxObjectSize)
+ storeStartDeleteBehind(e);
+ }
+ store_mem_size += len;
+ mem->data->mem_append(mem->data, data, len);
+ mem->e_current_len += len;
+ MD5Update(&(e->md5ctx), data, len);
+ }
+ if (e->store_status != STORE_ABORTED && !BIT_TEST(e->flag, DELAY_SENDING))
+ InvokeHandlers(e);
+ }
+ #endif
+
  #ifdef __STDC__
  void
  storeAppendPrintf(StoreEntry * e, const char *fmt,...)
*** 1.1 1998/07/29 22:00:28
--- store.h 1998/10/16 22:08:07
***************
*** 109,114 ****
--- 109,116 ----
  #ifndef _STORE_H_
  #define _STORE_H_
  
+ #include "md5.h"
+
  #define MIN_PENDING 1
  #define MIN_CLIENT 1
  
***************
*** 255,260 ****
--- 257,268 ----
      int object_len;
      int swap_file_number;
  
+ #ifdef MD5LOG
+ MD5_CTX md5ctx;
+ int elapsed_msec;
+ int aborted;
+ #endif
+
      mem_status_t mem_status:3;
      ping_status_t ping_status:3;
      store_status_t store_status:3;
Received on Tue Nov 20 2001 - 17:37:18 MST

This archive was generated by hypermail pre-2.1.9 : Tue Dec 09 2003 - 16:14:38 MST