Index: src/cf.data.pre =================================================================== RCS file: /cvsroot/squid/squid/src/cf.data.pre,v retrieving revision 1.146 diff -u -r1.146 cf.data.pre --- src/cf.data.pre 31 Jul 2006 01:52:23 -0000 1.146 +++ src/cf.data.pre 1 Aug 2006 10:22:29 -0000 @@ -1133,6 +1133,35 @@ you should not change the COSS block size after Squid has written some objects to the cache_dir. + overwrite-percent=n defines the percentage of disk that COSS + must write to before a given object will be moved to the + current stripe. A value of "n" closer to 100 will cause COSS + to waste less disk space by having multiple copies of an object + on disk, but will increase the chances of overwriting a popular + object as COSS overwrites stripes. A value of "n" close to 0 + will cause COSS to keep all current objects in the current COSS + stripe at the expense of the hit rate. The default value of 50 + will allow any given object to be stored on disk a maximum of + 2 times. + + max-stripe-waste=n defines the maximum amount of space that COSS + will waste in a given stripe (in bytes). When COSS writes data + to disk, it will potentially waste up to "max-size" worth of disk + space for each 1MB of data written. If "max-size" is set to a + large value (ie >256k), this could potentially result in large + amounts of wasted disk space. Setting this value to a lower value + (ie 64k or 32k) will result in a COSS disk refusing to cache + larger objects until the COSS stripe has been filled to within + "max-stripe-waste" of the maximum size (1MB). + + membufs=n defines the number of "memory-only" stripes that COSS + will use. When an cache hit is performed on a COSS stripe before + COSS has reached the overwrite-percent value for that object, + COSS will use a series of memory buffers to hold the object in + while the data is sent to the client. This will define the maximum + number of memory-only buffers that COSS will use. The default value + is 10, which will use a maximum of 10MB of memory for buffers. + Common options: read-only, this cache_dir is read only. Index: src/defines.h =================================================================== RCS file: /cvsroot/squid/squid/src/defines.h,v retrieving revision 1.36 diff -u -r1.36 defines.h --- src/defines.h 31 Jul 2006 10:53:41 -0000 1.36 +++ src/defines.h 1 Aug 2006 10:22:29 -0000 @@ -41,6 +41,25 @@ #define FALSE 0 #endif +/* Define load weights for cache_dir types */ +#define MAX_LOAD_VALUE 1000 + +#define COSS_LOAD_BASE 0 +#define AUFS_LOAD_BASE 100 +#define DISKD_LOAD_BASE 100 +#define UFS_LOAD_BASE 500 + +#define COSS_LOAD_STRIPE_WEIGHT (900 - COSS_LOAD_BASE) +#define COSS_LOAD_QUEUE_WEIGHT (100 - COSS_LOAD_BASE) +#if COSS_LOAD_QUEUE_WEIGHT < 0 +#undef COSS_LOAD_QUEUE_WEIGHT +#define COSS_LOAD_QUEUE_WEIGHT 0 +#endif + +#define AUFS_LOAD_QUEUE_WEIGHT (MAX_LOAD_VALUE - AUFS_LOAD_BASE) + +#define DISKD_LOAD_QUEUE_WEIGHT (MAX_LOAD_VALUE - DISKD_LOAD_BASE) + #define ACL_NAME_SZ 32 #define BROWSERNAMELEN 128 Index: src/fs/aufs/store_dir_aufs.c =================================================================== RCS file: /cvsroot/squid/squid/src/fs/aufs/store_dir_aufs.c,v retrieving revision 1.43 diff -u -r1.43 store_dir_aufs.c --- src/fs/aufs/store_dir_aufs.c 31 Jul 2006 10:53:42 -0000 1.43 +++ src/fs/aufs/store_dir_aufs.c 1 Aug 2006 10:22:33 -0000 @@ -1578,9 +1578,9 @@ ql = aioQueueSize(); if (ql == 0) { - return 1; + return AUFS_LOAD_BASE; } - loadav = ql * 1000 / MAGIC1; + loadav = AUFS_LOAD_BASE + (ql * AUFS_LOAD_QUEUE_WEIGHT / MAGIC1); return loadav; } @@ -1681,6 +1681,7 @@ storeAppendPrintf(sentry, "Current Size: %d KB\n", SD->cur_size); storeAppendPrintf(sentry, "Percent Used: %0.2f%%\n", 100.0 * SD->cur_size / SD->max_size); + storeAppendPrintf(sentry, "Current load metric: %d / %d\n", storeAufsDirCheckLoadAv(SD,ST_OP_CREATE),MAX_LOAD_VALUE); storeAppendPrintf(sentry, "Filemap bits in use: %d of %d (%d%%)\n", aioinfo->map->n_files_in_map, aioinfo->map->max_n_files, percent(aioinfo->map->n_files_in_map, aioinfo->map->max_n_files)); Index: src/fs/coss/store_coss.h =================================================================== RCS file: /cvsroot/squid/squid/src/fs/coss/store_coss.h,v retrieving revision 1.8 diff -u -r1.8 store_coss.h --- src/fs/coss/store_coss.h 5 Jul 2006 07:55:05 -0000 1.8 +++ src/fs/coss/store_coss.h 1 Aug 2006 10:22:34 -0000 @@ -46,6 +46,7 @@ struct { int alloc; int realloc; + int memalloc; int collisions; } alloc; int disk_overflows; @@ -73,6 +74,7 @@ unsigned int writing:1; unsigned int written:1; unsigned int dead:1; + unsigned int memonly:1; } flags; int numobjs; }; @@ -114,6 +116,7 @@ sfileno original_filen, new_filen; /* in blocks, not in bytes */ dlink_list ops; char *p; + struct _cossmembuf *locked_membuf; }; @@ -126,6 +129,8 @@ int fd; int swaplog_fd; int numcollisions; + int loadcalc[2]; + int load_interval; dlink_list pending_relocs; dlink_list pending_ops; int pending_reloc_count; @@ -137,7 +142,11 @@ unsigned int blksz_bits; unsigned int blksz_mask; /* just 1<fsdata; + int objsize = objectLen(e) + e->mem_obj->swap_hdr_sz; /* Check if the object is a special object, we can't cache these */ if (EBIT_TEST(e->flags, ENTRY_SPECIAL)) return 0; if (cs->rebuild.rebuilding == 1) return 0; + /* Check to see if the object is going to waste too much disk space */ + if(objsize > cs->sizerange_max) + return 0; + return 1; } int storeCossDirCheckLoadAv(SwapDir * SD, store_op_t op) { -#if !USE_AUFSOPS CossInfo *cs = (CossInfo *) SD->fsdata; -#else +#if USE_AUFSOPS + float disk_size_weight,current_write_weight; + int cur_load_interval = (squid_curtime / cs->load_interval)%2; int ql = 0; #endif int loadav; @@ -592,13 +608,31 @@ #if USE_AUFSOPS ql = aioQueueSize(); if (ql == 0) - loadav = 0; + loadav = COSS_LOAD_BASE; else - loadav = ql * 1000 / MAGIC1; + loadav = COSS_LOAD_BASE + (ql * COSS_LOAD_QUEUE_WEIGHT / MAGIC1); + + /* We want to try an keep the disks at a similar write rate + * otherwise the LRU algorithm breaks + * + * The queue length has a 10% weight on the load + * The number of stripes written has a 90% weight + */ + disk_size_weight = (float) max_coss_dir_size / SD->max_size; + current_write_weight = (float) cs->loadcalc[cur_load_interval] * COSS_LOAD_STRIPE_WEIGHT / MAX_LOAD_VALUE; + + loadav += disk_size_weight * current_write_weight; + + /* Remove the folowing check if we want to allow COSS partitions to get + * "too busy" + */ + if(loadav > MAX_LOAD_VALUE) + loadav=MAX_LOAD_VALUE; + debug(47, 9) ("storeAufsDirCheckObj: load=%d\n", loadav); return loadav; #else - loadav = cs->aq.aq_numpending * 1000 / MAX_ASYNCOP; + loadav = cs->aq.aq_numpending * MAX_LOAD_VALUE / MAX_ASYNCOP; return loadav; #endif } @@ -632,6 +666,7 @@ storeAppendPrintf(sentry, "Current Size: %d KB\n", SD->cur_size); storeAppendPrintf(sentry, "Percent Used: %0.2f%%\n", 100.0 * SD->cur_size / SD->max_size); + storeAppendPrintf(sentry, "Current load metric: %d / %d\n", storeCossDirCheckLoadAv(SD,ST_OP_CREATE),MAX_LOAD_VALUE); storeAppendPrintf(sentry, "Number of object collisions: %d\n", (int) cs->numcollisions); #if 0 /* is this applicable? I Hope not .. */ @@ -716,7 +751,21 @@ cs->blksz_bits = 9; /* default block size = 512 */ cs->blksz_mask = (1 << cs->blksz_bits) - 1; + /* By default, only overwrite objects that were written mor ethan 50% of the disk ago + * and use a maximum of 10 in-memory stripes + */ + cs->minumum_overwrite_pct = 0.5; + cs->nummemstripes = 10; + + /* Calculate load in 60 second incremenets */ + /* This could be made configurable */ + cs->load_interval = 60; + parse_cachedir_options(sd, options, 0); + + cs->sizerange_max = sd->max_objsize; + cs->sizerange_min = sd->max_objsize; + /* Enforce maxobjsize being set to something */ if (sd->max_objsize == -1) fatal("COSS requires max-size to be set to something other than -1!\n"); @@ -728,12 +777,15 @@ * signed integer, as defined in structs.h. */ max_offset = (off_t) 0xFFFFFF << cs->blksz_bits; - if (sd->max_size > (unsigned long) (max_offset >> 10)) { + if ((sd->max_size + cs->nummemstripes) > (unsigned long) (max_offset >> 10)) { debug(47, 0) ("COSS block-size = %d bytes\n", 1 << cs->blksz_bits); debug(47, 0) ("COSS largest file offset = %lu KB\n", (unsigned long) max_offset >> 10); debug(47, 0) ("COSS cache_dir size = %d KB\n", sd->max_size); fatal("COSS cache_dir size exceeds largest offset\n"); } + cs->max_disk_nf = ((off_t)sd->max_size << 10) >> cs->blksz_bits; + debug(47, 0) ("COSS: max disk fileno is %d\n",cs->max_disk_nf); + /* XXX todo checks */ /* Ensure that off_t range can cover the max_size */ @@ -748,6 +800,19 @@ cs->stripes[i].membuf = NULL; cs->stripes[i].numdiskobjs = -1; } + cs->minimum_stripe_distance = cs->numstripes * cs->minumum_overwrite_pct; + + debug(47, 0) ("COSS: number of memory-only stripes %d of %d bytes each\n",cs->nummemstripes, COSS_MEMBUF_SZ); + cs->memstripes = xcalloc(cs->nummemstripes, sizeof(struct _cossstripe)); + for (i = 0; i < cs->nummemstripes; i++) { + cs->memstripes[i].id = i; + cs->memstripes[i].membuf = NULL; + cs->memstripes[i].numdiskobjs = -1; + } + + /* Update the max size (used for load calculations */ + if(sd->max_size > max_coss_dir_size) + max_coss_dir_size=sd->max_size; } static void @@ -782,6 +847,49 @@ } static void +storeCossDirParseMemOnlyBufs(SwapDir * sd, const char *name, const char *value, int reconfiguring) +{ + CossInfo *cs = sd->fsdata; + int membufs = atoi(value); + if (reconfiguring) { + debug(47, 0) ("WARNING: cannot change COSS memory bufs Squid is running\n"); + return; + } + if (membufs < 2) + fatal("COSS ERROR: There must be at least 2 membufs\n"); + if (membufs > 500) + fatal("COSS ERROR: Squid will likely use too much memory if it ever used 500MB worth of buffers\n"); + cs->nummemstripes = membufs; +} + +static void +storeCossDirParseMaxWaste(SwapDir * sd, const char *name, const char *value, int reconfiguring) +{ + CossInfo *cs = sd->fsdata; + int waste = atoi(value); + + if (waste < 8192) + fatal("COSS max-stripe-waste must be > 8192\n"); + if (waste > sd->max_objsize) + debug(47,1) ("storeCossDirParseMaxWaste: COSS max-stripe-waste can not be bigger than the max object size (%" PRINTF_OFF_T ")\n",sd->max_objsize); + cs->sizerange_min = waste; +} + +static void +storeCossDirParseOverwritePct(SwapDir * sd, const char *name, const char *value, int reconfiguring) +{ + CossInfo *cs = sd->fsdata; + int pct = atoi(value); + + if (pct < 0) + fatal("COSS overwrite percent must be > 0\n"); + if (pct > 100) + fatal("COSS overwrite percent must be < 100\n"); + cs->minumum_overwrite_pct = (float)pct / 100; + cs->minimum_stripe_distance = cs->numstripes * cs->minumum_overwrite_pct; +} + +static void storeCossDirParseBlkSize(SwapDir * sd, const char *name, const char *value, int reconfiguring) { CossInfo *cs = sd->fsdata; @@ -811,6 +919,27 @@ } static void +storeCossDirDumpMemOnlyBufs(StoreEntry * e, const char *option, SwapDir * sd) +{ + CossInfo *cs = sd->fsdata; + storeAppendPrintf(e, " membufs=%d MB", cs->nummemstripes); +} + +static void +storeCossDirDumpMaxWaste(StoreEntry * e, const char *option, SwapDir * sd) +{ + CossInfo *cs = sd->fsdata; + storeAppendPrintf(e, " max-stripe-waste=%d", cs->sizerange_min); +} + +static void +storeCossDirDumpOverwritePct(StoreEntry * e, const char *option, SwapDir * sd) +{ + CossInfo *cs = sd->fsdata; + storeAppendPrintf(e, " overwrite-percent=%d%%", (int)cs->minumum_overwrite_pct * 100); +} + +static void storeCossDirDumpBlkSize(StoreEntry * e, const char *option, SwapDir * sd) { CossInfo *cs = sd->fsdata; @@ -892,6 +1021,7 @@ storeAppendPrintf(sentry, "dead_stripes: %d\n", coss_stats.dead_stripes); storeAppendPrintf(sentry, "alloc.alloc: %d\n", coss_stats.alloc.alloc); storeAppendPrintf(sentry, "alloc.realloc: %d\n", coss_stats.alloc.realloc); + storeAppendPrintf(sentry, "alloc.memalloc: %d\n", coss_stats.alloc.memalloc); storeAppendPrintf(sentry, "alloc.collisions: %d\n", coss_stats.alloc.collisions); storeAppendPrintf(sentry, "disk_overflows: %d\n", coss_stats.disk_overflows); storeAppendPrintf(sentry, "stripe_overflows: %d\n", coss_stats.stripe_overflows); @@ -1126,9 +1256,9 @@ tmpe.hash.key = key; /* Check sizes */ if (tmpe.swap_file_sz == 0) { - tmpe.swap_file_sz = len; + tmpe.swap_file_sz = len + bl; } - if (tmpe.swap_file_sz != len) { + if (tmpe.swap_file_sz != (len + bl)) { debug(47, 3) ("COSS: %s: stripe %d: file size mismatch (%" PRINTF_OFF_T " != %" PRINTF_OFF_T ")\n", SD->path, cs->rebuild.curstripe, tmpe.swap_file_sz, len); goto nextobject; } Index: src/fs/coss/store_io_coss.c =================================================================== RCS file: /cvsroot/squid/squid/src/fs/coss/store_io_coss.c,v retrieving revision 1.21 diff -u -r1.21 store_io_coss.c --- src/fs/coss/store_io_coss.c 17 Jul 2006 01:51:10 -0000 1.21 +++ src/fs/coss/store_io_coss.c 1 Aug 2006 10:22:37 -0000 @@ -52,6 +52,7 @@ static void storeCossMemBufUnlock(SwapDir * SD, storeIOState * e); static void storeCossWriteMemBuf(SwapDir * SD, CossMemBuf * t); static CossMemBuf *storeCossCreateMemBuf(SwapDir * SD, int stripe, sfileno curfn, int *collision); +static CossMemBuf *storeCossCreateMemOnlyBuf(SwapDir * SD); static CBDUNL storeCossIOFreeEntry; static off_t storeCossFilenoToDiskOffset(sfileno f, CossInfo *); static sfileno storeCossDiskOffsetToFileno(off_t o, CossInfo *); @@ -81,6 +82,52 @@ /* === PUBLIC =========================================================== */ +static sfileno +storeCossMemOnlyAllocate(SwapDir * SD, const StoreEntry * e) +{ + CossInfo *cs = (CossInfo *) SD->fsdata; + CossMemBuf *newmb; + off_t retofs; + size_t allocsize; + sfileno f; + + coss_stats.alloc.memalloc++; + allocsize = e->swap_file_sz; + + if(cs->current_memonly_membuf == NULL) { + newmb=storeCossCreateMemOnlyBuf(SD); + cs->current_memonly_membuf=newmb; + + if(newmb == NULL) { + return -1; + } + cs->current_memonly_offset=cs->current_memonly_membuf->diskstart; + } + else if ((cs->current_memonly_offset + allocsize) >= cs->current_memonly_membuf->diskend) { + debug(79, 3) ("storeCossMemOnlyAllocate: overflow for buffer %d (%p)\n",cs->curmemstripe,cs->current_memonly_membuf); + cs->current_memonly_membuf->flags.full = 1; + storeCossMaybeWriteMemBuf(SD, cs->current_memonly_membuf); + /* cs->current_memonly_membuf may be invalid at this point */ + + newmb=storeCossCreateMemOnlyBuf(SD); + cs->current_memonly_membuf=newmb; + + if(newmb == NULL) { + return -1; + } + cs->current_memonly_offset=cs->current_memonly_membuf->diskstart; + } + retofs = cs->current_memonly_offset; + cs->current_memonly_offset = retofs + allocsize; + cs->current_memonly_membuf->numobjs++; + cs->current_memonly_offset = ((cs->current_memonly_offset + cs->blksz_mask) >> cs->blksz_bits) << cs->blksz_bits; + f = storeCossDiskOffsetToFileno(retofs, cs); + assert(f >= 0 && f <= 0xffffff); + debug(79, 3) ("storeCossMemOnlyAllocate: offset %lld, filen: %d\n", (long long int) retofs, f); + return f; + +} + /* * This routine sucks. I want to rewrite it when possible, and I also think * that we should check after creatmembuf() to see if the object has a @@ -159,8 +206,20 @@ f = storeCossDiskOffsetToFileno(retofs, cs); assert(f >= 0 && f <= 0xffffff); debug(79, 3) ("storeCossAllocate: offset %lld, filen: %d\n", (long long int) retofs, f); + + /* + * Keep track of the largest object we can accept based on the + * max-wasted-space value + */ + cs->sizerange_max = cs->current_membuf->diskend - cs->current_offset; + if(cs->sizerange_max < cs->sizerange_min) + cs->sizerange_max=cs->sizerange_min; + return f; } else { + /* Reset this to a safe value */ + cs->sizerange_max = SD->max_objsize; + coss_stats.alloc.collisions++; debug(79, 3) ("storeCossAllocate: %s: Collision\n", SD->path); return -1; @@ -183,7 +242,20 @@ storeCossUnlink(SD, e); } +int +storeCossRelocateRequired(CossInfo *cs, sfileno f) +{ + int stripes_written; + int original_stripe=storeCossFilenoToStripe(cs, f); + + if(cs->curstripe > original_stripe) + stripes_written=cs->curstripe-original_stripe; + else + stripes_written=cs->numstripes + cs->curstripe - original_stripe; + /* Relocate if stripes_written > minimum_stripe_distance */ + return (stripes_written > cs->minimum_stripe_distance); +} storeIOState * storeCossCreate(SwapDir * SD, StoreEntry * e, STFNCB * file_callback, STIOCB * callback, void *callback_data) @@ -280,7 +352,6 @@ storeCossMemBufLock(SD, sio); debug(79, 3) ("storeCossOpen: %s: memory hit!\n", SD->path); } else { - debug(79, 3) ("storeCossOpen: %s: memory miss - doing reallocation\n", SD->path); /* Do the allocation */ /* this is the first time we've been called on a new sio * read the whole object into memory, then return the @@ -294,7 +365,17 @@ */ cstate->reqdiskoffset = storeCossFilenoToDiskOffset(sio->swap_filen, cs); assert(cstate->reqdiskoffset >= 0); - nf = storeCossAllocate(SD, e, COSS_ALLOC_REALLOC); + + /* If the object is allocated too recently, make a memory-only copy */ + if( storeCossRelocateRequired(cs, sio->swap_filen) ) { + debug(79, 3) ("storeCossOpen: %s: memory miss - doing reallocation (Current stripe : %d Object in stripe : %d)\n", SD->path,cs->curstripe,storeCossFilenoToStripe(cs, sio->swap_filen)); + nf = storeCossAllocate(SD, e, COSS_ALLOC_REALLOC); + } + else + { + debug(79, 3) ("storeCossOpen: %s memory miss - not reallocating (Current stripe : %d Object in stripe : %d)\n",SD->path,cs->curstripe,storeCossFilenoToStripe(cs, sio->swap_filen)); + nf = storeCossMemOnlyAllocate(SD, e); + } if (nf == -1) { /* We have to clean up neatly .. */ coss_stats.open.fail++; @@ -304,23 +385,38 @@ /* XXX XXX XXX Will squid call storeUnlink for this object? */ return NULL; } - /* Remove the object from its currently-allocated stripe */ - storeCossRemove(SD, e); - storeCossNewPendingRelocate(cs, sio, sio->swap_filen, nf); - sio->swap_filen = nf; - cstate->flags.reloc = 1; - /* Notify the upper levels that we've changed file number */ - sio->file_callback(sio->callback_data, 0, sio); - /* - * lock the new buffer so it doesn't get swapped out on us - * this will get unlocked in storeCossClose - */ - storeCossMemBufLock(SD, sio); - /* - * Do the index magic to keep the disk and memory LRUs identical - * by adding the object into the link list on the current stripe - */ - storeCossAdd(SD, e, cs->curstripe); + if(nf < cs->max_disk_nf) { + /* Remove the object from its currently-allocated stripe */ + storeCossRemove(SD, e); + storeCossNewPendingRelocate(cs, sio, sio->swap_filen, nf); + sio->swap_filen = nf; + cstate->flags.reloc = 1; + /* Notify the upper levels that we've changed file number */ + sio->file_callback(sio->callback_data, 0, sio); + /* + * lock the new buffer so it doesn't get swapped out on us + * this will get unlocked in storeCossClose + */ + storeCossMemBufLock(SD, sio); + /* + * Do the index magic to keep the disk and memory LRUs identical + * by adding the object into the link list on the current stripe + */ + storeCossAdd(SD, e, cs->curstripe); + } + else + { + /* Relocate the object in COSS, but not in other layers */ + storeCossNewPendingRelocate(cs, sio, sio->swap_filen, nf); + sio->swap_filen = nf; + cstate->flags.reloc = 1; + + /* + * lock the new buffer so it doesn't get swapped out on us + * this will get unlocked in storeCossClose + */ + storeCossMemBufLock(SD, sio); + } } coss_stats.open.success++; return sio; @@ -357,7 +453,7 @@ assert(sio->read.callback_data == NULL); sio->read.callback = callback; sio->read.callback_data = callback_data; - debug(79, 3) ("storeCossRead: %s: offset %ld\n", SD->path, (long int) offset); + debug(79, 3) ("storeCossRead: %s: file number %d offset %ld\n", SD->path, sio->swap_filen, (long int) offset); sio->offset = offset; cstate->flags.reading = 1; if ((offset + size) > sio->st_size) @@ -450,6 +546,37 @@ } static void +storeCossMemBufLockPending(CossPendingReloc * pr, CossMemBuf *t) +{ + assert(t->flags.dead == 0); + assert(pr->locked_membuf == NULL); + debug(79, 3) ("storeCossMemBufLockPending: locking %p, lockcount %d\n", + t, t->lockcount); + pr->locked_membuf = t; + t->lockcount++; +} + +static void +storeCossMemBufUnlockPending(CossPendingReloc * pr, CossInfo * cs) +{ + CossMemBuf *t = pr->locked_membuf; + if (NULL == t) + return; + assert(t->flags.dead == 0); + debug(79, 3) ("storeCossMemBufLockPending: unlocking %p, lockcount %d\n", + t, t->lockcount); + t->lockcount--; + pr->locked_membuf=NULL; + + if(!t->flags.written) { + storeCossMaybeWriteMemBuf(t->SD, t); + } else { + /* cs->current_membuf may be invalid at this point */ + storeCossMaybeFreeBuf(cs, t); + } +} + +static void storeCossMemBufLock(SwapDir * SD, storeIOState * sio) { CossMemBuf *t = storeCossFilenoToMembuf(SD, sio->swap_filen); @@ -475,9 +602,12 @@ t, t->lockcount); t->lockcount--; cstate->locked_membuf = NULL; - storeCossMaybeWriteMemBuf(SD, t); - /* cs->current_membuf may be invalid at this point */ - storeCossMaybeFreeBuf(cs, t); + if(!t->flags.written) { + storeCossMaybeWriteMemBuf(SD, t); + } else { + /* cs->current_membuf may be invalid at this point */ + storeCossMaybeFreeBuf(cs, t); + } } static void @@ -532,7 +662,8 @@ storeCossWriteMemBuf(SwapDir * SD, CossMemBuf * t) { CossInfo *cs = (CossInfo *) SD->fsdata; - coss_stats.stripe_write.ops++; + int cur_load_interval=(squid_curtime / cs->load_interval)%2; + int prev_load_interval=((squid_curtime + cs->load_interval)/cs->load_interval)%2; assert(t->flags.dead == 0); debug(79, 3) ("storeCossWriteMemBuf: %p: offset %ld, len %ld\n", t, (long int) t->diskstart, (long int) (t->diskend - t->diskstart)); @@ -545,26 +676,41 @@ * before the objects underneath the membufs stripe were purged and there * is still a pending relocate for it. Its a slim chance but it might happen. */ - assert(t->stripe < cs->numstripes); - if (cs->stripes[t->stripe].pending_relocs > 0) { - debug(79, 1) ("WARNING: %s: One or more pending relocate (reads) from stripe %d are queued - and I'm now writing over that part of the disk. This may result in object data corruption!\n", SD->path, t->stripe); - } - /* - * normally nothing should have this node locked here - but between the time - * we call a_file_write and the IO completes someone might have snuck in and - * attached itself somehow. This is why there's a distinction between "written" - * and "writing". Read the rest of the code for more details. - */ + if(!(t->flags.memonly)) { + coss_stats.stripe_write.ops++; + assert(t->stripe < cs->numstripes); + if (cs->stripes[t->stripe].pending_relocs > 0) { + debug(79, 1) ("WARNING: %s: One or more pending relocate (reads) from stripe %d are queued - and I'm now writing over that part of the disk. This may result in object data corruption!\n", SD->path, t->stripe); + } + + /* Update load stats */ + cs->loadcalc[cur_load_interval] += 1; + cs->loadcalc[prev_load_interval] = 0; + + /* + * normally nothing should have this node locked here - but between the time + * we call a_file_write and the IO completes someone might have snuck in and + * attached itself somehow. This is why there's a distinction between "written" + * and "writing". Read the rest of the code for more details. + */ #if USE_AUFSOPS - /* XXX The last stripe, for now, ain't the coss stripe size for some reason */ - /* XXX This may cause problems later on; worry about figuring it out later on */ - //assert(t->diskend - t->diskstart == COSS_MEMBUF_SZ); - debug(79, 3) ("aioWrite: FD %d: disk start: %llu, size %llu\n", cs->fd, (long long int) t->diskstart, (long long int) t->diskend - t->diskstart); - aioWrite(cs->fd, t->diskstart, &(t->buffer[0]), t->diskend - t->diskstart, storeCossWriteMemBufDone, t, NULL); + /* XXX The last stripe, for now, ain't the coss stripe size for some reason */ + /* XXX This may cause problems later on; worry about figuring it out later on */ + //assert(t->diskend - t->diskstart == COSS_MEMBUF_SZ); + debug(79, 3) ("aioWrite: FD %d: disk start: %llu, size %llu\n", cs->fd, (long long int) t->diskstart, (long long int) t->diskend - t->diskstart); + aioWrite(cs->fd, t->diskstart, &(t->buffer[0]), t->diskend - t->diskstart, storeCossWriteMemBufDone, t, NULL); #else - a_file_write(&cs->aq, cs->fd, t->diskstart, &t->buffer, - t->diskend - t->diskstart, storeCossWriteMemBufDone, t, NULL); + a_file_write(&cs->aq, cs->fd, t->diskstart, &t->buffer, + t->diskend - t->diskstart, storeCossWriteMemBufDone, t, NULL); #endif + } + else + { + /* No need to write, just mark as written and free */ + t->flags.written = 1; + t->flags.writing = 0; + storeCossMaybeFreeBuf(cs, t); + } } /* @@ -587,6 +733,13 @@ * call the asyncio disk completion handler.) */ if (mb->lockcount == 0 && mb->flags.written == 1) { + /* We need to wait until here to mark the membuf as + * free so we can re-alocate it + */ + if(mb->flags.memonly) { + assert(cs->memstripes[mb->stripe].membuf == mb); + cs->memstripes[mb->stripe].membuf = NULL; + } debug(79, 3) ("storeCossMaybeFreeBuf: %p: lockcount = 0, written = 1: marking dead\n", mb); mb->flags.dead = 1; dlinkDelete(&mb->node, &cs->membufs); @@ -655,6 +808,45 @@ storeCossMaybeFreeBuf(cs, t); } +static CossMemBuf * +storeCossCreateMemOnlyBuf(SwapDir * SD) +{ + CossMemBuf *newmb; + CossInfo *cs = (CossInfo *) SD->fsdata; + off_t start; + int stripe; + + /* TODO: Maybe make this a simple search for a free membuf */ + for(stripe=0;stripe < cs->nummemstripes;stripe++) { + if(cs->memstripes[stripe].membuf == NULL) + break; + } + if(stripe >= cs->nummemstripes) { + debug(79, 1)("storeCossCreateMemOnlyBuf: no free membufs. You may beed to increase the value of membufs on the %s cache_dir\n",SD->path); + return NULL; + } + cs->curmemstripe = stripe; + + start = (off_t) stripe * COSS_MEMBUF_SZ; + newmb=cbdataAlloc(CossMemBuf); + + cs->memstripes[stripe].membuf = newmb; + newmb->diskstart = ((off_t) SD->max_size << 10) + start; + newmb->stripe = stripe; + newmb->diskend = newmb->diskstart + COSS_MEMBUF_SZ; + newmb->flags.full = 0; + newmb->flags.writing = 0; + newmb->flags.memonly = 1; + newmb->lockcount = 0; + newmb->numobjs = 0; + newmb->SD = SD; + + dlinkAdd(newmb, &newmb->node, &cs->membufs); + + coss_stats.stripes++; + return newmb; +} + /* * This creates a memory buffer but assumes its going to be at the end * of the "LRU" and thusly will delete expire objects which appear under @@ -742,6 +934,12 @@ newmb = storeCossCreateMemBuf(sd, 0, -1, NULL); assert(!cs->current_membuf); cs->current_membuf = newmb; + + newmb = storeCossCreateMemOnlyBuf(sd); + assert(!cs->current_memonly_membuf); + cs->current_memonly_membuf = newmb; + + cs->current_memonly_offset=cs->current_memonly_membuf->diskstart; } /* @@ -802,6 +1000,7 @@ storeCossNewPendingRelocate(CossInfo * cs, storeIOState * sio, sfileno original_filen, sfileno new_filen) { CossPendingReloc *pr; + CossMemBuf *membuf; char *p; off_t disk_offset; int stripe; @@ -824,8 +1023,12 @@ cs->stripes[stripe].pending_relocs++; /* And now; we begin the IO */ - p = storeCossMemPointerFromDiskOffset(cs, storeCossFilenoToDiskOffset(new_filen, cs), NULL); + p = storeCossMemPointerFromDiskOffset(cs, storeCossFilenoToDiskOffset(new_filen, cs), &membuf); pr->p = p; + + /* Lock the destination membuf */ + storeCossMemBufLockPending(pr, membuf); + disk_offset = storeCossFilenoToDiskOffset(original_filen, cs); debug(79, 3) ("COSS Pending Relocate: size %" PRINTF_OFF_T ", disk_offset %llu\n", (squid_off_t) sio->e->swap_file_sz, (long long int) disk_offset); #if USE_AUFSOPS @@ -928,6 +1131,8 @@ /* XXX again, this shouldn't be here (find the dlinkAddTail() in storeCossKickReadOp); these should * be abstracted out. */ } + /* Unlock (and possibly write/free) the destination membuf */ + storeCossMemBufUnlockPending(pr,cs); /* Good, now we can delete it */ cbdataUnlock(pr); cbdataFree(pr); @@ -1038,11 +1243,12 @@ static void membufsPrint(StoreEntry * e, CossMemBuf * t, const char *prefix) { - storeAppendPrintf(e, "%s: %d, lockcount: %d, numobjects %d, flags: %s,%s,%s\n", + storeAppendPrintf(e, "%s: %d, lockcount: %d, numobjects %d, flags: %s,%s,%s,%s\n", prefix, t->stripe, t->lockcount, t->numobjs, t->flags.full ? "FULL" : "NOTFULL", t->flags.writing ? "WRITING" : "NOTWRITING", - t->flags.written ? "WRITTEN" : "NOTWRITTEN"); + t->flags.written ? "WRITTEN" : "NOTWRITTEN", + t->flags.memonly ? "MEMONLY" : "DISK"); } void Index: src/fs/diskd/store_dir_diskd.c =================================================================== RCS file: /cvsroot/squid/squid/src/fs/diskd/store_dir_diskd.c,v retrieving revision 1.53 diff -u -r1.53 store_dir_diskd.c --- src/fs/diskd/store_dir_diskd.c 31 Jul 2006 10:53:42 -0000 1.53 +++ src/fs/diskd/store_dir_diskd.c 1 Aug 2006 10:22:41 -0000 @@ -1813,7 +1813,7 @@ /* the parse function guarantees magic2 is positivie */ if (diskdinfo->away >= diskdinfo->magic1) return -1; - return diskdinfo->away * 1000 / diskdinfo->magic2; + return DISKD_LOAD_BASE + (diskdinfo->away * DISKD_LOAD_QUEUE_WEIGHT / diskdinfo->magic2); } /* @@ -1962,6 +1962,7 @@ storeAppendPrintf(sentry, "Current Size: %d KB\n", SD->cur_size); storeAppendPrintf(sentry, "Percent Used: %0.2f%%\n", 100.0 * SD->cur_size / SD->max_size); + storeAppendPrintf(sentry, "Current load metric: %d / %d\n", storeDiskdDirCheckLoadAv(SD,ST_OP_CREATE),MAX_LOAD_VALUE); storeAppendPrintf(sentry, "Filemap bits in use: %d of %d (%d%%)\n", diskdinfo->map->n_files_in_map, diskdinfo->map->max_n_files, percent(diskdinfo->map->n_files_in_map, diskdinfo->map->max_n_files)); Index: src/fs/ufs/store_dir_ufs.c =================================================================== RCS file: /cvsroot/squid/squid/src/fs/ufs/store_dir_ufs.c,v retrieving revision 1.42 diff -u -r1.42 store_dir_ufs.c --- src/fs/ufs/store_dir_ufs.c 31 Jul 2006 10:53:43 -0000 1.42 +++ src/fs/ufs/store_dir_ufs.c 1 Aug 2006 10:22:42 -0000 @@ -1613,7 +1613,7 @@ storeUfsDirCheckLoadAv(SwapDir * SD, store_op_t op) { ufsinfo_t *ufsinfo = SD->fsdata; - return 500 + ufsinfo->open_files / 2; + return UFS_LOAD_BASE + ufsinfo->open_files / 2; } /* @@ -1715,6 +1715,7 @@ storeAppendPrintf(sentry, "Current Size: %d KB\n", SD->cur_size); storeAppendPrintf(sentry, "Percent Used: %0.2f%%\n", 100.0 * SD->cur_size / SD->max_size); + storeAppendPrintf(sentry, "Current load metric: %d / %d\n", storeUfsDirCheckLoadAv(SD,ST_OP_CREATE),MAX_LOAD_VALUE); storeAppendPrintf(sentry, "Filemap bits in use: %d of %d (%d%%)\n", ufsinfo->map->n_files_in_map, ufsinfo->map->max_n_files, percent(ufsinfo->map->n_files_in_map, ufsinfo->map->max_n_files));