[squid-users] Squid 2.7 working with reference to storeurl/caching?

From: GIGO . <gigoz_at_msn.com>
Date: Sun, 23 May 2010 09:53:02 +0000

Hi all,

I request that please read my squid.conf file and guide me on the order of the directives and any issue as i am unable to cache a single thing. Does it matter the order of definition of the following?
 
1. storeurl program
2. refresh patterns
3 storeurl rewrite lists...
 
I assume :
 
1. whenever a user open a page in his user agent squid very first of all check the refresh pattern for deciding whether to search in the cache or go to web. Am i right?

2. Now if the request match the storeurl rewrite lists then the request is forwarded to storeurl program who then see that if the object is available in the cache in that case it is returned. Otherwise object is fetched from the web and stored as store_url for future reference. Please guide i am totally unclear?

3. With the following squid.conf not a single object is being cached. I am not sure whats happening?
 
----------------------------------------------------------------------------------------------------
# This is the configuration file for instance 2 which is doing all the caching. squid v 2.7 stable 9 is chosen for its store_url feature.

visible_hostname squidlhr1
unique_hostname squidlhr1cache
cache_effective_user proxy

# Directives to enhance security.....
allow_underscore off
httpd_suppress_version_string on
forwarded_for off
log_mime_hdrs on

pid_filename /var/run/inst2squid.pid
access_log /var/logs/inst2access.log squid
cache_log /var/logs/inst2cache.log
cache_store_log /var/logs/inst2store.log
http_port 1975
icp_port 0
# This option must be supported through giving at compilation
snmp_port 7172
#Explicit definition of all is must in squid 2.7 version
acl all src all
acl manager proto cache_object
acl localhost src 127.0.0.1/32
acl to_localhost dst 127.0.0.0/8
acl CONNECT method CONNECT
# Only allow cachemgr access from localhost
http_access allow manager localhost
http_access deny manager
# If peering with ISA then following two directives will be required. Otherwise not
#cache_peer 10.1.82.205 parent 8080 0 default no-digest no-query no-delay
#never_direct allow all
cache_dir aufs /cachedisk1/var/spool/squid 50000 128 256
coredump_dir /cachedisk1/var/spool/squid
cache_swap_low 75
#should be 1/4 of the physical memory installed in the system
cache_mem 1000 MB
range_offset_limit -1 KB
maximum_object_size 4194304 KB
minimum_object_size 10 KB
quick_abort_min -1 KB
cache_replacement_policy heap LFUDA

# This portion is not understood yet well
# Let the clients favorite video site through with full caching
# - they can come from any of a number of youtube.com subdomains.
# - this is NOT ideal, the 'merging' of identical content is really needed here
acl youtube dstdomain .youtube.com
cache allow youtube

#---------Refresh Pattern Portion--------------------------
# Custom Refresh patterns will come first
# Updates windows/debian etc..
refresh_pattern windowsupdate.com/.*.(cab|exe)(\?|$) 518400 100% 518400 reload-into-ims
refresh_pattern update.microsoft.com/.*.(cab|exe)(\?|$) 518400 100% 518400 reload-into-ims
refresh_pattern download.microsoft.com/.*.(cab|exe)(\?|$) 518400 100% 518400 reload-into-ims
refresh_pattern download.windowsupdate.com/.*\.(cab|exe|dll|msi) 1440 100% 43200 reload-into-ims
refresh_pattern (Release|Package(.gz)*)$ 0 20% 2880
refresh_pattern .deb$ 518400 100% 518400 override-expire
#specific for youtube custom refreshpatterns belowones....
refresh_pattern -i (get_video\?|videoplayback\?|videodownload\?) 5259487 99999999% 5259487 override-expire ignore-reload
# Break HTTP standard for flash videos. Keep them in cache even if asked not to.
refresh_pattern -i \.flv$ 10080 90% 999999 ignore-no-cache override-expire ignore-private
# Other long-lived items
refresh_pattern -i .(jp(e?g|e|2)|gif|png|tiff?|bmp|ico|flv)(\?|$) 161280 3000% 525948 override-expire reload-into-ims

#Trial/Test
refresh_pattern -i \.(iso|avi|wav|mp3|mp4|mpeg|mpg|swf|flv|x-flv)$ 43200 90% 432000 override-expire ignore-no-cache ignore-private
refresh_pattern -i \.(deb|rpm|exe|ram|bin|pdf|ppt|doc|tiff)$ 10080 90% 43200 override-expire ignore-no-cache ignore-private
refresh_pattern -i \.(gif|png|jpg|jpeg|ico)$ 10080 90% 43200 override-expire ignore-no-cache ignore-private
refresh_pattern -i \.(zip|gz|arj|lha|lzh|tar|tgz|cab|rar)$ 10080 95% 43200 override-expire ignore-no-cache ignore-private
refresh_pattern -i \.(php|asp|aspx|cgi|html|htm|css|js) 1440 40% 40320
refresh_pattern ^http://*.gmail.*/.* 720 100% 4320
refresh_pattern ^http://*.twitter.*/.* 720 100% 4320
refresh_pattern ^http://*.yimg.*/.* 720 100% 4320
refresh_pattern ^http://*.ymail.*/.* 720 100% 4320
refresh_pattern ^http://*.hotmail.*/.* 720 100% 4320
refresh_pattern ^http://*.live.*/.* 720 100% 4320
refresh_pattern ^http://*.wikipedia.*/.* 720 100% 4320
refresh_pattern ^http://wiki.*.*/.* 720 100% 4320
refresh_pattern ^http://*.profile/.* 720 100% 4320
refresh_pattern ^http://*.yahoo.*/.* 720 100% 4320
refresh_pattern ^http://*.microsoft.*/.* 720 100% 4320
refresh_pattern ^http://*.facebook.com/.* 720 100% 4320
refresh_pattern ^http://*.farmville.com/.* 720 100% 4320
refresh_pattern ^http://*.youtube.com/.* 720 100% 4320
refresh_pattern ^http://*.metacafe.com/.* 720 100% 4320
#must be defined(suggested defaults)
refresh_pattern ^ftp: 1440 20% 10080
refresh_pattern ^gopher: 1440 0% 1440
refresh_pattern -i (/cgi-bin/|\?) 0 0% 0
refresh_pattern . 0 20% 4320
# call storeurl rewrite helper program
storeurl_rewrite_program /etc/squid/scripts/storeurl.pl
storeurl_rewrite_children 1
storeurl_rewrite_concurrency 10
#-----------------------------Defined URLs which will be sent to the store url rewriter.
acl store_rewrite_list urlpath_regex \/(get_video|videoplayback\?id|videoplayback.*id)\.(jp(e?g|e|2)|gif|png|tiff?|bmp|ico|flv|wmv|3gp|mp(4|3)|exe|msi|zip|on2|mar|swf)
acl store_rewrite_list_domain_CDN url_regex \.rapidshare\.com.*\/[0-9]*\/.*\/[^\/]* ^http:\/\/(www\.ziddu\.com.*\.[^\/]{3,4})\/(.*)\.doubleclick\.net.* yieldmanager cpxinteractive ^http:\/\/[.a-z0-9]*\.photobucket\.com.*\.[a-z]{3}$ quantserve\.com
acl store_rewrite_list_domain url_regex ^http:\/\/([a-zA-Z-]+[0-9-]+)\.[A-Za-z]*\.[A-Za-z]*
acl store_rewrite_list_domain url_regex (([a-z]{1,2}[0-9]{1,3})|([0-9]{1,3}[a-z]{1,2}))\.[a-z]*[0-9]?\.[a-z]{3}
acl store_rewrite_list_path urlpath_regex \.(jp(e?g|e|2)|gif|png|tiff?|bmp|ico|flv|avc|zip|mp3|3gp|rar|on2|mar|exe)$
acl rapidurl url_regex \.rapidshare\.com.*\/[0-9]*\/[0-9]*\/[^\/]*
acl video urlpath_regex \.((mpeg|ra?m|avi|mp(g|e|4)|mov|divx|asf|qt|wmv|m\dv|rv|vob|asx|ogm|flv|3gp)(\?.*)?)$ (get_video\?|videoplayback\?|videodownload\?|\.flv(\?.*)?)
acl html url_regex \.((html|htm|php|js|css|aspx)(\?.*)?)$ \.com\/$ \.com$
#acl images urlpath_regex \.((jp(e?g|e|2)|gif|png|tiff?|bmp|ico)(\?.*)?)$
# must be defined......
acl dontrewrite url_regex redbot\.org (get_video|videoplayback\?id|videoplayback.*id).*begin\=
storeurl_access deny dontrewrite
storeurl_access allow store_rewrite_list_domain_CDN
storeurl_access allow store_rewrite_list
storeurl_access allow store_rewrite_list_domain store_rewrite_list_path
storeurl_access deny all
#Allow access from localhost only
http_access allow localhost
http_access deny all
--------------------------------------------------------------------------------------
I am using the script by chudy_fernandez by little modification
 
#!/usr/bin/perl
# $Rev$
# by chudy_fernandez_at_yahoo.com
# Updates at http://wiki.squid-cache.org/ConfigExamples/DynamicContent/YouTube/Discussion
$|= 1;
while (<>) {
    @X = split;
# $X[1] =~ s/&sig=.*//;
 $x = $X[0] . " ";
 $_ = $X[1];
 $u = $X[1];

   # compatibility for old cached get_video?video_id
if (m/^http:\/\/([0-9.]{4}|.*\.youtube\.com|.*\.googlevideo\.com|.*\.video\.google\.com).*?(videoplayback\?id=.*?|video_id=.*?)\&(.*?)/) {
 $z = $2; $z =~ s/video_id=/get_video?video_id=/;
 print $x . "http://video-srv.youtube.com.SQUIDINTERNAL/" . $z . "\n";
   # youtube HD itag=22
} elsif (m/^http:\/\/([0-9.]{4}|.*\.youtube\.com|.*\.googlevideo\.com|.*\.video\.google\.com).*?\&(itag=22).*?\&(id=[a-zA-Z0-9]*)/) {
 print $x . "http://video-srv.youtube.com.SQUIDINTERNAL/" . $2 . "&" . $3 . "\n";
   # youtube Normal screen always HD itag 35, Normal screen never HD itag 34, itag=18 <--normal?
} elsif (m/^http:\/\/([0-9.]{4}|.*\.youtube\.com|.*\.googlevideo\.com|.*\.video\.google\.com).*?\&(itag=[0-9]*).*?\&(id=[a-zA-Z0-9]*)/) {
 print $x . "http://video-srv.youtube.com.SQUIDINTERNAL/" . $3 . "\n";
} elsif (m/^http:\/\/www\.google-analytics\.com\/__utm\.gif\?.*/) {
 print $x . "http://www.google-analytics.com/__utm.gif\n";
   #Cache High Latency Ads
} elsif (m/^http:\/\/([a-z0-9.]*)(\.doubleclick\.net|\.quantserve\.com|\.googlesyndication\.com|yieldmanager|cpxinteractive)(.*)/) {
 $y = $3;$z = $2;
 for ($y) {
 s/pixel;.*/pixel/;
 s/activity;.*/activity/;
 s/(imgad[^&]*).*/\1/;
 s/;ord=[?0-9]*//;
 s/;&timestamp=[0-9]*//;
 s/[&?]correlator=[0-9]*//;
 s/&cookie=[^&]*//;
 s/&ga_hid=[^&]*//;
 s/&ga_vid=[^&]*//;
 s/&ga_sid=[^&]*//;
 # s/&prev_slotnames=[^&]*//
 # s/&u_his=[^&]*//;
 s/&dt=[^&]*//;
 s/&dtd=[^&]*//;
 s/&lmt=[^&]*//;
 s/(&alternate_ad_url=http%3A%2F%2F[^(%2F)]*)[^&]*/\1/;
 s/(&url=http%3A%2F%2F[^(%2F)]*)[^&]*/\1/;
 s/(&ref=http%3A%2F%2F[^(%2F)]*)[^&]*/\1/;
 s/(&cookie=http%3A%2F%2F[^(%2F)]*)[^&]*/\1/;
 s/[;&?]ord=[?0-9]*//;
 s/[;&]mpvid=[^&;]*//;
 s/&xpc=[^&]*//;
 # yieldmanager
 s/\?clickTag=[^&]*//;
 s/&u=[^&]*//;
 s/&slotname=[^&]*//;
 s/&page_slots=[^&]*//;
 }
 print $x . "http://" . $1 . $2 . $y . "\n";
   #cache high latency ads
} elsif (m/^http:\/\/(.*?)\/(ads)\?(.*?)/) {
 print $x . "http://" . $1 . "/" . $2 . "\n";
   # spicific servers starts here....
} elsif (m/^http:\/\/(www\.ziddu\.com.*\.[^\/]{3,4})\/(.*?)/) {
 print $x . "http://" . $1 . "\n";
   #cdn, varialble 1st path
} elsif (($u =~ /filehippo/) && (m/^http:\/\/(.*?)\.(.*?)\/(.*?)\/(.*)\.([a-z0-9]{3,4})(\?.*)?/)) {
 @y = ($1,$2,$4,$5);
 $y[0] =~ s/[a-z0-9]{2,5}/cdn./;
 print $x . "http://" . $y[0] . $y[1] . "/" . $y[2] . "." . $y[3] . "\n";
   #rapidshare
} elsif (($u =~ /rapidshare/) && (m/^http:\/\/(([A-Za-z]+[0-9-.]+)*?)([a-z]*\.[^\/]{3}\/[a-z]*\/[0-9]*)\/(.*?)\/([^\/\?\&]{4,})$/)) {
 print $x . "http://cdn." . $3 . "/SQUIDINTERNAL/" . $5 . "\n";
   #photos-X.ak.fbcdn.net where X a-z
} elsif (m/^http:\/\/photos-[a-z].ak.fbcdn.net\/(.*)/) {
 print $x . "http://photos.ak.fbcdn.net/" . $1 . "\n";
   #general purpose for cdn servers. add above your specific servers.
} elsif (m/^http:\/\/([0-9.]*?)\/\/(.*?)\.(.*)\?(.*?)/) {
 print $x . "http://squid-cdn-url//" . $2 . "." . $3 . "\n";
   #for yimg.com doubled
} elsif (m/^http:\/\/(.*?)\.yimg\.com\/(.*?)\.yimg\.com\/(.*?)\?(.*)/) {
 print $x . "http://cdn.yimg.com/" . $3 . "\n";
   #for yimg.com with &sig=
} elsif (m/^http:\/\/(.*?)\.yimg\.com\/(.*)/) {
 @y = ($1,$2);
 $y[0] =~ s/[a-z]+[0-9]+/cdn/;
 $y[1] =~ s/&sig=.*//;
 print $x . "http://" . $y[0] . ".yimg.com/" . $y[1] . "\n";
   #generic http://variable.domain.com/path/filename."ext" or "exte" with or withour "?"
} elsif (m/^http:\/\/(.*)([^\.\-]*?\..*?)\/(.*)\.([^\/\?\&]{3,4})(\?.*)?$/) {
 @y = ($1,$2,$3,$4);
 $y[0] =~ s/(([a-zA-A-]+[0-9-]+)|(.*cdn.*)|(.*cache.*))/cdn/;
 print $x . "http://" . $y[0] . $y[1] . "/" . $y[2] . "." . $y[3] . "\n";
   # generic http://variable.domain.com/...
} elsif (m/^http:\/\/(([A-Za-z]+[0-9-]+)*?|.*cdn.*|.*cache.*)\.(.*?)\.(.*?)\/(.*)$/) {
 print $x . "http://cdn." . $3 . "." . $4 . "/" . $5 . "\n";
   # spicific extention that ends with ?
} elsif (m/^http:\/\/(.*?)\/(.*?)\.(jp(e?g|e|2)|gif|png|tiff?|bmp|ico|flv|on2)\?(.*)/) {
 print $x . "http://" . $1 . "/" . $2 . "." . $3 . "\n";
   # all that ends with ;
} elsif (m/^http:\/\/(.*?)\/(.*?)\;(.*)/) {
 print $x . "http://" . $1 . "/" . $2 . "\n";
} else {
 print $x . $_ . "\n";
}
}
_________________________________________________________________
Hotmail: Trusted email with Microsoft’s powerful SPAM protection.
https://signup.live.com/signup.aspx?id=60969
Received on Sun May 23 2010 - 09:53:11 MDT

This archive was generated by hypermail 2.2.0 : Sun May 23 2010 - 12:00:32 MDT