[squid-users] Unable to get HULU ads to cache (not partial content)

From: Ian Nofziger <ian.nofziger_at_gmail.com>
Date: Wed, 9 Jul 2014 13:16:43 -0400

Hi,

I've been trying to set up squid for a variety of reasons, but one is
to cache commercials from HULU. HULU takes up a huge portion of my
bandwidth. It'd be pointless to try to cache the TV shows since we
typically only watch them once (I think they're encrypted anyway), but
I'd like to cache the unencrypted commercials since those get played
over and over again.

The commercials come in as partial content (206 code), so I wrote a
little script that grabs the list of videos with a 206 code each night
and does a "wget -O /dev/null [full address for video]" to cache the
commercials for the next day. This seems to be working just fine. The
problem is, it still will not cache the videos (even though it is no
longer a partial content request). Can someone help sort me out?

(P.S. I'm very much a beginner, so my config file is a result of just
copying a lot of stuff off the internet... I'll gladly take
suggestions even if its not directly related to the HULU ads.)

Thanks in advance for your help!

store.log:

1404923465.907 RELEASE -1 FFFFFFFF B493FE4264AFC396D382596A22280AA5
200 1404923474 1403637739 1436459474 video/mp4 12018601/12018601 GET
http://ads-v-darwin.hulu.com/published/2014/6/24/7/826d2864_2b05_4dd7_9ced_c9992a1dd04d_12d382_AdsTranscode_16x9_1403420_24623827_H264_3200.mp4

access.log:

1404923465.907 4383 192.168.1.80 TCP_MISS/200 12019050 GET
http://ads-v-darwin.hulu.com/published/2014/6/24/7/826d2864_2b05_4dd7_9ced_c9992a1dd04d_12d382_AdsTranscode_16x9_1403420_24623827_H264_3200.mp4
- HIER_DIRECT/8.27.246.254 video/mp4

# ACCESS CONTROLS
# -----------------------------------------------------------------------------

## local network
#acl localnet src 10.0.0.0/8 # RFC1918 possible internal network
#acl localnet src 172.16.0.0/12 # RFC1918 possible internal network
acl localnet src 192.168.0.0/16 # RFC1918 possible internal network
acl localnet src fc00::/7 # RFC 4193 local private network range
acl localnet src fe80::/10 # RFC 4291 link-local (directly
plugged) machines

## windows update
acl windowsupdate dstdomain windowsupdate.microsoft.com
acl windowsupdate dstdomain .update.microsoft.com
acl windowsupdate dstdomain download.windowsupdate.com
acl windowsupdate dstdomain redir.metaservices.microsoft.com
acl windowsupdate dstdomain images.metaservices.microsoft.com
acl windowsupdate dstdomain c.microsoft.com
acl windowsupdate dstdomain www.download.windowsupdate.com
acl windowsupdate dstdomain wustat.windows.com
acl windowsupdate dstdomain crl.microsoft.com
acl windowsupdate dstdomain sls.microsoft.com
acl windowsupdate dstdomain productactivation.one.microsoft.com
acl windowsupdate dstdomain ntservicepack.microsoft.com

## hulu videos
acl huluvids dstdomain ads-v-darwin.hulu.com
acl huluvids dstdomain assets.hulu.com
acl huluvids dstdomain assets.huluim.com
acl huluvids dstdomain p.hulu.com
acl huluvids dstdomain m.hulu.com
acl huluvids dstdomain ib.huluim.com
acl huluvids dstdomain s.hulu.com

## do not cache sites
acl directsites dstdomain "/etc/squid3/go_direct.txt"

## malware sites
acl malwareblockedsites dstdomain "/etc/squid3/malware_blocked_sites"

## fix for netflix on tv
acl VIERAWMDRM url_regex ^http://us.vieraconnect.tv/wmdrm$

## basic ports
acl SSL_ports port 443
acl SSL_ports port 563
acl SSL_ports port 81
acl SSL_ports port 2087
acl SSL_ports port 10000
acl Safe_ports port 563
acl Safe_ports port 631
acl Safe_ports port 901
acl Safe_ports port 81
acl Safe_ports port 3128
acl Safe_ports port 1025-65535
acl Safe_ports port 210
acl Safe_ports port 80 # http
acl Safe_ports port 21 # ftp
acl Safe_ports port 443 # https
acl Safe_ports port 70 # gopher
acl Safe_ports port 210 # wais
acl Safe_ports port 1025-65535 # unregistered ports
acl Safe_ports port 280 # http-mgmt
acl Safe_ports port 488 # gss-http
acl Safe_ports port 591 # filemaker
acl Safe_ports port 777 # multiling http
acl purge method PURGE
acl CONNECT method CONNECT

## windows update
acl wuCONNECT dstdomain www.update.microsoft.com
acl wuCONNECT dstdomain sls.microsoft.com

http_access allow CONNECT wuCONNECT localnet
http_access allow windowsupdate localnet
http_access allow huluvids
http_access deny malwareblockedsites
http_access allow localnet
cache allow huluvids
cache deny VIERAWMDRM
cache deny directsites

http_access deny !Safe_ports
http_access deny CONNECT !SSL_ports

# Only allow cachemgr access from localhost
http_access allow localhost manager
http_access deny manager
http_access allow localhost
http_access deny all

# LISTENING PORTS
# -----------------------------------------------------------------------------

http_port 3128
http_port 192.168.1.3:3129 transparent

# MEMORY CACHE OPTIONS
# -----------------------------------------------------------------------------

cache_mem 256 MB
maximum_object_size_in_memory 512 KB
memory_replacement_policy heap GDSF

# DISK CACHE OPTIONS
# -----------------------------------------------------------------------------

cache_replacement_policy heap LFUDA
cache_dir aufs /var/cache/squid 50176 14 256
maximum_object_size 2048 MB
cache_swap_low 96
cache_swap_high 98

# LOGFILE OPTIONS
# -----------------------------------------------------------------------------

access_log daemon:/var/log/squid3/access.log squid
cache_store_log daemon:/var/log/squid3/store.log
logfile_rotate 2

# OPTIONS FOR TROUBLESHOOTING
# -----------------------------------------------------------------------------

cache_log /var/log/squid3/cache.log
coredump_dir /var/spool/squid3

# OPTIONS FOR TUNING THE CACHE
# -----------------------------------------------------------------------------

refresh_pattern -i (p|m|s).hulu.com/.*\.(mp?) 4320 80% 43200
reload-into-ims override-expire ignore-no-cache ignore-no-store
ignore-private
refresh_pattern -i ib.huluim.com/.*\.(mp?) 4320 80% 43200
reload-into-ims override-expire ignore-no-cache ignore-no-store
ignore-private
refresh_pattern -i assets.hulu*.com/.*\.(mp?) 4320 80% 43200
reload-into-ims override-expire ignore-no-cache ignore-no-store
ignore-private
refresh_pattern -i .*ads-v-darwin.hulu.com.* 4320 80% 43200
reload-into-ims override-expire ignore-no-cache ignore-no-store
ignore-private
refresh_pattern -i
microsoft.com/.*\.(cab|exe|ms[i|u|f]|[ap]sf|wm[v|a]|dat|zip) 4320 80%
43200 reload-into-ims
refresh_pattern -i
windowsupdate.com/.*\.(cab|exe|ms[i|u|f]|[ap]sf|wm[v|a]|dat|zip) 4320
80% 43200 reload-into-ims
refresh_pattern -i
windows.com/.*\.(cab|exe|ms[i|u|f]|[ap]sf|wm[v|a]|dat|zip) 4320 80%
43200 reload-into-ims
refresh_pattern -i (get_video\?|videoplayback\?|videodownload\?) 4320
80% 43200 override-expire ignore-reload reload-into-ims
ignore-no-cache ignore-private
refresh_pattern -i ([^.]+.|)symantecliveupdate.com/.*\.(zip|exe) 4320
100% 43200 reload-into-ims
refresh_pattern -i http://*.googlesyndication.*/.* 720 100% 4320
refresh_pattern -i imdb\.com 10080 90% 43200 ignore-reload
override-expire ignore-no-cache ignore-no-store ignore-private
refresh_pattern -i thetvdb\.com 10080 90% 43200 ignore-reload
override-expire ignore-no-cache ignore-no-store ignore-private
refresh_pattern -i themoviedb\.org 10080 90% 43200 ignore-reload
override-expire ignore-no-cache ignore-no-store ignore-private
refresh_pattern -i impawards\.com 10080 90% 43200 ignore-reload
override-expire ignore-no-cache ignore-no-store ignore-private
refresh_pattern -i movieposterdb\.com 10080 90% 43200 ignore-reload
override-expire ignore-no-cache ignore-no-store ignore-private
refresh_pattern ^ftp: 1440 20% 10080
refresh_pattern ^gopher: 1440 0% 1440
refresh_pattern -i \.(gif|png|jp?g|ico|bmp|tiff?)$ 10080 95% 43200
override-expire override-lastmod reload-into-ims ignore-no-cache
ignore-private
refresh_pattern -i
\.(rpm|cab|deb|exe|msi|msu|zip|tar|xz|bz|bz2|lzma|gz|tgz|rar|bin|7z|doc?|xls?|ppt?|pdf|nth|psd|sis)$
10080 90% 43200 override-expire override-lastmod reload-into-ims
ignore-no-cache ignore-private
refresh_pattern -i
\.(avi|iso|wav|mid|mp3|mp4|mpeg|mov|3gp|wm?|swf|flv|x-flv|axd)$ 43200
95% 86400 override-expire override-lastmod reload-into-ims
ignore-no-cache ignore-private
refresh_pattern -i \.(html|htm|css|js)$ 1440 75% 40320
refresh_pattern -i \.index.(html|htm)$ 0 75% 10080
refresh_pattern -i (/cgi-bin/|\?) 0 0% 0
refresh_pattern (Release|Packages(.gz)*)$ 0 20% 2880
refresh_pattern . 0 40% 40320

## always download the whole file even if client aborts
quick_abort_min -1

## TAG: range_offset_limit size [acl acl...]
range_offset_limit 2048 MB windowsupdate
range_offset_limit 2048 MB huluvids

## Used for estimating number of objects
store_avg_object_size 30 KB

# HTTP OPTIONS
# -----------------------------------------------------------------------------

# TAG: request_header_access
request_header_access From deny all
request_header_access Server deny all
request_header_access Link deny all
request_header_access Via deny all
request_header_access X-Forwarded-For deny all

# TIMEOUTS
# -----------------------------------------------------------------------------

forward_timeout 240 seconds
connect_timeout 30 seconds
peer_connect_timeout 5 seconds
read_timeout 600 seconds
request_timeout 60 seconds
shutdown_lifetime 60 seconds

# ADMINISTRATIVE PARAMETERS
# -----------------------------------------------------------------------------

cache_effective_user proxy
cache_effective_group proxy

# TAG: httpd_suppress_version_string on|off
# Suppress Squid version string info in HTTP headers and HTML error pages.
#Default:
httpd_suppress_version_string on

# ICP OPTIONS
# -----------------------------------------------------------------------------

log_icp_queries off

# MISCELLANEOUS
# -----------------------------------------------------------------------------

memory_pools on
client_db on
reload_into_ims on
Received on Wed Jul 09 2014 - 17:17:22 MDT

This archive was generated by hypermail 2.2.0 : Thu Jul 10 2014 - 12:00:06 MDT