#!/usr/bin/perl -w # # Split squid log into multi logfiles by domain mapping config file # example # a.log example1.com example2.com # b.log example3.com # # $Id: split_squid_log.pl,v 1.1 2003/05/31 16:40:57 chedong Exp $ # Author Che, Dong use strict; use Getopt::Std; use IO::File; # parse command line options my %opts = (); getopts( 'hi:c:', \%opts ); # print help if ( !%opts || $opts{'h'} ) { print "usage: $0 [options] -h: print this help -c: [config_file] -i: [input squid log file] example: $0 -c sync.conf -i access.log\n"; exit; } # get input file and output config my $input_file; my $conf_file_name = ""; my $line = ""; my %domain_file_mapping = (); my @domains = (); my %files = (); my $domain = ''; if ( !$opts{'c'} || $opts{'c'} eq "" ) { print "please specifiey sync site configure file\n"; exit; } else { $conf_file_name = $opts{'c'}; } if ( !$opts{'i'} || $opts{'i'} eq "" ) { print "please specifiey input file\n"; exit; } else { # open(LOG, $opts{'i'}) or die "Unable to open logfile: $!\n"; $input_file = new IO::File $opts{'i'}, "r" or die "Unable to open logfile: $!\n"; } # parse domain list from configure file my @site_conf_list = (); my $conf_file = new IO::File $conf_file_name, "r" or die "Can't open $conf_file_name: $!"; while ( defined( $line = $conf_file->getline() ) ) { chomp $line; # trim $line =~ s/^\s+//; $line =~ s/\s+$//; # skip comments if ( $line =~ m /^#/ or $line eq '' ) { # print "Comments: " . $line . "\n"; } else { my ( $filename, @domain_list ) = split ( ' ', $line ); push ( @domains, @domain_list ); # init file handler $files{$filename} = new IO::File $filename, "w" or die "Couldn't open $filename for writing: $!\n"; foreach $domain (@domain_list) { $domain_file_mapping{$domain} = $filename; } } } $conf_file->close(); # make an unknown.log for unknown mapping host $files{"unknown.log"} = new IO::File "unknown.log", "w" or die "Couldn't open unknown.log for writing: $!\n"; my $domain_number = scalar(@domains); my @file_list = keys %files; my $output_number = scalar(@file_list); print "I have $domain_number domain(s) to split: @domains\n"; print "will output into $output_number file(s): @file_list\n"; print "according to following mapping:\n"; print map { "$_ => $domain_file_mapping{$_}\n" } keys %domain_file_mapping; # split log according to the domain => output_file mapping while ( defined( $line = $input_file->getline() ) ) { my $orig_line = $line; # convert "GET http://localhost:8000/index.html => "GET /index.html $line =~ s/"(GET|POST|HEAD) \w+:\/\/([\w\.]+)(:[\d]+)?/"$1 /; # print $line."host=".$2."\n"; my $match = 0; foreach $domain (@domains) { if ( $2 =~ m /$domain/ ) { # print "match domain: ".$domain." to file: ". $domain_file_mapping{$domain}."\n"; $files{ $domain_file_mapping{$domain} }->print($line); $match = 1; last; } } if ( $match == 0 ) { $files{"unknown.log"}->print($orig_line); } } $input_file->close(); # close file_list while ( my ( $k, $v ) = each %files ) { $files{$k}->close; }