#!/usr/bin/perl

# With some other Unix Os, first line may be
#!/usr/local/bin/perl
# With Apache for Windows and ActiverPerl, first line may be
#!C:/Program Files/ActiveState/ActivePerl/bin/perl

# access_referer_agent2combined.pl by PieterB, pieterb@gewis.nl

#This program is free software; you can redistribute it and/or
#modify it under the terms of the GNU General Public License
#version 2 as published by the Free Software Foundation.

#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#GNU General Public License for more details, in COPYING.TXT file.

#If you have not received a copy of this file along with this program,
#write to
#the Free Software Foundation, Inc.,
#59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


#-Description-------------------------------------------
# access_referer_agent2combined.pl
# v.0.6, free to use, e-mail pieterb+web@gewis.nl
# --------------------------------
# This perlscript converts a directory of common log format (CLF)
# access-files, referer-files and agent-files to a single combined.log
# This is usefull if you have seperate access/referer/agent logfiles
# and want to use a loganalyzer.

# access_referer_agent2combined.pl comes with ABSOLUTELY NO WARRANTY.
# It's a free software distributed with a GNU General Public License
# (See COPYING.txt file).  access_referer_agent2combined is part
# of AWStats but can be used alone for any need.
#
# The script has been tested with Lotus Notes/Lotus Domino 
# The admin had selected "Common" logging. It's better to select
# "Extended Common" logging in Lotus Domino
# (See Server, Current Server Document, tab Internet Protocols,
# Log File Settings)
# It should work with the logfiles of other webservers (e.g. Apache)
# as well (untested)

# precondition: edit constants to set range of MMDDYYYY
# input: a directory full of accessMMDDDYYYY.log, refererMMDDYYYY.log, 
# agentMMDDYYYY.log
# output: combined.log 
# PS please mail any patches you make to this script

# set the range of the logfiles: e.g. Jan/2002 to Dec/2002
# non existent logfiles are skipped
$startmonth=1;
$startyear=2002;
$endmonth=12;
$endyear=2002;

$accessprefix="access";
$refererprefix="referer";
$agentprefix="agent";
$postfix=".log";

######################### Start of program
$month=$startmonth;
$year=$startyear;
$day=1;

print "common_referer_agent2combined.pl by pieterb\@gewis.nl\n";
print "creating combined.log\n" ;
open (OUT, ">combined.log");

while($year<=$endyear) {
	while ($month<=$endmonth) {
		while ($day<=31) {

			# create mmddyyyy
			$mm=($month<10 ? "0$month": "$month") ;
			$dd=($day<10 ? "0$day": "$day") ;
			$mmddyyyy="$mm$dd$year" ;

			$accesslog="$accessprefix$mmddyyyy$postfix";
			$refererlog="$refererprefix$mmddyyyy$postfix";
			$agentlog="$agentprefix$mmddyyyy$postfix";

			if (-f $accesslog) {
			 	print "converting $accesslog, $refererlog, $agentlog\n";
				open(ACCESSLOG, "<$accesslog");
				open(REFERERLOG, "<$refererlog") ;
				open(AGENTLOG, "<$agentlog") ;
				
				while(<ACCESSLOG>){

					# get referer and remove ^M
					@refererfield=split /\[.*\] \s*/, <REFERERLOG>;
                                	$referer= $refererfield[1] ;
					$referer =~ s/\r\n//g ;
				
					# get agent and remove ^M	
					$agent = <AGENTLOG> ;
					$agent=~ s/^\[[^\]]*\] //g;
					$agent=~ s/\r\n//g ;

					# get clf and remove ^M
					$commonlogformat=$_;
					$commonlogformat=~ s/\r\n//g;

					print OUT "$commonlogformat $referer $agent\n";
				}
				
				close(REFEERLOG);
				close(ACCESSLOG);
				close(AGENTLOG);
			}	
  
			$day++ ;
		}
		$month++;
		$day=1;
	} ;
	$year++;
	$month=1
}

close (OUT);
print "finished\n"
