#!/usr/local/bin/perl -w
#
# Rotate Apache access and error logs into cumulative monthly logs
#
# USAGE:
#   rotate-logs <Apache http.conf configuration file path>
#
# NOTE: Must run as root to do a 'kill -HUP' on the Apache httpd 
#
# DOCUMENTATION:
#   perldoc rotate-logs
#
# (C) Copyright 1998 Dave Beckett <D.J.Beckett@ukc.ac.uk>
# University of Kent at Canterbury
#
# This program is free software; you can redistribute it and/or modify
# it under the same terms as Perl itself.
#

require 5.004;

use strict;
use Carp;

use IO::File;
use File::Basename;


$::debug=0;
$::prog_name=basename $0;
%::month_name_to_month_number=qw(Jan 1 Feb 2 Mar 3 Apr 4 May 5 Jun 6 Jul 7 Aug 8 Sep 9 Oct 10 Nov 11 Dec 12);
$::VERSION=(split(/ /, q$Id: rotate-logs,v 1.1 1998/04/07 10:21:29 djb1 Exp $))[2];

die "USAGE: $::prog_name V$::VERSION [apache configuration file]\n" unless @ARGV ==1;

main(@ARGV);
exit 0;




sub read_apache_config ($$) {
  my($cfg, $file)=@_;

  open(CFG,$file) or die "$::prog_name: Could not read config file $cfg - $!\n";
  my $in_thing=0;
  while(<CFG>) {
    chop;
    s/^\s+//;
    next if $_ eq '' || /^#/;

    # Ignore all stuff inside <BLAH> ... </BLAH>
    if (m%^<[^/]%) { $in_thing=1; next; }
    if (m%^</%)    { $in_thing=0; next; }
    next if $in_thing;

    my($var,$value)=split(/\s+/, $_, 2);
    $cfg->{$var}=$value;
  }
  close(CFG);
}


sub absolute ($$) {
  my($file,$root)=@_;
  $file=$root.$file if $file !~ m%^/%;
  $file;
}  


sub main ($) {
  my($http_conf)=@_;


  # Default configuration from apache documentation
  my %cfg=(
    AccessConfig   => 'conf/access.conf',
    ResourceConfig => 'conf/srm.conf',
    TypesConfig    => 'conf/mime.types',
    ErrorLog       => 'logs/error_log',
    TransferLog    => 'logs/access_log',
    PidFile        => 'logs/httpd.pid',
  );

  # Read in main configuration file
  read_apache_config(\%cfg, $http_conf);

  my $root_dir=$cfg{ServerRoot};
  $root_dir.="/" if $root_dir !~ m%/$%;

  chdir($root_dir) or die "$::prog_name: Could not enter directory $root_dir - $!\n";

  warn "$::prog_name: Working in directory $root_dir\n";


  # Could read in other configuration files but 'aint going to ...
  # read_apache_config(\%cfg, $cfg{AccessConfig});
  # read_apache_config(\%cfg, $cfg{ResourceConfig});


  # ... since all that was wanted were the log file names
  my %log_files=(
    error  => absolute($cfg{ErrorLog},    $root_dir),
    access => absolute($cfg{TransferLog}, $root_dir),
    pid    => absolute($cfg{PidFile},     $root_dir),
  );



  if ($::debug) {
    warn "$::prog_name: Log files\n";
    my($file,$path);
    while(($file,$path)=each %log_files) {
      warn "  $file: $path\n";
    }
  }


  open(PID, $log_files{pid}) or die "$::prog_name: Could not read pid file $log_files{pid} - $!\n";
  my $pid;
  chop($pid=<PID>);
  close(PID);

  die "$::prog_name:$log_files{pid}: '$pid' does not look like a process ID\n"
    unless $pid =~ /^[0-9]+$/;

  warn "$::prog_name: Apache is running as PID $pid\n";

  my $access_log=$log_files{access};
  my $error_log=$log_files{error};

  my $old_access_log=$access_log . ".old";
  my $old_error_log=$error_log . ".old";

  # Split logs by date (yyyy-mm here)
  my $output_log_dir= dirname($old_access_log);
  $output_log_dir.="/" if $output_log_dir !~ m%/$%;

  die "$::prog_name: Output directory $output_log_dir does not exist\n"
    unless -d $output_log_dir;


  # Rotate logs
  warn "$::prog_name: Rotating $access_log to $old_access_log\n";
  unlink $old_access_log if -r $old_access_log;
  rename($access_log, $old_access_log) or die "$::prog_name: Cannot rename $access_log to $old_access_log - $!\n";

  warn "$::prog_name: Rotating $error_log to $old_error_log\n";
  unlink $old_error_log if -r $old_error_log;
  rename($error_log, $old_error_log) or die "$::prog_name: Cannot rename $error_log to $old_error_log - $!\n";


  # Tell httpd to refresh, close logs and start new ones
  # Must be 'root' to do this.  Hmm.

  # [Or the user who started the daemons, but usually that is root]

  kill 'HUP', $pid;


  # ACCESS log

  my(%output_logs);

  my $count=0;
  open (ACCESS, $old_access_log) or die "$::prog_name: Cannot read access log file $old_access_log - $!\n";

  warn "$::prog_name: Reading $old_access_log\n";
  while(<ACCESS>) {
    #host ident authuser [15/Aug/1997:19:31:08 +0100] ...

    my $in=$_;

    my($mname,$year)=m%^\S+ \S+ \S+ \[\d+/([^/]+)/(\d+):%;
    my $month=$::month_name_to_month_number{$mname} || '0';

    # HERE -- This is the line to change to alter filename date for ACCESSES
    my $year_month=sprintf("%04d-%02d",$year,$month);

    my $out_fh=$output_logs{$year_month};
    if (!$out_fh) {
      # HERE: Change the format of the access file name
      my $out_path=$output_log_dir . "access-$year_month.log";

      $out_fh=$output_logs{$year_month}=new IO::File ">> $out_path";
      die "$::prog_name: Could not append to output log file $out_path - $!\n"
	unless $out_fh;
      warn "$::prog_name: Appending to new file $out_path\n";
    }

    print $out_fh $in;
    $count++;
  }

  # Explicitly close any opened output access log files
  my($name,$fh);
  while(($name,$fh)=each %output_logs) {
    $fh->close;
  }
  undef %output_logs;

  warn "$::prog_name: Wrote $count lines in total\n";


  # ERROR log

  open (ERROR, $old_error_log) or die "$::prog_name: Cannot read error log file $old_error_log - $!\n";

  $count=0;
  my $line='';
  my $date_time='';
  my $year_month='';
  my $last_date_time='';
  my $last_year_month='';
  my %error_logs;

  warn "$::prog_name: Reading $old_error_log\n";
  while(<ERROR>) {
    chomp;

    # [Fri Sep  5 03:24:52 1997] <error message>
    # <more error message>
    # [... next date ...       ]

    if (/^\[\w+ (\w+)\s+(\d+) (\S+) (\d\d\d\d)\]/) {
      my($mname,$dom,$time,$year)=($1,$2,$3,$4);
      my $month=$::month_name_to_month_number{$mname} || '0';
      # HERE -- This is the line to change to alter filename date for ERRORS
      $year_month=sprintf("%04d-%02d",$year,$month);
      my $date=$year_month."-".sprintf("%02d",$dom);
      $date_time="$date-$time";
    }

    if ($date_time ne $last_date_time) {
      # New record - must output current line

      if ($last_year_month) {
	my $error_fh=$error_logs{$last_year_month};
	if (!$error_fh) {
	  # HERE: Change the format of the error file name (and below too)
	  my $error_path=$output_log_dir . "error-$year_month.log";
	  
	  $error_fh=$error_logs{$year_month}=new IO::File ">> $error_path";
	  die "$::prog_name: Could not append to error log file $error_path - $!\n"
	  unless $error_fh;
	  warn "$::prog_name: Appending to new file $error_path\n";
	}

	print $error_fh "$line\n";
	$count++;
      }
      $line=$_;
    } else {
      $line.="\n$_";
    }

    $last_date_time=$date_time; $last_year_month=$year_month;
  }


  # Output last line
  if ($line) {
    my $error_fh=$error_logs{$last_year_month};
    if (!$error_fh) {
      # HERE: Change the format of the error file name (and above too)
      my $error_path=$output_log_dir . "error-$year_month.log";

      $error_fh=$error_logs{$year_month}=new IO::File ">> $error_path";
      die "$::prog_name: Could not append to error log file $error_path - $!\n"
	unless $error_fh;
      warn "$::prog_name: Appending to new file $error_path\n";
    }

    print $error_fh "$line\n";
    $count++;
  }


  # Explicitly close any opened output error log files
  while(my($name,$fh)=each %error_logs) {
    $fh->close;
  }
  undef %error_logs;
  warn "$::prog_name: Wrote $count lines in total\n";
}

__END__

=head1 NAME

rotate-logs - rotate Apache access and error logs into cummulative monthly ones

=head1 SYNOPSIS

  rotate-logs <apache main configuration file>

=head1 DESCRIPTION

Uses the apache configuration to work out the location of the access
and error logs and the PID file.  Then, rotates the logs, HUPs the
apache daemon and then starts work.  For the access and error logs,
it parses the lines and extracts or interprets the dates for each
entry.  It then stores the elements in date-specific files in the
form access-YYYY-MM.log or error-YYYY-MM.log

=head1 NOTES

This program must run as root to do a 'kill -HUP' on the Apache
daemon unless you are starting it as another user.

This program has mostly been used with the 'Extended' access log format:

  LogFormat "%h %l %u %t \"%r\" %s %b \"%{Referer}i\" \"%{User-Agent}i\""

but it should work with the default LogFormat since the date
information is early in the output line (%t).

=head1 CUSTOMISING

If you want to change the format of the log file names, look for
'HERE' in the source if you want to change the format from YYYY-MM
to, say, YY-MM to be anti-Y2K compliant :-( 

The output file names are similarly fixed at access-THING.log and
error-THING.log but, you can change that too.

=head1 AUTHOR

Dave Beckett E<lt>I<D.J.Beckett@ukc.ac.uk>E<gt>.

http://www.cs.ukc.ac.uk/people/staff/djb1/

=head1 COPYRIGHT

Copyright 1998 Dave Beckett, University of Kent at Canterbury

This program is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=head1 SEE ALSO

apache(1) and http://www.apache.org/

=cut
