#!/usr/bin/ruby # Skim through Apache access log files and count the number of hits to URLs # matching a given pattern. # # Copyright (c) 2008 Jochen Kupperschmidt <http://homework.nwsnet.de/> # Version: 07-Aug-2008 # Released under the terms of the MIT License. require 'zlib' # Adjust this. regex = /GET (\/media\/videos\/\d{4}-\d{2}-\d{2})/ # TODO: Might be nice to be able to pass this expression as command line # argument. class HitCounter def initialize(regex) @regex = regex # Create a hash with a default value of zero. @hits = Hash.new { |hash, key| hash[key] = 0 } end def parse_file(fname) # Read a file, line by line. If the file name ends on '.gz', treat it as # gzip-compressed and decompress it. Count the occurences of each matched # URL separately. puts "Parsing #{fname} ..." f = (fname =~ /\.gz$/ ? Zlib::GzipReader : File).open(fname) f.each_line { |line| @hits[$+] += 1 if line =~ @regex } end def show_results # Display the results of matching URLs and how often they were accessed. puts "\nResults:" @hits.each { |details| puts "%s %8d hits" % details } end def self.batch(regex, fnames) # A shortcut class method to parse multiple files and display the result. fnames = Dir['access.log*'] if ARGV.empty? (STDERR.puts "No files given."; exit 2) if fnames.empty? puts "Going to analyze #{fnames.length} files ..." instance = self.new(regex) begin fnames.each { |fname| instance.parse_file(fname) } instance.show_results rescue Interrupt puts "\nAborted." end end end HitCounter::batch(regex, ARGV)