Find files with UTF-8 BOM (Byte Order Mark)

#!/usr/bin/ruby

# Find files with an Unicode BOM (Byte Order Mark) for UTF-8 encoding (which
# doesn't actually say anything about byte order as that is a non-issue with
# UTF-8).
#
# See http://en.wikipedia.org/wiki/Byte_Order_Mark
#
# Copyright (c) 2008 Jochen Kupperschmidt <http://homework.nwsnet.de/>
# Version: 14-Jul-2008
# Released under the terms of the MIT License.

require 'find'
require 'optparse'


# Create option parser.
options = {}
ARGV.options do |opts|
  opts.banner << " <path>"
  opts.on("-v", "--[no-]verbose", "run verbosely") do |v|
    options[:verbose] = v
  end
end.parse!

# Check for required path argument.
path = ARGV[0] or (STDERR.puts ARGV.options; exit 2)

# Recursively scan files for the first bytes.
Find.find(path) do |fn|
  next if not File.file?(fn)
  has_bom = File.open(fn) {|f| f.read(3)} == "\xEF\xBB\xBF"
  if options[:verbose]
    # List every scanned file and the corresponding result.
    puts fn << " ... " << (has_bom ? "BOM found." : "no BOM.")
  elsif has_bom
    # Only list names of files with BOMs found.
    puts fn
  end
end