Find files with UTF-8 BOM (Byte Order Mark)


# Find files with an Unicode BOM (Byte Order Mark) for UTF-8 encoding (which
# doesn't actually say anything about byte order as that is a non-issue with
# UTF-8).
# See
# Copyright (c) 2008 Jochen Kupperschmidt <>
# Version: 14-Jul-2008
# Released under the terms of the MIT License.

require 'find'
require 'optparse'

# Create option parser.
options = {}
ARGV.options do |opts|
  opts.banner << " <path>"
  opts.on("-v", "--[no-]verbose", "run verbosely") do |v|
    options[:verbose] = v

# Check for required path argument.
path = ARGV[0] or (STDERR.puts ARGV.options; exit 2)

# Recursively scan files for the first bytes.
Find.find(path) do |fn|
  next if not File.file?(fn)
  has_bom = {|f|} == "\xEF\xBB\xBF"
  if options[:verbose]
    # List every scanned file and the corresponding result.
    puts fn << " ... " << (has_bom ? "BOM found." : "no BOM.")
  elsif has_bom
    # Only list names of files with BOMs found.
    puts fn