
Command-line output. Files are sorted from oldest to most recent change.
#!/usr/bin/env python # -*- coding: utf-8 -*- """ Repository Sorter ================= Recursively search files in a directory for ``$Id ... $`` identifier strings and show a list of matching files, ordered by the date of their latest change (according to the identifier). Supports Subversion (SVN), CVS, and RCS. :Copyright: 2006 Jochen Kupperschmidt :Date: 12-May-2006 :License: MIT """ from __future__ import with_statement from itertools import islice from optparse import OptionParser import os import re # Configuration BINARY_SUFFIXES = set(( 'gif', 'jpg', 'jpeg', 'png', 'tga', 'tif', 'class', 'pyc', 'bz2', 'gz', 'rar', 'tar', 'zip')) # ... RE_SVN_ID = re.compile(''' .* \$Id: # Id string prefix \ (?P<filename>.+) # filename \ (?P<version>\d+) # file revision \ (?P<date>\d{4}-\d{2}-\d{2}) # last change's date \ (?P<time>\d{2}:\d{2}:\d{2})Z # last change's time \ (?P<author>.+) # last change's author's name \ \$ # Id string suffix .* ''', re.VERBOSE) RE_RCS_ID = re.compile(''' .* \$Id: # Id string prefix \ (?P<filename>.+),v # filename \ (?P<version>\d+.\d+) # file version \ (?P<date>\d{4}/\d{2}/\d{2}) # last change's date \ (?P<time>\d{2}:\d{2}:\d{2}) # last change's time \ (?P<author>.+) # last change's author's name \ Exp\ \$ # Id string suffix .* ''', re.VERBOSE) TYPES = { 'svn': ('.svn', RE_SVN_ID), 'cvs': ('CVS', RE_RCS_ID), 'rcs': ('RCS', RE_RCS_ID), } def autodetect_type(dir): """Try to autodetect the repository type by folder names.""" entries = os.listdir(dir) for type in TYPES: if TYPES[type][0] in entries: return type def find_id_line(f, max_lines): """Try to find a line containing the id keyword ``$Id$``.""" for line in islice(f, max_lines): if '$Id' in line: return line class Id(object): """An identifier.""" def __init__(self, **kwargs): for key, value in kwargs.iteritems(): setattr(self, key, value) def __cmp__(self, other): """Specify the sort order of ``Id`` objects.""" for attr in ('date', 'time', 'filename', 'version'): result = cmp(getattr(self, attr), getattr(other, attr)) if result != 0: return result return 0 @classmethod def parse_id(cls, line, type): """Parse id string with compiled pattern.""" m = TYPES[type][1].match(line) if m is not None: return cls(**m.groupdict()) def scan_files(path, opts): """Scan through files, trying to find an id signature.""" for root, dirs, files in os.walk(path): # Skip version control specific directories. for type in TYPES: vc_dir = TYPES[type][0] if vc_dir in dirs: dirs.remove(vc_dir) for fname in files: # Skip defined binary file suffixes. if fname.split('.')[-1] in BINARY_SUFFIXES: continue # Open file and look for ident line. with open(os.path.join(root, fname), 'rb') as f: id_line = find_id_line(f, opts.num_lines) if id_line: id = Id.parse_id(id_line, opts.type) if id is not None: yield id def main(): # Create option parser and define options. parser = OptionParser( usage='%prog [options] <directory>', version='Repository Sorter', description=('Search files in a repository for ident strings' ' and order the files by the date of their last change.') ) parser.add_option('-t', '--type', choices=TYPES.keys() + ['auto'], dest='type', default='auto', help='repository type: %s or auto (default)' % ', '.join(TYPES.keys()) ) parser.add_option('-a', '--authors', action='store_true', dest='authors', default=False, help='display last authors') parser.add_option('-n', '--num-lines', dest='num_lines', type='int', default=10, help='maximum number of lines to scan') # Process options and arguments. opts, args = parser.parse_args() if len(args) != 1: parser.print_help() parser.exit() # Autodetect repository type. if opts.type == 'auto': opts.type = autodetect_type(args[0]) if opts.type is None: parser.exit(msg='Auto-detection of repository type failed.' ' Please specify a type.\n') print('Auto-detection assumes this is a %s repository.\n' % opts.type.upper()) # Scan. ids = list(scan_files(args[0], opts)) # Print sorted results. ids.sort() format = '%(date)s %(time)s %(filename)s' if opts.authors: format += ' [%(author)s]' for id in ids: print format % id.__dict__ if __name__ == '__main__': main()