#!/usr/bin/env python # -*- coding: utf-8 -*- """ Leechr ====== A (not yet) multi-thread mass downloader (using FTP, so far). Python 2.5 or later is required. Future ideas ------------ - Implement recursive directory downloads. (Should be easy, but has to be configurable.) - Implement resuming of downloads. - Use threads for multiple downloads in parallel. (This is already prepared and might even work, but would probably wreck the visualization; a new GUI interface is required.) - Expand to other methods beneath FTP; most important is HTTP. (Compiling the list of files to be downloaded requires some rethinking, probably scraping a website for all URLs [of a certain pattern] in the case of HTTP.) Unresolved issues ----------------- - ``KeyboardInterrupt`` stuff doesn't really work. - Sometimes unexplored connection hang when trying to retrieve the first file. - What about symlinks? :Copyright: 2007-2008 Jochen Kupperschmidt :Date: 11-Apr-2007 :License: GNU General Public License, Version 2 """ from __future__ import with_statement import ftplib from itertools import cycle import os import Queue import socket import sys from sys import argv, exit import threading import urlparse class FileList(list): """A list of (filename, size) tuples.""" def parse_line(self, line): """Parse directory listing line and save a tuple (filename, size).""" # Skip directories. if line[0] == 'd': return columns = line.split(None, 8) self.append((columns[8], int(columns[4]))) class FileDownload(file): """A file that is to be, currently is or has been downloaded.""" def __init__(self, name, target_size, *args, **kwargs): file.__init__(self, name, *args, **kwargs) self.target_size = int(target_size) self.size = 0 self.percent = 0 self.last_status_len = 0 self.spin = cycle(r'-\|/') self.display('\n * %-52s ' % os.path.basename(self.name)) def retr_block(self, block): self.size += float(len(block)) self.percent = (self.size / self.target_size) * 100 self.update_display() self.write(block) def update_display(self, spinner=True): """Update the display to show the current status.""" self.display('\b' * self.last_status_len) spinner = self.spin.next() if spinner else ' ' status = '%7.2f MB (%5.1f%%) %c' % ( self.size / 1024 / 1024, self.percent, spinner) self.last_status_len = len(status) self.display(status) def close(self): self.update_display(spinner=False) self.display('\n') file.close(self) def display(self, s): sys.stdout.write(s) class DownloadSession(ftplib.FTP): """A FTP mass download session.""" def __init__(self, *args, **kwargs): ftplib.FTP.__init__(self, *args, **kwargs) self.queue = Queue.Queue() def start(self, path, target_dir, offset=0, num_threads=1): """Start the actual download.""" self.cwd(path) self.target_dir = target_dir print 'Starting download from ftp://%s:%d%s' % ( self.host, self.port, path) files = self.get_file_list()[offset:] # Start downloader threads. for i in range(num_threads): t = threading.Thread(target=self.process_queue) t.setDaemon(True) t.start() # Fill queue. map(self.queue.put, files) # Block until all tasks are done. self.queue.join() print 'Done.' def get_file_list(self): """Retrieve a list of available files with meta data.""" files = FileList() self.retrlines('LIST', files.parse_line) return files def process_queue(self): """Retrieve all files waiting in the queue.""" while True: try: filename, size = self.queue.get() except Queue.Empty: break self.retr_file(filename, size) self.queue.task_done() def retr_file(self, filename, size): """Retrieve a file block-wise and save it to a local file.""" target = os.path.join(self.target_dir, filename) with FileDownload(target, size, 'wb') as f: self.retrbinary('RETR ' + filename, f.retr_block) def main(): if len(argv) not in (2, 3, 4): print 'usage: %s <ftp:// URL> <target directory> [offset]' % argv[0] exit(2) scheme, host, path = urlparse.urlsplit(argv[1])[:3] if scheme != 'ftp': print 'URL has to start with ftp://' exit(2) target_dir = argv[2] if not os.path.exists(target_dir): print 'Target directory "%s" does not exist.' % target_dir exit(1) offset = int(argv[3]) if (len(argv) == 4) else 0 try: ds = DownloadSession(host) ds.login() # anonymous ds.start(path, target_dir, offset) ds.quit() except socket.error, e: print '\nSocket error:', e[1] except KeyboardInterrupt: print '\nCtrl-C pressed, aborting.' if __name__ == '__main__': main()