
Console output of a download in progress.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Leechr
======
A (not yet) multi-thread mass downloader (using FTP, so far).
Python 2.5 or later is required.
Future ideas
------------
- Implement recursive directory downloads. (Should be easy, but has to be
configurable.)
- Implement resuming of downloads.
- Use threads for multiple downloads in parallel. (This is already prepared
and might even work, but would probably wreck the visualization; a new GUI
interface is required.)
- Expand to other methods beneath FTP; most important is HTTP. (Compiling the
list of files to be downloaded requires some rethinking, probably scraping a
website for all URLs [of a certain pattern] in the case of HTTP.)
Unresolved issues
-----------------
- ``KeyboardInterrupt`` stuff doesn't really work.
- Sometimes unexplored connection hang when trying to retrieve the first file.
- What about symlinks?
:Copyright: 2007-2008 Jochen Kupperschmidt
:Date: 11-Apr-2007
:License: GNU General Public License, Version 2
"""
from __future__ import with_statement
import ftplib
from itertools import cycle
import os
import Queue
import socket
import sys
from sys import argv, exit
import threading
import urlparse
class FileList(list):
"""A list of (filename, size) tuples."""
def parse_line(self, line):
"""Parse directory listing line and save a tuple (filename, size)."""
# Skip directories.
if line[0] == 'd':
return
columns = line.split(None, 8)
self.append((columns[8], int(columns[4])))
class FileDownload(file):
"""A file that is to be, currently is or has been downloaded."""
def __init__(self, name, target_size, *args, **kwargs):
file.__init__(self, name, *args, **kwargs)
self.target_size = int(target_size)
self.size = 0
self.percent = 0
self.last_status_len = 0
self.spin = cycle(r'-\|/')
self.display('\n * %-52s ' % os.path.basename(self.name))
def retr_block(self, block):
self.size += float(len(block))
self.percent = (self.size / self.target_size) * 100
self.update_display()
self.write(block)
def update_display(self, spinner=True):
"""Update the display to show the current status."""
self.display('\b' * self.last_status_len)
spinner = self.spin.next() if spinner else ' '
status = '%7.2f MB (%5.1f%%) %c' % (
self.size / 1024 / 1024, self.percent, spinner)
self.last_status_len = len(status)
self.display(status)
def close(self):
self.update_display(spinner=False)
self.display('\n')
file.close(self)
def display(self, s):
sys.stdout.write(s)
class DownloadSession(ftplib.FTP):
"""A FTP mass download session."""
def __init__(self, *args, **kwargs):
ftplib.FTP.__init__(self, *args, **kwargs)
self.queue = Queue.Queue()
def start(self, path, target_dir, offset=0, num_threads=1):
"""Start the actual download."""
self.cwd(path)
self.target_dir = target_dir
print 'Starting download from ftp://%s:%d%s' % (
self.host, self.port, path)
files = self.get_file_list()[offset:]
# Start downloader threads.
for i in range(num_threads):
t = threading.Thread(target=self.process_queue)
t.setDaemon(True)
t.start()
# Fill queue.
map(self.queue.put, files)
# Block until all tasks are done.
self.queue.join()
print 'Done.'
def get_file_list(self):
"""Retrieve a list of available files with meta data."""
files = FileList()
self.retrlines('LIST', files.parse_line)
return files
def process_queue(self):
"""Retrieve all files waiting in the queue."""
while True:
try:
filename, size = self.queue.get()
except Queue.Empty:
break
self.retr_file(filename, size)
self.queue.task_done()
def retr_file(self, filename, size):
"""Retrieve a file block-wise and save it to a local file."""
target = os.path.join(self.target_dir, filename)
with FileDownload(target, size, 'wb') as f:
self.retrbinary('RETR ' + filename, f.retr_block)
def main():
if len(argv) not in (2, 3, 4):
print 'usage: %s <ftp:// URL> <target directory> [offset]' % argv[0]
exit(2)
scheme, host, path = urlparse.urlsplit(argv[1])[:3]
if scheme != 'ftp':
print 'URL has to start with ftp://'
exit(2)
target_dir = argv[2]
if not os.path.exists(target_dir):
print 'Target directory "%s" does not exist.' % target_dir
exit(1)
offset = int(argv[3]) if (len(argv) == 4) else 0
try:
ds = DownloadSession(host)
ds.login() # anonymous
ds.start(path, target_dir, offset)
ds.quit()
except socket.error, e:
print '\nSocket error:', e[1]
except KeyboardInterrupt:
print '\nCtrl-C pressed, aborting.'
if __name__ == '__main__':
main()