Leechr - a (not yet) multi-thread mass downloader (using FTP, so far) ¶
#!/usr/bin/env python# -*- coding: utf-8 -*-"""
Leechr
======
A (not yet) multi-thread mass downloader (using FTP, so far).
Python 2.5 or later is required.
Future ideas
------------
- Implement recursive directory downloads. (Should be easy, but has to be
configurable.)
- Implement resuming of downloads.
- Use threads for multiple downloads in parallel. (This is already prepared
and might even work, but would probably wreck the visualization; a new GUI
interface is required.)
- Expand to other methods beneath FTP; most important is HTTP. (Compiling the
list of files to be downloaded requires some rethinking, probably scraping a
website for all URLs [of a certain pattern] in the case of HTTP.)
Unresolved issues
-----------------
- ``KeyboardInterrupt`` stuff doesn't really work.
- Sometimes unexplored connection hang when trying to retrieve the first file.
- What about symlinks?
:Copyright: 2007-2008 Jochen Kupperschmidt
:Date: 11-Apr-2007
:License: GNU General Public License, Version 2
"""from__future__importwith_statementimportftplibfromitertoolsimportcycleimportosimportQueueimportsocketimportsysfromsysimportargv,exitimportthreadingimporturlparseclassFileList(list):"""A list of (filename, size) tuples."""defparse_line(self,line):"""Parse directory listing line and save a tuple (filename, size)."""# Skip directories.ifline[0]=='d':returncolumns=line.split(None,8)self.append((columns[8],int(columns[4])))classFileDownload(file):"""A file that is to be, currently is or has been downloaded."""def__init__(self,name,target_size,*args,**kwargs):file.__init__(self,name,*args,**kwargs)self.target_size=int(target_size)self.size=0self.percent=0self.last_status_len=0self.spin=cycle(r'-\|/')self.display('\n * %-52s '%os.path.basename(self.name))defretr_block(self,block):self.size+=float(len(block))self.percent=(self.size/self.target_size)*100self.update_display()self.write(block)defupdate_display(self,spinner=True):"""Update the display to show the current status."""self.display('\b'*self.last_status_len)spinner=self.spin.next()ifspinnerelse' 'status='%7.2f MB (%5.1f%%) %c'%(self.size/1024/1024,self.percent,spinner)self.last_status_len=len(status)self.display(status)defclose(self):self.update_display(spinner=False)self.display('\n')file.close(self)defdisplay(self,s):sys.stdout.write(s)classDownloadSession(ftplib.FTP):"""A FTP mass download session."""def__init__(self,*args,**kwargs):ftplib.FTP.__init__(self,*args,**kwargs)self.queue=Queue.Queue()defstart(self,path,target_dir,offset=0,num_threads=1):"""Start the actual download."""self.cwd(path)self.target_dir=target_dirprint'Starting download from ftp://%s:%d%s'%(self.host,self.port,path)files=self.get_file_list()[offset:]# Start downloader threads.foriinrange(num_threads):t=threading.Thread(target=self.process_queue)t.setDaemon(True)t.start()# Fill queue.map(self.queue.put,files)# Block until all tasks are done.self.queue.join()print'Done.'defget_file_list(self):"""Retrieve a list of available files with meta data."""files=FileList()self.retrlines('LIST',files.parse_line)returnfilesdefprocess_queue(self):"""Retrieve all files waiting in the queue."""whileTrue:try:filename,size=self.queue.get()exceptQueue.Empty:breakself.retr_file(filename,size)self.queue.task_done()defretr_file(self,filename,size):"""Retrieve a file block-wise and save it to a local file."""target=os.path.join(self.target_dir,filename)withFileDownload(target,size,'wb')asf:self.retrbinary('RETR '+filename,f.retr_block)defmain():iflen(argv)notin(2,3,4):print'usage: %s <ftp:// URL> <target directory> [offset]'%argv[0]exit(2)scheme,host,path=urlparse.urlsplit(argv[1])[:3]ifscheme!='ftp':print'URL has to start with ftp://'exit(2)target_dir=argv[2]ifnotos.path.exists(target_dir):print'Target directory "%s" does not exist.'%target_direxit(1)offset=int(argv[3])if(len(argv)==4)else0try:ds=DownloadSession(host)ds.login()# anonymousds.start(path,target_dir,offset)ds.quit()exceptsocket.error,e:print'\nSocket error:',e[1]exceptKeyboardInterrupt:print'\nCtrl-C pressed, aborting.'if__name__=='__main__':main()