Suite à un ticket sur le bug tracker de Adopy, je décide de recoder la méthode s’occupant du téléchargement. L’objectif est simple : être capable de reprendre un téléchargement là où il s’est arrêté en cas de problème (et dans le futur, être capable de le reprendre après l’avoir mis en pause volontairement).
Avoir du code propre, entièrement fonctionnel et performant n’étant pas chose aisée dans ce cas, je m’oriente vers une solution alternative : modifier la méthode retrive() de la class URLopener (de urllib).
Et afin de ne pas modifier l’intégralité de la lib, je procède comme ceci :
from urllib import quote_plus, FancyURLopener, URLopener, unwrap,\
toBytes, splittype
def retrieve(self, url, filename=None, reporthook=None, data=None,
maxtries=5, r_range=None):
"""retrieve(url) returns (filename, headers) for a local object
or (tempfilename, headers) for a remote object.
If it fails, it relaunches itself until the dl is complete or
maxtries == 0 (maxtries == -1 for unlimited tries).
Range tuple(start, end) indicates the range of the remote object
we have to retrieve (ignored for local files)"""
if maxtries < -1:
raise ValueError, 'maxtries must be at least equal with -1'
url = unwrap(toBytes(url))
if self.tempcache and url in self.tempcache:
return self.tempcache[url]
type, url1 = splittype(url)
if filename is None and (not type or type == 'file'):
try:
fp = self.open_local_file(url1)
hdrs = fp.info()
fp.close()
return url2pathname(splithost(url1)[1]), hdrs
except IOError, msg:
pass
if not r_range is None:
try:
self.addheader(('Range', 'bytes=%d-%d' % r_range))
except TypeError:
raise ValueError, 'r_range argument must be a tuple of two int : (start, end)'
fp = self.open(url, data)
try:
headers = fp.info()
if filename:
tfp = open(filename, 'ab')
else:
import tempfile
garbage, path = splittype(url)
garbage, path = splithost(path or "")
path, garbage = splitquery(path or "")
path, garbage = splitattr(path or "")
suffix = os.path.splitext(path)[1]
(fd, filename) = tempfile.mkstemp(suffix)
self.__tempfiles.append(filename)
tfp = os.fdopen(fd, 'ab')
try:
result = filename, headers
if self.tempcache is not None:
self.tempcache[url] = result
bs = 1024*8
size = -1
read = 0
blocknum = 0
if "content-length" in headers:
size = int(headers["Content-Length"])
elif r_range is not None:
size = r_range[1]
if reporthook:
reporthook(blocknum, bs, size)
while 1:
block = fp.read(bs)
if block == "":
break
read += len(block)
tfp.write(block)
blocknum += 1
if reporthook:
reporthook(blocknum, bs, size)
finally:
tfp.close()
finally:
fp.close()
# raise exception if actual size does not match content-length
# header and if maxtries <= 0
if size >= 0 and read < size:
if maxtries > 0 or maxtries == -1:
self.retrieve(url, filename, reporthook, data,
maxtries if maxtries == -1 else maxtries-1,
r_range=(read, size))
else:
raise ContentTooShortError("retrieval incomplete: got only %i out "
"of %i bytes" % (read, size), result)
return result
#to use our function in the opener
URLopener.retrieve = retrieve
N’hésitez pas à me donner votre avis =)
