我正在尝试将文件缓冲区下载到5个线程中,但似乎出现乱码。
from numpy import arange import requests from threading import Thread import urllib2 url = 'http://pymotw.com/2/urllib/index.html' sizeInBytes = r = requests.head(url, headers={'Accept-Encoding': 'identity'}).headers['content-length'] splitBy = 5 splits = arange(splitBy + 1) * (float(sizeInBytes)/splitBy) dataLst = [] def bufferSplit(url, idx, splits): req = urllib2.Request(url, headers={'Range': 'bytes=%d-%d' % (splits[idx], splits[idx+1])}) print {'bytes=%d-%d' % (splits[idx], splits[idx+1])} dataLst.append(urllib2.urlopen(req).read()) for idx in range(splitBy): dlth = Thread(target=bufferSplit, args=(url, idx, splits)) dlth.start() print dataLst with open('page.html', 'w') as fh: fh.write(''.join(dataLst))
更新: 所以我努力了,但是进展甚微,但是,如果我下载jpg,则似乎已损坏;
from numpy import arange import os import requests import threading import urllib2 # url ='http://s1.fans.ge/mp3/201109/08/John_Legend_So_High_Remix(fans_ge).mp3' url = "http://www.nasa.gov/images/content/607800main_kepler1200_1600-1200.jpg" # url = 'http://pymotw.com/2/urllib/index.html' sizeInBytes = requests.head(url, headers={'Accept-Encoding': 'identity'}).headers.get('content-length', None) splitBy = 5 dataLst = [] class ThreadedFetch(threading.Thread): """ docstring for ThreadedFetch """ def __init__(self, url, fileName, splitBy=5): super(ThreadedFetch, self).__init__() self.__url = url self.__spl = splitBy self.__dataLst = [] self.__fileName = fileName def run(self): if not sizeInBytes: print "Size cannot be determined." return splits = arange(self.__spl + 1) * (float(sizeInBytes)/self.__spl) for idx in range(self.__spl): req = urllib2.Request(self.__url, headers={'Range': 'bytes=%d-%d' % (splits[idx], splits[idx+1])}) self.__dataLst.append(urllib2.urlopen(req).read()) def getFileData(self): return ''.join(self.__dataLst) fileName = url.split('/')[-1] dl = ThreadedFetch(url, fileName) dl.start() dl.join() content = dl.getFileData() if content: with open(fileName, 'w') as fh: fh.write(content) print "Finished Writing file %s" % fileName
以下是下载后的图像。