1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
|
#!/usr/bin/env python # -*- coding:utf-8 -*- from gevent import monkey monkey.patch_all() from gevent.pool import Pool import requests import sys import os def download(url): chrome = 'Mozilla/5.0 (X11; Linux i86_64) AppleWebKit/537.36 ' + '(KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36' headers = { 'User-Agent' : chrome} filename = url.split( '/' )[ - 1 ].strip() r = requests.get(url.strip(), headers = headers, stream = True ) with open (filename, 'wb' ) as f: for chunk in r.iter_content(chunk_size = 1024 ): if chunk: f.write(chunk) f.flush() print filename, "is ok" def removeLine(key, filename): os.system( 'sed -i /%s/d %s' % (key, filename)) if __name__ = = "__main__" : if len (sys.argv) = = 2 : filename = sys.argv[ 1 ] f = open (filename, "r" ) p = Pool( 4 ) for line in f.readlines(): if line: p.spawn(download, line.strip()) key = line.split( '/' )[ - 1 ].strip() removeLine(key, filename) f.close() p.join() else : print 'Usage: python %s urls.txt' % sys.argv[ 0 ] |
其他网友的方法:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
|
from os.path import basename from urlparse import urlsplit def url2name(url): return basename(urlsplit(url)[ 2 ]) def download(url, localFileName = None ): localName = url2name(url) req = urllib2.Request(url) r = urllib2.urlopen(req) if r.info().has_key( 'Content-Disposition' ): # If the response has Content-Disposition, we take file name from it localName = r.info()[ 'Content-Disposition' ].split( 'filename=' )[ 1 ] if localName[ 0 ] = = '"' or localName[ 0 ] = = "'": localName = localName[ 1 : - 1 ] elif r.url ! = url: # if we were redirected, the real file name we take from the final URL localName = url2name(r.url) if localFileName: # we can force to save the file as specified name localName = localFileName f = open (localName, 'wb' ) f.write(r.read()) f.close() download(r '你要下载的python文件的url地址' ) |
以上便是本文给大家分享的全部内容了,小伙伴们可以测试下哪种方法效率更高呢。