本文实例讲述了python实现下载指定网址所有图片的方法。分享给大家供大家参考。具体实现方法如下:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
#coding=utf-8 #download pictures of the url #useage: python downpicture.py www.baidu.com import os import sys from html.parser import HTMLParser from urllib.request import urlopen from urllib.parse import urlparse def getpicname(path): ''' retrive filename of url ''' if os.path.splitext(path)[ 1 ] = = '': return None pr = urlparse(path) path = 'http://' + pr[ 1 ] + pr[ 2 ] return os.path.split(path)[ 1 ] def saveimgto(path, urls): ''' save img of url to local path ''' if not os.path.isdir(path): print ( 'path is invalid' ) sys.exit() else : for url in urls: of = open (os.path.join(path, getpicname(url)), 'w+b' ) q = urlopen(url) of.write(q.read()) q.close() of.close() class myhtmlparser(HTMLParser): '''put all src of img into urls''' def __init__( self ): HTMLParser.__init__( self ) self .urls = list () self .num = 0 def handle_starttag( self , tag, attr): if tag.lower() = = 'img' : srcs = [u[ 1 ] for u in attr if u[ 0 ].lower() = = 'src' ] self .urls.extend(srcs) self .num = self .num + 1 if __name__ = = '__main__' : url = sys.argv[ 1 ] if not url.startswith( 'http://' ): url = 'http://' + sys.argv[ 1 ] parseresult = urlparse(url) domain = 'http://' + parseresult[ 1 ] q = urlopen(url) content = q.read().decode( 'utf-8' , 'ignore' ) q.close() myparser = myhtmlparser() myparser.feed(content) for u in myparser.urls: if (u.startswith( '//' )): myparser.urls[myparser.urls.index(u)] = 'http:' + u elif u.startswith( '/' ): myparser.urls[myparser.urls.index(u)] = domain + u saveimgto(r 'D:\python\song' , myparser.urls) print ( 'num of download pictures is {}' . format (myparser.num)) |
运行结果如下:
num of download pictures is 19
希望本文所述对大家的Python程序设计有所帮助。