代码如下
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
|
#encoding:utf-8 import requests from lxml import etree import xlwt import os # 爬取b站热门视频信息 def spider(): video_list = [] url = "https://www.bilibili.com/ranking?spm_id_from=333.851.b_7072696d61727950616765546162.3" html = requests.get(url, headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36" }).text html = etree.HTML(html) infolist = html.xpath( "//li[@class='rank-item']" ) for item in infolist: rank = " ".join(item.xpath(" . / div[@ class = 'num' ] / text()")) video_link = " ".join(item.xpath(" . / / div[@ class = 'info' ] / a / @href")) title = " ".join(item.xpath(" . / / div[@ class = 'info' ] / a / text()")) payinfo = " ".join(item.xpath(" . / / div[@ class = 'detail' ] / span / text() ")).split(" 万") play = payinfo[ 0 ] + "万" comment = payinfo[ 1 ] if comment.isdigit() = = False : comment + = "万" upname = " ".join(item.xpath(" . / / div[@ class = 'detail' ] / a / span / text()")) uplink = "http://" + " ".join(item.xpath(" . / / div[@ class = 'detail' ] / a / @href")) hot = " ".join(item.xpath(" . / / div[@ class = 'pts' ] / div / text()")) video_list.append({ 'rank' : rank, 'videolink' : video_link, 'title' : title, 'play' : play, 'comment' : comment, 'upname' : upname, 'uplink' : uplink, 'hot' : hot }) return video_list def write_Excel(): # 将爬取的信息添加到Excel video_list = spider() workbook = xlwt.Workbook() # 定义表格 sheet = workbook.add_sheet( "b站热门视频" ) # 添加sheet的name xstyle = xlwt.XFStyle() # 实例化表格样式对象 xstyle.alignment.horz = 0x02 # 字体居中 xstyle.alignment.vert = 0x01 head = [ '视频名' , 'up主' , '排名' , '热度' , '播放量' , '评论数' ] for h in range ( len (head)): sheet.write( 0 , h, head[h], xstyle) i = 1 for item in video_list: # 向单元格(视频名)添加该视频的超链接 if '"' in item["title"]: item["title"] = item["title"].split('"' )[ 1 ] title_data = 'HYPERLINK("' + item["videolink "]+'" ; "'+item[" title "]+'" )' # 设置超链接 sheet.col( 0 ).width = int ( 256 * len (title_data) * 3 / 5 ) # 设置列宽 sheet.write(i, 0 , xlwt.Formula(title_data), xstyle) name_data = 'HYPERLINK("' + item["uplink "]+'" ; "'+item[" upname "]+'" )' sheet.col( 1 ).width = int ( 256 * len (name_data) * 3 / 5 ) sheet.write(i, 1 , xlwt.Formula(name_data), xstyle) sheet.write(i, 2 , item[ "rank" ], xstyle) sheet.write(i, 3 , item[ "hot" ], xstyle) sheet.write(i, 4 , item[ "play" ], xstyle) sheet.write(i, 5 , item[ "comment" ], xstyle) i + = 1 # 如果文件存在,则将其删除 file = "b站热门视频信息.xls" if os.path.exists( file ): os.remove( file ) workbook.save( file ) if __name__ = = '__main__' : write_Excel() |
结果展示:
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://www.cnblogs.com/zhouzetian/p/12613930.html