python爬取淘宝商品销量的程序,运行程序,输入想要爬取的商品关键词,在代码中的‘###'可以进一步约束商品的属性,比如某某作者的书籍,可以在###处输入作者名字,以及时期等等。最后可以得到所要商品的总销量
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
|
import requests import bs4 import re import json def open (keywords, page): headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36" } payload = { 'q' :keywords, 'sort' : "sale-desc" , 's' :(page - 1 ) * 44 } url = "https://s.taobao.com/search" res = requests.get(url, params = payload) return res def get_item(res): g_page_config = re.search(r 'g_page_config = (.*?);\n' , res.text) page_config_json = json.loads(g_page_config.group( 1 )) page_item = page_config_json[ 'mods' ][ 'itemlist' ][ 'data' ][ 'auctions' ] result = [] #整理出我们关注的信息(ID,标题,链接,售价,销量和商家) for each in page_item: dict1 = dict .fromkeys(( 'id' , 'title' , 'link' , 'price' , 'sale' , 'shoper' )) dict1[ 'id' ] = each[ 'nid' ] dict1[ 'title' ] = each[ 'title' ] dict1[ 'link' ] = each[ 'detail_url' ] dict1[ 'price' ] = each[ 'view_price' ] dict1[ 'sale' ] = each[ 'view_sales' ] dict1[ 'shoper' ] = each[ 'nick' ] result.append(dict1) return result def count_sales(items): count = 0 for each in items: if '###' in each[ 'title' ]:#规定只取标题中‘###'的商品 count + = int (re.search(r '\d+' ,each[ 'sale' ]).group()) return count def main(): keywords = input ( "请输入搜索关键词:" ) #可以为各种商品名称 length = 10 #淘宝商品页数 total = 0 for each in range (length): res = open (keywords, each + 1 ) items = get_item(res) total + = count_sales(items) #销售总量 print (total) if __name__ = = "__main__" : main() |
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/qq_25774883/article/details/81292383