测试代码1:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
def test( self ): data = { "add" : { "doc" : { "id" : "100001" , "*字段名*" : u "我是一个大好人" }}} params = { "boost" : 1.0 , "overwrite" : "true" , "commitwithin" : 1000 } url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = { "content-type" : "application/json" } r = requests.post(url, json = data, params = params, headers = headers) print r.text def index_data( self ): solr = pysolr.solr( 'http://127.0.0.1:8983/solr/mycore/' , timeout = 10 ) # how you'd index data. result = solr.add([ { "id" : "doc_1" , "title" : "a test document" , }, { "id" : "doc_2" , "title" : "the banana: tasty or dangerous?" , }, ]) print result |
测试代码2:
实际数据:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
|
def index_data_fromcsv( self , csvfile): ''' 从csv文件中读取数据,并索引到solr中 :param csvfile: csv文件,包括完整路径 :return: ''' list = csvop.readcsv(csvfile) index = 0 doc = {} params = { "boost" : 1.0 , "overwrite" : "true" , "commitwithin" : 1000 } url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = { "content-type" : "application/json" } for item in list : if index > 0 : # 第一行是标题 try : doc[ 'title' ] = item[ 0 ].decode( 'gb2312' ) doc[ 'link' ] = item[ 1 ] # doc['date'] = item[2] doc[ 'source' ] = item[ 3 ].decode( 'gb2312' ) doc[ 'keyword' ] = item[ 4 ].decode( 'gb2312' ) data = { "add" : { "doc" : doc}} r = requests.post(url, json = data, params = params, headers = headers) print r.text except exception,e: print e.message print index index + = 1 #pysolr客户端代码 def pysolr_index_data_fromcsv( self , csvfile,url = 'http://127.0.0.1:8983/solr/mycore/' ): ''' 从csv文件中读取数据,并索引到solr中 :param csvfile: csv文件,包括完整路径 :return: ''' list = csvop.readcsv(csvfile) index = 0 listdocs = [] for item in list : if index > 0 : # 第一行是标题 doc = {} try : doc[ 'title' ] = item[ 0 ].decode( 'gb2312' ) doc[ 'link' ] = item[ 1 ] # doc['date'] = item[2] doc[ 'source' ] = item[ 3 ].decode( 'gb2312' ) doc[ 'keyword' ] = item[ 4 ].decode( 'gb2312' ) listdocs.append(doc) except exception,e: print e.message index + = 1 solr = pysolr.solr(url, timeout = 10 ) result = solr.add(listdocs) print result |
查询代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
|
def search_data( self ,message = '视频' ): url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message r = requests.get(url, verify = false) print r.text r = r.json()[ 'response' ][ 'numfound' ] print message + ":" + str (r) #pysolr客户端 def search_data( self ,where = '视频' ,url = 'http://127.0.0.1:8983/solr/mycore/' ): solr = pysolr.solr(url, timeout = 10 ) dict = { 'start' : 10 , 'rows' : 30 , 'fl' : 'title,keyword,source,link' } result = solr.search( 'title:视频' , * * dict ) # result = solr.search('title:视频') # print result.raw_response['response']['numfound'] for item in result: print 'keyword: %s' % item[ 'keyword' ] print 'title: %s' % item[ 'title' ] print 'source: %s' % item[ 'source' ] print 'link: %s' % item[ 'link' ] print ' |
'
输出结果:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
{ "responseheader" :{ "status" : 0 , "qtime" : 0 , "params" :{ "q" : "title:\"\\视频\"" , "indent" : "true" , "wt" : "json" }}, "response" :{ "numfound" : 123 , "start" : 0 , "docs" :[ { "source" : "中彩网" , "link" : "http://www.zhcw.com/video/kaijiangshipin-3d/11981126.shtml" , "keyword" : "视频" , "title" : "福彩3d开奖 视频 -中彩 视频" , "id" : "2f0a9d21-3771-4efa-a0cc-e0484cc97993" , "_version_" : 1584214368617234432 }, { "source" : "新浪视频" , "link" : "http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1" , "keyword" : "视频" , "title" : "今日热门 视频 汇总20170707" , "id" : "c8aae0af-01e9-491f-b999-24b97004a4ba" , "_version_" : 1584214367507841024 }, { "source" : "网易新闻" , "link" : "http://news.163.com/17/0707/13/coocnuie00018aor.html" , "keyword" : "视频" , "title" : "网传"兰桂坊附近不雅 视频 " 警方:传播 视频 将追责" , "id" : "353de48d-ede7-481b-89d3-bc20ab4b3884" , "_version_" : 1584214367821365248 }, { "source" : "凤凰视频" , "link" : "http://v.ifeng.com/video_7480871.shtml" , "keyword" : "视频" , "title" : "创想动画片:花粉过敏症的痛谁懂-凤凰 视频 -最具媒体品质的综合 视频 ..." , "id" : "dc5f19c4-180f-4004-a0db-4499d875a60f" , "_version_" : 1584214366819975168 }, { "source" : "凤凰视频" , "link" : "http://v.ifeng.com/video_7805858.shtml" , "keyword" : "视频" , "title" : "节气说:小暑时节就该这样养生-凤凰 视频 -最具媒体品质的综合 视频 门..." , "id" : "5e9eb7a7-48b8-4e41-9514-7712ae619d9a" , "_version_" : 1584214367516229632 }, { "source" : "凤凰视频" , "link" : "http://v.ifeng.com/video_7483506.shtml" , "keyword" : "视频" , "title" : "听导演讲《神奇女侠》的故事 -凤凰 视频 -最具媒体品质的综合 视频 门户-..." , "id" : "6b1482f1-c0c9-479f-bef7-7de324fb9372" , "_version_" : 1584214367647301632 }, { "source" : "汽车杂志" , "link" : "http://www.jiemian.com/article/1445267.html" , "keyword" : "视频" , "title" : "【视频】欧宝最近找了一堆穿睡衣的辣妈拍了一段超牛的视频" , "id" : "1d327555-a6f3-4513-9a21-43d59418ab82" , "_version_" : 1584214368157958144 }, { "source" : "味觉大师" , "link" : "http://www.jiemian.com/article/1453545.html" , "keyword" : "视频" , "title" : "【视频】大董没有肉的肉味烧茄子" , "id" : "7d777870-93cb-4c18-a32b-734af8f133f1" , "_version_" : 1584213891451191296 }, { "source" : "新浪汽车" , "link" : "http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml" , "keyword" : "视频" , "title" : "视频 :两大神车pk!高尔夫思域怎么选?" , "id" : "3a50b303-6b54-4da3-aee1-a61c678c752d" , "_version_" : 1584213892090822656 }, { "source" : "味觉大师" , "link" : "http://www.jiemian.com/article/1453545.html" , "keyword" : "视频" , "title" : "【视频】大董没有肉的肉味烧茄子" , "id" : "01da8e11-77bc-4c31-ba3a-ba668e846d9d" , "_version_" : 1584214366191878144 }] }} |
完整代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
|
#-*- coding: utf-8 -*- import csv import os import codecs def readcsv(filename): if os.path.exists(filename): with open (filename, 'r' ) as f: reader = csv.reader(f) list = [] for item in reader: list .append(item) return list ################################################# #coding=utf-8 import json import requests import os import time from os import walk import csvop from datetime import datetime import pysolr import math class solrclientobj: def test( self ): data = { "add" : { "doc" : { "id" : "100001" , "*字段名*" : u "我是一个大好人" }}} params = { "boost" : 1.0 , "overwrite" : "true" , "commitwithin" : 1000 } url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = { "content-type" : "application/json" } r = requests.post(url, json = data, params = params, headers = headers) print r.text def pysolr_index_data_fromcsv( self , csvfile,url = 'http://127.0.0.1:8983/solr/mycore/' ): ''' 从csv文件中读取数据,并索引到solr中 :param csvfile: csv文件,包括完整路径 :return: ''' list = csvop.readcsv(csvfile) index = 0 listdocs = [] for item in list : if index > 0 : # 第一行是标题 doc = {} try : doc[ 'title' ] = item[ 0 ].decode( 'gb2312' ) doc[ 'link' ] = item[ 1 ] # doc['date'] = item[2] doc[ 'source' ] = item[ 3 ].decode( 'gb2312' ) doc[ 'keyword' ] = item[ 4 ].decode( 'gb2312' ) listdocs.append(doc) except exception,e: print e.message index + = 1 solr = pysolr.solr(url, timeout = 10 ) result = solr.add(listdocs) print result def index_data_fromcsv( self , csvfile): ''' 从csv文件中读取数据,并索引到solr中 :param csvfile: csv文件,包括完整路径 :return: ''' list = csvop.readcsv(csvfile) index = 0 doc = {} params = { "boost" : 1.0 , "overwrite" : "true" , "commitwithin" : 1000 } url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json' headers = { "content-type" : "application/json" } for item in list : if index > 0 : # 第一行是标题 try : doc[ 'title' ] = item[ 0 ].decode( 'gb2312' ) doc[ 'link' ] = item[ 1 ] # doc['date'] = item[2] doc[ 'source' ] = item[ 3 ].decode( 'gb2312' ) doc[ 'keyword' ] = item[ 4 ].decode( 'gb2312' ) data = { "add" : { "doc" : doc}} r = requests.post(url, json = data, params = params, headers = headers) print r.text except exception,e: print e.message print index index + = 1 def index_data( self ): solr = pysolr.solr( 'http://127.0.0.1:8983/solr/mycore/' , timeout = 10 ) # how you'd index data. result = solr.add([ { "id" : "doc_1" , "title" : "a test document" , }, { "id" : "doc_2" , "title" : "the banana: tasty or dangerous?" , }, ]) print result def search_data( self ,where = '视频' ,url = 'http://127.0.0.1:8983/solr/mycore/' ): solr = pysolr.solr(url, timeout = 10 ) dict = { 'start' : 10 , 'rows' : 30 , 'fl' : 'title,keyword,source,link' } result = solr.search( 'title:视频' , * * dict ) # result = solr.search('title:视频') # print result.raw_response['response']['numfound'] for item in result: print 'keyword: %s' % item[ 'keyword' ] print 'title: %s' % item[ 'title' ] print 'source: %s' % item[ 'source' ] print 'link: %s' % item[ 'link' ] print ' ' def delete_index_data( self ,where,url = 'http://127.0.0.1:8983/solr/mycore/' ): ''' 删除索引 :param where: 删除的条件 :param url: url :return: ''' solr = pysolr.solr(url, timeout = 10 ) # solr.delete(id=where) #id='id1':删除id为“id1”的索引 result = solr.delete(q = where) #q='*:*'删除所有索引 print result obj = solrclientobj() # obj.delete_index_data('*:*') #删除所有索引 # obj.index_data() # obj.search_data() # obj.delete_index_data('doc_1') obj.search_data( '视频' ) # csvfile = 'd:/work/solr/other/exportexcels/2017-07-07_info.csv' # obj.pysolr_index_data_fromcsv(csvfile) |
以上这篇对python 操作solr索引数据的实例详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持服务器之家。
原文链接:https://www.cnblogs.com/shaosks/p/7845576.html