服务器之家

服务器之家 > 正文

对python 操作solr索引数据的实例详解

时间:2021-04-26 00:30     来源/作者:shaomine

测试代码1:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitwithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"content-type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text
 
 
 def index_data(self):
  solr = pysolr.solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)
 
  # how you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "a test document",
   },
   {
    "id": "doc_2",
    "title": "the banana: tasty or dangerous?",
   },
  ])
  print result

测试代码2:

实际数据:

对python 操作solr索引数据的实例详解

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def index_data_fromcsv(self, csvfile):
  '''
   从csv文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = csvop.readcsv(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitwithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"content-type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('gb2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('gb2312')
     doc['keyword'] = item[4].decode('gb2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except exception,e:
     print e.message
 
   print index
   index += 1
 
#pysolr客户端代码
 def pysolr_index_data_fromcsv(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从csv文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = csvop.readcsv(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('gb2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('gb2312')
     doc['keyword'] = item[4].decode('gb2312')
     listdocs.append(doc)
    except exception,e:
     print e.message
   index += 1
  solr = pysolr.solr(url, timeout=10)
  result = solr.add(listdocs)
  print result

查询代码:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
def search_data(self,message='视频'):
  url = 'http://127.0.0.1:8983/solr/mycore/select?q=title:"\%s"&wt=json&indent=true' % message
  r = requests.get(url, verify=false)
  print r.text
  r = r.json()['response']['numfound']
  print message + ":" + str(r)
  
  #pysolr客户端
  def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numfound']
 
  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '

'

输出结果:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
{
 "responseheader":{
 "status":0,
 "qtime":0,
 "params":{
  "q":"title:\"\\视频\"",
  "indent":"true",
  "wt":"json"}},
 "response":{"numfound":123,"start":0,"docs":[
  {
  "source":"中彩网",
  "link":"http://www.zhcw.com/video/kaijiangshipin-3d/11981126.shtml",
  "keyword":"视频",
  "title":"福彩3d开奖 视频 -中彩 视频",
  "id":"2f0a9d21-3771-4efa-a0cc-e0484cc97993",
  "_version_":1584214368617234432},
  {
  "source":"新浪视频",
  "link":"http://video.sina.com.cn/news/spj/topvideoes20170707/?opsubject_id=top1",
  "keyword":"视频",
  "title":"今日热门 视频 汇总20170707",
  "id":"c8aae0af-01e9-491f-b999-24b97004a4ba",
  "_version_":1584214367507841024},
  {
  "source":"网易新闻",
  "link":"http://news.163.com/17/0707/13/coocnuie00018aor.html",
  "keyword":"视频",
  "title":"网传"兰桂坊附近不雅 视频 " 警方:传播 视频 将追责",
  "id":"353de48d-ede7-481b-89d3-bc20ab4b3884",
  "_version_":1584214367821365248},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7480871.shtml",
  "keyword":"视频",
  "title":"创想动画片:花粉过敏症的痛谁懂-凤凰 视频 -最具媒体品质的综合 视频 ...",
  "id":"dc5f19c4-180f-4004-a0db-4499d875a60f",
  "_version_":1584214366819975168},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7805858.shtml",
  "keyword":"视频",
  "title":"节气说:小暑时节就该这样养生-凤凰 视频 -最具媒体品质的综合 视频 门...",
  "id":"5e9eb7a7-48b8-4e41-9514-7712ae619d9a",
  "_version_":1584214367516229632},
  {
  "source":"凤凰视频",
  "link":"http://v.ifeng.com/video_7483506.shtml",
  "keyword":"视频",
  "title":"听导演讲《神奇女侠》的故事 -凤凰 视频 -最具媒体品质的综合 视频 门户-...",
  "id":"6b1482f1-c0c9-479f-bef7-7de324fb9372",
  "_version_":1584214367647301632},
  {
  "source":"汽车杂志",
  "link":"http://www.jiemian.com/article/1445267.html",
  "keyword":"视频",
  "title":"【视频】欧宝最近找了一堆穿睡衣的辣妈拍了一段超牛的视频",
  "id":"1d327555-a6f3-4513-9a21-43d59418ab82",
  "_version_":1584214368157958144},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"7d777870-93cb-4c18-a32b-734af8f133f1",
  "_version_":1584213891451191296},
  {
  "source":"新浪汽车",
  "link":"http://auto.sina.com.cn/video/zz/2017-07-07/detail-ifyhwehx5311889.shtml",
  "keyword":"视频",
  "title":"视频 :两大神车pk!高尔夫思域怎么选?",
  "id":"3a50b303-6b54-4da3-aee1-a61c678c752d",
  "_version_":1584213892090822656},
  {
  "source":"味觉大师",
  "link":"http://www.jiemian.com/article/1453545.html",
  "keyword":"视频",
  "title":"【视频】大董没有肉的肉味烧茄子",
  "id":"01da8e11-77bc-4c31-ba3a-ba668e846d9d",
  "_version_":1584214366191878144}]
 }}

完整代码:

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#-*- coding: utf-8 -*-
import csv
import os
import codecs
 
 
def readcsv(filename):
 if os.path.exists(filename):
  with open(filename, 'r') as f:
   reader = csv.reader(f)
   list = []
   for item in reader:
    list.append(item)
   return list
 
#################################################
#coding=utf-8
import json
import requests
 
import os
import time
from os import walk
import csvop
from datetime import datetime
import pysolr
import math
 
class solrclientobj:
 
 def test(self):
  data = {"add": {"doc": {"id": "100001", "*字段名*": u"我是一个大好人"}}}
  params = {"boost": 1.0, "overwrite": "true", "commitwithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"content-type": "application/json"}
  r = requests.post(url, json=data, params=params, headers=headers)
  print r.text
 
 def pysolr_index_data_fromcsv(self, csvfile,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
   从csv文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = csvop.readcsv(csvfile)
  index = 0
  listdocs = []
  for item in list:
   if index > 0: # 第一行是标题
    doc = {}
    try:
     doc['title'] = item[0].decode('gb2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('gb2312')
     doc['keyword'] = item[4].decode('gb2312')
     listdocs.append(doc)
    except exception,e:
     print e.message
   index += 1
  solr = pysolr.solr(url, timeout=10)
  result = solr.add(listdocs)
  print result
 
 def index_data_fromcsv(self, csvfile):
  '''
   从csv文件中读取数据,并索引到solr中
   :param csvfile: csv文件,包括完整路径
   :return:
   '''
  list = csvop.readcsv(csvfile)
  index = 0
  doc = {}
  params = {"boost": 1.0, "overwrite": "true", "commitwithin": 1000}
  url = 'http://127.0.0.1:8983/solr/mycore/update?wt=json'
  headers = {"content-type": "application/json"}
  for item in list:
   if index > 0: # 第一行是标题
    try:
     doc['title'] = item[0].decode('gb2312')
     doc['link'] = item[1]
     # doc['date'] = item[2]
     doc['source'] = item[3].decode('gb2312')
     doc['keyword'] = item[4].decode('gb2312')
     data = {"add": {"doc": doc}}
     r = requests.post(url, json=data, params=params, headers=headers)
     print r.text
    except exception,e:
     print e.message
 
   print index
   index += 1
 
 def index_data(self):
  solr = pysolr.solr('http://127.0.0.1:8983/solr/mycore/', timeout=10)
 
  # how you'd index data.
  result = solr.add([
   {
    "id": "doc_1",
    "title": "a test document",
   },
   {
    "id": "doc_2",
    "title": "the banana: tasty or dangerous?",
   },
  ])
  print result
 
 def search_data(self,where='视频',url='http://127.0.0.1:8983/solr/mycore/'):
  solr = pysolr.solr(url, timeout=10)
  dict = {'start':10,'rows': 30,'fl':'title,keyword,source,link'}
  result = solr.search('title:视频',**dict)
  # result = solr.search('title:视频')
  # print result.raw_response['response']['numfound']
 
  for item in result:
   print 'keyword: %s'% item['keyword']
   print 'title: %s'% item['title']
   print 'source: %s'% item['source']
   print 'link: %s'% item['link']
   print '    '
 
 def delete_index_data(self,where,url='http://127.0.0.1:8983/solr/mycore/'):
  '''
  删除索引
  :param where: 删除的条件
  :param url: url
  :return:
  '''
  solr = pysolr.solr(url, timeout=10)
  # solr.delete(id=where) #id='id1':删除id为“id1”的索引
  result = solr.delete(q=where) #q='*:*'删除所有索引
  print result
 
 
obj = solrclientobj()
# obj.delete_index_data('*:*') #删除所有索引
# obj.index_data()
# obj.search_data()
# obj.delete_index_data('doc_1')
obj.search_data('视频')
# csvfile = 'd:/work/solr/other/exportexcels/2017-07-07_info.csv'
# obj.pysolr_index_data_fromcsv(csvfile)

以上这篇对python 操作solr索引数据的实例详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持服务器之家。

原文链接:https://www.cnblogs.com/shaosks/p/7845576.html

标签:

相关文章

热门资讯

2020微信伤感网名听哭了 让对方看到心疼的伤感网名大全
2020微信伤感网名听哭了 让对方看到心疼的伤感网名大全 2019-12-26
yue是什么意思 网络流行语yue了是什么梗
yue是什么意思 网络流行语yue了是什么梗 2020-10-11
背刺什么意思 网络词语背刺是什么梗
背刺什么意思 网络词语背刺是什么梗 2020-05-22
苹果12mini价格表官网报价 iPhone12mini全版本价格汇总
苹果12mini价格表官网报价 iPhone12mini全版本价格汇总 2020-11-13
Intellij idea2020永久破解,亲测可用!!!
Intellij idea2020永久破解,亲测可用!!! 2020-07-29
返回顶部