首页 > 其他分享 >使用Solrj管理Solr索引


时间:2023-04-28 11:05:22浏览次数:38  
标签:String Solrj void private 索引 new import Solr public





  • 一种是直接在客户端进行计算,亦即,进行索引时计算添加的文档数,满足设置的值则进行手动commit,这种方式比较灵活,你可以根据搜索服务器的运行状况选择合理的commit文档数量;
  • 另一种是,直接在Solr搜索服务器上进行配置,一般来说,对索引进行大批量更新,一般不会选择在搜索服务器业务繁忙的时候进行,所以能够自动进行commit也便利了对索引的管理,更新文档可以完全可以实现自动化处理。


1. <requestHandler name="/update" class="solr.XmlUpdateRequestHandler">  
2. <maxPendingDeletes>10000</maxPendingDeletes>  
3. <autoCommit>  
4. <maxDocs>20</maxDocs>  
5. <maxTime>86000</maxTime>  
6. </autoCommit>  
7. </requestHandler>

在Solr 4.0将会实现一种基于“软自动提交”(soft auto commit)的功能,它会根据当前的系统上下文决定是否提交(简单的情况就是,确保每次commit完成,也就是最近的索引数据更新已经更新同步到磁盘上之后再自动执行下一次commit)。




1. package org.shirdrn.solr.solrj;  
3. import java.io.IOException;  
4. import java.io.Serializable;  
5. import java.net.MalformedURLException;  
6. import java.util.ArrayList;  
7. import java.util.Collection;  
8. import java.util.HashMap;  
9. import java.util.Iterator;  
10. import java.util.List;  
11. import java.util.Map;  
13. import org.apache.commons.httpclient.HttpClient;  
14. import org.apache.log4j.Logger;  
15. import org.apache.lucene.document.Document;  
16. import org.apache.solr.client.solrj.ResponseParser;  
17. import org.apache.solr.client.solrj.SolrServerException;  
18. import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;  
19. import org.apache.solr.client.solrj.impl.XMLResponseParser;  
20. import org.apache.solr.common.SolrDocument;  
21. import org.apache.solr.common.SolrDocumentList;  
22. import org.apache.solr.common.SolrInputDocument;  
23. import org.apache.solr.common.params.SolrParams;  
25. import com.mongodb.BasicDBObject;  
26. import com.mongodb.DBCollection;  
27. import com.mongodb.DBCursor;  
28. import com.mongodb.DBObject;  
29. import com.mongodb.Mongo;  
30. import com.mongodb.MongoException;  
32. /**
33.  * Solr server for indexes operations.
34.  * 
35.  * @author shirdrn
36.  * @date   2011-12-20
37.  */  
38. public class SolrPostServer {  
40. private static final Logger LOG = Logger.getLogger(SolrPostServer.class);  
41. private CommonsHttpSolrServer server;   
42. private ResponseParser responseParser;  
44. private MongoConfig mongoConfig;  
45. private String[] collectionNames;  
46. private  int maxCommitCount = 100;  
47. private boolean manualOptimize = true;  
49. private boolean manualCommit = false;  
50. private Collection<SolrInputDocument> docContainer = new ArrayList<SolrInputDocument>();  
51. private static int totalCount = 0;  
53. public SolrPostServer(String url, HttpClient httpClient, MongoConfig mongoConfig) {  
54. try {  
55. if(httpClient==null) {  
56. new CommonsHttpSolrServer(url);  
57. 500000);  // socket read timeout  
58. 5000);    
59. 10);    
60. 100);  
61. true);    
62. 1); // defaults to 0.  > 1 not recommended.   
63. else {  
64. new CommonsHttpSolrServer(url, httpClient);  
65.             }  
66. catch (MalformedURLException e) {  
67.             e.printStackTrace();  
68.         }  
69. this.mongoConfig = mongoConfig;  
70.         initialize();  
71.     }  
73. /**
74.      * Initialize the {@link CommonsHttpSolrServer}'s basic parameters.
75.      */  
76. private void initialize() {  
77. if(responseParser!=null) {  
78.             server.setParser(responseParser);  
79. else {  
80. new XMLResponseParser());  
81.         }  
82.     }  
84. @SuppressWarnings("unchecked")  
85. public void postUpdate() {  
86. null;  
87. try {  
88. for (String c : collectionNames) {  
89. "MongoDB collection name: " + c);  
90.                 DBCollection collection = MongoHelper.newHelper(mongoConfig).getCollection(c);  
91. new BasicDBObject();  
92.                 cursor = collection.find(q);  
93. while(cursor.hasNext()) {  
94. try {  
95.                         Map<Object, Object> m = cursor.next().toMap();  
96. if(manualCommit) {  
97. true);  
98. else {  
99. false);  
100.                         }  
101.                         ++totalCount;  
102. "Add fragment: _id = " + m.get("_id").toString());  
103. catch (IOException e) {  
104.                         e.printStackTrace();  
105.                     }  
106.                 }  
107.                 cursor.close();  
108.             }  
109. "Add totalCount: " + totalCount);  
110.             finallyCommit();  
111.             optimize(manualOptimize);  
112. catch (MongoException e) {  
113.             e.printStackTrace();  
114. catch (SolrServerException e) {  
115.             e.printStackTrace();  
116. catch (IOException e) {  
117.             e.printStackTrace();  
118.         }  
119.     }  
121. /**
122.      * Detele lucene {@link Document} by IDs.
123.      * @param strings
124.      */  
125. public void deleteById(List<String> strings) {  
126. try {  
127.             server.deleteById(strings);  
128. catch (SolrServerException e) {  
129.             e.printStackTrace();  
130. catch (IOException e) {  
131.             e.printStackTrace();  
132.         }  
133.     }  
135. /**
136.      * Detele lucene {@link Document} by query.
137.      * @param query
138.      */  
139. public void deleteByQuery(String query) {  
140. try {  
141.             server.deleteByQuery(query);  
142. catch (SolrServerException e) {  
143.             e.printStackTrace();  
144. catch (IOException e) {  
145.             e.printStackTrace();  
146.         }  
147.     }  
149. /**
150.      * Query.
151.      * @param params
152.      * @param fields
153.      * @return
154.      */  
155. public List<Map<String, Object>> query(SolrParams params, String[] fields) {  
156. new ArrayList<Map<String, Object>>();  
157. try {  
158.             SolrDocumentList documents = server.query(params).getResults();  
159.             Iterator<SolrDocument> iter = documents.iterator();  
160. while(iter.hasNext()) {  
161.                 SolrDocument doc = iter.next();  
162. new HashMap<String, Object>();  
163. for(String field : fields) {  
164.                     map.put(field, doc.getFieldValue(field));  
165.                 }  
166.                 results.add(map);  
167.             }  
168. catch (SolrServerException e) {  
169.             e.printStackTrace();  
170.         }  
171. return results;  
172.     }  
174. /**
175.      * When controlling the committing action at client side, finally execute committing.
176.      * @throws SolrServerException
177.      * @throws IOException
178.      */  
179. private void finallyCommit() throws SolrServerException, IOException {  
180. if(!docContainer.isEmpty()) {  
181.             server.add(docContainer);  
182. false, false);  
183.         }  
184.     }  
186. /**
187.      * Commit.
188.      * @param waitFlush
189.      * @param waitSearcher
190.      * @throws SolrServerException
191.      * @throws IOException
192.      */  
193. public void commit(boolean waitFlush, boolean waitSearcher) {  
194. try {  
195.             server.commit(waitFlush, waitSearcher);  
196. catch (SolrServerException e) {  
197.             e.printStackTrace();  
198. catch (IOException e) {  
199.             e.printStackTrace();  
200.         }  
201.     }  
203. /**
204.      * When controlling the optimizing action at client side, finally execute optimizing.
205.      * @param waitFlush
206.      * @param waitSearcher
207.      * @throws SolrServerException
208.      * @throws IOException
209.      */  
210. public void optimize(boolean waitFlush, boolean waitSearcher) {  
211. try {  
212.             server.optimize(waitFlush, waitSearcher);  
213.             commit(waitFlush, waitSearcher);  
214. catch (Exception e) {  
215. "Encounter error when optimizing.",  e);  
216. try {  
217.                 server.rollback();  
218. catch (SolrServerException e1) {  
219.                 e1.printStackTrace();  
220. catch (IOException e1) {  
221.                 e1.printStackTrace();  
222.             }  
223.         }  
224.     }  
226. /**
227.      * Optimize.
228.      * @param optimize
229.      * @throws SolrServerException
230.      * @throws IOException
231.      */  
232. private void optimize(boolean optimize) {  
233. if(optimize) {  
234. true, true);  
235.         }  
236.     }  
238. /**
239.      * Add a {@link SolrInputDocument} or collect object and add to the a collection for batch updating
240.      * from a mongodb's recored, a Map object.
241.      * @param m
242.      * @param oneByOne
243.      * @throws SolrServerException
244.      * @throws IOException
245.      */  
246. private void add(Map<Object, Object> m, boolean oneByOne) throws SolrServerException, IOException {  
247.         SolrInputDocument doc = createDocument(m);  
248. if(oneByOne) {  
249.             server.add(doc);  
250. else {  
251.             docContainer.add(doc);  
252. if(docContainer.size()>maxCommitCount) {  
253.                 server.add(docContainer);  
254. false, false);  
255. new ArrayList<SolrInputDocument>();  
256.             }  
257.         }  
258.     }  
260. /**
261.      * Create a {@link SolrInputDocument} object.
262.      * @param record
263.      * @return
264.      */  
265. private SolrInputDocument createDocument(Map<Object, Object> record) {  
266. "_id").toString();  
267. "articleId");  
268. "title");  
269. "url");  
270. "spiderName");  
271. "fragment"));  
272. "word");  
273. int pictureCount = (Integer) record.get("pictureCount");  
274. int selectedCount = (Integer) record.get("selectedCount");  
275. int fragmentSize = (Integer) record.get("fragmentSize");  
277. new SolrInputDocument();  
278. "_id", id, 1.0f);  
279. "articleId", articleId, 1.0f);  
280. "title", title, 1.0f);  
281. "url", url, 1.0f);  
282. "spiderName", spiderName, 1.0f);  
283. "fragment", fragment, 1.0f);  
284. "word", word, 1.0f);  
285. // Additional processing for lucene payload metadata.  
286. "pictureCount", word + "|" + pictureCount);  
287. "coverage", word + "|" + (float)selectedCount/fragmentSize);  
288. return doc;  
289.     }  
291. @SuppressWarnings("unchecked")  
292. private String makeFragment(BasicDBObject fragment) {  
293. new StringBuilder();  
294.         Iterator<Map.Entry<Integer, String>> iter = fragment.toMap().entrySet().iterator();  
295. while(iter.hasNext()) {  
296.             Map.Entry<Integer, String> entry = iter.next();  
297. "<br>");  
298.         }  
299. return builder.toString();  
300.     }  
302. /**
303.      * Set {@link ResponseParser}, default value is {@link XMLResponseParser}.
304.      * @param responseParser
305.      */  
306. public void setResponseParser(ResponseParser responseParser) {  
307. this.responseParser = responseParser;  
308.     }  
310. /**
311.      * Pulling document resource from multiple collections of MongoDB.
312.      * @param collectionNames
313.      */  
314. public void setCollectionNames(String[] collectionNames) {  
315. this.collectionNames = collectionNames;  
316.     }  
318. public void setMaxCommitCount(int maxCommitCount) {  
319. this.maxCommitCount = maxCommitCount;  
320.     }  
322. public void setManualCommit(boolean manualCommit) {  
323. this.manualCommit = manualCommit;  
324.     }  
326. public void setManualOptimize(boolean manualOptimize) {  
327. this.manualOptimize = manualOptimize;  
328.     }  
330. /**
331.      * Mongo database configuration.
332.      * 
333.      * @author shirdrn
334.      * @date   2011-12-20
335.      */  
336. public static class MongoConfig implements Serializable {  
337. private static final long serialVersionUID = -3028092758346115702L;  
338. private String host;  
339. private int port;  
340. private String dbname;  
341. private String collectionName;  
342. public MongoConfig(String host, int port, String dbname, String collectionName) {  
343. super();  
344. this.host = host;  
345. this.port = port;  
346. this.dbname = dbname;  
347. this.collectionName = collectionName;  
348.         }  
349. @Override  
350. public boolean equals(Object obj) {  
351.             MongoConfig other = (MongoConfig) obj;  
352. return host.equals(other.host) && port==other.port  
353.                 && dbname.equals(other.dbname) && collectionName.equals(other.collectionName);  
354.         }  
355.     }  
357. /**
358.      * Mongo database utility.
359.      * 
360.      * @author shirdrn
361.      * @date   2011-12-20
362.      */  
363. static class MongoHelper {  
364. private static Mongo mongo;  
365. private static MongoHelper helper;  
366. private MongoConfig mongoConfig;  
367. private MongoHelper(MongoConfig mongoConfig) {  
368. super();  
369. this.mongoConfig = mongoConfig;  
370.         }  
371. public synchronized static MongoHelper newHelper(MongoConfig mongoConfig) {  
372. try {  
373. if(helper==null) {  
374. new MongoHelper(mongoConfig);  
375. new Mongo(mongoConfig.host, mongoConfig.port);  
376. new Thread() {  
377. @Override  
378. public void run() {  
379. if(mongo!=null) {  
380.                                 mongo.close();  
381.                             }  
382.                         }  
383.                     });  
384.                 }  
385. catch (Exception e) {  
386.                 e.printStackTrace();  
387.             }  
388. return helper;  
389.         }             
390. public DBCollection getCollection(String collectionName) {  
391. null;  
392. try {  
393.                 c = mongo.getDB(mongoConfig.dbname).getCollection(collectionName);  
394. catch (Exception e) {  
395.                 e.printStackTrace();  
396.             }  
397. return c;  
398.         }     
399.     }  
400. }







1. package org.shirdrn.solr.solrj;  
3. import java.util.ArrayList;  
4. import java.util.HashMap;  
5. import java.util.List;  
6. import java.util.Map;  
8. import junit.framework.TestCase;  
10. import org.apache.solr.common.params.CommonParams;  
11. import org.apache.solr.common.params.SolrParams;  
12. import org.apache.solr.request.MapSolrParams;  
13. import org.shirdrn.solr.solrj.SolrPostServer.MongoConfig;  
15. @SuppressWarnings("deprecation")  
16. public class TestSolrPostServer extends TestCase {  
18.     SolrPostServer myServer;  
19.     MongoConfig config;  
20.     String url;  
21.     String[] collectionNames;  
23. @Override  
24. protected void setUp() throws Exception {  
25. "";  
26. new MongoConfig("", 27017, "fragment", "");  
27. new SolrPostServer(url, null, config);  
28. 100);  
29.     }  
31. @Override  
32. protected void tearDown() throws Exception {  
33. super.tearDown();  
34.     }  
36. public void testPostUpdate() {  
37. new String[] {  
38. "sina",  
39. "lvping",  
40. "daodao",  
41. "go2eu",  
42. "mafengwo",  
43. "lotour",  
44. "17u",  
45. "sohu",  
46. "baseSe",  
47. "bytravel"  
48.         };  
49.         myServer.setCollectionNames(collectionNames);  
50. true);  
51. false);  
52.         myServer.postUpdate();  
53.     }  
55. public void testPostDelete() {  
56. new ArrayList<String>();  
57. "4ef051342c4117a38f63ee97");  
58. "4ef051322c4117a38f63ee36");  
59. "4ef051a42c4117a38f63fb51");  
60. "4ef050d92c4117a38f63dda4");  
61. "4ef051fe2c4117a38f640bc8");  
62. "4ef048ef2c4117a38f6207ce");  
63. "4ef049062c4117a38f620e13");  
64. "4ef046f12c4117a38f6185c0");  
65.         myServer.deleteById(strings);  
66. false, false);  
67. true, false);  
68.     }  
70. @SuppressWarnings({ "rawtypes", "unchecked" })  
71. public void testQuery() {  
72. new HashMap();  
73. "法国");  
74. "0");  
75. "10");  
76. "word:卢浮宫");  
77. new MapSolrParams(map);  
78. new String[] {"_id", "title", "url"});  
79. 10, results.size());  
80.     }  
81. }


在实际开发的过程中,使用Solrj客户端可以非常容易为测试做一些基本操作,如创建索引,测试Solr基本参数及其开发定制Solr相关接口(Retrieval、Highlighting、Faceted Search、Clustering等等)。

From: https://blog.51cto.com/u_16087105/6233574


  • solr高亮显示和多索引字段查询
    Youcandirectlytestthe highlighting bypassingthehighlightparameters.e.g. hl=true&hl.fl=name,featuresAlso,youcanconfigurethehighlightdefaultswithinyourequesthandlerinsolrconfig.xmle.g.<requestHandlername="/browse"clas......
  • Vim编辑器命令索引
  • 搜索引擎如何判断锚文本质量
  • uniapp脚手架项目抖音小程序中使用了uView框架中的IndexList 索引列表组件报错uni.req
  • 【linux】循序渐进学运维-MySQL-索引
  • 使⽤泛型和索引器实现集合类MyLis
  • Three.js教程:顶点索引复用顶点数据
  • 索引列表的制作,中文拼音排序
  • MySQL索引详解
  • PGSQL 查询哪些表要索引,查表行数