elasticsearch

标签：范德萨 ngram elasticsearch test prod my

elasticsearch

1、安装
mkdir -p /opt/elasticsearch/config
mkdir -p /opt/elasticsearch/data
mkdir -p /opt/elasticsearch/plugins
echo "http.host: 0.0.0.0" >> /opt/elasticsearch/config/elasticsearch.yml

docker run --name elasticsearch -p 9200:9200 -p 9300:9300 \
-e "discovery.type=single-node" \
-e ES_JAVA_OPTS="-Xms84m -Xmx512m" \
-v /opt/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml \
-v /opt/elasticsearch/data:/usr/share/elasticsearch/data \
-v /opt/elasticsearch/plugins:/usr/share/elasticsearch/plugins \
-d elasticsearch:7.12.0

docker run --name kibana -e ELASTICSEARCH_HOSTS=http://192.168.111.145:9200 -p 5601:5601 \
-d kibana:7.12.0

插件上传到 /opt/elasticsearch/plugins 然后重启es

2、es
Lucene 文本搜索函数库工具包；一个全文检索引擎的架构，提供了完整的查询引擎和索引引擎，部分文本分析引擎（英文与德文两种西方语言）。
比如索引结构读写索引工具排序过滤等。 ------doug cutting
Nutch 是一个建立在Lucene核心之上的网页搜索程序
Google Hadoop
GFS(google file system) HDFS(Hadoop Distributed File System)
MapReduce MapReduce
BigTable HBase

es(compass)是Lucene的封装和增强，所有操作通过 restful api，隐藏Lucene的复杂性。-------shay banon
索引类型(废弃) 文档字段，面向文档，json对象存储
倒排索引：根据数据(value)找id(key)；正排索引：根据id(key)找数据(value)。分片使用倒排索引(关键词做key,分片id做value)
solr xml、json，websevice api

3、实践
# 创建索引
PUT test_prod

# 查看索引 mapping settings
GET test_prod

# 删除索引
DELETE test_prod

PUT test_prodno
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"properties": {
"branchid":{
"type": "keyword"
},
"prodno": {
"type": "text"
},
"memorycode": {
"type": "text"
},
"manufacture": {
"type": "text"
}
}
}

}

PUT test_prod/_doc/1
{"branchid":"FDG","prodno":"ABS000189AD","memorycode":"bysamxlkl001","ioid":"1100","manufacture":"澳美卡卡的顺丰快递电商分类的道路上舒服多了登录时"}

PUT test_prod/_doc/2
{"branchid":"FDG","prodno":"ABS000189AC","memorycode":"amxlkl002","ioid":"1101","manufacture":"的说法都是方法的是范德萨范德萨范德萨的方式范德萨范德萨发"}

PUT test_prod/_doc/5
{"branchid":"FDG","prodno":"ABS000189AE","memorycode":"amxlkl003","ioid":"1102","manufacture":"范德萨范德萨范德萨反倒是个额外热热我发到后台会还没喝过你换个给胡芳"}

PUT test_prod/_doc/3
{"branchid":"FDJ","prodno":"ABS000120AF","memorycode":"bjamxlkl001","ioid":"1102","manufacture":"几公分给返点发的广泛地要投入看看计划"}

PUT test_prod/_doc/4
{"branchid":"FDJ","prodno":"ABS000121AG","memorycode":"amxlkl003","ioid":"2201","manufacture":"范德萨发顺丰撒范德萨范德萨范德萨的说法的萨芬的萨芬的我认为额外热舞"}

# 删除数据
# DELETE test_prod/_doc/1

GET test_prod/_search

# keyword 不可分割，不进行分词; text 会进行分词处理
#GET test_prod/_mapping
#GET test_prod/_settings
GET test_prod/_mapping
#查询
# match_all 匹配所有
# match 分词匹配
# match_phrase 分词短语匹配
# match_phrase_prefix 分词短语前缀匹配
# multi_match 多字段匹配
# range 范围
# term =
# terms in
# query_string 不需要连续，顺序还可以调换模糊匹配不建议使用
# wildcard like查询字符串中指定通配符*和占位符模糊匹配不建议使用
# fuzzy 大概匹配支持输入关键字的错别字不太精确和稳定模糊匹配不建议使用
# regexp 正则表达式查询模糊匹配不建议使用
# bool 组合查询

GET test_prod/_search
{
"from": 0,
"size": 2,
"_source": "branchid",
"aggs": {
"branch_agg": {
"terms": {
"field": "branchid.keyword",
"size": 10
}
}
},
"sort": [
{
"branchid.keyword": {
"order": "desc"
}
}
],
"query": {
"match_all": {}
}
}

GET test_prod/_search
{
"query": {
"match": {
"manufacture": "卡卡的"
}
}
}

GET test_prod/_search
{
"query": {
"match_phrase": {
"manufacture": "澳美"
}
}
}

GET test_prod/_search
{
"query": {
"match_phrase_prefix": {
"manufacture": "后台"
}
}
}

GET test_prod/_analyze
{
"explain": false,
"text": ["范德萨范德萨范德萨反倒是个额外热热我发到后台会还没喝过你换个给胡芳"],
"analyzer": "standard"
}

GET test_prod/_search
{
"query": {
"multi_match": {
"query": "",
"fields": []
}
}
}

# 分词器
# 字符过滤器 character filter
# 切词器 tokenizer
# 词项过滤器 token filter
# ngram 和 edge-ngram
DELETE my_ngram_index
PUT my_ngram_index
{
"settings": {
"max_ngram_diff": 50,
"analysis": {
"filter": {
"my_fil_ngram": {
"type": "ngram",
"min_gram": 2,
"max_gram": 13
}
},
"analyzer": {
"my_ana_ngram": {
"type": "custom",
"tokenizer": "standard",
"filter": [
"my_fil_ngram"
]
}
}
}
},
"mappings": {
"properties": {
"branchid": {
"type": "keyword"
},
"prodno": {
"type": "text",
"analyzer": "my_ana_ngram",
"search_analyzer": "standard"
},
"memorycode": {
"type": "text",
"analyzer": "my_ana_ngram",
"search_analyzer": "standard"
},
"manufacture": {
"type": "text",
"analyzer": "my_ana_ngram",
"search_analyzer": "standard"
}
}
}
}

GET my_ngram_index/_analyze
{
"analyzer": "my_ana_ngram",
"text": ["bysamxlkl001"]
}

PUT my_edge_ngram_index
{
"settings": {
"analysis": {
"filter": {
"my_fil_edge_ngram":{
"type": "edge_ngram",
"min_gram": 2,
"max_gram": 3
}
},
"analyzer": {
"my_ana_edge_ngram":{
"type": "custom",
"tokenizer": "standard",
"filter": ["my_fil_edge_ngram"]
}
}
}
},
"mappings": {
"properties": {
"name": {
"type": "text",
"analyzer": "my_ana_edge_ngram",
"search_analyzer": "standard"
}
}
}
}

标签：范德萨,ngram,elasticsearch,test,prod,my
From： https://www.cnblogs.com/andycode/p/17301325.html

相关文章

赞助商

阅读排行