Elasticsearch核心知识篇(61)
索引管理_快速上机动手实战创建、修改以及删除索引
创建索引
创建索引的语法
PUT /my_index
{
"settings": { ... any settings ... },
"mappings": {
"type_one": { ... any mappings ... },
"type_two": { ... any mappings ... },
...
}
}
复制代码
创建索引的示例
PUT /my_index
{
"settings": {
"number_of_shards": 1,
"number_of_replicas": 0
},
"mappings": {
"my_type": {
"properties": {
"my_field": {
"type": "text"
}
}
}
}
}
{
"acknowledged": true,
"shards_acknowledged": true
}
复制代码
修改索引
PUT /my_index/_settings
{
"number_of_replicas": 1
}
GET /my_index
{
"my_index": {
"aliases": {},
"mappings": {
"my_type": {
"properties": {
"my_field": {
"type": "text"
}
}
}
},
"settings": {
"index": {
"creation_date": "1629427405603",
"number_of_shards": "1",
"number_of_replicas": "1",
"uuid": "yayCFwRaTyK3bTWrzf41xw",
"version": {
"created": "5020099"
},
"provided_name": "my_index"
}
}
}
}
复制代码
删除索引
DELETE /my_index
DELETE /index_one,index_two
DELETE /index_*
DELETE /_all
复制代码
在配置文件中进行指定
配置文件:elasticsearch.yml
action.destructive_requires_name: true # 必须要指定索引名进行删除,不能使用_all的方式进行全部删除
复制代码
Elasticsearch核心知识篇(62)
索引管理_快速上机动手实战修改分词器以及定制自己的分词器
默认的分词器
- standard tokenizer:以单词边界进行切分
- standard token filter:什么都不做,对英文就是按照空格进行切分
- lowercase token filter:将所有字母转换为小写
- stop token filer(默认被禁用):移除停用词,比如a the it等等
修改分词器的设置
启用english停用词token filter
PUT /my_index
{
"settings": {
"analysis": {
"analyzer": {
"es_std": {
"type": "standard",
"stopwords": "_english_"
}
}
}
}
}
{
"acknowledged": true,
"shards_acknowledged": true
}
复制代码
- 进行分词示例
GET /my_index/_analyze
{
"analyzer": "standard",
"text": "a dog is in the house"
}
{
"tokens": [
{
"token": "a",
"start_offset": 0,
"end_offset": 1,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "dog",
"start_offset": 2,
"end_offset": 5,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "is",
"start_offset": 6,
"end_offset": 8,
"type": "<ALPHANUM>",
"position": 2
},
{
"token": "in",
"start_offset": 9,
"end_offset": 11,
"type": "<ALPHANUM>",
"position": 3
},
{
"token": "the",
"start_offset": 12,
"end_offset": 15,
"type": "<ALPHANUM>",
"position": 4
},
{
"token": "house",
"start_offset": 16,
"end_offset": 21,
"type": "<ALPHANUM>",
"position": 5
}
]
}
复制代码
- 使用上面自定义的es_std进行分词
GET /my_index/_analyze
{
"analyzer": "es_std",
"text":"a dog is in the house"
}
{
"tokens": [
{
"token": "dog",
"start_offset": 2,
"end_offset": 5,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "house",
"start_offset": 16,
"end_offset": 21,
"type": "<ALPHANUM>",
"position": 5
}
]
}
复制代码
定制化自己的分词器
PUT /my_index_2
{
"settings": {
"analysis": {
"char_filter": {
"&_to_and": {
"type": "mapping",
"mappings": ["&=> and"]
}
},
"filter": {
"my_stopwords": {
"type": "stop",
"stopwords": ["the", "a"]
}
},
"analyzer": {
"my_analyzer": {
"type": "custom",
"char_filter": ["html_strip", "&_to_and"], # 将html标签去掉
"tokenizer": "standard",
"filter": ["lowercase", "my_stopwords"]
}
}
}
}
}
复制代码
- 示例展示
GET /my_index_2/_analyze
{
"text": "tom&jerry are a friend in the house, <a>, HAHA!!",
"analyzer": "my_analyzer"
}
{
"tokens": [
{
"token": "tomandjerry",
"start_offset": 0,
"end_offset": 9,
"type": "<ALPHANUM>",
"position": 0
},
{
"token": "are",
"start_offset": 10,
"end_offset": 13,
"type": "<ALPHANUM>",
"position": 1
},
{
"token": "friend",
"start_offset": 16,
"end_offset": 22,
"type": "<ALPHANUM>",
"position": 3
},
{
"token": "in",
"start_offset": 23,
"end_offset": 25,
"type": "<ALPHANUM>",
"position": 4
},
{
"token": "house",
"start_offset": 30,
"end_offset": 35,
"type": "<ALPHANUM>",
"position": 6
},
{
"token": "haha",
"start_offset": 42,
"end_offset": 46,
"type": "<ALPHANUM>",
"position": 7
}
]
}
复制代码
- 在我们的特定的type指定的分词器
PUT /my_index_2/_mapping/my_type
{
"properties": {
"content": {
"type": "text",
"analyzer": "my_analyzer"
}
}
}
复制代码
© 版权声明
文章版权归作者所有,未经允许请勿转载。
THE END