Elasticsearch笔记第二十六篇-一一网

Elasticsearch核心知识篇(61)

索引管理_快速上机动手实战创建、修改以及删除索引

创建索引

创建索引的语法

 PUT /my_index
 {
     "settings": { ... any settings ... },
     "mappings": {
         "type_one": { ... any mappings ... },
         "type_two": { ... any mappings ... },
         ...
     }
 }
复制代码

创建索引的示例

 PUT /my_index
 {
   "settings": {
     "number_of_shards": 1,
     "number_of_replicas": 0
   },
   "mappings": {
     "my_type": {
       "properties": {
         "my_field": {
           "type": "text"
         }
       }
     }
   }
 }
 
 {
   "acknowledged": true,
   "shards_acknowledged": true
 }
复制代码

修改索引

 PUT /my_index/_settings
 {
     "number_of_replicas": 1
 }
 
 GET /my_index
 {
   "my_index": {
     "aliases": {},
     "mappings": {
       "my_type": {
         "properties": {
           "my_field": {
             "type": "text"
           }
         }
       }
     },
     "settings": {
       "index": {
         "creation_date": "1629427405603",
         "number_of_shards": "1",
         "number_of_replicas": "1",
         "uuid": "yayCFwRaTyK3bTWrzf41xw",
         "version": {
           "created": "5020099"
         },
         "provided_name": "my_index"
       }
     }
   }
 }
复制代码

删除索引

 DELETE /my_index
 DELETE /index_one,index_two
 DELETE /index_*
 DELETE /_all
复制代码

在配置文件中进行指定

 配置文件：elasticsearch.yml
 action.destructive_requires_name: true   # 必须要指定索引名进行删除，不能使用_all的方式进行全部删除
复制代码

Elasticsearch核心知识篇(62)

索引管理_快速上机动手实战修改分词器以及定制自己的分词器

默认的分词器

standard tokenizer：以单词边界进行切分
standard token filter：什么都不做，对英文就是按照空格进行切分
lowercase token filter：将所有字母转换为小写
stop token filer（默认被禁用）：移除停用词，比如a the it等等

修改分词器的设置

启用english停用词token filter

 PUT /my_index
 {
   "settings": {
     "analysis": {
       "analyzer": {
         "es_std": {
           "type": "standard",
           "stopwords": "_english_"
         }
       }
     }
   }
 }
 
 {
   "acknowledged": true,
   "shards_acknowledged": true
 }
复制代码

进行分词示例

 GET /my_index/_analyze
 {
   "analyzer": "standard", 
   "text": "a dog is in the house"
 }
 
 {
   "tokens": [
     {
       "token": "a",
       "start_offset": 0,
       "end_offset": 1,
       "type": "<ALPHANUM>",
       "position": 0
     },
     {
       "token": "dog",
       "start_offset": 2,
       "end_offset": 5,
       "type": "<ALPHANUM>",
       "position": 1
     },
     {
       "token": "is",
       "start_offset": 6,
       "end_offset": 8,
       "type": "<ALPHANUM>",
       "position": 2
     },
     {
       "token": "in",
       "start_offset": 9,
       "end_offset": 11,
       "type": "<ALPHANUM>",
       "position": 3
     },
     {
       "token": "the",
       "start_offset": 12,
       "end_offset": 15,
       "type": "<ALPHANUM>",
       "position": 4
     },
     {
       "token": "house",
       "start_offset": 16,
       "end_offset": 21,
       "type": "<ALPHANUM>",
       "position": 5
     }
   ]
 }
复制代码

使用上面自定义的es_std进行分词

 GET /my_index/_analyze
 {
   "analyzer": "es_std",
   "text":"a dog is in the house"
 }
 
 {
   "tokens": [
     {
       "token": "dog",
       "start_offset": 2,
       "end_offset": 5,
       "type": "<ALPHANUM>",
       "position": 1
     },
     {
       "token": "house",
       "start_offset": 16,
       "end_offset": 21,
       "type": "<ALPHANUM>",
       "position": 5
     }
   ]
 }
复制代码

定制化自己的分词器

 PUT /my_index_2
 {
   "settings": {
     "analysis": {
       "char_filter": {
         "&_to_and": {
           "type": "mapping",
           "mappings": ["&=> and"]
         }
       },
       "filter": {
         "my_stopwords": {
           "type": "stop",
           "stopwords": ["the", "a"]
         }
       },
       "analyzer": {
         "my_analyzer": {
           "type": "custom",
           "char_filter": ["html_strip", "&_to_and"],  # 将html标签去掉
           "tokenizer": "standard",
           "filter": ["lowercase", "my_stopwords"]
         }
       }
     }
   }
 }
复制代码

示例展示

 GET /my_index_2/_analyze
 {
   "text": "tom&jerry are a friend in the house, <a>, HAHA!!",
   "analyzer": "my_analyzer"
 }
 
 {
   "tokens": [
     {
       "token": "tomandjerry",
       "start_offset": 0,
       "end_offset": 9,
       "type": "<ALPHANUM>",
       "position": 0
     },
     {
       "token": "are",
       "start_offset": 10,
       "end_offset": 13,
       "type": "<ALPHANUM>",
       "position": 1
     },
     {
       "token": "friend",
       "start_offset": 16,
       "end_offset": 22,
       "type": "<ALPHANUM>",
       "position": 3
     },
     {
       "token": "in",
       "start_offset": 23,
       "end_offset": 25,
       "type": "<ALPHANUM>",
       "position": 4
     },
     {
       "token": "house",
       "start_offset": 30,
       "end_offset": 35,
       "type": "<ALPHANUM>",
       "position": 6
     },
     {
       "token": "haha",
       "start_offset": 42,
       "end_offset": 46,
       "type": "<ALPHANUM>",
       "position": 7
     }
   ]
 }
复制代码

在我们的特定的type指定的分词器

 PUT /my_index_2/_mapping/my_type
 {
   "properties": {
     "content": {
       "type": "text",
       "analyzer": "my_analyzer"
     }
   }
 }
复制代码

文章版权归作者所有，未经允许请勿转载。

THE END

后端

Yii2:在多个表之间定义关系的正确方法

PHP Fileinfo组件‘mconvert’函数缓冲区溢出漏洞

321soft PhP-Gallery index.php 跨站脚本攻击漏洞

一种针对文字识别的多模态半监督方法

java前后端分离项目中使用shiro权限框架遇到的那些坑

【现金红包】大兴应急宪法日答题抽红包

Elasticsearch笔记第二十六篇

Elasticsearch核心知识篇(61)

索引管理_快速上机动手实战创建、修改以及删除索引

创建索引

修改索引

删除索引

Elasticsearch核心知识篇(62)

索引管理_快速上机动手实战修改分词器以及定制自己的分词器

默认的分词器

修改分词器的设置

定制化自己的分词器