Skip to content

Elasticsearch实战指南:从入门到精通

引言

Elasticsearch是一个分布式、RESTful风格的搜索和分析引擎。本文将详细介绍Elasticsearch的使用方法和最佳实践。

基础概念

核心概念

  1. 索引(Index)
  2. 类型(Type)- ES 7.0后废弃
  3. 文档(Document)
  4. 分片(Shard)
  5. 副本(Replica)

基本操作

索引管理

json
// 创建索引
PUT /products
{
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "name": {
        "type": "text",
        "analyzer": "ik_max_word",
        "fields": {
          "keyword": {
            "type": "keyword"
          }
        }
      },
      "description": {
        "type": "text",
        "analyzer": "ik_smart"
      },
      "price": {
        "type": "double"
      },
      "created_at": {
        "type": "date"
      },
      "tags": {
        "type": "keyword"
      }
    }
  }
}

文档操作

json
// 添加文档
POST /products/_doc
{
  "name": "iPhone 15 Pro",
  "description": "Apple最新旗舰手机",
  "price": 7999.00,
  "created_at": "2024-03-15",
  "tags": ["手机", "苹果", "5G"]
}

// 批量操作
POST /_bulk
{"index":{"_index":"products","_id":"1"}}
{"name":"iPhone 15 Pro","price":7999.00}
{"index":{"_index":"products","_id":"2"}}
{"name":"Samsung S24 Ultra","price":8999.00}

搜索查询

基本查询

json
// 全文搜索
GET /products/_search
{
  "query": {
    "match": {
      "name": "iPhone 手机"
    }
  }
}

// 精确查询
GET /products/_search
{
  "query": {
    "term": {
      "tags": "5G"
    }
  }
}

复合查询

json
// bool查询
GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "match": {
            "name": "iPhone"
          }
        }
      ],
      "filter": [
        {
          "range": {
            "price": {
              "gte": 5000,
              "lte": 10000
            }
          }
        }
      ],
      "should": [
        {
          "term": {
            "tags": "5G"
          }
        }
      ]
    }
  }
}

聚合分析

基础聚合

json
// 价格统计
GET /products/_search
{
  "size": 0,
  "aggs": {
    "price_stats": {
      "stats": {
        "field": "price"
      }
    }
  }
}

// 分组统计
GET /products/_search
{
  "size": 0,
  "aggs": {
    "tag_count": {
      "terms": {
        "field": "tags",
        "size": 10
      }
    }
  }
}

高级聚合

json
// 嵌套聚合
GET /products/_search
{
  "size": 0,
  "aggs": {
    "tags": {
      "terms": {
        "field": "tags",
        "size": 10
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

分词器配置

IK分词器

json
// 创建自定义分词器
PUT /my_index
{
  "settings": {
    "analysis": {
      "analyzer": {
        "my_analyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": ["lowercase", "asciifolding"]
        }
      }
    }
  }
}

// 测试分词
POST /my_index/_analyze
{
  "analyzer": "my_analyzer",
  "text": "Elasticsearch是一个分布式搜索引擎"
}

性能优化

索引优化

json
// 索引设置优化
PUT /products/_settings
{
  "index": {
    "refresh_interval": "30s",
    "number_of_replicas": 1,
    "translog": {
      "durability": "async",
      "sync_interval": "5s"
    }
  }
}

查询优化

json
// 使用Filter Context
GET /products/_search
{
  "query": {
    "bool": {
      "filter": [
        {
          "term": {
            "tags": "5G"
          }
        },
        {
          "range": {
            "price": {
              "gte": 5000
            }
          }
        }
      ]
    }
  }
}

集群管理

集群健康检查

bash
# 查看集群健康状态
GET /_cluster/health

# 查看节点信息
GET /_cat/nodes?v

# 查看分片分配
GET /_cat/shards?v

索引管理

bash
# 创建索引模板
PUT /_template/logs_template
{
  "index_patterns": ["logs-*"],
  "settings": {
    "number_of_shards": 3,
    "number_of_replicas": 1
  },
  "mappings": {
    "properties": {
      "@timestamp": {
        "type": "date"
      },
      "message": {
        "type": "text"
      },
      "level": {
        "type": "keyword"
      }
    }
  }
}

数据备份

快照备份

bash
# 注册备份仓库
PUT /_snapshot/my_backup
{
  "type": "fs",
  "settings": {
    "location": "/mount/backups/my_backup"
  }
}

# 创建快照
PUT /_snapshot/my_backup/snapshot_1
{
  "indices": "products",
  "ignore_unavailable": true,
  "include_global_state": false
}

监控告警

Elasticsearch Exporter

yaml
# docker-compose.yml
version: '3'
services:
  elasticsearch_exporter:
    image: justwatch/elasticsearch_exporter:1.1.0
    command:
      - '--es.uri=http://elasticsearch:9200'
    ports:
      - "9114:9114"

Grafana Dashboard

json
{
  "dashboard": {
    "panels": [
      {
        "title": "ES Cluster Status",
        "type": "stat",
        "datasource": "Prometheus",
        "targets": [
          {
            "expr": "elasticsearch_cluster_health_status"
          }
        ]
      }
    ]
  }
}

Java客户端使用

Java High Level REST Client

java
// 创建客户端
RestHighLevelClient client = new RestHighLevelClient(
    RestClient.builder(
        new HttpHost("localhost", 9200, "http")
    )
);

// 搜索请求
SearchRequest searchRequest = new SearchRequest("products");
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
searchSourceBuilder.query(QueryBuilders.matchQuery("name", "iPhone"));
searchRequest.source(searchSourceBuilder);

// 执行搜索
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

实战案例

商品搜索系统

json
// 创建商品索引
PUT /products
{
  "settings": {
    "analysis": {
      "analyzer": {
        "product_analyzer": {
          "type": "custom",
          "tokenizer": "ik_max_word",
          "filter": ["lowercase", "asciifolding"]
        }
      }
    }
  },
  "mappings": {
    "properties": {
      "name": {
        "type": "text",
        "analyzer": "product_analyzer",
        "fields": {
          "keyword": {
            "type": "keyword"
          }
        }
      },
      "category": {
        "type": "keyword"
      },
      "price": {
        "type": "double"
      },
      "description": {
        "type": "text",
        "analyzer": "product_analyzer"
      },
      "tags": {
        "type": "keyword"
      },
      "created_at": {
        "type": "date"
      }
    }
  }
}

// 搜索接口
GET /products/_search
{
  "query": {
    "bool": {
      "must": [
        {
          "multi_match": {
            "query": "iPhone 手机",
            "fields": ["name^3", "description"]
          }
        }
      ],
      "filter": [
        {
          "term": {
            "category": "手机"
          }
        },
        {
          "range": {
            "price": {
              "gte": 5000,
              "lte": 10000
            }
          }
        }
      ]
    }
  },
  "sort": [
    {
      "_score": "desc"
    },
    {
      "created_at": "desc"
    }
  ],
  "from": 0,
  "size": 20,
  "highlight": {
    "fields": {
      "name": {},
      "description": {}
    }
  }
}

最佳实践

  1. 合理设计索引结构
  2. 使用合适的分片数
  3. 优化查询性能
  4. 定期维护和监控
  5. 实施备份策略

总结

Elasticsearch是一个功能强大的搜索和分析引擎,通过合理的配置和使用,可以构建高性能的搜索系统。

参考资料

  1. Elasticsearch官方文档
  2. Elasticsearch权威指南
  3. Elasticsearch实战(第2版)

幸运的人用童年治愈一生,不幸的人用一生治愈童年 —— 强爸