ElasticSearch2基本操作(06关于查询条件及过滤)

过滤

match_all 全部匹配,不做过滤,默认
term 精确匹配
terms 精确匹配多个词
range 范围匹配
exists 文档包含某属性
missing 文档不包含某属性
bool 多个过滤条件的组合

其中,对于bool过滤,可以有下面的组合条件:

must 多个查询条件的完全匹配,相当于 and。
must_not 多个查询条件的相反匹配,相当于 not。
should 至少有一个查询条件匹配, 相当于 or。

查询

match_all 全部匹配,默认
match 首先对查询条件进行分词,然后用TF/IDF评分
multi_match 与match类似,但可以用多个条件
bool 多个条件的组合查询

其中,对于bool查询,可以有下面的组合条件:

must 多个查询条件的完全匹配,相当于 and。
must_not 多个查询条件的相反匹配,相当于 not。
should 至少有一个查询条件匹配, 相当于 or。
#查询性别为男,年龄不是25,家庭住址最好有魔都两个字的记录
curl -XPOST http://127.0.0.1:9200/myindex/user/_search -d'
{
    "query": {
        "bool": {
            "must": {
                "term": {
                    "性别": "男"
                }
            },
            "must_not": {
                "match": {
                    "年龄": "25"
                }
            },
            "should": {
                "match": {
                    "家庭住址": "魔都"
                }
            }
        }
    }
}'

#查询注册时间从2015-04-01到2016-04-01的用户
curl -XPOST http://127.0.0.1:9200/myindex/user/_search -d'
{
    "query": {
        "bool": {
            "must": {
                "range": {
                    "注册时间": {
                        "gte": "2015-04-01 00:00:00",
                        "lt": "2016-04-01 00:00:00"
                    }
                }
            }
        }
    }
}'

#查询没有年龄字段的记录
curl -XPOST http://127.0.0.1:9200/myindex/user/_search -d'
{
    "query": {
        "bool": {
            "must": {
                "missing": {
                    "field": "年龄"
                }
            }
        }
    }
}'

#查询家庭地址或工作地址中包含北京的用户
curl -XPOST http://127.0.0.1:9200/myindex/user/_search -d'
{
    "query": {
        "multi_match": {
            "query": "北京",
            "type": "most_fields",
            "fields": [
                "家庭住址",
                "工作地址"
            ]
        }
    }
}'

#查询性别为男的用户
curl -XPOST http://127.0.0.1:9200/myindex/user/_search -d'
{
    "query": {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": {
                "term": {
                    "性别": "男"
                }
            }
        }
    }
}'

#查询注册时间为两年内的用户
curl -XPOST http://127.0.0.1:9200/myindex/user/_search -d'
{
    "query": {
        "filtered": {
            "query": {
                "match_all": {}
            },
            "filter": {
                "range": {
                    "注册时间": {"gt" : "now-2y"}
                }
            }
        }
    }
}'

排序

#查询所有用户,按注册时间进行排序
curl -XPOST http://127.0.0.1:9200/myindex/user/_search -d'
{
    "query": {
        "match_all": {}
    },
    "sort": {
        "注册时间": {
            "order": "desc"
        }
    }
}'

分页

#查询前三条记录
curl -XPOST http://127.0.0.1:9200/myindex/user/_search -d'
{
    "query": {
        "match_all": {}
    },
    "from": 0,
    "size": 3
}'

带缓存的分页

#进行分页
curl -XPOST http://127.0.0.1:9200/myindex/user/_search?search_type=scan&scroll=5m -d'
{
    "query": { "match_all": {}},
    "size":  10
}'

#返回_scroll_id
{"_scroll_id":"c2Nhbjs1OzE1MzE6NVR2MmE1WWFRRHFtelVGYlRwNGlhdzsxNTMzOjVUdjJhNVlhUURxbXpVRmJUcDRpYXc7MTUzNDo1VHYyYTVZYVFEcW16VUZiVHA0aWF3OzE1MzU6NVR2MmE1WWFRRHFtelVGYlRwNGlhdzsxNTMyOjVUdjJhNVlhUURxbXpVRmJUcDRpYXc7MTt0b3RhbF9oaXRzOjc7","took":2,"timed_out":false,"_shards":{"total":5,"successful":5,"failed":0},"hits":{"total":7,"max_score":0.0,"hits":[]}}

#发送_scroll_id开始查询
curl -XPOST http://127.0.0.1:9200/_search/scroll?scroll=5m
c2Nhbjs1OzE1MzE6NVR2MmE1WWFRRHFtelVGYlRwNGlhdzsxNTMzOjVUdjJhNVlhUURxbXpVRmJUcDRpYXc7MTUzNDo1VHYyYTVZYVFEcW16VUZiVHA0aWF3OzE1MzU6NVR2MmE1WWFRRHFtelVGYlRwNGlhdzsxNTMyOjVUdjJhNVlhUURxbXpVRmJUcDRpYXc7MTt0b3RhbF9oaXRzOjc7

ElasticSearch2基本操作(05关于搜索)

ES的搜索,不是关系数据库中的LIKE,而是通过搜索条件及文档之间的相关性来进行的。

对于一次搜索,对于每一个文档,都有一个浮点数字段_score 来表示文档与搜索主题的相关性, _score 的评分越高,相关性越高。

评分的计算方式取决于不同的查询类型:
fuzzy查询会计算与关键词的拼写相似程度
terms查询会计算找到的内容与关键词组成部分匹配的百分比
而全文本搜索是指计算内容与关键词的类似程度。

ES通过计算TF/IDF(即检索词频率/反向文档频率, Term Frequency/Inverse Document Frequency)作为相关性指标,具体与下面三个指标相关:
检索词频率TF: 对于一条记录,检索词在查询字段中出现的频率越高,相关性也越高。比如,一共有5个检索词,有4个出现在第一条记录,3条出现在第二条记录,则第一条记录TF会比第二条高一些。

反向文档频率IDF: 每个检索词在所有文档的该字段中出现的频率越高,则该词相关性越低。比如有5个检索词,如果一个词在所有文档中都出现,而另一个词之出现了一次,则所有文档中都包含的词几乎可以被忽略,只出现了一次的这个词权重会很高。

字段长度: 对于一条记录,查询字段的长度越长,相关性越低。比如有一条记录长度为10个词,另一条记录长度为100个词,而一个关键词,在两条记录里都出现了一次。则长度为10个词的记录,比长度为100个词的记录,相关性会高很多。

通过对TF/IDF的了解,可以让你解释一些看似不应该出现的结果。同时,你应该清楚,这不是一种精确匹配算法,而是一种评分算法,根据相关性进行了排序。

如果认为评分结果不合理,可以用下面的语句,查看评分过程:

#解释查询是如何进行评分的
crul -XPost http://127.0.0.1:9200/myindex/user/_search?explain -d'
{
   "query"   : { "match" : { "家庭住址" : "魔都大街" }}
}'

#结果如下:
{
    "took": 7,
    "timed_out": false,
    "_shards": {
        "total": 5,
        "successful": 5,
        "failed": 0
    },
    "hits": {
        "total": 4,
        "max_score": 4,
        "hits": [
            {
                "_shard": 4,
                "_node": "5Tv2a5YaQDqmzUFbTp4iaw",
                "_index": "myindex",
                "_type": "user",
                "_id": "u002",
                "_score": 4,
                "_source": {
                    "用户ID": "u002",
                    "姓名": "李四",
                    "性别": "男",
                    "年龄": "25",
                    "家庭住址": "上海市闸北区魔都大街007号",
                    "注册时间": "2015-02-01 08:30:00"
                },
                "_explanation": {
                    "value": 4,
                    "description": "sum of:",
                    "details": [
                        {
                            "value": 4,
                            "description": "sum of:",
                            "details": [
                                {
                                    "value": 1,
                                    "description": "weight(家庭住址:魔 in 0) [PerFieldSimilarity], result of:",
                                    "details": [
                                        {
                                            "value": 1,
                                            "description": "score(doc=0,freq=1.0), product of:",
                                            "details": [
                                                {
                                                    "value": 0.5,
                                                    "description": "queryWeight, product of:",
                                                    "details": [
                                                        {
                                                            "value": 1,
                                                            "description": "idf(docFreq=1, maxDocs=2)",
                                                            "details": []
                                                        },
                                                        {
                                                            "value": 0.5,
                                                            "description": "queryNorm",
                                                            "details": []
                                                        }
                                                    ]
                                                },
                                                {
                                                    "value": 2,
                                                    "description": "fieldWeight in 0, product of:",
                                                    "details": [
                                                        {
                                                            "value": 1,
                                                            "description": "tf(freq=1.0), with freq of:",
                                                            "details": [
                                                                {
                                                                    "value": 1,
                                                                    "description": "termFreq=1.0",
                                                                    "details": []
                                                                }
                                                            ]
                                                        },
                                                        {
                                                            "value": 1,
                                                            "description": "idf(docFreq=1, maxDocs=2)",
                                                            "details": []
                                                        },
                                                        {
                                                            "value": 2,
                                                            "description": "fieldNorm(doc=0)",
                                                            "details": []
                                                        }
                                                    ]
                                                }
                                            ]
                                        }
                                    ]
                                },
                                {
                                    "value": 1,
                                    "description": "weight(家庭住址:都 in 0) [PerFieldSimilarity], result of:",
                                    "details": [
                                        {
                                            "value": 1,
                                            "description": "score(doc=0,freq=1.0), product of:",
                                            "details": [
                                                {
                                                    "value": 0.5,
                                                    "description": "queryWeight, product of:",
                                                    "details": [
                                                        {
                                                            "value": 1,
                                                            "description": "idf(docFreq=1, maxDocs=2)",
                                                            "details": []
                                                        },
                                                        {
                                                            "value": 0.5,
                                                            "description": "queryNorm",
                                                            "details": []
                                                        }
                                                    ]
                                                },
                                                {
                                                    "value": 2,
                                                    "description": "fieldWeight in 0, product of:",
                                                    "details": [
                                                        {
                                                            "value": 1,
                                                            "description": "tf(freq=1.0), with freq of:",
                                                            "details": [
                                                                {
                                                                    "value": 1,
                                                                    "description": "termFreq=1.0",
                                                                    "details": []
                                                                }
                                                            ]
                                                        },
                                                        {
                                                            "value": 1,
                                                            "description": "idf(docFreq=1, maxDocs=2)",
                                                            "details": []
                                                        },
                                                        {
                                                            "value": 2,
                                                            "description": "fieldNorm(doc=0)",
                                                            "details": []
                                                        }
                                                    ]
                                                }
                                            ]
                                        }
                                    ]
                                },
                                {
                                    "value": 1,
                                    "description": "weight(家庭住址:大街 in 0) [PerFieldSimilarity], result of:",
                                    "details": [
                                        {
                                            "value": 1,
                                            "description": "score(doc=0,freq=1.0), product of:",
                                            "details": [
                                                {
                                                    "value": 0.5,
                                                    "description": "queryWeight, product of:",
                                                    "details": [
                                                        {
                                                            "value": 1,
                                                            "description": "idf(docFreq=1, maxDocs=2)",
                                                            "details": []
                                                        },
                                                        {
                                                            "value": 0.5,
                                                            "description": "queryNorm",
                                                            "details": []
                                                        }
                                                    ]
                                                },
                                                {
                                                    "value": 2,
                                                    "description": "fieldWeight in 0, product of:",
                                                    "details": [
                                                        {
                                                            "value": 1,
                                                            "description": "tf(freq=1.0), with freq of:",
                                                            "details": [
                                                                {
                                                                    "value": 1,
                                                                    "description": "termFreq=1.0",
                                                                    "details": []
                                                                }
                                                            ]
                                                        },
                                                        {
                                                            "value": 1,
                                                            "description": "idf(docFreq=1, maxDocs=2)",
                                                            "details": []
                                                        },
                                                        {
                                                            "value": 2,
                                                            "description": "fieldNorm(doc=0)",
                                                            "details": []
                                                        }
                                                    ]
                                                }
                                            ]
                                        }
                                    ]
                                },
                                {
                                    "value": 1,
                                    "description": "weight(家庭住址:街 in 0) [PerFieldSimilarity], result of:",
                                    "details": [
                                        {
                                            "value": 1,
                                            "description": "score(doc=0,freq=1.0), product of:",
                                            "details": [
                                                {
                                                    "value": 0.5,
                                                    "description": "queryWeight, product of:",
                                                    "details": [
                                                        {
                                                            "value": 1,
                                                            "description": "idf(docFreq=1, maxDocs=2)",
                                                            "details": []
                                                        },
                                                        {
                                                            "value": 0.5,
                                                            "description": "queryNorm",
                                                            "details": []
                                                        }
                                                    ]
                                                },
                                                {
                                                    "value": 2,
                                                    "description": "fieldWeight in 0, product of:",
                                                    "details": [
                                                        {
                                                            "value": 1,
                                                            "description": "tf(freq=1.0), with freq of:",
                                                            "details": [
                                                                {
                                                                    "value": 1,
                                                                    "description": "termFreq=1.0",
                                                                    "details": []
                                                                }
                                                            ]
                                                        },
                                                        {
                                                            "value": 1,
                                                            "description": "idf(docFreq=1, maxDocs=2)",
                                                            "details": []
                                                        },
                                                        {
                                                            "value": 2,
                                                            "description": "fieldNorm(doc=0)",
                                                            "details": []
                                                        }
                                                    ]
                                                }
                                            ]
                                        }
                                    ]
                                }
                            ]
                        },
                        {
                            "value": 0,
                            "description": "match on required clause, product of:",
                            "details": [
                                {
                                    "value": 0,
                                    "description": "# clause",
                                    "details": []
                                },
                                {
                                    "value": 0.5,
                                    "description": "_type:user, product of:",
                                    "details": [
                                        {
                                            "value": 1,
                                            "description": "boost",
                                            "details": []
                                        },
                                        {
                                            "value": 0.5,
                                            "description": "queryNorm",
                                            "details": []
                                        }
                                    ]
                                }
                            ]
                        }
                    ]
                }
            },
            {
                "_shard": 0,
                "_node": "5Tv2a5YaQDqmzUFbTp4iaw",
                "_index": "myindex",
                "_type": "user",
                "_id": "u003",
                "_score": 0.71918744,
                "_source": {
                    "用户ID": "u003",
                    "姓名": "王五",
                    "性别": "男",
                    "年龄": "26",
                    "家庭住址": "广州市花都区花城大街010号",
                    "注册时间": "2015-03-01 08:30:00"
                },
                "_explanation": {
                    "value": 0.71918744,
                    "description": "sum of:",
                    "details": [
                        {
                            "value": 0.71918744,
                            "description": "product of:",
                            "details": [
                                {
                                    "value": 1.4383749,
                                    "description": "sum of:",
                                    "details": [
                                        {
                                            "value": 0.71918744,
                                            "description": "weight(家庭住址:大街 in 0) [PerFieldSimilarity], result of:",
                                            "details": [
                                                {
                                                    "value": 0.71918744,
                                                    "description": "score(doc=0,freq=1.0), product of:",
                                                    "details": [
                                                        {
                                                            "value": 0.35959372,
                                                            "description": "queryWeight, product of:",
                                                            "details": [
                                                                {
                                                                    "value": 1,
                                                                    "description": "idf(docFreq=1, maxDocs=2)",
                                                                    "details": []
                                                                },
                                                                {
                                                                    "value": 0.35959372,
                                                                    "description": "queryNorm",
                                                                    "details": []
                                                                }
                                                            ]
                                                        },
                                                        {
                                                            "value": 2,
                                                            "description": "fieldWeight in 0, product of:",
                                                            "details": [
                                                                {
                                                                    "value": 1,
                                                                    "description": "tf(freq=1.0), with freq of:",
                                                                    "details": [
                                                                        {
                                                                            "value": 1,
                                                                            "description": "termFreq=1.0",
                                                                            "details": []
                                                                        }
                                                                    ]
                                                                },
                                                                {
                                                                    "value": 1,
                                                                    "description": "idf(docFreq=1, maxDocs=2)",
                                                                    "details": []
                                                                },
                                                                {
                                                                    "value": 2,
                                                                    "description": "fieldNorm(doc=0)",
                                                                    "details": []
                                                                }
                                                            ]
                                                        }
                                                    ]
                                                }
                                            ]
                                        },
                                        {
                                            "value": 0.71918744,
                                            "description": "weight(家庭住址:街 in 0) [PerFieldSimilarity], result of:",
                                            "details": [
                                                {
                                                    "value": 0.71918744,
                                                    "description": "score(doc=0,freq=1.0), product of:",
                                                    "details": [
                                                        {
                                                            "value": 0.35959372,
                                                            "description": "queryWeight, product of:",
                                                            "details": [
                                                                {
                                                                    "value": 1,
                                                                    "description": "idf(docFreq=1, maxDocs=2)",
                                                                    "details": []
                                                                },
                                                                {
                                                                    "value": 0.35959372,
                                                                    "description": "queryNorm",
                                                                    "details": []
                                                                }
                                                            ]
                                                        },
                                                        {
                                                            "value": 2,
                                                            "description": "fieldWeight in 0, product of:",
                                                            "details": [
                                                                {
                                                                    "value": 1,
                                                                    "description": "tf(freq=1.0), with freq of:",
                                                                    "details": [
                                                                        {
                                                                            "value": 1,
                                                                            "description": "termFreq=1.0",
                                                                            "details": []
                                                                        }
                                                                    ]
                                                                },
                                                                {
                                                                    "value": 1,
                                                                    "description": "idf(docFreq=1, maxDocs=2)",
                                                                    "details": []
                                                                },
                                                                {
                                                                    "value": 2,
                                                                    "description": "fieldNorm(doc=0)",
                                                                    "details": []
                                                                }
                                                            ]
                                                        }
                                                    ]
                                                }
                                            ]
                                        }
                                    ]
                                },
                                {
                                    "value": 0.5,
                                    "description": "coord(2/4)",
                                    "details": []
                                }
                            ]
                        },
                        {
                            "value": 0,
                            "description": "match on required clause, product of:",
                            "details": [
                                {
                                    "value": 0,
                                    "description": "# clause",
                                    "details": []
                                },
                                {
                                    "value": 0.35959372,
                                    "description": "_type:user, product of:",
                                    "details": [
                                        {
                                            "value": 1,
                                            "description": "boost",
                                            "details": []
                                        },
                                        {
                                            "value": 0.35959372,
                                            "description": "queryNorm",
                                            "details": []
                                        }
                                    ]
                                }
                            ]
                        }
                    ]
                }
            },
            ......
        ]
    }
}

你可以看到,不仅是“魔都大街”的记录被查询出来了,只要有“大街”的记录也被查出来了哦。同时,也告诉了你,为什么”u002″是最靠前的。

还有一种用法,就是让ES告诉你,查询语句哪里错了:

curl -XPOST http://127.0.0.1:9200/myindex/user/_validate/query?explain -d'
{
   "query"   : { "matchA" : { "家庭住址" : "魔都大街" }}
}'

{
    "valid": false,
    "_shards": {
        "total": 1,
        "successful": 1,
        "failed": 0
    },
    "explanations": [
        {
            "index": "myindex",
            "valid": false,
            "error": "org.elasticsearch.index.query.QueryParsingException: No query registered for [matchA]"
        }
    ]
}

ES会告诉你matchA这里错了哦。

ElasticSearch2基本操作(04关于分词)

恩,有些初步的感觉了没?那回过头来我们看下最基础的东西:

ES中,常见数据类型如下:

类型名称 数据类型
字符串 string
整数 byte, short, integer, long
浮点数 float, double
布尔 boolean
日期 date
对象 object
嵌套结构 nested
地理位置(经纬度) geo_point

常用字段分析类型如下:

分析类型 含义
analyzed 首先分析这个字符串,然后索引。换言之,以全文形式索引此字段。
not_analyzed 索引这个字段,使之可以被搜索,但是索引内容和指定值一样。不分析此字段。
no 不索引这个字段。这个字段不能被搜索到。

然后,我们测试一下分词器

1、首先测试一下用标准分词进行分词

curl -XPOST http://localhost:9200/_analyze?analyzer=standard&text=小明同学大吃一惊

{
    "tokens": [
        {
            "token": "小",
            "start_offset": 0,
            "end_offset": 1,
            "type": "<IDEOGRAPHIC>",
            "position": 0
        },
        {
            "token": "明",
            "start_offset": 1,
            "end_offset": 2,
            "type": "<IDEOGRAPHIC>",
            "position": 1
        },
        {
            "token": "同",
            "start_offset": 2,
            "end_offset": 3,
            "type": "<IDEOGRAPHIC>",
            "position": 2
        },
        {
            "token": "学",
            "start_offset": 3,
            "end_offset": 4,
            "type": "<IDEOGRAPHIC>",
            "position": 3
        },
        {
            "token": "大",
            "start_offset": 4,
            "end_offset": 5,
            "type": "<IDEOGRAPHIC>",
            "position": 4
        },
        {
            "token": "吃",
            "start_offset": 5,
            "end_offset": 6,
            "type": "<IDEOGRAPHIC>",
            "position": 5
        },
        {
            "token": "一",
            "start_offset": 6,
            "end_offset": 7,
            "type": "<IDEOGRAPHIC>",
            "position": 6
        },
        {
            "token": "惊",
            "start_offset": 7,
            "end_offset": 8,
            "type": "<IDEOGRAPHIC>",
            "position": 7
        }
    ]
}

2、然后对比一下用IK分词进行分词

curl -XGET http://localhost:9200/_analyze?analyzer=ik&text=小明同学大吃一惊

{
    "tokens": [
        {
            "token": "小明",
            "start_offset": 0,
            "end_offset": 2,
            "type": "CN_WORD",
            "position": 0
        },
        {
            "token": "同学",
            "start_offset": 2,
            "end_offset": 4,
            "type": "CN_WORD",
            "position": 1
        },
        {
            "token": "大吃一惊",
            "start_offset": 4,
            "end_offset": 8,
            "type": "CN_WORD",
            "position": 2
        },
        {
            "token": "大吃",
            "start_offset": 4,
            "end_offset": 6,
            "type": "CN_WORD",
            "position": 3
        },
        {
            "token": "吃",
            "start_offset": 5,
            "end_offset": 6,
            "type": "CN_WORD",
            "position": 4
        },
        {
            "token": "一惊",
            "start_offset": 6,
            "end_offset": 8,
            "type": "CN_WORD",
            "position": 5
        },
        {
            "token": "一",
            "start_offset": 6,
            "end_offset": 7,
            "type": "TYPE_CNUM",
            "position": 6
        },
        {
            "token": "惊",
            "start_offset": 7,
            "end_offset": 8,
            "type": "CN_CHAR",
            "position": 7
        }
    ]
}

3、测试一下按”家庭住址”字段进行分词

curl -XGET http://localhost:9200/myindex/_analyze?field=家庭住址&text=我爱北京天安门

{
    "tokens": [
        {
            "token": "我",
            "start_offset": 0,
            "end_offset": 1,
            "type": "CN_CHAR",
            "position": 0
        },
        {
            "token": "爱",
            "start_offset": 1,
            "end_offset": 2,
            "type": "CN_CHAR",
            "position": 1
        },
        {
            "token": "北京",
            "start_offset": 2,
            "end_offset": 4,
            "type": "CN_WORD",
            "position": 2
        },
        {
            "token": "京",
            "start_offset": 3,
            "end_offset": 4,
            "type": "CN_WORD",
            "position": 3
        },
        {
            "token": "天安门",
            "start_offset": 4,
            "end_offset": 7,
            "type": "CN_WORD",
            "position": 4
        },
        {
            "token": "天安",
            "start_offset": 4,
            "end_offset": 6,
            "type": "CN_WORD",
            "position": 5
        },
        {
            "token": "门",
            "start_offset": 6,
            "end_offset": 7,
            "type": "CN_CHAR",
            "position": 6
        }
    ]
}

4、测试一下按”性别”字段进行分词

curl -XGET http://localhost:9200/myindex/_analyze?field=性别&text=我爱北京天安门

{
    "tokens": [
        {
            "token": "我爱北京天安门",
            "start_offset": 0,
            "end_offset": 7,
            "type": "word",
            "position": 0
        }
    ]
}

大家可以看到,不同的分词器,使用场景、针对语言是不一样的,所以要选择合适的分词器。
此外,对于不同的字段,要选择不同的分析方式及适用的分词器,会让你事半功倍。

ElasticSearch2基本操作(03增删改查REST)

接上一篇:

11、更新文档

curl -XPOST http://localhost:9200/_bulk -d'
{ action: { metadata }}\n
{ request body        }\n
{ action: { metadata }}\n
{ request body        }\n
'
操作类型 说明
create 当文档不存在时创建之。
index 创建新文档或替换已有文档。
update 局部更新文档。
delete 删除一个文档。

比如下面的操作:
首先删除一个文件
再新建一个文件
然后全局更加一个文件
最后局部更新一个文件

curl -XPOST http://localhost:9200/_bulk -d'
{ "delete": { "_index": "myindex", "_type": "user", "_id": "u004" }}
{ "create": { "_index": "myindex", "_type": "user", "_id": "u004" }}
{"用户ID": "u004","姓名":"赵六","性别":"男","年龄":"27","家庭住址":"深圳市龙岗区特区大街011号","注册时间":"2015-04-01 08:30:00"}
{ "index": { "_index": "myindex", "_type": "user", "_id": "u004" }}
{"用户ID": "u004","姓名":"赵六","性别":"男","年龄":"28","家庭住址":"深圳市龙岗区特区大街012号","注册时间":"2015-04-01 08:30:00"}
{ "update": { "_index": "myindex", "_type": "user", "_id": "u004"} }
{ "doc" : {"年龄" : "28"}}

结果如下:(局部更新没有执行,没查到原因)

{
    "took": 406,
    "errors": false,
    "items": [
        {
            "delete": {
                "_index": "myindex",
                "_type": "user",
                "_id": "u004",
                "_version": 10,
                "_shards": {
                    "total": 2,
                    "successful": 1,
                    "failed": 0
                },
                "status": 200,
                "found": true
            }
        },
        {
            "create": {
                "_index": "myindex",
                "_type": "user",
                "_id": "u004",
                "_version": 11,
                "_shards": {
                    "total": 2,
                    "successful": 1,
                    "failed": 0
                },
                "status": 201
            }
        },
        {
            "index": {
                "_index": "myindex",
                "_type": "user",
                "_id": "u004",
                "_version": 12,
                "_shards": {
                    "total": 2,
                    "successful": 1,
                    "failed": 0
                },
                "status": 200
            }
        }
    ]
}

ElasticSearch2基本操作(02增删改查REST)

接上一篇:

7、更新文档

#新增u004
curl -XPUT http://localhost:9200/myindex/user/u004 -d'
{
"用户ID": "u004",
"姓名":"赵六",
"性别":"男",
"年龄":"27",
"家庭住址":"深圳市龙岗区特区大街011号",
"注册时间":"2015-04-01 08:30:00"
}'

#更新u004
curl -XPUT http://localhost:9200/myindex/user/u004 -d'
{
"用户ID": "u004",
"姓名":"赵六",
"性别":"男",
"年龄":"27",
"家庭住址":"深圳市龙岗区特区大街011号",
"注册时间":"2015-04-01 08:30:00"
}'

#强制新增u004,如果已存在,则会报错
curl -XPUT http://localhost:9200/myindex/user/u004/_create -d'
{
"用户ID": "u004",
"姓名":"赵六",
"性别":"男",
"年龄":"27",
"家庭住址":"深圳市龙岗区特区大街012号",
"注册时间":"2015-04-01 08:30:00"
}'

返回结果如下:

#新增成功,版本为1
{
    "_index": "myindex",
    "_type": "user",
    "_id": "u004",
    "_version": 1,
    "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
    },
    "created": true
}

#更新成功,版本为2
{
    "_index": "myindex",
    "_type": "user",
    "_id": "u004",
    "_version": 2,
    "_shards": {
        "total": 2,
        "successful": 1,
        "failed": 0
    },
    "created": false
}

#强制新增失败
Http Error: Conflict

8、删除文档,注意版本号变化

#删除文档
curl -XDELETE http://localhost:9200/myindex/user/u004

9、然后新增,再做局部更新,注意版本号变化

#新增
curl -XPUT http://localhost:9200/myindex/user/u004 -d'
{
"用户ID": "u004",
"姓名":"赵六",
"性别":"男",
"年龄":"27",
"家庭住址":"深圳市龙岗区特区大街011号",
"注册时间":"2015-04-01 08:30:00"
}'

#局部更新
curl -XPOST http://localhost:9999/myindex/user/u004/_update -d'
{
    "doc": {
        "家庭住址": "深圳市龙岗区特区大街013号"
    }
}'

#取回
curl -XGET http://localhost:9999/myindex/user/u004

10、批量取回

#从index开始指定
curl -XGET http://localhost:9999/_mget'
{
   "docs" : [
      {
         "_index" : "myindex",
         "_type" :  "user",
         "_id" :    "u001"
      },
      {
         "_index" : "myindex",
         "_type" :  "user",
         "_id" :    "u002",
         "_source": "家庭住址"
      }
   ]
}'

#index相同
GET -XGET http://localhost:9999/myindex/_mget'
{
   "docs" : [
      {  "_type" : "user", "_id" :   "u002"},
      { "_type" : "user", "_id" :   "u002" }
   ]
}'

#type相同
curl -XGET http://localhost:9999/myindex/user/_mget'
{
   "ids" : [ "u001", "u002" ]
}'

ElasticSearch2基本操作(01增删改查REST)

首先,大家要调整一下概念,对应于普通的关系型数据库,你可以暂时这样考虑

Relational DB Elasticsearch
Databases Indexes
Tables Types
Rows Documents
Columns Fields

1、创建索引myindex

curl -XPUT http://localhost:9200/myindex

2、创建类型user

curl -XPOST http://localhost:9200/myindex/user/_mapping -d'
{
    "user": {
        "_all": {
            "analyzer": "ik_max_word",
            "search_analyzer": "ik_max_word",
            "term_vector": "no",
            "store": "false"
        },
        "properties": {
            "用户ID": {
                "type": "string",
                "store": "no",
                "analyzer": "keyword",
                "search_analyzer": "keyword",
                "include_in_all": "true",
                "boost": 8
            },
            "姓名": {
                "type": "string",
                "store": "no",
                "term_vector": "with_positions_offsets",
                "analyzer": "ik_max_word",
                "search_analyzer": "ik_max_word",
                "include_in_all": "true",
                "boost": 8
            },
            "性别": {
                "type": "string",
                "store": "no",
                "analyzer": "keyword",
                "search_analyzer": "keyword",
                "include_in_all": "true",
                "boost": 8
            },
            "年龄": {
                "type": "integer",
                "store": "no",
                "index": "not_analyzed",
                "include_in_all": "true",
                "boost": 8
            },
            "家庭住址": {
                "type": "string",
                "store": "no",
                "term_vector": "with_positions_offsets",
                "analyzer": "ik_max_word",
                "search_analyzer": "ik_max_word",
                "include_in_all": "true",
                "boost": 8
            },
            "注册时间": {
                "type": "date",
                "format": "yyy-MM-dd HH:mm:ss",
                "store": "no",
                "index": "not_analyzed",
                "include_in_all": "true",
                "boost": 8
            }
        }
    }
}'

在这里类型user中,有几种索引类型,

key 类型 分词方式
用户ID string keyword
姓名 string ik_max_word
性别 string keyword
年龄 integer not_analyzed
家庭住址 string ik_max_word
注册时间 date not_analyzed

其中,
ik_max_word,指的是用ik分词,然后将分词结果作为term,需要分词检索的字段,需要这样处理
keyword,指的是,不要分词,而是把整个词作为term,ID及字典很适合这样做
not_analyzed,是不做分词处理,如数字、时间,没有必要

3、上传文档

curl -XPUT http://localhost:9200/myindex/user/u001 -d'
{
"用户ID": "u001",
"姓名":"张三",
"性别":"男",
"年龄":"25",
"家庭住址":"北京市崇文区天朝大街001号",
"注册时间":"2015-01-01 08:30:00"
}'

curl -XPUT http://localhost:9200/myindex/user/u002 -d'
{
"用户ID": "u002",
"姓名":"李四",
"性别":"男",
"年龄":"25",
"家庭住址":"上海市闸北区魔都大街007号",
"注册时间":"2015-02-01 08:30:00"
}'

curl -XPUT http://localhost:9200/myindex/user/u003 -d'
{
"用户ID": "u003",
"姓名":"王五",
"性别":"男",
"年龄":"26",
"家庭住址":"广州市花都区花城大街010号",
"注册时间":"2015-03-01 08:30:00"
}'

4、文档是否存在

#判断id为u003的文档是否存在
curl -XHEAD http://localhost:9200/myindex/user/u003

5、获取文档

#获取id为u003的文档
curl -XGET http://localhost:9200/myindex/user/u003

#获取id为u003的文档的姓名及性别字段
http://localhost:9200/myindex/user/u003?_source=姓名,性别

6、查询文档

#查询文档,默认返回前10个
curl -XGET http://localhost:9200/myindex/user/_search

#用参数进行查询
#年龄等于25的记录
curl -XGET http://localhost:9200/myindex/user/_search?q=年龄:25
#姓名等于王五的记录
curl -XGET http://localhost:9200/myindex/user/_search?q=姓名:王五
#姓名等于王五及年龄等于25的记录
curl -XGET http://localhost:9200/myindex/user/_search?q=+姓名:王五+年龄:26

#查询年龄等于25的用户
curl -XGET http://localhost:9200/myindex/user/_search -d'
{
    "query" : {
        "match" : {
            "年龄" : "25"
        }
    }
}'

#查询年龄大于25,男性用户
curl -XGET http://localhost:9200/myindex/user/_search -d'
{
    "query": {
        "filtered": {
            "filter": {
                "range": {
                    "年龄": {
                        "gt": 25
                    }
                }
            },
            "query": {
                "match": {
                    "性别": "男"
                }
            }
        }
    }
}'

#查询家庭住址中,包含北京或上海的用户
curl -XGET http://localhost:9200/myindex/user/_search -d'
{
    "query" : {
        "match" : {
            "家庭住址" : "北京 上海"
        }
    }
}'

#查询词组
curl -XGET http://localhost:9200/myindex/user/_search -d'
{
    "query" : {
        "match_phrase" : {
            "家庭住址" : "北京 崇文"
        }
    }
}

#按年龄分组聚合,并count
curl -XGET http://localhost:9200/myindex/user/_search -d'
{
  "aggs": {
    "all_interests": {
      "terms": { "field": "年龄" }
    }
  }
}

#男性患者,按年龄分组聚合,并count
curl -XGET http://localhost:9200/myindex/user/_search -d'
{
  "query": {
    "match": {
      "性别": "男"
    }
  },
  "aggs": {
    "all_interests": {
      "terms": {
        "field": "年龄"
      }
    }
  }
}

ElasticSearch2常用插件

1、在线安装常用插件

#head
bin\plugin install mobz/elasticsearch-head

#gui
bin\plugin install jettro/elasticsearch-gui

#bigdesk
#bin\plugin install lukas-vlcek/bigdesk
bin\plugin install hlstudio/bigdesk

#kopf
bin\plugin install lmenezes/elasticsearch-kopf

#carrot2
bin\plugin install org.carrot2/elasticsearch-carrot2/2.2.1

#inquisitor
bin\plugin install polyfractal/elasticsearch-inquisitor

2、离线安装常用插件

#上面的插件,都可手工下载后,通过命令行进行离线安装
bin\plugin install file:///PATH_TO_PLUGIN/PLUGIN.zip

3、手工安装分词插件

#到下面的地址下载release版本,解压,放到ES的plugins目录下,然后重启即可
https://github.com/medcl/elasticsearch-analysis-ik
https://github.com/medcl/elasticsearch-analysis-pinyin
https://github.com/medcl/elasticsearch-analysis-mmseg

eXistDB简单Tirgger示例03

  • XCONF文件中指定XQuery文件路径
  • XCONF文件中包含XQuery文件
  • XCONF文件中指定Java类

第三种方式,是用XCONF文件通知eXistDB要对哪个collection中的哪些操作做触发,然后触发器指向一个JAVA类。

1、首先,编写触发器的java类,打成jar包,放到%existdb_home%\lib\user路径下
TriggerTest.java

package com.neohope.existdb.test;


import org.exist.collections.Collection;
import org.exist.collections.IndexInfo;
import org.exist.collections.triggers.DocumentTrigger;
import org.exist.collections.triggers.SAXTrigger;
import org.exist.collections.triggers.TriggerException;
import org.exist.dom.DocumentImpl;
import org.exist.dom.NodeSet;
import org.exist.security.PermissionDeniedException;
import org.exist.security.xacml.AccessContext;
import org.exist.storage.DBBroker;
import org.exist.storage.txn.Txn;
import org.exist.xmldb.XmldbURI;
import org.exist.xquery.CompiledXQuery;
import org.exist.xquery.XPathException;
import org.exist.xquery.XQueryContext;

import java.util.ArrayList;
import java.util.Map;

public class TriggerTest extends SAXTrigger implements DocumentTrigger {

    private String logCollection = "xmldb:exist:///db/Triggers";
    private String logFileName = "logj.xml";
    private String logUri;

    @Override
    public void configure(DBBroker broker, Collection parent, Map parameters)
            throws TriggerException {
        super.configure(broker, parent, parameters);

        ArrayList<String> objList =  (ArrayList<String>)parameters.get("LogFileName");
        if(objList!=null && objList.size()>0)
        {
            logFileName= objList.get(0);
        }

        logUri = logCollection+"/"+logFileName;
    }

    @Override
    public void beforeCreateDocument(DBBroker broker, Txn transaction, XmldbURI uri) throws TriggerException {
        LogEvent(broker,uri.toString(),"beforeCreateDocument");
    }

    @Override
    public void afterCreateDocument(DBBroker broker, Txn transaction, DocumentImpl document) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"afterCreateDocument");
    }

    @Override
    public void beforeUpdateDocument(DBBroker broker, Txn transaction, DocumentImpl document) throws TriggerException {
       LogEvent(broker,document.getDocumentURI(), "beforeUpdateDocument");
    }

    @Override
    public void afterUpdateDocument(DBBroker broker, Txn transaction, DocumentImpl document) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"afterUpdateDocument");
    }

    @Override
    public void beforeMoveDocument(DBBroker broker, Txn transaction, DocumentImpl document, XmldbURI newUri) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"beforeMoveDocument");
    }

    @Override
    public void afterMoveDocument(DBBroker broker, Txn transaction, DocumentImpl document, XmldbURI newUri) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"afterMoveDocument");
    }

    @Override
    public void beforeCopyDocument(DBBroker broker, Txn transaction, DocumentImpl document, XmldbURI newUri) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"beforeCopyDocument");
    }

    @Override
    public void afterCopyDocument(DBBroker broker, Txn transaction, DocumentImpl document, XmldbURI newUri) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"afterCopyDocument");
    }

    @Override
    public void beforeDeleteDocument(DBBroker broker, Txn transaction, DocumentImpl document) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"beforeDeleteDocument");
    }

    @Override
    public void afterDeleteDocument(DBBroker broker, Txn transaction, XmldbURI uri) throws TriggerException {
       LogEvent(broker, uri.toString(),"afterDeleteDocument");
    }

    @Override
    public void beforeUpdateDocumentMetadata(DBBroker broker, Txn txn, DocumentImpl document) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"beforeUpdateDocumentMetadata");
    }

    @Override
    public void afterUpdateDocumentMetadata(DBBroker broker, Txn txn, DocumentImpl document) throws TriggerException {
       LogEvent(broker, document.getDocumentURI(),"afterUpdateDocumentMetadata");
    }

    private void LogEvent(DBBroker broker,String uriFile, String logContent) throws TriggerException {
        String  xQuery = "update insert <trigger event=\""+logContent+"\" uri=\""+uriFile+"\" timestamp=\"{current-dateTime()}\"/> into doc(\""+logUri+"\")/TriggerLogs";

        try {
            XQueryContext  context  = broker.getXQueryService().newContext(AccessContext.TRIGGER);
            CreateLogFile(broker,context);
            CompiledXQuery compiled = broker.getXQueryService().compile(context,xQuery);
            broker.getXQueryService().execute(compiled, NodeSet.EMPTY_SET);
        } catch (XPathException e) {
            e.printStackTrace();
        } catch (PermissionDeniedException e) {
            e.printStackTrace();
        }
    }

    private void CreateLogFile(DBBroker broker,XQueryContext  context)
    {
        String  xQuery = "if (not(doc-available(\""+logUri+"\"))) then xmldb:store(\""+logCollection+"\", \""+logFileName+"\", <TriggerLogs/>) else ()";

        try {
            CompiledXQuery compiled = broker.getXQueryService().compile(context,xQuery);
            broker.getXQueryService().execute(compiled, NodeSet.EMPTY_SET);
        } catch (XPathException e) {
            e.printStackTrace();
        } catch (PermissionDeniedException e) {
            e.printStackTrace();
        }
    }
}

Continue reading eXistDB简单Tirgger示例03

eXistDB简单Tirgger示例02

  • XCONF文件中指定XQuery文件路径
  • XCONF文件中包含XQuery文件
  • XCONF文件中指定Java类

第二种方式,是用XCONF文件通知eXistDB要对哪个collection中的哪些操作做触发,然后将XQuery语句包含在XCONF文件中。

1、在你需要触发的collection的对应配置collection中,增加一个xconf文件,文件名任意,官方推荐collection.xconf。配置collection与原collection的对应关系为,在/db/system/config/db下,建立/db下相同的collection。
比如,如果你希望监控/db/cda02路径,就需要在/db/system/config/db/cda02路径下,新增一个collection.xconf。
collection.xconf

<collection xmlns="http://exist-db.org/collection-config/1.0">
    <triggers>
        <trigger event="create" class="org.exist.collections.triggers.XQueryTrigger">
            <parameter name="query" value="
             xquery version '3.0';

             module namespace trigger='http://exist-db.org/xquery/trigger';
             declare namespace xmldb='http://exist-db.org/xquery/xmldb';

             declare function trigger:before-create-document($uri as xs:anyURI)
             {
                 local:log-event('before', 'create', 'document', $uri)
             };

             declare function trigger:after-create-document($uri as xs:anyURI)
             {
                 local:log-event('after', 'create', 'document', $uri)
             };
             
             declare function trigger:before-delete-document($uri as xs:anyURI)
             {
                 local:log-event('before', 'delete', 'document', $uri)
             };
             
             declare function trigger:after-delete-document($uri as xs:anyURI)
             {
                 local:log-event('after', 'delete', 'document', $uri)
             };
             
             declare function local:log-event($type as xs:string, $event as xs:string, $object-type as xs:string, $uri as xs:string)
             {
                 let $log-collection := '/db/Triggers'
                 let $log := 'log02.xml'
                 let $log-uri := concat($log-collection, '/', $log)
                 return
                 (
                     (: util:log does not work at all
                         util:log('warn', 'trigger fired'),
                     :)
                     
                     (: create the log file if it does not exist :)
                     if (not(doc-available($log-uri))) then
                         xmldb:store($log-collection, $log, &lt;triggers/&gt;)
                     else ()
                     ,
                     (: log the trigger details to the log file :)
                     update insert &lt;trigger event='{string-join(($type, $event, $object-type), '-')}' uri='{$uri}' timestamp='{current-dateTime()}'/&gt; into doc($log-uri)/triggers                 )             };"/>
        </trigger>
    </triggers>
</collection>

Continue reading eXistDB简单Tirgger示例02

eXistDB简单Tirgger示例01

  • XCONF文件中指定XQuery文件路径
  • XCONF文件中包含XQuery文件
  • XCONF文件中指定Java类

第一种方式,是用XCONF文件通知eXistDB要对哪个collection中的哪些操作做触发,然后触发器指向一个XQM的文件。

1、首先,编写触发器的xqm文件,比如我的保存路径为/db/Triggers/TriggerTest01.xqm
TriggerTest01.xqm

xquery version "3.0";

module namespace trigger="http://exist-db.org/xquery/trigger";

declare namespace xmldb="http://exist-db.org/xquery/xmldb";

declare function trigger:before-create-collection($uri as xs:anyURI)
{
    local:log-event("before", "create", "collection", $uri)
};

declare function trigger:after-create-collection($uri as xs:anyURI)
{
    local:log-event("after", "create", "collection", $uri)
};

declare function trigger:before-copy-collection($uri as xs:anyURI, $new-uri as xs:anyURI)
{
    local:log-event("before", "copy", "collection", concat("from: ", $uri, " to:", $new-uri))
};

declare function trigger:after-copy-collection($new-uri as xs:anyURI, $uri as xs:anyURI)
{
    local:log-event("after", "copy", "collection", concat("from: ", $uri, " to:", $new-uri))
};

declare function trigger:before-move-collection($uri as xs:anyURI, $new-uri as xs:anyURI)
{
    local:log-event("before", "move", "collection", concat("from: ", $uri, " to:", $new-uri))
};

declare function trigger:after-move-collection($new-uri as xs:anyURI, $uri as xs:anyURI)
{
    local:log-event("after", "move", "collection", concat("from: ", $uri, " to:", $new-uri))
};

declare function trigger:before-delete-collection($uri as xs:anyURI)
{
    local:log-event("before", "delete", "collection", $uri)
};

declare function trigger:after-delete-collection($uri as xs:anyURI)
{
    local:log-event("after", "delete", "collection", $uri)
};

declare function trigger:before-create-document($uri as xs:anyURI)
{
    local:log-event("before", "create", "document", $uri)
};

declare function trigger:after-create-document($uri as xs:anyURI)
{
    local:log-event("after", "create", "document", $uri)
};

declare function trigger:before-update-document($uri as xs:anyURI)
{
    local:log-event("before", "update", "document", $uri)
};

declare function trigger:after-update-document($uri as xs:anyURI)
{
    local:log-event("after", "update", "document", $uri)
};

declare function trigger:before-copy-document($uri as xs:anyURI, $new-uri as xs:anyURI)
{
    local:log-event("before", "copy", "document", concat("from: ", $uri, " to: ", $new-uri))
};

declare function trigger:after-copy-document($new-uri as xs:anyURI, $uri as xs:anyURI)
{
    local:log-event("after", "copy", "document", concat("from: ", $uri, " to: ", $new-uri))
};

declare function trigger:before-move-document($uri as xs:anyURI, $new-uri as xs:anyURI)
{
    local:log-event("before", "move", "document", concat("from: ", $uri, " to: ", $new-uri))
};

declare function trigger:after-move-document($new-uri as xs:anyURI, $uri as xs:anyURI)
{
    local:log-event("after", "move", "document", concat("from: ", $uri, " to: ", $new-uri))
};

declare function trigger:before-delete-document($uri as xs:anyURI)
{
    local:log-event("before", "delete", "document", $uri)
};

declare function trigger:after-delete-document($uri as xs:anyURI)
{
    local:log-event("after", "delete", "document", $uri)
};

declare function local:log-event($type as xs:string, $event as xs:string, $object-type as xs:string, $uri as xs:string)
{
    let $log-collection := "/db/Triggers"
    let $log := "log01.xml"
    let $log-uri := concat($log-collection, "/", $log)
    return
    (
        (: create the log file if it does not exist :)
        if (not(doc-available($log-uri))) then
            xmldb:store($log-collection, $log, <triggers/>)
        else ()
        ,
        (: log the trigger details to the log file :)
        update insert <trigger event="{string-join(($type, $event, $object-type), '-')}" uri="{$uri}" timestamp="{current-dateTime()}"/> into doc($log-uri)/triggers
    )
};

Continue reading eXistDB简单Tirgger示例01