1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- # -*- coding: utf-8 -*-
- """
- es余弦相似度搜索
- """
- from bert_serving.client import BertClient
- server_ip = "192.168.20.68"
- bc = BertClient(server_ip)
- from elasticsearch import Elasticsearch
- host = '192.168.20.69:30920'
- es = Elasticsearch([host])
- def findRelevantHits(inQuiry):
- global response
- inQuiry_vector = bc.encode([inQuiry])[0].tolist()
- queries = {
- 'es': {
- "script_score": {
- "query": {
- "match_all": {}
- },
- "script": {
- "source": "cosineSimilarity(params.inQuiry_vector, doc['phenomenon_vector'])",
- "params": {
- "inQuiry_vector": inQuiry_vector
- }
- }
- }
- },
- 'mlt': {
- "more_like_this": {
- "fields": ["phenomenon"],
- "like": inQuiry,
- "min_term_freq": 1,
- "max_query_terms": 50,
- "min_doc_freq": 1
- }
- }
- }
- result = {'es': [], 'mlt': []}
- for metric, query in queries.items():
- if metric == 'es':
- body = {"min_score": 0.9, "query": query, "size": 10, "_source": {"includes": ["phenomenon"]}}
- response = es.search(index='fault_meter', body=body)
- result[metric] = [a['_source']['phenomenon'] for a in response['hits']['hits']]
- return result
- inQuery = "后备式UPS当负载接近满载时,"
- result = findRelevantHits(inQuery.strip().lower())
- print(result)
|