# -*- coding: utf-8 -*- """ es余弦相似度搜索 """ from bert_serving.client import BertClient server_ip = "192.168.20.68" bc = BertClient(server_ip) from elasticsearch import Elasticsearch host = '192.168.20.69:30920' es = Elasticsearch([host]) def findRelevantHits(inQuiry): global response inQuiry_vector = bc.encode([inQuiry])[0].tolist() queries = { 'es': { "script_score": { "query": { "match_all": {} }, "script": { "source": "cosineSimilarity(params.inQuiry_vector, doc['phenomenon_vector'])", "params": { "inQuiry_vector": inQuiry_vector } } } }, 'mlt': { "more_like_this": { "fields": ["phenomenon"], "like": inQuiry, "min_term_freq": 1, "max_query_terms": 50, "min_doc_freq": 1 } } } result = {'es': [], 'mlt': []} for metric, query in queries.items(): if metric == 'es': body = {"min_score": 0.9, "query": query, "size": 10, "_source": {"includes": ["phenomenon"]}} response = es.search(index='fault_meter', body=body) result[metric] = [a['_source']['phenomenon'] for a in response['hits']['hits']] return result inQuery = "后备式UPS当负载接近满载时," result = findRelevantHits(inQuery.strip().lower()) print(result)