find.py 1.5 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. # -*- coding: utf-8 -*-
  2. """
  3. es余弦相似度搜索
  4. """
  5. from bert_serving.client import BertClient
  6. server_ip = "192.168.20.68"
  7. bc = BertClient(server_ip)
  8. from elasticsearch import Elasticsearch
  9. host = '192.168.20.69:30920'
  10. es = Elasticsearch([host])
  11. def findRelevantHits(inQuiry):
  12. global response
  13. inQuiry_vector = bc.encode([inQuiry])[0].tolist()
  14. queries = {
  15. 'es': {
  16. "script_score": {
  17. "query": {
  18. "match_all": {}
  19. },
  20. "script": {
  21. "source": "cosineSimilarity(params.inQuiry_vector, doc['phenomenon_vector'])",
  22. "params": {
  23. "inQuiry_vector": inQuiry_vector
  24. }
  25. }
  26. }
  27. },
  28. 'mlt': {
  29. "more_like_this": {
  30. "fields": ["phenomenon"],
  31. "like": inQuiry,
  32. "min_term_freq": 1,
  33. "max_query_terms": 50,
  34. "min_doc_freq": 1
  35. }
  36. }
  37. }
  38. result = {'es': [], 'mlt': []}
  39. for metric, query in queries.items():
  40. if metric == 'es':
  41. body = {"min_score": 0.9, "query": query, "size": 10, "_source": {"includes": ["phenomenon"]}}
  42. response = es.search(index='fault_meter', body=body)
  43. result[metric] = [a['_source']['phenomenon'] for a in response['hits']['hits']]
  44. return result
  45. inQuery = "后备式UPS当负载接近满载时,"
  46. result = findRelevantHits(inQuery.strip().lower())
  47. print(result)