-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathranker.py
56 lines (46 loc) · 2.12 KB
/
ranker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# you can change whatever you want in this module, just make sure it doesn't
# break the searcher module
from cosimularity import Cosimularity
from inner_product import Inner_product
from sort_docs import Sorts
class Ranker:
def __init__(self, indexer):
self.docs_limit = 2000
self.indexer = indexer
self.activate_pop = True
self.max_pop = max(self.indexer.pop_dict.values())
self.sort = Sorts(self.indexer)
def rank_relevant_docs(self, relevant_doc, relevant_terms,query_terms):
"""
This function provides rank for each relevant document and sorts them by their scores.
The current score considers solely the number of terms shared by the tweet (full_text) and query.
:param relevant_doc: dictionary of documents that contains at least one term from the query.
:return: sorted list of documents by score
"""
inner_product = Inner_product(relevant_doc, relevant_terms, self.indexer)
ranked = inner_product.rank()
# cos = Cosimularity(relevant_doc, relevant_terms, self.indexer)
# ranked= cos.rank(query_terms)
if self.activate_pop:
self.pop_handler(ranked)
rank_relevant_doc = sorted(ranked.items(), key=lambda item: item[1], reverse=True)
return rank_relevant_doc
def pop_handler(self, inner_prod):
for term in inner_prod.keys():
if term in self.indexer.pop_dict:
norm_val = self.normelize_popularity(self.indexer.pop_dict[term])
inner_prod[term] += norm_val
def normelize_popularity(self, val):
min_val = 0
norm = 15 * ((val - min_val) / (self.max_pop - min_val)) + 1
return norm
def retrieve_top_k(self, sorted_relevant_doc, k=1):
"""
return a list of top K tweets based on their ranking from highest to lowest
:param sorted_relevant_doc: list of all candidates docs.
:param k: Number of top document to return
:return: list of relevant document
"""
if k > self.docs_limit:
k = self.docs_limit
return sorted_relevant_doc[:k]