debeir.rankers.reranking.nir
NIR Reranker
[Insert paper link here]
1""" 2NIR Reranker 3 4[Insert paper link here] 5""" 6 7import math 8from typing import Dict, List 9 10from debeir.core.document import Document 11from debeir.rankers.reranking.reranker import DocumentReRanker 12from debeir.rankers.transformer_sent_encoder import Encoder 13from debeir.utils import scaler 14from scipy import spatial 15from tqdm import tqdm 16 17 18class NIReRanker(DocumentReRanker): 19 """ 20 Re-ranker which uses the NIR scoring method 21 score = log(bm25)/log(z) + cosine_sum 22 """ 23 24 def __init__(self, query, ranked_list: List[Document], encoder: Encoder, 25 distance_fn=spatial.distance.cosine, facets_weights: Dict = None, 26 presort=False, fields_to_encode=None, 27 *args, **kwargs): 28 29 if presort: 30 ranked_list.sort(key=lambda k: k.score) 31 32 super().__init__(query, ranked_list, *args, **kwargs) 33 self.encoder = encoder 34 self.top_score = self._get_top_score() 35 self.top_cosine_score = -1 36 37 self.query_vec = self.encoder(self.query) 38 self.distance_fn = distance_fn 39 self.fields_to_encode = fields_to_encode 40 41 if facets_weights: 42 self.facets_weights = facets_weights 43 else: 44 self.facets_weights = {} 45 46 # Compute all the cosine scores 47 self.pre_calc = {} 48 self.pre_calc_finished = False 49 self.log_norm = None 50 51 def _get_top_score(self): 52 return self.ranked_list[0].score 53 54 def _compute_scores_helper(self): 55 for document in tqdm(self.ranked_list, desc="Calculating cosine scores"): 56 facet_scores = {} 57 for facet in self.fields_to_encode if self.fields_to_encode else document.facets: 58 if "embedding" in facet.lower(): 59 continue 60 61 document_facet = document.facets[facet] 62 facet_weight = self.facets_weights[document_facet] if facet in self.facets_weights else 1.0 63 64 # Early exit 65 if facet_weight == 0: 66 continue 67 68 document_vec = self.encoder(document_facet) 69 facet_scores[facet] = self.distance_fn(self.query_vec, document_vec) * facet_weight 70 71 sum_score = sum(facet_scores.values()) 72 facet_scores["cosine_sum"] = sum_score 73 74 self.top_cosine_score = max(self.top_cosine_score, sum_score) 75 self.pre_calc[document.doc_id] = facet_scores 76 77 self.pre_calc_finished = True 78 79 def _compute_scores(self, document): 80 if not self.pre_calc_finished: 81 self._compute_scores_helper() 82 self.log_norm = scaler.get_z_value(self.top_cosine_score, self.top_score) 83 84 return math.log(document.score, self.log_norm) + self.pre_calc[document.doc_id]["cosine_sum"]
19class NIReRanker(DocumentReRanker): 20 """ 21 Re-ranker which uses the NIR scoring method 22 score = log(bm25)/log(z) + cosine_sum 23 """ 24 25 def __init__(self, query, ranked_list: List[Document], encoder: Encoder, 26 distance_fn=spatial.distance.cosine, facets_weights: Dict = None, 27 presort=False, fields_to_encode=None, 28 *args, **kwargs): 29 30 if presort: 31 ranked_list.sort(key=lambda k: k.score) 32 33 super().__init__(query, ranked_list, *args, **kwargs) 34 self.encoder = encoder 35 self.top_score = self._get_top_score() 36 self.top_cosine_score = -1 37 38 self.query_vec = self.encoder(self.query) 39 self.distance_fn = distance_fn 40 self.fields_to_encode = fields_to_encode 41 42 if facets_weights: 43 self.facets_weights = facets_weights 44 else: 45 self.facets_weights = {} 46 47 # Compute all the cosine scores 48 self.pre_calc = {} 49 self.pre_calc_finished = False 50 self.log_norm = None 51 52 def _get_top_score(self): 53 return self.ranked_list[0].score 54 55 def _compute_scores_helper(self): 56 for document in tqdm(self.ranked_list, desc="Calculating cosine scores"): 57 facet_scores = {} 58 for facet in self.fields_to_encode if self.fields_to_encode else document.facets: 59 if "embedding" in facet.lower(): 60 continue 61 62 document_facet = document.facets[facet] 63 facet_weight = self.facets_weights[document_facet] if facet in self.facets_weights else 1.0 64 65 # Early exit 66 if facet_weight == 0: 67 continue 68 69 document_vec = self.encoder(document_facet) 70 facet_scores[facet] = self.distance_fn(self.query_vec, document_vec) * facet_weight 71 72 sum_score = sum(facet_scores.values()) 73 facet_scores["cosine_sum"] = sum_score 74 75 self.top_cosine_score = max(self.top_cosine_score, sum_score) 76 self.pre_calc[document.doc_id] = facet_scores 77 78 self.pre_calc_finished = True 79 80 def _compute_scores(self, document): 81 if not self.pre_calc_finished: 82 self._compute_scores_helper() 83 self.log_norm = scaler.get_z_value(self.top_cosine_score, self.top_score) 84 85 return math.log(document.score, self.log_norm) + self.pre_calc[document.doc_id]["cosine_sum"]
Re-ranker which uses the NIR scoring method score = log(bm25)/log(z) + cosine_sum
NIReRanker( query, ranked_list: List[debeir.core.document.Document], encoder: debeir.rankers.transformer_sent_encoder.Encoder, distance_fn=<function cosine>, facets_weights: Dict = None, presort=False, fields_to_encode=None, *args, **kwargs)
25 def __init__(self, query, ranked_list: List[Document], encoder: Encoder, 26 distance_fn=spatial.distance.cosine, facets_weights: Dict = None, 27 presort=False, fields_to_encode=None, 28 *args, **kwargs): 29 30 if presort: 31 ranked_list.sort(key=lambda k: k.score) 32 33 super().__init__(query, ranked_list, *args, **kwargs) 34 self.encoder = encoder 35 self.top_score = self._get_top_score() 36 self.top_cosine_score = -1 37 38 self.query_vec = self.encoder(self.query) 39 self.distance_fn = distance_fn 40 self.fields_to_encode = fields_to_encode 41 42 if facets_weights: 43 self.facets_weights = facets_weights 44 else: 45 self.facets_weights = {} 46 47 # Compute all the cosine scores 48 self.pre_calc = {} 49 self.pre_calc_finished = False 50 self.log_norm = None