debeir.evaluation.residual_scoring
1import os 2import subprocess 3import tempfile 4import uuid 5from typing import Dict, List, Union 6 7from debeir.evaluation.evaluator import Evaluator 8 9 10# Remove all documents that exist in the training set 11# Evaluate on remaining 12# Normalize for result set length, cut off at ???? 13 14 15class ResidualEvaluator(Evaluator): 16 """ Residual Scoring is the scoring of a subset of documents or the residiaul. The residual is created by removing documents from the collection and qrels. 17 """ 18 19 def __init__(self, qrels: str, metrics: List[str], filter_ids: Dict[str, List[str]]): 20 """ 21 Args: 22 qrels (str): Path to qrels 23 metrics (List[str]): A list of metrics with depth e.g. NDCG@1000 24 filter_ids (Dict[str, List[str]]): A list of IDs to remove from the collection given by Dict[Topic_num, [Docids]] 25 """ 26 super().__init__(qrels, metrics) 27 self.qrels_fp = qrels 28 self.filter_ids = filter_ids 29 30 def _filter_run(self, res: str): 31 if self.filter_ids is None: 32 return res 33 34 tmpdir = tempfile.mkdtemp() 35 tmpfp = os.path.join(tmpdir, str(uuid.uuid4())) 36 37 writer = open(tmpfp, 'w+') 38 39 with open(res) as out_file: 40 for line in out_file: 41 topic_num, _, doc_id, _, _, _ = line.split() 42 if doc_id in self.filter_ids[topic_num]: 43 continue 44 45 writer.write(line) 46 47 writer.close() 48 49 return tmpfp 50 51 def evaluate_runs(self, res: Union[str, List[str]], with_trec_binary=False, **kwargs): 52 """ Run the residual evaluation for the runs 53 54 :param res: The results to run the evaluator against 55 :param with_trec_binary: Use the TREC C binary instead of the default Python library, defaults to False 56 :return: A dictionary of supplied metrics of the results against the qrels 57 """ 58 if with_trec_binary: 59 return self._evaluate_with_binary(res, **kwargs) 60 61 fp = self._filter_run(res) 62 63 return super().evaluate_runs(fp, **kwargs) 64 65 def _evaluate_with_binary(self, res, **kwargs): 66 fp = self._filter_run(res) 67 68 output = subprocess.check_output(["trec_eval", self.qrels_fp, fp]).decode() 69 70 metrics = {} 71 72 for line in str(output).split("\n"): 73 try: 74 metric, _, value = line.split() 75 metrics[metric] = value 76 except: 77 continue 78 79 return metrics
16class ResidualEvaluator(Evaluator): 17 """ Residual Scoring is the scoring of a subset of documents or the residiaul. The residual is created by removing documents from the collection and qrels. 18 """ 19 20 def __init__(self, qrels: str, metrics: List[str], filter_ids: Dict[str, List[str]]): 21 """ 22 Args: 23 qrels (str): Path to qrels 24 metrics (List[str]): A list of metrics with depth e.g. NDCG@1000 25 filter_ids (Dict[str, List[str]]): A list of IDs to remove from the collection given by Dict[Topic_num, [Docids]] 26 """ 27 super().__init__(qrels, metrics) 28 self.qrels_fp = qrels 29 self.filter_ids = filter_ids 30 31 def _filter_run(self, res: str): 32 if self.filter_ids is None: 33 return res 34 35 tmpdir = tempfile.mkdtemp() 36 tmpfp = os.path.join(tmpdir, str(uuid.uuid4())) 37 38 writer = open(tmpfp, 'w+') 39 40 with open(res) as out_file: 41 for line in out_file: 42 topic_num, _, doc_id, _, _, _ = line.split() 43 if doc_id in self.filter_ids[topic_num]: 44 continue 45 46 writer.write(line) 47 48 writer.close() 49 50 return tmpfp 51 52 def evaluate_runs(self, res: Union[str, List[str]], with_trec_binary=False, **kwargs): 53 """ Run the residual evaluation for the runs 54 55 :param res: The results to run the evaluator against 56 :param with_trec_binary: Use the TREC C binary instead of the default Python library, defaults to False 57 :return: A dictionary of supplied metrics of the results against the qrels 58 """ 59 if with_trec_binary: 60 return self._evaluate_with_binary(res, **kwargs) 61 62 fp = self._filter_run(res) 63 64 return super().evaluate_runs(fp, **kwargs) 65 66 def _evaluate_with_binary(self, res, **kwargs): 67 fp = self._filter_run(res) 68 69 output = subprocess.check_output(["trec_eval", self.qrels_fp, fp]).decode() 70 71 metrics = {} 72 73 for line in str(output).split("\n"): 74 try: 75 metric, _, value = line.split() 76 metrics[metric] = value 77 except: 78 continue 79 80 return metrics
Residual Scoring is the scoring of a subset of documents or the residiaul. The residual is created by removing documents from the collection and qrels.
ResidualEvaluator(qrels: str, metrics: List[str], filter_ids: Dict[str, List[str]])
20 def __init__(self, qrels: str, metrics: List[str], filter_ids: Dict[str, List[str]]): 21 """ 22 Args: 23 qrels (str): Path to qrels 24 metrics (List[str]): A list of metrics with depth e.g. NDCG@1000 25 filter_ids (Dict[str, List[str]]): A list of IDs to remove from the collection given by Dict[Topic_num, [Docids]] 26 """ 27 super().__init__(qrels, metrics) 28 self.qrels_fp = qrels 29 self.filter_ids = filter_ids
Args: qrels (str): Path to qrels metrics (List[str]): A list of metrics with depth e.g. NDCG@1000 filter_ids (Dict[str, List[str]]): A list of IDs to remove from the collection given by Dict[Topic_num, [Docids]]
def
evaluate_runs(self, res: Union[str, List[str]], with_trec_binary=False, **kwargs):
52 def evaluate_runs(self, res: Union[str, List[str]], with_trec_binary=False, **kwargs): 53 """ Run the residual evaluation for the runs 54 55 :param res: The results to run the evaluator against 56 :param with_trec_binary: Use the TREC C binary instead of the default Python library, defaults to False 57 :return: A dictionary of supplied metrics of the results against the qrels 58 """ 59 if with_trec_binary: 60 return self._evaluate_with_binary(res, **kwargs) 61 62 fp = self._filter_run(res) 63 64 return super().evaluate_runs(fp, **kwargs)
Run the residual evaluation for the runs
Parameters
- res: The results to run the evaluator against
- with_trec_binary: Use the TREC C binary instead of the default Python library, defaults to False
Returns
A dictionary of supplied metrics of the results against the qrels