debeir.datasets.factory

  1from pathlib import Path
  2from typing import Dict, Type, Union
  3
  4import toml
  5from debeir.datasets.bioreddit import BioRedditCommentParser, BioRedditSubmissionParser
  6from debeir.datasets.clinical_trials import ClinicalTrialParser, ClinicalTrialsElasticsearchExecutor, \
  7    TrialsElasticsearchQuery, TrialsQueryConfig
  8from debeir.datasets.marco import MarcoElasticsearchExecutor, MarcoQueryConfig
  9from debeir.datasets.trec_clinical_trials import TrecClincialElasticsearchQuery, TrecClinicalTrialsParser
 10from debeir.datasets.trec_covid import TrecCovidParser, TrecElasticsearchQuery
 11from debeir.evaluation.evaluator import Evaluator
 12from debeir.evaluation.residual_scoring import ResidualEvaluator
 13from debeir.core.config import Config, ElasticsearchConfig, GenericConfig, MetricsConfig, NIRConfig, SolrConfig, \
 14    _NIRMasterConfig
 15from debeir.core.executor import GenericElasticsearchExecutor
 16from debeir.core.parser import (
 17    CSVParser, Parser, TSVParser,
 18)
 19from debeir.core.query import GenericElasticsearchQuery, Query
 20
 21str_to_config_cls = {
 22    "clinical_trials": TrialsQueryConfig,
 23    "test_trials": TrialsQueryConfig,
 24    "med-marco": MarcoQueryConfig,
 25    "generic": MarcoQueryConfig,
 26}
 27
 28query_factory = {
 29    "clinical_trials": TrialsElasticsearchQuery,
 30    "test_trials": TrialsElasticsearchQuery,
 31    "generic": GenericElasticsearchQuery,
 32    "trec_covid": TrecElasticsearchQuery,
 33    "trec_clinical": TrecClincialElasticsearchQuery,
 34}
 35
 36parser_factory = {
 37    "trec_covid": TrecCovidParser,
 38    "bioreddit-comment": BioRedditCommentParser,
 39    "bioreddit-submission": BioRedditSubmissionParser,
 40    "test_trials": ClinicalTrialParser,
 41    "med-marco": CSVParser,
 42    "tsv": TSVParser,
 43    "trec_clinical": TrecClinicalTrialsParser
 44}
 45
 46executor_factory = {
 47    "clinical": ClinicalTrialsElasticsearchExecutor,
 48    "med-marco": MarcoElasticsearchExecutor,
 49    "generic": GenericElasticsearchExecutor,
 50}
 51
 52evaluator_factory = {
 53    "residual": ResidualEvaluator,
 54    "trec": Evaluator,
 55}
 56
 57
 58def get_index_name(config_fp):
 59    """
 60    Get the index name from the config without parsing as a TOML
 61
 62    :param config_fp:
 63    :return:
 64    """
 65    with open(config_fp, "r") as reader:
 66        for line in reader:
 67            if line.startswith("index"):
 68                line = line.replace('"', "")
 69                return line.split("=")[-1].strip()
 70    return None
 71
 72
 73def factory_fn(config_fp, index=None) -> (Query, GenericConfig,
 74                                          Parser, GenericElasticsearchExecutor, Evaluator):
 75    """
 76    Factory method for creating the parsed topics, config object, query object and query executor object
 77
 78    :param config_fp: Config file path
 79    :param index: Index to search
 80    :return:
 81        Query, Config, Parser, Executor, Evaluator
 82    """
 83    config = config_factory(config_fp)
 84    assert config.index is not None
 85    query_cls = query_factory[config.query_fn]
 86    parser = parser_factory[config.parser_fn]
 87    executor = executor_factory[config.executor_fn]
 88
 89    return query_cls, config, parser, executor
 90
 91
 92def config_factory(path: Union[str, Path] = None, config_cls: Type[Config] = None, args_dict: Dict = None):
 93    """
 94    Factory method for creating configs
 95
 96    :param path: Config path
 97    :param config_cls: Config class to instantiate
 98    :param args_dict: Arguments to consider
 99    :return:
100        A config object
101    """
102    if path:
103        args_dict = toml.load(path)
104
105    if not config_cls:
106        if "config_fn" in args_dict:
107            config_cls = str_to_config_cls[args_dict["config_fn"]]
108        else:
109            raise NotImplementedError()
110
111    return config_cls.from_args(args_dict, config_cls)
112
113
114def get_nir_config(nir_config, *args, ignore_errors=False, **kwargs):
115    main_config = config_factory(nir_config, config_cls=_NIRMasterConfig)
116    search_engine_config = None
117
118    supported_search_engines = {"solr": SolrConfig,
119                                "elasticsearch": ElasticsearchConfig}
120
121    search_engine_config = None
122
123    if 'engine' in kwargs and kwargs['engine'] in supported_search_engines:
124        search_engine = kwargs['engine']
125        search_engine_config = config_factory(args_dict=main_config.get_search_engine_settings(search_engine),
126                                              config_cls=supported_search_engines[search_engine])
127
128    # for search_engine in supported_search_engines:
129    #    if search_engine in kwargs and kwargs[search_engine] and kwargs['engine'] == search_engine:
130    #        search_engine_config = config_factory(args_dict=main_config.get_search_engine_settings(search_engine),
131    #                                              config_cls=supported_search_engines[search_engine])
132
133    if not ignore_errors and search_engine_config is None:
134        raise RuntimeError("Unable to get a search engine configuration.")
135
136    metrics_config = config_factory(args_dict=main_config.get_metrics(), config_cls=MetricsConfig)
137    nir_config = config_factory(args_dict=main_config.get_nir_settings(), config_cls=NIRConfig)
138
139    return nir_config, search_engine_config, metrics_config
140
141
142def apply_nir_config(func):
143    """
144    Decorator that applies the NIR config settings to the current function
145    Replaces arguments and keywords arguments with those found in the config
146
147    :param func:
148    :return:
149    """
150
151    def parse_nir_config(*args, ignore_errors=False, **kwargs):
152        """
153        Parses the NIR config for the different setting groups: Search Engine, Metrics and NIR settings
154        Applies these settings to the current function
155        :param ignore_errors:
156        :param args:
157        :param kwargs:
158        :return:
159        """
160
161        nir_config, search_engine_config, metrics_config = get_nir_config(*args,
162                                                                          ignore_errors,
163                                                                          **kwargs)
164
165        kwargs = nir_config.__update__(
166            **search_engine_config.__update__(
167                **metrics_config.__update__(**kwargs)
168            )
169        )
170
171        return func(*args, **kwargs)
172
173    return parse_nir_config
def get_index_name(config_fp):
59def get_index_name(config_fp):
60    """
61    Get the index name from the config without parsing as a TOML
62
63    :param config_fp:
64    :return:
65    """
66    with open(config_fp, "r") as reader:
67        for line in reader:
68            if line.startswith("index"):
69                line = line.replace('"', "")
70                return line.split("=")[-1].strip()
71    return None

Get the index name from the config without parsing as a TOML

Parameters
  • config_fp:
Returns
def factory_fn( config_fp, index=None) -> (<class 'debeir.core.query.Query'>, <class 'debeir.core.config.GenericConfig'>, <class 'debeir.core.parser.Parser'>, <class 'debeir.core.executor.GenericElasticsearchExecutor'>, <class 'debeir.evaluation.evaluator.Evaluator'>):
74def factory_fn(config_fp, index=None) -> (Query, GenericConfig,
75                                          Parser, GenericElasticsearchExecutor, Evaluator):
76    """
77    Factory method for creating the parsed topics, config object, query object and query executor object
78
79    :param config_fp: Config file path
80    :param index: Index to search
81    :return:
82        Query, Config, Parser, Executor, Evaluator
83    """
84    config = config_factory(config_fp)
85    assert config.index is not None
86    query_cls = query_factory[config.query_fn]
87    parser = parser_factory[config.parser_fn]
88    executor = executor_factory[config.executor_fn]
89
90    return query_cls, config, parser, executor

Factory method for creating the parsed topics, config object, query object and query executor object

Parameters
  • config_fp: Config file path
  • index: Index to search
Returns
Query, Config, Parser, Executor, Evaluator
def config_factory( path: Union[str, pathlib.Path] = None, config_cls: Type[debeir.core.config.Config] = None, args_dict: Dict = None):
 93def config_factory(path: Union[str, Path] = None, config_cls: Type[Config] = None, args_dict: Dict = None):
 94    """
 95    Factory method for creating configs
 96
 97    :param path: Config path
 98    :param config_cls: Config class to instantiate
 99    :param args_dict: Arguments to consider
100    :return:
101        A config object
102    """
103    if path:
104        args_dict = toml.load(path)
105
106    if not config_cls:
107        if "config_fn" in args_dict:
108            config_cls = str_to_config_cls[args_dict["config_fn"]]
109        else:
110            raise NotImplementedError()
111
112    return config_cls.from_args(args_dict, config_cls)

Factory method for creating configs

Parameters
  • path: Config path
  • config_cls: Config class to instantiate
  • args_dict: Arguments to consider
Returns
A config object
def get_nir_config(nir_config, *args, ignore_errors=False, **kwargs):
115def get_nir_config(nir_config, *args, ignore_errors=False, **kwargs):
116    main_config = config_factory(nir_config, config_cls=_NIRMasterConfig)
117    search_engine_config = None
118
119    supported_search_engines = {"solr": SolrConfig,
120                                "elasticsearch": ElasticsearchConfig}
121
122    search_engine_config = None
123
124    if 'engine' in kwargs and kwargs['engine'] in supported_search_engines:
125        search_engine = kwargs['engine']
126        search_engine_config = config_factory(args_dict=main_config.get_search_engine_settings(search_engine),
127                                              config_cls=supported_search_engines[search_engine])
128
129    # for search_engine in supported_search_engines:
130    #    if search_engine in kwargs and kwargs[search_engine] and kwargs['engine'] == search_engine:
131    #        search_engine_config = config_factory(args_dict=main_config.get_search_engine_settings(search_engine),
132    #                                              config_cls=supported_search_engines[search_engine])
133
134    if not ignore_errors and search_engine_config is None:
135        raise RuntimeError("Unable to get a search engine configuration.")
136
137    metrics_config = config_factory(args_dict=main_config.get_metrics(), config_cls=MetricsConfig)
138    nir_config = config_factory(args_dict=main_config.get_nir_settings(), config_cls=NIRConfig)
139
140    return nir_config, search_engine_config, metrics_config
def apply_nir_config(func):
143def apply_nir_config(func):
144    """
145    Decorator that applies the NIR config settings to the current function
146    Replaces arguments and keywords arguments with those found in the config
147
148    :param func:
149    :return:
150    """
151
152    def parse_nir_config(*args, ignore_errors=False, **kwargs):
153        """
154        Parses the NIR config for the different setting groups: Search Engine, Metrics and NIR settings
155        Applies these settings to the current function
156        :param ignore_errors:
157        :param args:
158        :param kwargs:
159        :return:
160        """
161
162        nir_config, search_engine_config, metrics_config = get_nir_config(*args,
163                                                                          ignore_errors,
164                                                                          **kwargs)
165
166        kwargs = nir_config.__update__(
167            **search_engine_config.__update__(
168                **metrics_config.__update__(**kwargs)
169            )
170        )
171
172        return func(*args, **kwargs)
173
174    return parse_nir_config

Decorator that applies the NIR config settings to the current function Replaces arguments and keywords arguments with those found in the config

Parameters
  • func:
Returns