debeir.datasets.bioreddit
1from typing import Dict 2 3from debeir.core.parser import CSVParser 4from debeir.core.query import GenericElasticsearchQuery 5 6 7class BioRedditSubmissionParser(CSVParser): 8 """ 9 Parser for the BioReddit Submission Dataset 10 """ 11 parse_fields = ["id", "body"] 12 13 @classmethod 14 def get_topics(cls, csvfile) -> Dict[int, Dict[str, str]]: 15 return super().get_topics(csvfile) 16 17 18class BioRedditCommentParser(CSVParser): 19 """ 20 Parser for the BioReddit Comment Dataset 21 """ 22 parse_fields = ["id", "parent_id", "selftext", "title"] 23 24 @classmethod 25 def get_topics(cls, csvfile) -> Dict[str, Dict[str, str]]: 26 topics = super().get_topics(csvfile) 27 temp = {} 28 29 for _, topic in topics.items(): 30 topic["text"] = topic.pop("selftext") 31 topic["text2"] = topic.pop("title") 32 temp[topic["id"]] = topic 33 34 return temp 35 36 37class BioRedditElasticsearchQuery(GenericElasticsearchQuery): 38 """ 39 Elasticsearch Query object for the BioReddit 40 """ 41 42 def __init__(self, topics, config, *args, **kwargs): 43 super().__init__(topics, config, *args, **kwargs) 44 self.mappings = ["Text"] 45 46 self.topics = topics 47 self.config = config 48 self.query_type = self.config.query_type 49 50 self.embed_mappings = ["Text_Embedding"] 51 52 self.query_funcs = { 53 "query": self.generate_query, 54 "embedding": self.generate_query_embedding, 55 }
8class BioRedditSubmissionParser(CSVParser): 9 """ 10 Parser for the BioReddit Submission Dataset 11 """ 12 parse_fields = ["id", "body"] 13 14 @classmethod 15 def get_topics(cls, csvfile) -> Dict[int, Dict[str, str]]: 16 return super().get_topics(csvfile)
Parser for the BioReddit Submission Dataset
@classmethod
def
get_topics(cls, csvfile) -> Dict[int, Dict[str, str]]:
14 @classmethod 15 def get_topics(cls, csvfile) -> Dict[int, Dict[str, str]]: 16 return super().get_topics(csvfile)
Instance method for getting topics, forwards instance self parameters to the _get_topics class method.
Inherited Members
19class BioRedditCommentParser(CSVParser): 20 """ 21 Parser for the BioReddit Comment Dataset 22 """ 23 parse_fields = ["id", "parent_id", "selftext", "title"] 24 25 @classmethod 26 def get_topics(cls, csvfile) -> Dict[str, Dict[str, str]]: 27 topics = super().get_topics(csvfile) 28 temp = {} 29 30 for _, topic in topics.items(): 31 topic["text"] = topic.pop("selftext") 32 topic["text2"] = topic.pop("title") 33 temp[topic["id"]] = topic 34 35 return temp
Parser for the BioReddit Comment Dataset
@classmethod
def
get_topics(cls, csvfile) -> Dict[str, Dict[str, str]]:
25 @classmethod 26 def get_topics(cls, csvfile) -> Dict[str, Dict[str, str]]: 27 topics = super().get_topics(csvfile) 28 temp = {} 29 30 for _, topic in topics.items(): 31 topic["text"] = topic.pop("selftext") 32 topic["text2"] = topic.pop("title") 33 temp[topic["id"]] = topic 34 35 return temp
Instance method for getting topics, forwards instance self parameters to the _get_topics class method.
Inherited Members
38class BioRedditElasticsearchQuery(GenericElasticsearchQuery): 39 """ 40 Elasticsearch Query object for the BioReddit 41 """ 42 43 def __init__(self, topics, config, *args, **kwargs): 44 super().__init__(topics, config, *args, **kwargs) 45 self.mappings = ["Text"] 46 47 self.topics = topics 48 self.config = config 49 self.query_type = self.config.query_type 50 51 self.embed_mappings = ["Text_Embedding"] 52 53 self.query_funcs = { 54 "query": self.generate_query, 55 "embedding": self.generate_query_embedding, 56 }
Elasticsearch Query object for the BioReddit
BioRedditElasticsearchQuery(topics, config, *args, **kwargs)
43 def __init__(self, topics, config, *args, **kwargs): 44 super().__init__(topics, config, *args, **kwargs) 45 self.mappings = ["Text"] 46 47 self.topics = topics 48 self.config = config 49 self.query_type = self.config.query_type 50 51 self.embed_mappings = ["Text_Embedding"] 52 53 self.query_funcs = { 54 "query": self.generate_query, 55 "embedding": self.generate_query_embedding, 56 }