Source code for gaiaxpy.input_reader.input_reader
from os.path import isfile
from pathlib import Path
import pandas as pd
from .dataframe_reader import DataFrameReader
from .file_reader import FileParserSelector
from .hdfs_reader import HDFSReader
from .list_reader import ListReader
from .local_file_reader import LocalFileReader
from .query_reader import QueryReader
default_extension = 'csv'
[docs]
class InputReader(object):
def __init__(self, content, function, truncation, additional_columns=None, selector=None, disable_info=False,
user=None, password=None):
if additional_columns is None:
additional_columns = dict()
self.additional_columns = additional_columns
if not isinstance(self.additional_columns, dict):
raise ValueError(f'Additional columns is {type(self.additional_columns)}.')
self.selector = selector if selector is None else selector
self.content = content
self.function = function
self.truncation = truncation
self.disable_info = disable_info
self.user = user
self.password = password
[docs]
def read(self):
content = self.content
function = self.function
truncation = self.truncation
disable_info = self.disable_info
additional_columns = self.additional_columns
selector = self.selector
# Input data directly provided by the user
if isinstance(content, pd.DataFrame):
reader = DataFrameReader(content, function, truncation, additional_columns=additional_columns,
selector=selector, disable_info=disable_info)
elif (isinstance(content, Path) or isinstance(content, str)) and isfile(content):
parser = FileParserSelector(function)
reader = LocalFileReader(parser, content, truncation, additional_columns=additional_columns,
selector=selector, disable_info=disable_info)
# Actual input data got from the Archive
elif isinstance(content, list):
reader = ListReader(content, function, truncation, user=self.user, password=self.password,
additional_columns=additional_columns, selector=selector, disable_info=disable_info)
elif isinstance(content, str) and content.lower().startswith('select'):
reader = QueryReader(content, function, truncation, user=self.user, password=self.password,
additional_columns=additional_columns, selector=selector, disable_info=disable_info)
elif isinstance(content, str) and content.lower().startswith('hdfs://'):
parser = FileParserSelector(function)
reader = HDFSReader(parser, content, truncation, additional_columns=additional_columns, selector=selector,
disable_info=disable_info)
else:
raise ValueError('The input provided does not match any of the expected input types.')
parsed_data, extension = reader.read()
extension = default_extension if extension is None else extension
return parsed_data, extension