Source code for gaiaxpy.converter.converter

"""
converter.py
====================================
Module for the converter functionality.
"""

import numbers
import numpy as np
import pandas as pd
from configparser import ConfigParser
from os import path
from .config import get_config, load_config
from gaiaxpy.config import config_path
from gaiaxpy.core import _get_spectra_type, _progress_tracker, \
                         _validate_arguments, _validate_pwl_sampling
from gaiaxpy.core.satellite import BANDS
from gaiaxpy.input_reader import InputReader
from gaiaxpy.output import SampledSpectraData
from gaiaxpy.spectrum import SampledBasisFunctions, XpContinuousSpectrum, \
                             XpSampledSpectrum, _get_covariance_matrix

config_parser = ConfigParser()
config_parser.read(path.join(config_path, 'config.ini'))
config_file = path.join(config_path, config_parser.get('converter', 'optimised_bases'))


[docs]def convert( input_object, sampling=np.linspace( 0, 60, 600), truncation=False, output_path='.', output_file='output_spectra', output_format=None, save_file=True, username=None, password=None): """ Conversion utility: converts the input internally calibrated mean spectra from the continuous representation to a sampled form. The sampling grid can be defined by the user, alternatively a default will be adopted. Optionally, the continuous representation can be truncated dropping the bases functions (and corresponding coefficients) that were considered not to be significant considering the errors on the reconstructed mean spectra. Args: input_object (object): Path to the file containing the mean spectra as downloaded from the archive in their continuous representation, a list of sources ids (string or long), or a pandas DataFrame. sampling (ndarray): 1D array containing the desired sampling in pseudo-wavelengths. truncation (bool): Toggle truncation of the set of bases. The level of truncation to be applied is defined by the recommended value in the input files. output_path (str): Path where to save the output data. output_file (str): Name of the output file. output_format (str): Format to be used for the output file. If no format is given, then the output file will be in the same format as the input file. save_file (bool): Whether to save the output in a file. If false, output_format and output_file are ignored. username (str): Cosmos username, only suggested when input_object is a list or ADQL query. password (str): Cosmos password, only suggested when input_object is a list or ADQL query. Returns: (tuple): tuple containing: DataFrame: The values for all sampled spectra. ndarray: The sampling used to convert the input spectra (user-provided or default). Raises: ValueError: If the sampling is out of the expected boundaries. """ # Check sampling _validate_pwl_sampling(sampling) _validate_arguments(convert.__defaults__[3], output_file, save_file) parsed_input_data, extension = InputReader(input_object, convert, username, password)._read() config_df = load_config(config_file) # Union of unique ids as sets unique_bases_ids = get_unique_basis_ids(parsed_input_data) # Get design matrices design_matrices = get_design_matrices(unique_bases_ids, sampling, config_df) spectra_list = _create_spectra(parsed_input_data, truncation, design_matrices) # Generate output spectra_df = pd.DataFrame.from_records([spectrum._spectrum_to_dict() for spectrum in spectra_list]) spectra_type = _get_spectra_type(spectra_list) spectra_df.attrs['data_type'] = spectra_type positions = spectra_list[0]._get_positions() # Save output output_data = SampledSpectraData(spectra_df, positions) output_data.save(save_file, output_path, output_file, output_format, extension) return spectra_df, positions
def _create_continuous_spectrum(row, band): covariance_matrix = _get_covariance_matrix(row, band) if covariance_matrix is not None: continuous_spectrum = XpContinuousSpectrum( row['source_id'], band.upper(), row[f'{band}_coefficients'], covariance_matrix, row[f'{band}_standard_deviation']) return continuous_spectrum def _create_spectrum(row, truncation, design_matrices, band): """ Create a single sampled spectrum from the input continuously-represented mean spectrum and design matrix. Args: row (DataFrame): Single row in a DataFrame containing the entry for one source in the mean spectra file. This will include columns for both bands (although one could be missing). truncation (bool): Toggle truncation of the set of bases. design_matrix (ndarray): 2D array containing the basis functions sampled on the pseudo-wavelength grid (either user-defined or default). band (str): BP/RP band. Returns: obj: The sampled spectrum. """ covariance_matrix = _get_covariance_matrix(row, band) if covariance_matrix is not None: continuous_spectrum = XpContinuousSpectrum( row['source_id'], band, row[f'{band}_coefficients'], covariance_matrix, row[f'{band}_standard_deviation']) if truncation: recommended_truncation = row[f'{band}_n_relevant_bases'] else: recommended_truncation = -1 spectrum = XpSampledSpectrum.from_continuous( continuous_spectrum, design_matrices.get( row.loc[f'{band}_basis_function_id']), truncation=recommended_truncation) return spectrum def _create_spectra(parsed_input_data, truncation, design_matrices): """ Internal wrapper function. Allows _create_spectrum to use the generic progress tracker. """ spectra_list = [] nrows = len(parsed_input_data) @_progress_tracker def create_spectrum(row, truncation, *args): design_matrices = args[0] for band in BANDS: try: spectrum_xp = _create_spectrum(row, truncation, design_matrices, band) spectra_list.append(spectrum_xp) except BaseException: # Band not available continue for index, row in parsed_input_data.iterrows(): create_spectrum(row, truncation, design_matrices, index, nrows) return spectra_list
[docs]def get_unique_basis_ids(parsed_input_data): """ Get the IDs of the unique basis required to sample all spectra in the input files. Args: parsed_input_data (DataFrame): Pandas DataFrame populated with the content of the file containing the mean spectra in continuous representation. Returns: set: A set containing all the required unique basis function IDs. """ # Keep only non NaN values (in Python, nan != nan) def remove_nans(_set): return {int(element) for element in _set if element == element} set_bp = set([basis for basis in parsed_input_data[f'{BANDS.bp}_basis_function_id'] if isinstance(basis, numbers.Number)]) set_rp = set([basis for basis in parsed_input_data[f'{BANDS.rp}_basis_function_id'] if isinstance(basis, numbers.Number)]) return remove_nans(set_bp).union(remove_nans(set_rp))
[docs]def get_design_matrices(unique_bases_ids, sampling, config_df): """ Get the design matrices corresponding to the input bases. Args: unique_bases_ids (set): A set containing the basis function IDs for which the design matrix is required. sampling (ndarray): 1D array containing the sampling grid. config_df (DataFrame): A DataFrame containing the configuration for all sets of basis functions. Returns: list: a list of the design matrices for the input list of bases. """ design_matrices = {} for id in unique_bases_ids: design_matrices.update({id: SampledBasisFunctions.from_config( sampling, get_config(config_df, id))}) return design_matrices