Source code for GUIBRUSHR.General_Constants.FunctionsAndConstants.PTR_Modules

import copy
import re
import warnings
from molmass import Formula
from petitRADTRANS.utils import LockedDict


def _split_species_cloud_info(species):
    cloud_info = ''
    name_split = species.split('(', 1)

    if len(name_split) == 1:
        name = name_split[0]
    else:
        name, cloud_info = name_split
        cloud_info = '(' + cloud_info  # re-add parenthesis lost during split

    return name, cloud_info


def _split_species_charge(species, final_charge_format='+-'):
    # Check for repeated charge symbol, function can work without, but it eases the user's understanding of future error
    if len(re.findall(r'^.*([+\-pm])([+\-pm])$', species)) > 0:
        raise ValueError(f"invalid species formula '{species}', "
                         f"multiple consecutive charge symbols found (+, -, p, m)")

    # Extract charge symbol
    charge_pattern_match = re.findall(r'^.+(_(\d{0,3})[+\-pm])$', species)
    len_charge = 0
    charge = ''
    name = copy.deepcopy(species)

    if species[-2:] in ['Tm', 'Am', 'Cm', 'Fm']:  # prevent matches with atomic Thulium, Americium, Curium and Fermium
        return name, ''

    if len(charge_pattern_match) == 1:
        charge = charge_pattern_match[0][0]
        len_charge = len(charge)
    else:
        charge_pattern_match = re.findall(r'^.+([+\-pm])$', species)

        if len(charge_pattern_match) == 1:  # positive or negative charge
            charge = charge_pattern_match[0][0]
            len_charge = len(charge)

            charge = '_' + charge
        elif species[-1] in ['-', '+']:
            raise ValueError(f"invalid species formula '{species}', "
                             f"either a symbol used is unknown, or the charge formula "
                             f"does not respects the pattern '<element_symbol><charge_number><+|-|p|m>' "
                             f"(e.g., 'Ca2+', 'H-', '7Li-1H_p', 'SO4_2m')")

    # Rewrite charge symbol with +/-
    if final_charge_format == '+-':
        charge = charge.replace('p', '+')
        charge = charge.replace('m', '-')
    elif final_charge_format == 'pm':
        charge = charge.replace('+', 'p')
        charge = charge.replace('-', 'm')
    else:
        raise ValueError(f"Final charge format must be '+-'|'pm', but was '{final_charge_format}'")

    # Temporarily remove charge symbol to remove isotopic numbers
    if len_charge > 0:
        name = species[:-len_charge]

    return name, charge


def _split_species_source(species):
    split = species.split('__', 1)

    if len(split) == 1:
        name = split[0]
        source = ''
    else:
        name, source = split

    return name, source


def _split_species_spectral_info(species):
    split = species.split('.', 1)

    if len(split) == 1:
        name = split[0]
        spectral_info = ''
    else:
        name, spectral_info = split

    return name, spectral_info


[docs]
def split_species_all_info(species, final_charge_format='+-'):
    """
    Split a species string into all its components.

    Args:
        species: Full species string to parse.
        final_charge_format: Format for charge symbols ('+-' or 'pm').

    Returns:
        Tuple of (name, natural_abundance, charge, cloud_info, source, spectral_info).
    """
    name, spectral_info = _split_species_spectral_info(species)
    name, source = _split_species_source(name)  # remove resolving power or opacity source information

    # Remove cloud info
    name, cloud_info = _split_species_cloud_info(name)

    natural_abundance_string = __get_natural_abundance_string()

    if f"-{natural_abundance_string}" in name:
        name = name.replace(f'-{natural_abundance_string}', '')
        natural_abundance = natural_abundance_string
    else:
        natural_abundance = ''

    # Check for repeated ion symbol, function can work without, but it eases the user's understanding of future error
    if len(re.findall(r'^.*([+\-pm])([+\-pm])$', name)) > 0:
        raise ValueError(f"invalid species formula '{name}', "
                         f"multiple consecutive charge symbols found (+, -, p, m)")

    # Extract ion symbol
    name, charge = _split_species_charge(name, final_charge_format=final_charge_format)

    return name, natural_abundance, charge, cloud_info, source, spectral_info



[docs]
def get_species_scientific_name(species: str) -> str:
    """Return the species name formatted with LaTeX-style isotope notation."""
    name, natural_abundance, charge, cloud_info, _, _ = split_species_all_info(species, final_charge_format='+-')

    # Remove isotopic numbers
    return rf"{_rebuild_isotope_numbers(name, mode='scientific')}"


def _get_base_cia_names():
    return LockedDict.build_and_lock({
        'H2--H2': 'H2--H2-NatAbund__BoRi.R831_0.6-250mu',
        'H2--He': 'H2--He-NatAbund__BoRi.DeltaWavenumber2_0.5-500mu',
        'H2O--H2O': 'H2O--H2O-NatAbund',
        'H2O--N2': 'H2O--N2-NatAbund',
        'N2--H2': 'N2--H2-NatAbund.DeltaWavenumber1_5.3-909mu',
        'N2--He': 'N2--He-NatAbund.DeltaWavenumber1_10-909mu',
        'N2--N2': 'N2--N2-NatAbund.DeltaWavelength1e-6_2-100mu',
        'O2--O2': 'O2--O2-NatAbund.DeltaWavelength1e-6_0.34-8.7mu',
        'N2--O2': 'N2--O2-NatAbund.DeltaWavelength1e-6_0.72-5.4mu',
        'CO2--CO2': 'CO2--CO2-NatAbund.DeltaWavelength1e-6_3-100mu',
    })


[docs]
def get_cia_aliases(name: str) -> str:
    """Resolve a CIA species name to its full petitRADTRANS directory name."""
    # Try to match name with full directory and directory shortcut
    cia_filenames = _get_base_cia_names()

    for cia, cia_filename in cia_filenames.items():
        if name in [cia_filename, cia_filename.rsplit('_', 1)[0]]:
            return cia_filename

    # Try to match name with aliases
    cia_aliases = copy.deepcopy(cia_filenames)

    cia_aliases.update({
        'H2--H2': ['H2H2', 'H2-H2', 'H2--H2'],
        'H2--He': ['H2He', 'H2-He', 'H2--He', 'HeH2', 'He-H2', 'He--H2'],
        'H2O--H2O': ['H2OH2O', 'H2O-H2O', 'H2O--H2O'],
        'H2O--N2': ['H2ON2', 'H2O-N2', 'H2O--N2', 'N2H2O', 'N2-H2O', 'N2--H2O'],
        'N2--H2': ['N2H2', 'N2-H2', 'N2--H2', 'H2N2', 'H2-N2', 'H2--N2'],
        'N2--He': ['N2He', 'N2-He', 'N2--He', 'HeN2', 'He-N2', 'He--N2'],
        'N2--N2': ['N2N2', 'N2-N2', 'N2--N2'],
        'O2--O2': ['O2O2', 'O2-O2', 'O2--O2'],
        'N2--O2': ['N2O2', 'N2-O2', 'N2--O2', 'O2N2', 'O2-N2', 'O2--N2'],
        'CO2--CO2': ['CO2CO2', 'CO2-CO2', 'CO2--CO2']
    })

    for cia, aliases in cia_aliases.items():
        if name in aliases:
            if name != cia:
                warnings.warn(
                    "usage of CIA aliases is deprecated and will be removed in a future update.\n"
                    "Use the CIA actual name, with pRT's CIA separator ('--') between colliding species, instead",
                    FutureWarning
                )

            return cia_filenames[cia]

    # Name does not match a directory, a directory shortcut, or an alias
    return name


def __get_natural_abundance_string(separator=''):
    """Naming convention for natural abundance. Access names here, change this to change the convention."""
    return f'{separator}NatAbund'

def _rebuild_isotope_numbers(species, mode='add'):
    """Add or remove isotope numbers from a species.
    Note that using improper isotope separation can lead to incorrect results (e.g. H218O -> 1H218-16O).

    Args:
        species:
            Species name. Can also be a species collision (e.g. H2--He).
        mode:
            Can be 'add', 'remove', or 'scientific'.
            In 'add' mode, the isotope number of each species is added to the name, and each
            isotope is separated with a '-'. By default, the main isotope number is used. If partial isotope
            information is provided (e.g. 13C2H2, H2-18O, ...), the information is used (e.g. 13C2-1H2, 1H2-18O, ...).
            In 'remove' mode, remove all isotope numbers (e.g. 13C2-1H2 -> C2H2).
            In 'scientific' mode, format isotope and stoichiometric numbers as LaTeX superscripts/subscripts.

    Returns:
        The species name with added or removed isotope information.
    """
    # Set isotope explicit separator
    if mode == 'add':
        isotope_separator = '-'
    elif mode == 'remove' or mode == 'scientific':
        isotope_separator = ''  # no separator in remove mode
    else:
        raise ValueError(f"iter isotopes mode must be 'add'|'remove'|'scientific', but was '{mode}'")

    species_pattern = r'(\d{1,3})?([A-Z][a-z]?|e)(\d{1,3})?'  # isotope number, element symbol, stoichiometric number
    natural_abundance_str = __get_natural_abundance_string(separator='-')

    # Handle natural abundance case
    if natural_abundance_str in species:
        if mode == 'add':
            species = species.rsplit(natural_abundance_str, 1)[0]
            _species = species.split('--')  # CIA case

            for i, __species in enumerate(_species):  # for each colliding species
                matches = re.findall(species_pattern, __species)

                __species = [group[1] + group[2] for group in matches]
                _species[i] = isotope_separator.join(__species)

            species = '--'.join(_species)

            return species + natural_abundance_str
        elif mode == 'remove' or mode == 'scientific':
            return species.replace(natural_abundance_str, '')
        else:
            raise ValueError(f"iter isotopes mode must be 'add'|'remove', but was '{mode}'")

    # Handle regular case
    _species = species.split('--')  # CIA case

    for i, __species in enumerate(_species):  # for each colliding species
        if isotope_separator in __species:
            isotopes = __species.split('-')
        else:
            isotopes = [__species]

        for j, isotope in enumerate(isotopes):  # for each separated isotope in the species
            # Match isotope pattern in order to handle the case in which not all isotopes are separated (e.g. "13C2H2")
            matches = re.findall(species_pattern, isotope)

            if len(matches) == 0:
                raise ValueError(f"invalid isotope name '{isotope}' in species '{__species}'")

            for k, groups in enumerate(matches):  # for each non-separated isotope in the "separated isotope"
                groups = list(groups)  # contains isotope number, element symbol and element count (e.g. 13, C, 2)

                if groups[1] == 'D':  # handle deuterium
                    if groups[0] == '':
                        groups[0] = '2'

                    groups[1] = 'H'
                elif groups[1] == 'T':  # handle tritium
                    if groups[0] == '':
                        groups[0] = '3'

                    groups[1] = 'H'

                # Update isotope number
                if mode == 'add':
                    if groups[0] == '' and groups[1] != 'e':
                        groups[0] = f"{Formula(groups[1]).isotope.massnumber}"
                elif mode == 'remove':
                    if groups[0] != '':
                        groups[0] = ''  # remove isotope number
                elif mode == 'scientific':
                    if groups[0] != '':  # isotope number
                        groups[0] = '$^{' + groups[0] + '}$'

                    if groups[2] != '':  # stoichiometric number
                        groups[2] = '$_{' + groups[2] + '}$'
                else:
                    raise ValueError(f"iter isotopes mode must be 'add'|'remove', but was '{mode}'")

                matches[k] = ''.join(groups)  # rebuild isotope string (e.g. "13C2")

            isotopes[j] = isotope_separator.join(matches)  # join non-separated isotopes with the explicit separator

        _species_tmp = isotope_separator.join(isotopes)  # join isotopes to rebuild species

        # Merge contiguous matches containing the same element (but presumably different isotopes)
        if mode == 'remove':
            _species_tmp = _merge_contiguous_isotopes(
                species=_species_tmp,
                isotope_separator=isotope_separator
            )

        _species[i] = _species_tmp

    if mode == 'scientific':
        cia_separator = '$-$'
    else:
        cia_separator = '--'

    __species = cia_separator.join(_species)  # join species to rebuild collision

    return __species


[docs]
def get_species_basename(species: str, join: bool = False) -> str:
    """Return the base name of a species with isotope numbers removed."""
    name, natural_abundance, charge, cloud_info, _, _ = split_species_all_info(species, final_charge_format='+-')

    # Remove isotopic numbers
    name = _rebuild_isotope_numbers(name, mode='remove')

    if join:
        return join_species_all_info(name, charge=charge, cloud_info=cloud_info)
    else:
        return name



[docs]
def join_species_all_info(name, natural_abundance='', charge='', cloud_info='', source='', spectral_info='',
                          resolution_filename=None, range_filename=None):
    """Reconstruct a full species string from its individual components."""
    if natural_abundance != '':
        name += '-' + natural_abundance

    name += charge + cloud_info

    if source != '':
        name += '__' + source

    if spectral_info != '':
        if resolution_filename is not None or range_filename is not None:
            raise ValueError(
                f"cannot give both complete spectral info ('{spectral_info}'), "
                f"and resolution ('{resolution_filename}') + range ('{range_filename}')\n"
                f"Set resolution_filename and range_filename to None, or set spectral_info to an empty string"
            )
        name += '.' + spectral_info
    elif resolution_filename is not None or range_filename is not None:
        if resolution_filename is None:
            raise ValueError("both resolution_filename and range_filename must be not None, "
                             "but resolution_filename is None")

        if range_filename is None:
            raise ValueError("both resolution_filename and range_filename must be not None, "
                             "but range_filename is None")

        name += '.' + _join_spectral_information(
            resolution_filename=resolution_filename,
            range_filename=range_filename
        )

    return name


def _join_spectral_information(resolution_filename, range_filename):
    return f"{resolution_filename}_{range_filename}"

def _merge_contiguous_isotopes(species, isotope_separator):
    isotope_groups = re.findall(r'([A-Z][a-z]?|e)(\d{1,3})?', species)
    isotope_groups = [list(isotope_group) for isotope_group in isotope_groups]

    # Merge the isotopes, numbers of contiguous isotopes are converted to int, merged groups numbers are set to 0
    for i in range(len(isotope_groups)):
        isotope_groups = __recursive_merge_contiguous_isotopes(isotope_groups, i)

    # Convert back numbers from int to str
    for i in range(len(isotope_groups)):
        isotope_groups[i][1] = str(isotope_groups[i][1])

    # Rebuild the species with merged isotopes
    return isotope_separator.join([
        isotope_separator.join(isotope_group)
        for isotope_group in isotope_groups
        if isotope_group[1] != '0'  # ignore groups that has been merged
    ])


def __recursive_merge_contiguous_isotopes(isotope_groups, i, index_merge=None):
    if index_merge is None:
        index_merge = i - 1

    if i > 0:
        element, number = isotope_groups[i]
        element_merge, number_merge = isotope_groups[index_merge]

        if element == element_merge:
            if number_merge == '':
                number_merge = 1
            elif number_merge == 0:
                if index_merge > 0:
                    isotope_groups = __recursive_merge_contiguous_isotopes(isotope_groups, i, index_merge - 1)
                    return isotope_groups
                else:
                    return isotope_groups
            else:
                number_merge = int(number_merge)

            if number == '':
                number = 1
            else:
                number = int(number)

            isotope_groups[index_merge][1] = number_merge + number
            isotope_groups[i][1] = 0

    return isotope_groups