import copy
import re
import warnings
from molmass import Formula
from petitRADTRANS.utils import LockedDict
def _split_species_cloud_info(species):
cloud_info = ''
name_split = species.split('(', 1)
if len(name_split) == 1:
name = name_split[0]
else:
name, cloud_info = name_split
cloud_info = '(' + cloud_info # re-add parenthesis lost during split
return name, cloud_info
def _split_species_charge(species, final_charge_format='+-'):
# Check for repeated charge symbol, function can work without, but it eases the user's understanding of future error
if len(re.findall(r'^.*([+\-pm])([+\-pm])$', species)) > 0:
raise ValueError(f"invalid species formula '{species}', "
f"multiple consecutive charge symbols found (+, -, p, m)")
# Extract charge symbol
charge_pattern_match = re.findall(r'^.+(_(\d{0,3})[+\-pm])$', species)
len_charge = 0
charge = ''
name = copy.deepcopy(species)
if species[-2:] in ['Tm', 'Am', 'Cm', 'Fm']: # prevent matches with atomic Thulium, Americium, Curium and Fermium
return name, ''
if len(charge_pattern_match) == 1:
charge = charge_pattern_match[0][0]
len_charge = len(charge)
else:
charge_pattern_match = re.findall(r'^.+([+\-pm])$', species)
if len(charge_pattern_match) == 1: # positive or negative charge
charge = charge_pattern_match[0][0]
len_charge = len(charge)
charge = '_' + charge
elif species[-1] in ['-', '+']:
raise ValueError(f"invalid species formula '{species}', "
f"either a symbol used is unknown, or the charge formula "
f"does not respects the pattern '<element_symbol><charge_number><+|-|p|m>' "
f"(e.g., 'Ca2+', 'H-', '7Li-1H_p', 'SO4_2m')")
# Rewrite charge symbol with +/-
if final_charge_format == '+-':
charge = charge.replace('p', '+')
charge = charge.replace('m', '-')
elif final_charge_format == 'pm':
charge = charge.replace('+', 'p')
charge = charge.replace('-', 'm')
else:
raise ValueError(f"Final charge format must be '+-'|'pm', but was '{final_charge_format}'")
# Temporarily remove charge symbol to remove isotopic numbers
if len_charge > 0:
name = species[:-len_charge]
return name, charge
def _split_species_source(species):
split = species.split('__', 1)
if len(split) == 1:
name = split[0]
source = ''
else:
name, source = split
return name, source
def _split_species_spectral_info(species):
split = species.split('.', 1)
if len(split) == 1:
name = split[0]
spectral_info = ''
else:
name, spectral_info = split
return name, spectral_info
[docs]
def split_species_all_info(species, final_charge_format='+-'):
"""
Split a species string into all its components.
Args:
species: Full species string to parse.
final_charge_format: Format for charge symbols ('+-' or 'pm').
Returns:
Tuple of (name, natural_abundance, charge, cloud_info, source, spectral_info).
"""
name, spectral_info = _split_species_spectral_info(species)
name, source = _split_species_source(name) # remove resolving power or opacity source information
# Remove cloud info
name, cloud_info = _split_species_cloud_info(name)
natural_abundance_string = __get_natural_abundance_string()
if f"-{natural_abundance_string}" in name:
name = name.replace(f'-{natural_abundance_string}', '')
natural_abundance = natural_abundance_string
else:
natural_abundance = ''
# Check for repeated ion symbol, function can work without, but it eases the user's understanding of future error
if len(re.findall(r'^.*([+\-pm])([+\-pm])$', name)) > 0:
raise ValueError(f"invalid species formula '{name}', "
f"multiple consecutive charge symbols found (+, -, p, m)")
# Extract ion symbol
name, charge = _split_species_charge(name, final_charge_format=final_charge_format)
return name, natural_abundance, charge, cloud_info, source, spectral_info
[docs]
def get_species_scientific_name(species: str) -> str:
"""Return the species name formatted with LaTeX-style isotope notation."""
name, natural_abundance, charge, cloud_info, _, _ = split_species_all_info(species, final_charge_format='+-')
# Remove isotopic numbers
return rf"{_rebuild_isotope_numbers(name, mode='scientific')}"
def _get_base_cia_names():
return LockedDict.build_and_lock({
'H2--H2': 'H2--H2-NatAbund__BoRi.R831_0.6-250mu',
'H2--He': 'H2--He-NatAbund__BoRi.DeltaWavenumber2_0.5-500mu',
'H2O--H2O': 'H2O--H2O-NatAbund',
'H2O--N2': 'H2O--N2-NatAbund',
'N2--H2': 'N2--H2-NatAbund.DeltaWavenumber1_5.3-909mu',
'N2--He': 'N2--He-NatAbund.DeltaWavenumber1_10-909mu',
'N2--N2': 'N2--N2-NatAbund.DeltaWavelength1e-6_2-100mu',
'O2--O2': 'O2--O2-NatAbund.DeltaWavelength1e-6_0.34-8.7mu',
'N2--O2': 'N2--O2-NatAbund.DeltaWavelength1e-6_0.72-5.4mu',
'CO2--CO2': 'CO2--CO2-NatAbund.DeltaWavelength1e-6_3-100mu',
})
[docs]
def get_cia_aliases(name: str) -> str:
"""Resolve a CIA species name to its full petitRADTRANS directory name."""
# Try to match name with full directory and directory shortcut
cia_filenames = _get_base_cia_names()
for cia, cia_filename in cia_filenames.items():
if name in [cia_filename, cia_filename.rsplit('_', 1)[0]]:
return cia_filename
# Try to match name with aliases
cia_aliases = copy.deepcopy(cia_filenames)
cia_aliases.update({
'H2--H2': ['H2H2', 'H2-H2', 'H2--H2'],
'H2--He': ['H2He', 'H2-He', 'H2--He', 'HeH2', 'He-H2', 'He--H2'],
'H2O--H2O': ['H2OH2O', 'H2O-H2O', 'H2O--H2O'],
'H2O--N2': ['H2ON2', 'H2O-N2', 'H2O--N2', 'N2H2O', 'N2-H2O', 'N2--H2O'],
'N2--H2': ['N2H2', 'N2-H2', 'N2--H2', 'H2N2', 'H2-N2', 'H2--N2'],
'N2--He': ['N2He', 'N2-He', 'N2--He', 'HeN2', 'He-N2', 'He--N2'],
'N2--N2': ['N2N2', 'N2-N2', 'N2--N2'],
'O2--O2': ['O2O2', 'O2-O2', 'O2--O2'],
'N2--O2': ['N2O2', 'N2-O2', 'N2--O2', 'O2N2', 'O2-N2', 'O2--N2'],
'CO2--CO2': ['CO2CO2', 'CO2-CO2', 'CO2--CO2']
})
for cia, aliases in cia_aliases.items():
if name in aliases:
if name != cia:
warnings.warn(
"usage of CIA aliases is deprecated and will be removed in a future update.\n"
"Use the CIA actual name, with pRT's CIA separator ('--') between colliding species, instead",
FutureWarning
)
return cia_filenames[cia]
# Name does not match a directory, a directory shortcut, or an alias
return name
def __get_natural_abundance_string(separator=''):
"""Naming convention for natural abundance. Access names here, change this to change the convention."""
return f'{separator}NatAbund'
def _rebuild_isotope_numbers(species, mode='add'):
"""Add or remove isotope numbers from a species.
Note that using improper isotope separation can lead to incorrect results (e.g. H218O -> 1H218-16O).
Args:
species:
Species name. Can also be a species collision (e.g. H2--He).
mode:
Can be 'add', 'remove', or 'scientific'.
In 'add' mode, the isotope number of each species is added to the name, and each
isotope is separated with a '-'. By default, the main isotope number is used. If partial isotope
information is provided (e.g. 13C2H2, H2-18O, ...), the information is used (e.g. 13C2-1H2, 1H2-18O, ...).
In 'remove' mode, remove all isotope numbers (e.g. 13C2-1H2 -> C2H2).
In 'scientific' mode, format isotope and stoichiometric numbers as LaTeX superscripts/subscripts.
Returns:
The species name with added or removed isotope information.
"""
# Set isotope explicit separator
if mode == 'add':
isotope_separator = '-'
elif mode == 'remove' or mode == 'scientific':
isotope_separator = '' # no separator in remove mode
else:
raise ValueError(f"iter isotopes mode must be 'add'|'remove'|'scientific', but was '{mode}'")
species_pattern = r'(\d{1,3})?([A-Z][a-z]?|e)(\d{1,3})?' # isotope number, element symbol, stoichiometric number
natural_abundance_str = __get_natural_abundance_string(separator='-')
# Handle natural abundance case
if natural_abundance_str in species:
if mode == 'add':
species = species.rsplit(natural_abundance_str, 1)[0]
_species = species.split('--') # CIA case
for i, __species in enumerate(_species): # for each colliding species
matches = re.findall(species_pattern, __species)
__species = [group[1] + group[2] for group in matches]
_species[i] = isotope_separator.join(__species)
species = '--'.join(_species)
return species + natural_abundance_str
elif mode == 'remove' or mode == 'scientific':
return species.replace(natural_abundance_str, '')
else:
raise ValueError(f"iter isotopes mode must be 'add'|'remove', but was '{mode}'")
# Handle regular case
_species = species.split('--') # CIA case
for i, __species in enumerate(_species): # for each colliding species
if isotope_separator in __species:
isotopes = __species.split('-')
else:
isotopes = [__species]
for j, isotope in enumerate(isotopes): # for each separated isotope in the species
# Match isotope pattern in order to handle the case in which not all isotopes are separated (e.g. "13C2H2")
matches = re.findall(species_pattern, isotope)
if len(matches) == 0:
raise ValueError(f"invalid isotope name '{isotope}' in species '{__species}'")
for k, groups in enumerate(matches): # for each non-separated isotope in the "separated isotope"
groups = list(groups) # contains isotope number, element symbol and element count (e.g. 13, C, 2)
if groups[1] == 'D': # handle deuterium
if groups[0] == '':
groups[0] = '2'
groups[1] = 'H'
elif groups[1] == 'T': # handle tritium
if groups[0] == '':
groups[0] = '3'
groups[1] = 'H'
# Update isotope number
if mode == 'add':
if groups[0] == '' and groups[1] != 'e':
groups[0] = f"{Formula(groups[1]).isotope.massnumber}"
elif mode == 'remove':
if groups[0] != '':
groups[0] = '' # remove isotope number
elif mode == 'scientific':
if groups[0] != '': # isotope number
groups[0] = '$^{' + groups[0] + '}$'
if groups[2] != '': # stoichiometric number
groups[2] = '$_{' + groups[2] + '}$'
else:
raise ValueError(f"iter isotopes mode must be 'add'|'remove', but was '{mode}'")
matches[k] = ''.join(groups) # rebuild isotope string (e.g. "13C2")
isotopes[j] = isotope_separator.join(matches) # join non-separated isotopes with the explicit separator
_species_tmp = isotope_separator.join(isotopes) # join isotopes to rebuild species
# Merge contiguous matches containing the same element (but presumably different isotopes)
if mode == 'remove':
_species_tmp = _merge_contiguous_isotopes(
species=_species_tmp,
isotope_separator=isotope_separator
)
_species[i] = _species_tmp
if mode == 'scientific':
cia_separator = '$-$'
else:
cia_separator = '--'
__species = cia_separator.join(_species) # join species to rebuild collision
return __species
[docs]
def get_species_basename(species: str, join: bool = False) -> str:
"""Return the base name of a species with isotope numbers removed."""
name, natural_abundance, charge, cloud_info, _, _ = split_species_all_info(species, final_charge_format='+-')
# Remove isotopic numbers
name = _rebuild_isotope_numbers(name, mode='remove')
if join:
return join_species_all_info(name, charge=charge, cloud_info=cloud_info)
else:
return name
[docs]
def join_species_all_info(name, natural_abundance='', charge='', cloud_info='', source='', spectral_info='',
resolution_filename=None, range_filename=None):
"""Reconstruct a full species string from its individual components."""
if natural_abundance != '':
name += '-' + natural_abundance
name += charge + cloud_info
if source != '':
name += '__' + source
if spectral_info != '':
if resolution_filename is not None or range_filename is not None:
raise ValueError(
f"cannot give both complete spectral info ('{spectral_info}'), "
f"and resolution ('{resolution_filename}') + range ('{range_filename}')\n"
f"Set resolution_filename and range_filename to None, or set spectral_info to an empty string"
)
name += '.' + spectral_info
elif resolution_filename is not None or range_filename is not None:
if resolution_filename is None:
raise ValueError("both resolution_filename and range_filename must be not None, "
"but resolution_filename is None")
if range_filename is None:
raise ValueError("both resolution_filename and range_filename must be not None, "
"but range_filename is None")
name += '.' + _join_spectral_information(
resolution_filename=resolution_filename,
range_filename=range_filename
)
return name
def _join_spectral_information(resolution_filename, range_filename):
return f"{resolution_filename}_{range_filename}"
def _merge_contiguous_isotopes(species, isotope_separator):
isotope_groups = re.findall(r'([A-Z][a-z]?|e)(\d{1,3})?', species)
isotope_groups = [list(isotope_group) for isotope_group in isotope_groups]
# Merge the isotopes, numbers of contiguous isotopes are converted to int, merged groups numbers are set to 0
for i in range(len(isotope_groups)):
isotope_groups = __recursive_merge_contiguous_isotopes(isotope_groups, i)
# Convert back numbers from int to str
for i in range(len(isotope_groups)):
isotope_groups[i][1] = str(isotope_groups[i][1])
# Rebuild the species with merged isotopes
return isotope_separator.join([
isotope_separator.join(isotope_group)
for isotope_group in isotope_groups
if isotope_group[1] != '0' # ignore groups that has been merged
])
def __recursive_merge_contiguous_isotopes(isotope_groups, i, index_merge=None):
if index_merge is None:
index_merge = i - 1
if i > 0:
element, number = isotope_groups[i]
element_merge, number_merge = isotope_groups[index_merge]
if element == element_merge:
if number_merge == '':
number_merge = 1
elif number_merge == 0:
if index_merge > 0:
isotope_groups = __recursive_merge_contiguous_isotopes(isotope_groups, i, index_merge - 1)
return isotope_groups
else:
return isotope_groups
else:
number_merge = int(number_merge)
if number == '':
number = 1
else:
number = int(number)
isotope_groups[index_merge][1] = number_merge + number
isotope_groups[i][1] = 0
return isotope_groups