Source code for GUIBRUSHR.Retrieval.ModelCalculation.ModelData

"""
ModelData Module

This module contains the core ModelData class for atmospheric retrieval calculations.
It handles parameter management, model calculations, and likelihood evaluations for
both high-resolution and low-resolution observations.

This is the CORE of the code - all logic and sequence of operations are preserved.
"""

import os
import time
import traceback
from collections import Counter
from multiprocessing.managers import SyncManager
import numpy as np
from numpy.random import default_rng
from petitRADTRANS import physical_constants as phys_const

from GUIBRUSHR.General_Constants.Classes.UserTemperatureProfile import UserTemperatureProfile
from GUIBRUSHR.General_Constants.Classes.ValueErrorTP import ValueErrorTP
from GUIBRUSHR.General_Constants.FunctionsAndConstants.Constant_Variables import ConstantVariables, to_linear
from GUIBRUSHR.Retrieval.ModelCalculation.Classes.NotRetrieval import NotRetrieval
from GUIBRUSHR.Retrieval.ModelCalculation.ParameterHandler import ParameterHandler
from GUIBRUSHR.Retrieval.ModelCalculation.ModelSetup import ModelSetup
from GUIBRUSHR.Retrieval.ModelCalculation.LikelihoodHR import LikelihoodHR
from GUIBRUSHR.Retrieval.ModelCalculation.LikelihoodLR import LikelihoodLR
from GUIBRUSHR.Retrieval.ExofastMCMC.StructReturnExofast import StructReturnExofast
from GUIBRUSHR.Retrieval.ExofastMCMC import snooker as snooker_module
from GUIBRUSHR.Retrieval.ExofastMCMC.process_safety import (
    install_parent_death_signal as _install_parent_death_signal,
    describe_exitcode as _describe_exitcode,
    mp_context as _mp_context,
)
from GUIBRUSHR.Retrieval.debug_log import emit_event as _emit_debug_event
from GUIBRUSHR.Retrieval.debug_log import get_log_path as _debug_log_path
from GUIBRUSHR.Retrieval.debug_log import BufferedLogger as _BufferedLogger
from GUIBRUSHR.core.types import slice_section



[docs]
class ModelData:
    """
    Core class for atmospheric retrieval model data and calculations.

    This class manages all aspects of atmospheric modeling including:
    - Parameter initialization and management
    - Model calculations for both high and low resolution
    - Likelihood evaluations
    - MCMC chain operations

    Attributes:
        path_default: Default path for operations
        params_list: List of all possible parameters
        list_multiple_param: Parameters that can have multiple values
        clight: Speed of light constant
        model_type: Type of model (Retrieval, Model, etc.)
        atmosphere: Atmosphere object for calculations
        retrieval_data: Retrieval configuration data
        bestpars_data: Best parameters data
        random_obj: Random number generator object
    """


[docs]
    def __init__(
            self,
            path_params=None,
            path_df=None,
            id_process=None,
            table_output_file=None,
            minwlen_lr=None,
            maxwlen_lr=None,
            minwlen_hr=None,
            maxwlen_hr=None,
            model_type="Retrieval",
            lbl_sampling_hr=None,
            lbl_sampling_lr=None,
            range_min=None,
            range_max=None,
            nlayers=None,
            manual_model_obj=None,
            load_new_opacities=True,
            path_default=None,
            plot_convolved_LR=False
    ):
        """
        Initialize ModelData object.

        Args:
            path_params: Path to parameters file
            path_df: Path to dataframe file
            id_process: Process ID for parallel operations
            table_output_file: Output file for table data
            minwlen_lr: Minimum wavelength for low resolution
            maxwlen_lr: Maximum wavelength for low resolution
            minwlen_hr: Minimum wavelength for high resolution
            maxwlen_hr: Maximum wavelength for high resolution
            model_type: Type of model calculation
            lbl_sampling_hr: Line-by-line sampling parameter hr
            lbl_sampling_lr: Line-by-line sampling parameter lr
            range_min: Minimum pressure range
            range_max: Maximum pressure range
            nlayers: Number of atmospheric layers
            manual_model_obj: Manual model object for direct initialization
            load_new_opacities: Whether to load new opacity data
            path_default: Default working directory path
        """
        # Initialize core attributes
        self.lbl_sampling_hr = lbl_sampling_hr
        self.lbl_sampling_lr = lbl_sampling_lr
        os.chdir(path_default)
        self.path_default = path_default
        self.params_list = ConstantVariables.params_list
        self.list_multiple_param = ConstantVariables.LIST_MULTIPLE_PARAM
        self.clight = ConstantVariables.CLIGHT

        # Initialize data objects
        self.smooth_data = None
        self.bestpars_data = None
        self.retrieval_data = None
        self.atmosphere = None
        self.random_obj = None
        self.wlen_list_overplot = None
        # self.jitter_list = None
        self.beta_list = None
        self.table_output_file = None
        self.plot_convolved_LR = plot_convolved_LR


        # Initialize parameter handler (manages param array, indexing, categorization)
        self.param_handler = ParameterHandler(self.params_list, self.list_multiple_param)
        self.initial_param_array = self.param_handler.initial_param_array
        self.start_general_1 = self.param_handler.get_index("kp")
        self.start_elements = self.param_handler.get_index(ConstantVariables.LIST_ELEMENT_FOR_HYBRID[0])
        self.start_molec = self.param_handler.get_index("H2")
        self.start_condensed = ConstantVariables.start_condensed_molecs

        # Store configuration parameters
        self.model_type = model_type

        # Initialize setup handler
        self.setup = ModelSetup(
            param_handler=self.param_handler,
            lbl_sampling_hr=self.lbl_sampling_hr,
            lbl_sampling_lr=self.lbl_sampling_lr,
            path_default=self.path_default,
            model_type=self.model_type,
            initial_param_array=self.initial_param_array,
            start_molec=self.start_molec,
        )

        # Initialize based on input method
        if path_params is not None:
            # Initialize from parameter files
            df_parameters = self.read_df_parameters(path_params)
            self.setup.read_df_information(
                path_df, df_parameters,
                id_process, table_output_file,
                minwlen_lr, maxwlen_lr, minwlen_hr, maxwlen_hr, range_min, range_max, nlayers
            )
        else:
            # Initialize from manual model object
            self.setup.populate_from_manual_model(
                manual_model_obj, load_new_opacities,
                minwlen_lr, maxwlen_lr, minwlen_hr, maxwlen_hr, range_min, range_max, nlayers
            )

        # Copy state from setup handler
        self.atmosphere = self.setup.atmosphere
        self.retrieval_data = self.setup.retrieval_data
        self.bestpars_data = self.setup.bestpars_data
        self.random_obj = self.setup.random_obj
        self.wlen_list_overplot = self.setup.wlen_list_overplot
        self.table_output_file = self.setup.table_output_file

        # Initialize likelihood handlers
        self.likelihood_hr = LikelihoodHR(
            atmosphere=self.atmosphere,
            retrieval_data=self.retrieval_data,
            param_handler=self.param_handler,
            bestpars_data=self.bestpars_data,
            # jitter_list=self.jitter_list,
            beta_list=self.beta_list,
            model_type=self.model_type,
            clight=self.clight,
            start_molec=self.start_molec,
            start_elements=self.start_elements,
            start_condensed=self.start_condensed,
        )
        self.likelihood_lr = LikelihoodLR(
            atmosphere=self.atmosphere,
            bestpars_data=self.bestpars_data,
            model_type=self.model_type,
            clight=self.clight,
        )



[docs]
    def populate_from_manual_model(self, *args, **kwargs):
        """Delegate to ModelSetup. See ModelSetup.populate_from_manual_model."""
        result = self.setup.populate_from_manual_model(*args, **kwargs)
        # When load_new_opacities=True, ModelSetup replaces self.atmosphere
        # with a fresh Atmosphere(); propagate the new reference so the
        # likelihood objects don't keep reading the stale (old-composition)
        # atmosphere.
        self.atmosphere = self.setup.atmosphere
        self.likelihood_hr.atmosphere = self.atmosphere
        self.likelihood_lr.atmosphere = self.atmosphere
        return result



[docs]
    def add_param_manual_model(self, *args, **kwargs):
        """Delegate to ParameterHandler. See ParameterHandler.add_param_manual_model."""
        return self.param_handler.add_param_manual_model(*args, **kwargs)



[docs]
    def read_df_parameters(self, path_df_parameters):
        """Delegate to ParameterHandler. See ParameterHandler.read_df_parameters."""
        return self.param_handler.read_df_parameters(path_df_parameters)



[docs]
    def parameter_management(self, *args, **kwargs):
        """Delegate to ParameterHandler. See ParameterHandler.parameter_management."""
        return self.param_handler.parameter_management(*args, **kwargs)



[docs]
    def read_df_information(self, *args, **kwargs):
        """Delegate to ModelSetup. See ModelSetup.read_df_information."""
        return self.setup.read_df_information(*args, **kwargs)



[docs]
    def night_data_extraction(self):
        """Delegate to ModelSetup. See ModelSetup.night_data_extraction."""
        return self.setup.night_data_extraction()



[docs]
    def get_value(self, params, name, single_value=True):
        """Delegate to ParameterHandler. See ParameterHandler.get_value."""
        return self.param_handler.get_value(params, name, single_value)



[docs]
    def get_index(self, param_name):
        """Delegate to ParameterHandler. See ParameterHandler.get_index."""
        return self.param_handler.get_index(param_name)


    def _extract_params(self, params):
        """
        Extract every non-chemistry retrieval parameter from PARAMS_DICT into a dict.

        Applies the log10-to-linear conversion via to_linear for parameters whose YAML
        type is Log10ToLinear. The YAML field is the single source of truth for which
        parameters need a 10** conversion; no separate hardcoded list is maintained here.

        Chemistry species (molec == 1) are skipped here and handled in the chemistry path.

        Args:
            params: Array of parameter objects (positional, as used by get_value).

        Returns:
            dict: Mapping parameter name -> extracted value (linear for Log10ToLinear
            parameters, raw otherwise, None for parameters not fitted in this retrieval).
        """
        v = {}
        for name, cfg in ConstantVariables.PARAMS_DICT.items():
            if cfg.get("molec", 0) == 1:
                # Chemistry species are handled separately in the chemistry path
                continue
            raw = self.get_value(params, name, single_value=(cfg["multi"] < 1))
            if raw is None and cfg.get("default_if_absent", 0):
                # Parameter not fitted in this retrieval: fall back to its YAML "value"
                # field, marked feature-off neutral default. to_linear below applies the
                # same 10** conversion as for a fitted value (e.g. Pref -1 -> 0.1 bar).
                raw = cfg["value"]
            v[name] = to_linear(name, raw)
        return v


[docs]
    def calculate_mass_fraction(self, *args, **kwargs):
        """Delegate to LikelihoodHR. See LikelihoodHR.calculate_mass_fraction."""
        return self.likelihood_hr.calculate_mass_fraction(*args, **kwargs)


    def _save_trpca_error_debug_info(self, *args, **kwargs):
        """Delegate to LikelihoodHR. See LikelihoodHR._save_trpca_error_debug_info."""
        return self.likelihood_hr._save_trpca_error_debug_info(*args, **kwargs)


[docs]
    def calculate_log_prior(self, params):
        """Delegate to LikelihoodHR. See LikelihoodHR.calculate_log_prior."""
        return self.likelihood_hr.calculate_log_prior(params)



[docs]
    def high_resolution_lhood(self, temperature, mass_fraction, vmr, MMW, dict_calc_model,
                              chain=None, step=None):
        """Delegate to LikelihoodHR. See LikelihoodHR.high_resolution_lhood."""
        return self.likelihood_hr.high_resolution_lhood(
            temperature, mass_fraction, vmr, MMW, dict_calc_model,
            chain=chain, step=step,
        )


    def _bin_emission_lr(self, *args, **kwargs):
        """Delegate to LikelihoodLR. See LikelihoodLR._bin_emission_lr."""
        return self.likelihood_lr._bin_emission_lr(*args, **kwargs)

    def _bin_transmission_lr(self, *args, **kwargs):
        """Delegate to LikelihoodLR. See LikelihoodLR._bin_transmission_lr."""
        return self.likelihood_lr._bin_transmission_lr(*args, **kwargs)

    def _preprocess_model_lr_transmission(self, *args, **kwargs):
        """Delegate to LikelihoodLR. See LikelihoodLR._preprocess_model_lr_transmission."""
        return self.likelihood_lr._preprocess_model_lr_transmission(*args, **kwargs)

    def _bin_spectrum_to_instrument_resolution(self, *args, **kwargs):
        """Delegate to LikelihoodLR. See LikelihoodLR._bin_spectrum_to_instrument_resolution."""
        return self.likelihood_lr._bin_spectrum_to_instrument_resolution(*args, **kwargs)


[docs]
    def low_resolution_lhood(self, temperature, mass_fraction, vmr, MMW, dict_calc_model, rp):
        """Delegate to LikelihoodLR. See LikelihoodLR.low_resolution_lhood."""
        return self.likelihood_lr.low_resolution_lhood(temperature, mass_fraction, vmr, MMW, dict_calc_model, rp)



[docs]
    def lh_function_gib(self, params, chain=None, step=None):
        """
        Evaluate the core likelihood function for atmospheric retrieval.

        This is the HEART of the atmospheric retrieval system. It takes input
        parameters, performs boundary checks, extracts all parameter values,
        calculates temperature profiles, computes mass fractions, and evaluates
        both high-resolution and low-resolution likelihoods.

        The function preserves the exact sequence of operations critical for
        atmospheric modeling and MCMC retrieval.

        Args:
            params: List of ParamForModel objects containing all retrieval parameters
            chain: MCMC chain index. Forwarded to downstream prints
                (negative mass fractions, negative spectrum, TRPCA error) so
                operators can identify which chain is misbehaving. None
                outside an MCMC context.
            step: MCMC outer step index. Used together with ``chain`` to
                annotate diagnostic prints. None outside an MCMC context.

        Returns:
            Tuple containing:
            - lhood: Log-likelihood value (float)
            - det: Determinant value for Bayesian priors (float)
            - not_retrieval_obj: NotRetrieval object with model results (or None)
        """
        # BOUNDARY CHECKING: Reject parameters outside valid ranges
        # Explicit loop (not np.all on a list-comp) so we can report which
        # parameter failed first - that name is the main diagnostic for the
        # debug log when acceptance collapses.
        if "Retrieval" in self.model_type:
            for param in params:
                if param is not None and not param.boundaries_check():
                    # Extract the actual offending value. ParamForModel stores
                    # a scalar in ``value_in_retrieval`` when starting_value_variable
                    # is None, otherwise an array in ``value_arr_in_retrieval``.
                    # For array-valued params, return the element that is actually
                    # outside the bounds (first one found), not the average.
                    if getattr(param, "starting_value_variable", None) is None:
                        arr = getattr(param, "value_arr_in_retrieval", None)
                        if arr is not None and len(arr) > 0:
                            rng_min = float(param.range_min)
                            rng_max = float(param.range_max)
                            bad = next(
                                (float(e) for e in arr
                                 if not (rng_min <= float(e) <= rng_max)),
                                float(arr[0]),
                            )
                            bad_value = bad
                        else:
                            bad_value = float("nan")
                    else:
                        bad_value = float(getattr(param, "value_in_retrieval",
                                                  float("nan")))
                    return (
                        -np.inf, 0.0, None,
                        {
                            "reason": "boundary",
                            "param": getattr(param, "name", "?"),
                            "value": bad_value,
                        },
                    )

        # EXTRACT ALL RETRIEVAL PARAMETERS
        # Every parameter flows through _extract_params, which applies to_linear for
        # parameters whose YAML type is Log10ToLinear. The YAML "type" field is the
        # single source of truth for log10->linear conversion. Derived quantities
        # (eccentricity, radius/gravity, C/O fallback, eddy grid) and physical-constraint
        # early returns stay explicit below, in the original order.
        # Feature-off neutral defaults for parameters not fitted in this retrieval are
        # handled inside _extract_params via the "default_if_absent" YAML marker, so no
        # hardcoded parameter names are needed here.
        v = self._extract_params(params)

        # Orbital eccentricity (Thiele-Innes elements h = e*sin(omega), k = e*cos(omega))
        if self.retrieval_data.eccentricity:
            ecc = np.power(v["h_ecc"], 2) + np.power(v["k_ecc"], 2)  # e² = h² + k²
            opi = np.arctan2(v["h_ecc"], v["k_ecc"])  # ω = arctan2(h, k)

            # Physical constraint: eccentricity must be between 0 and 1
            if not 0 <= ecc <= 1:
                return -np.inf, 0.0, None, {"reason": "eccentricity"}
        else:
            ecc = None
            opi = None

        # Planet radius (Jupiter radii -> meters) and surface gravity
        rp_jup = v["rp"]
        rp = rp_jup * phys_const.r_jup_mean  # Convert to meters
        gravity = (  # Surface gravity calculation
                phys_const.G * (self.atmosphere.target.mass * phys_const.m_jup)
                / np.power(rp, 2)
        )

        # Physical constraint for Madhusudhan profile: p1 must be < p3
        if self.retrieval_data.format_temperature in ("Madhusudhan", "madhu") and v["p1"] > v["p3"]:
            return -np.inf, 0.0, None, {"reason": "madhu_p1_gt_p3"}

        # Physical constraint: high pressure must be greater than low pressure
        if v["P_high"] is not None and v["P_high"] >= v["P_low"]:
            return -np.inf, 0.0, None, {"reason": "P_high_ge_P_low"}

        # Atmospheric chemistry: C/O arrives already in linear units from to_linear
        # (co_ratio is Log10ToLinear); fall back to co_ratio_linear if not fitted.
        # Si/O is Linear and arrives unchanged.
        c_o_ratio_final = v["co_ratio"] if v["co_ratio"] is not None else v["co_ratio_linear"]
        si_o_ratio_final = v["sio_ratio_linear"]

        # Eddy diffusion coefficient broadcast over the pressure grid.
        # eddy_diff_coeff is Log10ToLinear, so to_linear already applied 10**.
        eddy_diff_coeff_e = None if v["eddy_diff_coeff"] is None else np.ones_like(
            self.atmosphere.pressure_data.pressures) * v["eddy_diff_coeff"]

        # TEMPERATURE PROFILE SETUP
        # Create parameter dictionary for temperature profile calculation
        parameters_tp_profile = {
            "T0": ValueErrorTP(v["T0"]),
            "kappa_IR": ValueErrorTP(v["kappa_IR"]),
            "gamma_g": ValueErrorTP(v["gamma_g"]),
            "T_int": ValueErrorTP(v["T_int"]),
            "T_low": ValueErrorTP(v["T_low"]),
            "T_high": ValueErrorTP(v["T_high"]),
            "P_low": ValueErrorTP(v["P_low"]),
            "P_high": ValueErrorTP(v["P_high"]),
            "p1": ValueErrorTP(v["p1"]),
            "p2": ValueErrorTP(v["p2"]),
            "p3": ValueErrorTP(v["p3"]),
            "alpha1": ValueErrorTP(v["alpha1"]),
            "alpha2": ValueErrorTP(v["alpha2"]),
            "T0_node": ValueErrorTP(v["T0_node"]),
            "T1_node": ValueErrorTP(v["T1_node"]),
            "T2_node": ValueErrorTP(v["T2_node"]),
            "T3_node": ValueErrorTP(v["T3_node"]),
            "P1_node": ValueErrorTP(v["P1_node"]),
            "P2_node": ValueErrorTP(v["P2_node"]),
        }
        # TEMPERATURE PROFILE CALCULATION
        # Create temperature profile object with all parameters
        tp_profile_obj = UserTemperatureProfile(
            self.atmosphere.pressure_data.pressures,
            parameters_tp_profile,
            gravity,
            False,
            None
        )

        # Get the appropriate temperature profile function and calculate temperature
        function_tp_profile = getattr(tp_profile_obj, self.retrieval_data.format_temperature)
        temperature, _ = function_tp_profile()

        # TEMPERATURE VALIDATION: reject NaN, inf, or non-positive temperatures
        if not np.all(np.isfinite(temperature)) or np.any(temperature <= 0):
            return -np.inf, 0.0, None, {"reason": "temperature_invalid"}

        # ATMOSPHERIC COMPOSITION CALCULATION
        # Calculate mass fractions, mean molecular weight, and VMR profiles
        mass_fraction_names_hr, MMW, vmr, mean_VMR_and_MF_string, mean_VMR_and_MF_dict, negative_mass_fractions = self.calculate_mass_fraction(
            temperature, v["met"], c_o_ratio_final, si_o_ratio_final,
            v["vmr_peak"], v["pressure_peak"], v["width_peak"],
            chain=chain, step=step,
        )

        if negative_mass_fractions:
            return -np.inf, 0.0, None, {"reason": "negative_mass_fractions"}

        # INITIALIZE OUTPUT VARIABLES
        lhood = 0  # Total log-likelihood
        wl_full_resolution_HR = 0
        depth_full_resolution_HR = 0
        wl_full_resolution_LR = 0
        depth_full_resolution_LR = 0
        wl_binned_LR = 0
        final_spectrum_LR = 0
        wlen_mu_contribution_HR = 0
        contribution_HR = 0
        wlen_mu_contribution_LR = 0
        contribution_LR = 0
        opacity_contribution_HR = None
        opacity_contribution_LR = None

        # CREATE MODEL CALCULATION DICTIONARY
        # Package all parameters needed for atmospheric model calculations
        dict_calc_model = {
            "offsetLR_arr": v["offsetLR"],
            "Pc": v["Pc"],
            "rp": rp,
            "k0": v["k0"],
            "gamma": v["gamma"],
            "gravity": gravity,
            "omegad": v["omega"],
            "rv": v["rv"],
            "sf": v["sf"],
            "sf_arr": v["sf_multi"],
            "f_rot_arr": v["f_rot"],
            "T0": v["T0"],
            "T_low": v["T_low"],
            "T3_node": v["T3_node"],
            "ecc": ecc,
            "opi": opi,
            "kp": v["kp"],
            # "jitter_arr": jitter_arr,
            "beta_arr": v["beta_HR"],
            "beta_LR_arr": v["beta_LR"],
            "dVsys_arr": v["dVsys"],
            "P_ref": v["Pref"],
            "haze_factor": v["haze_factor"],
            "cloud_fraction": v["cloud_fraction"],
            "k_cond": v["k_cond"],
            "k_opac": v["k_opac"],
            "lambda0_micron": v["lambda0_micron"],
            "xi": v["xi"],
            "omega_scale_micron": v["omega_scale_micron"],
            "std_radius_distribution": v["std_radius_distribution"],
            "cloud_fsed": v["cloud_fsed"],
            "eddy_diff_coeff": eddy_diff_coeff_e,
        }
        lh_high_resolution = None
        lh_low_resolution = None
        # HIGH-RESOLUTION LIKELIHOOD CALCULATION
        if self.atmosphere.resolution_obj.high_resolution():
            if "Contribution" in self.model_type:
                # Calculate opacity contributions for analysis
                os.system(f"mkdir -p {self.retrieval_data.path_results}/contribution/")
                wl_full_resolution_HR, contribution_HR = self.atmosphere.calc_model_contribution(
                    temperature, mass_fraction_names_hr, MMW, dict_calc_model
                )
                wlen_mu_contribution_HR = wl_full_resolution_HR
            else:
                # Calculate high-resolution likelihood
                hr_likelihood = self.high_resolution_lhood(
                    temperature, mass_fraction_names_hr, vmr, MMW, dict_calc_model,
                    chain=chain, step=step,
                )
                all_ok = hr_likelihood[0]
                lhood += hr_likelihood[1]
                wl_full_resolution_HR = hr_likelihood[2]
                depth_full_resolution_HR = hr_likelihood[3]
                opacity_contribution_HR = hr_likelihood[4]
                lh_high_resolution = hr_likelihood[5]

                # Exit if high-resolution calculation failed. Use the distinct
                # reason set by LikelihoodHR (hr_model_nan / hr_negative_spectrum /
                # hr_trpca_failed) so the debug log makes clear whether the model
                # itself failed (NaN) rather than the PCA step.
                if not all_ok:
                    return -np.inf, 0.0, None, {
                        "reason": getattr(self.likelihood_hr, "failure_reason", None) or "hr_failed"
                    }

        mass_fraction_names_lr = None
        # LOW-RESOLUTION LIKELIHOOD CALCULATION
        if self.atmosphere.resolution_obj.low_resolution():
            # Prepare mass fraction names for low-resolution (different naming convention)
            mass_fraction_names_lr = mass_fraction_names_hr.copy()
            for counter_species, elem in enumerate(self.atmosphere.species_obj.line_species_complete_name_hr):
                mass_fraction_names_lr[self.atmosphere.species_obj.line_species_complete_name_lr[counter_species]] = mass_fraction_names_lr.pop(elem)

            if "Contribution" in self.model_type:
                # Calculate opacity contributions for low-resolution analysis
                os.system(f"mkdir -p {self.retrieval_data.path_results}/contribution/")
                wl_full_resolution_LR, contribution_LR = self.atmosphere.calc_model_contribution(
                    temperature, mass_fraction_names_lr, MMW, dict_calc_model, False
                )
                wlen_mu_contribution_LR = wl_full_resolution_LR

            # Calculate low-resolution likelihood
            lr_likelihood = self.low_resolution_lhood(
                temperature, mass_fraction_names_lr, vmr, MMW, dict_calc_model, rp
            )
            # Exit if the low-resolution model failed (NaN/Inf in the LR spectrum),
            # with a distinct reject reason separate from the HR one.
            if getattr(self.likelihood_lr, "failure_reason", None):
                return -np.inf, 0.0, None, {"reason": self.likelihood_lr.failure_reason}
            lhood += lr_likelihood[0]
            wl_full_resolution_LR = lr_likelihood[1]
            depth_full_resolution_LR = lr_likelihood[2]
            wl_binned_LR = lr_likelihood[3]
            final_spectrum_LR = lr_likelihood[4]
            opacity_contribution_LR = lr_likelihood[5]
            lh_low_resolution = lr_likelihood[6]

        # IDL: determinant=det - log of Gaussian prior product (renamed from det; see calculate_log_prior)
        log_prior = self.calculate_log_prior(params)

        # DETERMINE SPECIES LIST BASED ON CHEMISTRY MODEL
        if self.atmosphere.chemistry == ConstantVariables.LIST_CHEMISTRY_TABLE[1]:
            species = mass_fraction_names_hr.keys()
        else:
            species = list(self.atmosphere.species_compatible_with_prt)

        # CREATE OUTPUT OBJECT FOR NON-RETRIEVAL MODES
        not_retrieval_obj = None
        if "Manual" in self.model_type or "Model" in self.model_type:
            # print("Construction of Not Retrieval OBJ")
            not_retrieval_obj = NotRetrieval(
                mass_fraction_names_hr, mass_fraction_names_lr, MMW, vmr, mean_VMR_and_MF_string, mean_VMR_and_MF_dict, wl_full_resolution_HR, depth_full_resolution_HR,
                wlen_mu_contribution_HR, contribution_HR, wl_full_resolution_LR,
                depth_full_resolution_LR, wl_binned_LR, final_spectrum_LR,
                v["offsetLR"], wlen_mu_contribution_LR, contribution_LR,
                self.atmosphere.chemistry, slice_section(
                    params, ConstantVariables.params_list,
                    ConstantVariables.FIRST_MOLEC_NAME,
                    ConstantVariables.FIRST_ELEMENT_NAME,
                ),
                species, opacity_contribution_HR, opacity_contribution_LR, self.atmosphere.resolution_obj.instruments_LR, rp_jup, rad_mode=self.atmosphere.rad_mode,
                stellar_radius=self.atmosphere.target.stellar_radius, stellar_spectrum=self.atmosphere.stellar_spectrum,
                lh_HR=lh_high_resolution, lh_LR=lh_low_resolution, HR_res_present=self.atmosphere.resolution_obj.high_resolution(),
                LR_res_present=self.atmosphere.resolution_obj.low_resolution(), use_hr_linelists_for_lr=self.atmosphere.use_hr_linelists_for_lr,
                plot_convolved_LR=self.plot_convolved_LR
            )

        # reject_info=None signals the 4th slot is unused (success path).
        return lhood, log_prior, not_retrieval_obj, None



[docs]
    def parallel_chain(
            self,
            index_core,
            j,
            return_dict,
            oldpars,
            old_lhood,  # IDL: oldchi2 (was misnamed; this is log-likelihood, higher = better)
            old_log_prior,
            all_pars_snapshot,
            nthin=1,
            worker_seed=None,
            debug_log_path=None,
            outer_step=None,
            sampler="DE-MC",
            archive=None,
    ):
        """
        Execute parallel MCMC chain step for differential evolution.

        This method implements one step of the differential evolution MCMC algorithm
        in parallel. It calculates new parameter values using the DE formula and
        evaluates the likelihood to decide whether to accept or reject the step.

        When nthin > 1, each chain performs nthin proposals internally and saves
        only the final state, matching the IDL inner loop::

            IDL: for k=0, nthin-1 do begin ... endfor

        Args:
            index_core: Index of the core.
            j: Chain index array (chains assigned to this core)
            return_dict: Shared dictionary for returning results
            oldpars: Current parameter values for this core's chains, shape (nfit, chain_per_core)
            old_lhood: Current log-likelihood values for this core's chains  # IDL: oldchi2
            old_log_prior: Log of Gaussian prior product for this core's chains
                           (IDL: 'olddet' - see calculate_log_prior for rename rationale)
            all_pars_snapshot: Frozen snapshot of ALL chains' parameters at the
                start of this outer step, shape (nfit, nchains). Used to draw
                fresh DE reference chains r1, r2 inside each sub-step.
            nthin: Number of internal proposals per saved step (default 1)
            worker_seed: SeedSequence used to build an independent rng stream
                for this worker. Required for proper multi-core mixing.
            debug_log_path: Path to the shared JSONL log file. None disables
                logging; safe for concurrent append on POSIX.
            outer_step: Parent-side step index used to correlate all events
                from the same MCMC iteration across workers.
            sampler: "DE-MC" (default, legacy byte-identical path) or "Snooker"
                (DE-MCzs: per sub-step a 10% snooker move + 90% parallel DE move,
                both drawing from the archive). See Retrieval/ExofastMCMC/snooker.py.
            archive: Full pooled history Z of past+present states, shape
                (M, nfit), required when sampler == "Snooker"; ignored for DE-MC.
        """
        # SAFETY (requirement 1): this is a forked worker. If the retrieval
        # parent dies abnormally (segfault in pRT Fortran, OOM kill, ...) the
        # kernel must kill this worker too instead of leaving it as a
        # CPU/RAM-burning orphan reparented to init. Linux-only; no-op
        # elsewhere. Must run before any heavy work.
        _install_parent_death_signal()

        # Per-worker buffer: accumulates every event in memory for the
        # duration of this outer_step and performs ONE write at the end
        # (see the flush call just before return_dict is assigned). This
        # turns ~6 disk writes per chain (enter / chain_start / proposal /
        # accept / chain_end / core_done) into a single batched write per
        # worker per step, without changing the log schema. POSIX atomic-
        # append semantics still hold line-by-line so workers writing
        # concurrently with the parent don't interleave within a line.
        _wbuf = _BufferedLogger()

        def _wemit(record):
            _wbuf.emit(record)

        _wemit({
            "event": "enter",
            "outer_step": outer_step,
            "core": int(index_core),
            "chains_assigned": [int(c) for c in j],
            "nthin": int(nthin),
            "worker_seed_set": worker_seed is not None,
        })
        # Aggregate reject reasons per core. Emitted once inside core_done so
        # the per-proposal log stays lean (~million rows/run).
        reject_reasons: Counter = Counter()
        reject_params: Counter = Counter()
        # Per-param list of proposed (rejected) values for this core's slice
        # of the outer_step. Size is tiny (<= chains_assigned entries per step)
        # so no capping is needed at this layer - the analyser handles cross-run
        # aggregation and top-K selection.
        reject_values_by_param: dict[str, list[float]] = {}
        # Per-chain reject counters - emitted on chain_end so the analyser can
        # build a chains × params heatmap without joining accept events.
        chain_reject_params: dict[int, Counter] = {}
        chain_reject_values: dict[int, dict[str, list[float]]] = {}
        # CRITICAL: replace the RNG that was fork-inherited verbatim from the
        # parent with an independent stream derived from a SeedSequence child.
        # Without this, every worker draws identical epsilon perturbations and
        # identical acceptance uniforms, silently correlating all chains across
        # cores and producing chains that drift in lockstep instead of mixing.
        if worker_seed is not None:
            self.random_obj.rng = default_rng(worker_seed)

        # Pre-fill arr_chains with rejected steps (naccept=0, keep old state).
        # This ensures a partial crash mid-loop still produces valid entries for all
        # chains in this batch - the parent will get safe "step rejected" results
        # instead of None/uninitialized objects that would cause AttributeError.
        # IDL has no equivalent (no parallelisation); this is a Python-specific safety net.
        arr_chains = np.array([
            StructReturnExofast(0, oldpars[:, i], old_lhood[i], old_log_prior[i])
            for i in range(len(j))
        ])
        try:
            for i in range(len(j)):
                # Start each chain from its current state
                cur_pars = oldpars[:, i].copy()
                cur_lhood = old_lhood[i]  # IDL: cur_chi2
                # IDL: cur_det = olddet[i]  (linear prior product)
                # log-space equivalent - same value, never underflows
                cur_log_prior = old_log_prior[i]
                cur_naccept = 0

                _wemit( {
                    "event": "chain_start",
                    "outer_step": outer_step,
                    "core": int(index_core),
                    "chain": int(j[i]),
                    "cur_lhood": float(cur_lhood),
                    "cur_log_prior": float(cur_log_prior),
                    "pars_min": float(np.min(cur_pars)),
                    "pars_max": float(np.max(cur_pars)),
                })

                # IDL: for k=0L, nthin-1L do begin ... endfor
                # Only the final state is saved; all intermediate accept/reject counts
                for _k in range(nthin):
                    # Sampler dispatch. The "DE-MC" branch (else) is the legacy
                    # path, executed verbatim so a fixed-seed DE-MC run stays
                    # byte-identical. The "Snooker" branch implements DE-MCzs
                    # (ter Braak and Vrugt 2008): per sub-step a 10% snooker move
                    # or a 90% parallel DE move, BOTH drawing their states from
                    # the full pooled history archive Z. _snooker_anchor is set
                    # only for an actual snooker move, gating the radial Jacobian
                    # added to the acceptance ratio below.
                    _snooker_anchor = None
                    if sampler == "Snooker":
                        # Use a snooker move with probability PSNOOKER_DEFAULT,
                        # provided the archive holds enough distinct states
                        # (anchor + 2). Otherwise fall back to a parallel move.
                        use_snooker_move = (
                            archive is not None
                            and archive.shape[0] >= 3
                            and self.random_obj.rng.uniform()
                            < snooker_module.PSNOOKER_DEFAULT
                        )
                        if use_snooker_move:
                            # 10%: line-constrained move, gamma_s ~ U(1.2, 2.2),
                            # no additive noise term.
                            newpars_chain, _snooker_anchor = (
                                snooker_module.snooker_propose(
                                    cur_pars, archive, self.random_obj.rng,
                                )
                            )
                            r1_k = r2_k = -1
                            gamma_eff = float("nan")
                            de_term = newpars_chain - cur_pars
                            epsilon = np.zeros(self.bestpars_data.nfit)
                        else:
                            # 90%: parallel DE move (DE-MCz) with the difference
                            # drawn from the archive instead of the current
                            # population; keeps the existing epsilon noise term.
                            r1_k, r2_k = snooker_module.sample_distinct_indices(
                                self.random_obj.rng, archive.shape[0], 2,
                            )
                            ref_r1 = archive[r1_k]
                            ref_r2 = archive[r2_k]
                            gamma_eff = self.bestpars_data.gamma_coeff
                            epsilon = (
                                (self.random_obj.rng.uniform(size=self.bestpars_data.nfit) - 0.5)
                                * 2.0
                                * self.retrieval_data.scale_vector_params
                                / self.retrieval_data.epsilon_scale_divisor
                            )
                            de_term = gamma_eff * np.reshape(
                                ref_r1 - ref_r2, (self.bestpars_data.nfit,)
                            )
                            newpars_chain = cur_pars + de_term + epsilon
                    else:
                        # Fresh DE pair for every sub-step: ter Braak (2006) prescribes
                        # r1, r2 to be redrawn at every proposal, not held fixed across
                        # nthin sub-steps. Holding them fixed turns the inner loop into
                        # a 1-D ray search along a single differential, defeating the
                        # purpose of nthin-thinning.
                        while True:
                            r1_k = int(self.random_obj.rng.integers(0, self.bestpars_data.nchains))
                            if r1_k != j[i]:
                                break
                        while True:
                            r2_k = int(self.random_obj.rng.integers(0, self.bestpars_data.nchains))
                            if r2_k != j[i] and r2_k != r1_k:
                                break
                        ref_r1 = all_pars_snapshot[:, r1_k]
                        ref_r2 = all_pars_snapshot[:, r2_k]


                        # ter Braak (2006) §2.3: occasionally use gamma≈1 to enable
                        # mode-jumping in multimodal posteriors. Recommended frequency
                        # ~10%. Without this, chains stuck in a local mode never escape.
                        # We use 0.98 instead of exactly 1.0 to break the mirror-point
                        # degeneracy when r1+r2 ≈ 2·cur_pars (γ=1 would place the proposal
                        # at a fixed reflection of the current point, hurting ergodicity).
                        if self.random_obj.rng.uniform() < self.bestpars_data.mode_jump_threshold:
                            gamma_eff = 0.98
                        else:
                            gamma_eff = self.bestpars_data.gamma_coeff

                        # IDL: newpars[tofit] = oldpars[tofit]
                        #        + gamma*(pars[*,i-1,r1]-pars[*,i-1,r2])
                        #        + (randomu-0.5d0)*2d0*scale/100d0
                        # NOTE: IDL epsilon is Uniform[-scale/d, +scale/d] with d=100 hardcoded.
                        # Here the divisor is runtime-configurable via retrieval_data.epsilon_scale_divisor
                        # (default 100.0 matches IDL). Previously Python used standard_normal - corrected to Uniform.
                        epsilon = (
                            (self.random_obj.rng.uniform(size=self.bestpars_data.nfit) - 0.5)
                            * 2.0
                            * self.retrieval_data.scale_vector_params
                            / self.retrieval_data.epsilon_scale_divisor
                        )
                        newpars_chain = (
                            cur_pars
                            + gamma_eff
                            * np.reshape(ref_r1 - ref_r2, (self.bestpars_data.nfit,))
                            + epsilon
                        )

                        de_term = gamma_eff * np.reshape(ref_r1 - ref_r2, (self.bestpars_data.nfit,))

                    de_norm = float(np.linalg.norm(de_term))
                    eps_norm = float(np.linalg.norm(epsilon))
                    dpars_norm = float(np.linalg.norm(newpars_chain - cur_pars))
                    # Move type for offline analysis. "demc" = legacy DE-MC;
                    # "snooker"/"parallel_archive" = the two DE-MCzs moves. For a
                    # snooker move gamma_eff is not the meaningful scale (gamma_s
                    # is drawn inside snooker_propose), so it is logged as null
                    # rather than NaN to keep the JSONL valid and aggregable.
                    if sampler == "Snooker":
                        move_label = (
                            "snooker" if _snooker_anchor is not None
                            else "parallel_archive"
                        )
                    else:
                        move_label = "demc"
                    _wemit( {
                        "event": "proposal",
                        "outer_step": outer_step,
                        "core": int(index_core),
                        "chain": int(j[i]),
                        "sub": int(_k + 1),
                        "nthin": int(nthin),
                        "move": move_label,
                        "r1": int(r1_k),
                        "r2": int(r2_k),
                        "gamma_eff": (
                            float(gamma_eff) if np.isfinite(gamma_eff) else None
                        ),
                        "de_norm": de_norm,
                        "eps_norm": eps_norm,
                        "dpars_norm": dpars_norm,
                    })

                    # Create full parameter object and evaluate likelihood.
                    # chain/step propagate down to per-step diagnostic prints in
                    # LikelihoodHR (negative mass fractions, negative spectrum,
                    # TRPCA error) so log triage can point at the right chain.
                    param_full = self.create_param_full(newpars_chain)
                    new_lhood, new_log_prior, _, reject_info = self.lh_function_gib(
                        param_full, chain=int(j[i]), step=outer_step,
                    )  # IDL: new_chi2
                    if reject_info is not None:
                        reject_reasons[reject_info.get("reason", "unknown")] += 1
                        pname = reject_info.get("param")
                        if pname:
                            reject_params[pname] += 1
                            val = reject_info.get("value")
                            if val is not None:
                                reject_values_by_param.setdefault(pname, []).append(float(val))
                            # Mirror into per-chain buckets for the heatmap.
                            _chain_key = int(j[i])
                            chain_reject_params.setdefault(_chain_key, Counter())[pname] += 1
                            if val is not None:
                                chain_reject_values.setdefault(_chain_key, {}) \
                                    .setdefault(pname, []).append(float(val))

                    # IDL: C = (newdet/olddet)*exp(newchi2-oldchi2)
                    #      if randomu lt C then accept
                    # In log-space (mathematically identical, no underflow risk):
                    #   log_C = log(newdet/olddet) + (new_lhood - cur_lhood)
                    #         = (new_log_prior - cur_log_prior) + (new_lhood - cur_lhood)
                    #   accept if log(U(0,1)) < log_C
                    log_C = (new_log_prior - cur_log_prior) + (new_lhood - cur_lhood)
                    # Snooker moves are non-symmetric on R^d: add the radial
                    # Jacobian (d-1)*(log||x'-z|| - log||x-z||) in log-space
                    # (ter Braak and Vrugt 2008, Eq. 3). DE-MC and the parallel
                    # DE move are symmetric, so _snooker_anchor is None there and
                    # log_C is unchanged.
                    if _snooker_anchor is not None:
                        log_C = log_C + snooker_module.snooker_log_jacobian(
                            cur_pars,
                            newpars_chain,
                            _snooker_anchor,
                            self.bestpars_data.nfit,
                        )
                    rng_value = np.log(self.random_obj.rng.uniform())
                    accepted = rng_value < log_C
                    _accept_rec = {
                        "event": "accept",
                        "outer_step": outer_step,
                        "core": int(index_core),
                        "chain": int(j[i]),
                        "sub": int(_k + 1),
                        "nthin": int(nthin),
                        "accepted": bool(accepted),
                        "new_lhood": float(new_lhood),
                        "cur_lhood": float(cur_lhood),
                        "d_lhood": float(new_lhood - cur_lhood),
                        "new_log_prior": float(new_log_prior),
                        "cur_log_prior": float(cur_log_prior),
                        "d_log_prior": float(new_log_prior - cur_log_prior),
                        "log_C": float(log_C),
                        "log_U": float(rng_value),
                    }
                    # When lh_function_gib short-circuited due to a boundary
                    # violation, record the offending param / value so the
                    # analyser can do per-sub-step reject vs norms joins.
                    if reject_info is not None:
                        _accept_rec["reject_reason"] = str(reject_info.get("reason", "unknown"))
                        _rp = reject_info.get("param")
                        if _rp is not None:
                            _accept_rec["reject_param"] = str(_rp)
                        _rv = reject_info.get("value")
                        if _rv is not None:
                            _accept_rec["reject_value"] = float(_rv)
                    _wemit(_accept_rec)
                    if accepted:
                        cur_naccept += 1
                        cur_pars = newpars_chain
                        cur_lhood = new_lhood
                        cur_log_prior = new_log_prior
                    # else: keep cur_pars, cur_lhood, cur_log_prior unchanged

                _chain_key = int(j[i])
                _wemit( {
                    "event": "chain_end",
                    "outer_step": outer_step,
                    "core": int(index_core),
                    "chain": _chain_key,
                    "naccept": int(cur_naccept),
                    "nthin": int(nthin),
                    "final_lhood": float(cur_lhood),
                    "final_log_prior": float(cur_log_prior),
                    "pars_min": float(np.min(cur_pars)),
                    "pars_max": float(np.max(cur_pars)),
                    "reject_params": dict(chain_reject_params.get(_chain_key, Counter())),
                    "reject_values": chain_reject_values.get(_chain_key, {}),
                })
                arr_chains[i] = StructReturnExofast(cur_naccept, cur_pars, cur_lhood, cur_log_prior)

        except Exception as e:
            # Likelihood evaluation failed for this chain.
            # arr_chains[i:] keep their pre-filled rejected-step values (safe fallback).
            _wemit( {
                "event": "error",
                "outer_step": outer_step,
                "core": int(index_core),
                "id_process": getattr(self.retrieval_data, "id_process", None),
                "exception_type": type(e).__name__,
                "exception": str(e),
                "traceback": traceback.format_exc(),
            })

        # Store result in shared dictionary
        total_naccept = int(sum(int(s.naccept) for s in arr_chains))
        total_steps = len(arr_chains) * nthin
        acc_rate = (total_naccept / total_steps) if total_steps > 0 else 0.0
        _wemit( {
            "event": "core_done",
            "outer_step": outer_step,
            "core": int(index_core),
            "chains_count": int(len(arr_chains)),
            "total_naccept": total_naccept,
            "total_steps": int(total_steps),
            "accept_rate": float(acc_rate),
            "reject_reasons": dict(reject_reasons),
            "reject_params": dict(reject_params),
            "reject_values": reject_values_by_param,
        })
        # Single batched write for this worker's entire outer_step
        # (enter / chain_start / proposal / accept / chain_end / core_done).
        _wbuf.flush(debug_log_path)
        try:
            return_dict[index_core] = arr_chains
        except Exception as e:
            # Crash path: flush is already done, emit the error with a
            # direct atomic append so it lands on disk before exit().
            _emit_debug_event(debug_log_path, {
                "event": "error",
                "outer_step": outer_step,
                "core": int(index_core),
                "id_process": getattr(self.retrieval_data, "id_process", None),
                "exception_type": type(e).__name__,
                "exception": str(e),
                "traceback": traceback.format_exc(),
                "where": "return_dict_assignment",
            })
            exit()


    # def parallel_chain_pool_version(self, j, oldpars, oldchi2, olddet, pars_r1, pars_r2):
    #     """
    #     Pool version of parallel_chain - returns result instead of writing to shared dict.
    #
    #     Args:
    #         j: Chain index
    #         oldpars: Current parameter values for this chain
    #         oldchi2: Current chi-square value for this chain
    #         olddet: Current determinant value for this chain
    #         pars_r1: Parameter values from random chain 1
    #         pars_r2: Parameter values from random chain 2
    #
    #     Returns:
    #         tuple: (j, StructReturnExofast_result)
    #     """
    #     # Calculate new parameter values using differential evolution formula
    #     newpars_chain = (
    #             oldpars
    #             + self.bestpars_data.gamma_coeff
    #             * np.reshape(pars_r1 - pars_r2, (self.bestpars_data.nfit,))
    #             + self.random_obj.rng.standard_normal(self.bestpars_data.nfit)
    #             * self.retrieval_data.scale_vector_params
    #             / 100
    #     )
    #
    #     # Create full parameter object and evaluate likelihood
    #     param_full = self.create_param_full(newpars_chain)
    #     newchi2, newdet, _ = self.lh_function_gib(param_full)
    #
    #     # Calculate acceptance probability
    #     C = (newdet / olddet) * np.exp(newchi2 - oldchi2)
    #
    #     # Accept or reject the step based on Metropolis criterion
    #     if self.random_obj.rng.uniform() < C:
    #         temp = StructReturnExofast(1, newpars_chain, newchi2, newdet)
    #     else:
    #         temp = StructReturnExofast(0, oldpars, oldchi2, olddet)
    #
    #     # Return result instead of writing to shared dict
    #     return j, temp

    # def run_multiple_processes_pool_version(self, old_pars, old_chi, old_det):
    #     """
    #     Pool version that works EXACTLY like the original Manager version.
    #     One process per chain, one core per chain.
    #
    #     Args:
    #         old_pars: Current parameter values for all chains
    #         old_chi: Current chi-square values for all chains
    #         old_det: Current determinant values for all chains
    #
    #     Returns:
    #         return_dict: Dictionary containing results from all processes
    #     """
    #     nchains = self.bestpars_data.nchains
    #     ncores = self.bestpars_data.ncores
    #
    #     # Prepare arguments for each chain
    #     chain_args = []
    #     for j in range(nchains):
    #         # Select two random chains different from current chain for DE
    #         while True:
    #             r1 = self.random_obj.rng.integers(0, nchains, 1)[0]
    #             if r1 != j:
    #                 break
    #
    #         while True:
    #             r2 = self.random_obj.rng.integers(0, nchains, 1)[0]
    #             if r2 != j and r2 != r1:
    #                 break
    #
    #         # Add arguments for this chain
    #         chain_args.append((
    #             j,
    #             old_pars[:, j],
    #             old_chi[j],
    #             old_det[j],
    #             old_pars[:, r1],
    #             old_pars[:, r2]
    #         ))
    #
    #     # Execute in parallel - ONE PROCESS PER CHAIN, exactly like Manager
    #     try:
    #         with multiprocessing.Pool(processes=ncores) as pool:
    #             results = pool.starmap(self.parallel_chain_pool_version, chain_args)
    #
    #         # Convert results back to dictionary format
    #         return_dict = {}
    #         for j, result in results:
    #             return_dict[j] = result
    #
    #         return return_dict
    #
    #     except Exception as e:
    #         print(f"Pool execution failed: {e}")
    #         # Fallback to original Manager version
    #         return self.run_multiple_processes_manager_version(old_pars, old_chi, old_det)


[docs]
    def run_multiple_processes_manager_version(
            self,
            old_pars,
            old_lhood,
            old_log_prior,
            nthin=1,
            outer_step=None,
            debug_log_path=None,
            sampler="DE-MC",
            archive=None,
    ):
        """
        Run MCMC chain steps in parallel using multiprocessing Manager.

        This method distributes chain steps across multiple processes using
        differential evolution MCMC, with a shared Manager dictionary for
        collecting results.

        Args:
            old_pars: Current parameter values for all chains, shape (nfit, nchains)
            old_lhood: Current log-likelihood values for all chains, shape (nchains,)  # IDL: old_chi
            old_log_prior: Log of Gaussian prior product for all chains, shape (nchains,)
                           (IDL: 'old_det' - renamed; see calculate_log_prior)
            nthin: Thinning factor passed through to each worker process
            outer_step: MCMC iteration index (owned by exofast parent loop).
                Passed into every worker event so multi-core records can be
                grouped by step during offline analysis.
            debug_log_path: Shared JSONL log path. If None, falls back to the
                project default derived from retrieval_data.path_results.

        Returns:
            return_dict: Manager dictionary mapping core index to array of
                StructReturnExofast results
        """
        chain_per_core = self.bestpars_data.nchains // self.bestpars_data.ncores
        # Fallback: when the parent did not pass an explicit path (e.g. unit
        # tests calling this directly), reconstruct it - but honour the
        # retrieval_data.save_chain_debug flag so a disabled run still emits
        # nothing instead of silently re-enabling itself.
        if debug_log_path is None and getattr(
            self.retrieval_data, "save_chain_debug", True
        ):
            debug_log_path = _debug_log_path(self)

        _emit_debug_event(debug_log_path, {
            "event": "outer_step_begin",
            "outer_step": outer_step,
            "nthin": int(nthin),
            "lhood_min": float(np.min(old_lhood)),
            "lhood_max": float(np.max(old_lhood)),
            "lhood_mean": float(np.mean(old_lhood)),
        })

        # One independent RNG seed per worker per outer step. spawn() advances
        # the parent SeedSequence internal counter, so consecutive outer steps
        # also receive fresh, non-colliding child seeds. Without this, fork()
        # would copy the parent rng verbatim into every worker, causing all
        # cores to draw identical epsilon perturbations and identical
        # acceptance uniforms - silently correlating chains across cores.
        child_seeds = self.random_obj.seed_seq.spawn(self.bestpars_data.ncores)

        # Force the fork context (via _mp_context) so the Manager server AND
        # the worker Processes below share the same start method. On macOS the
        # default is spawn, which would re-pickle the bound parallel_chain (and
        # the full pRT atmosphere) per worker - slow to broken. map_optimizer
        # uses the same helper.
        ctx = _mp_context()

        # Manager started with a parent-death initializer (requirement 1): the
        # Manager server is itself a forked child, so it installs the same
        # PR_SET_PDEATHSIG and dies with the parent instead of lingering as a
        # zombie. Shut it down explicitly in `finally` so a worker raising
        # mid-loop can't leak per-outer-step Manager zombies across the
        # maxsteps×ncores spawn cascade. We snapshot return_dict into a plain
        # dict before teardown so the caller can iterate after the proxy dies.
        manager = SyncManager(ctx=ctx)
        manager.start(_install_parent_death_signal)
        try:
            return_dict = manager.dict()
            proc = []
            for i in range(self.bestpars_data.ncores):
                # Chain indices owned by this core (DE r1/r2 are now redrawn
                # fresh inside the worker for every sub-step - see
                # parallel_chain).
                j = np.array(
                    [int(i * chain_per_core + k) for k in range(chain_per_core)],
                    dtype=int,
                )

                # Create and start process for this chain (fork context, so it
                # matches the Manager server above and never re-pickles model_obj)
                proc.append(
                    ctx.Process(
                        target=self.parallel_chain,
                        args=(
                            i,
                            j,
                            return_dict,
                            old_pars[:, j],
                            old_lhood[j],  # IDL: old_chi
                            old_log_prior[j],
                            old_pars.copy(),  # frozen snapshot
                            nthin,
                            child_seeds[i],
                        ),
                        kwargs={
                            "debug_log_path": debug_log_path,
                            "outer_step": outer_step,
                            "sampler": sampler,
                            "archive": archive,
                        },
                    )
                )
                proc[i].start()

            # Wait for all processes to complete and clean up. The join is
            # bounded by a whole-step wall-clock budget: the parent-death signal
            # only protects children when the PARENT dies, so without a timeout a
            # worker that HANGS (e.g. pRT stuck in Fortran) would block the join
            # forever. A full model + complete loop has measured ~1 min so far,
            # so 300 s is a ~5x ceiling. A timed-out worker is killed and, having
            # never written return_dict[i], is handled by the fill loop below as
            # a rejected step (same path as a crashed worker).
            step_timeout_s = getattr(
                self.retrieval_data, "worker_step_timeout_s", 300
            )
            deadline = time.monotonic() + step_timeout_s
            timed_out_cores = set()
            for i in range(self.bestpars_data.ncores):
                remaining = max(0.0, deadline - time.monotonic())
                proc[i].join(timeout=remaining)
                if proc[i].is_alive():
                    # Hung worker: SIGTERM, brief grace, then SIGKILL.
                    timed_out_cores.add(i)
                    proc[i].terminate()
                    proc[i].join(timeout=5)
                    if proc[i].is_alive():
                        proc[i].kill()
                        proc[i].join()
                else:
                    proc[i].terminate()

            # SAFETY (requirements 2 & 3): a worker that died abnormally
            # (segfault/OOM) never wrote its slice into return_dict. Without
            # this the consumer (exofast_demc.likelihood) hits KeyError,
            # retries the whole step 5× and then kills the entire retrieval.
            # Instead: keep the parent alive, tell the user *which* core died
            # and *why*, and substitute a clean "rejected step" result so the
            # affected chains stay at their valid previous state (naccept=0,
            # pars/lhood/log_prior unchanged - same convention as the worker's
            # own pre-filled fallback in parallel_chain).
            for i in range(self.bestpars_data.ncores):
                if i in return_dict:
                    continue
                j = np.array(
                    [int(i * chain_per_core + k) for k in range(chain_per_core)],
                    dtype=int,
                )
                if i in timed_out_cores:
                    cause = (
                        f"timed out (no result after {step_timeout_s:.0f}s; "
                        f"worker killed)"
                    )
                else:
                    cause = _describe_exitcode(proc[i].exitcode)
                print(
                    f"[RETRIEVAL] WARNING: a subprocess died during "
                    f"step {outer_step} (core {i}, chains {[int(c) for c in j]}): "
                    f"{cause}. The retrieval continues: the {len(j)} chains of this "
                    f"core are marked as a rejected step (naccept=0, state "
                    f"unchanged).",
                    flush=True,
                )
                return_dict[i] = np.array([
                    StructReturnExofast(
                        0,
                        old_pars[:, j][:, k],
                        old_lhood[j][k],  # IDL: old_chi
                        old_log_prior[j][k],
                    )
                    for k in range(len(j))
                ])
                _emit_debug_event(debug_log_path, {
                    "event": "worker_died",
                    "outer_step": outer_step,
                    "core": int(i),
                    "exitcode": proc[i].exitcode,
                    "cause": cause,
                    "timed_out": i in timed_out_cores,
                    "chains": [int(c) for c in j],
                    "id_process": getattr(
                        self.retrieval_data, "id_process", None
                    ),
                })

            # Snapshot proxy → plain dict before Manager teardown so the
            # caller (exofast_demc.likelihood loop) can keep iterating.
            results = dict(return_dict)
        finally:
            manager.shutdown()

        _emit_debug_event(debug_log_path, {
            "event": "outer_step_end",
            "outer_step": outer_step,
        })

        return results



[docs]
    def run_multiple_processes(
            self,
            old_pars,
            old_lhood,
            old_log_prior,
            nthin=1,
            outer_step=None,
            debug_log_path=None,
            sampler="DE-MC",
            archive=None,
    ):
        """
        Run MCMC chain steps in parallel using the Manager-based approach.

        Args:
            old_pars: Current parameter values for all chains, shape (nfit, nchains)
            old_lhood: Current log-likelihood values for all chains, shape (nchains,)  # IDL: old_chi
            old_log_prior: Log of Gaussian prior product for all chains, shape (nchains,)
                           (IDL: 'old_det' - renamed; see calculate_log_prior)
            nthin: Thinning factor - each worker performs nthin proposals, saves last
            outer_step: MCMC iteration index owned by the exofast parent loop.
            debug_log_path: Shared JSONL log path (None → project default).
            sampler: "DE-MC" (default) or "Snooker" (DE-MCzs). Forwarded to each worker.
            archive: Pooled history Z (M, nfit) for the snooker path; None for DE-MC.

        Returns:
            return_dict: Dictionary containing results from all processes
        """
        return self.run_multiple_processes_manager_version(
            old_pars,
            old_lhood,
            old_log_prior,
            nthin=nthin,
            outer_step=outer_step,
            debug_log_path=debug_log_path,
            sampler=sampler,
            archive=archive,
        )


    # noinspection PyUnresolvedReferences

[docs]
    def create_param_full(self, newpars_chain):
        """Delegate to ParameterHandler. See ParameterHandler.create_param_full."""
        return self.param_handler.create_param_full(newpars_chain, self.bestpars_data)