diff --git a/.idea/dictionaries/Titus.xml b/.idea/dictionaries/Titus.xml index 7ca660c..b634bf6 100644 --- a/.idea/dictionaries/Titus.xml +++ b/.idea/dictionaries/Titus.xml @@ -2,16 +2,21 @@ coeffs + conc diluant + disp dodecane equilibrate extractant + ftol kmol lmse + maxiter ndarray pred reeps scipy + slsqp thermo diff --git a/.idea/workspace.xml b/.idea/workspace.xml index ee250e0..18a656d 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -1,12 +1,12 @@ - + + - - - + + - + + + - - - + @@ -215,7 +215,14 @@ @@ -254,10 +261,10 @@ - + - + @@ -266,30 +273,30 @@ - - + + - + - - - + + + - + - - - + + + - + - - - + + + - + - + @@ -298,12 +305,12 @@ - - + + - + @@ -317,4 +324,15 @@ + + + + + file://$PROJECT_DIR$/../../../Powell Research/CSTR-RL/ann-pso_ppo_compare_stochastic10/tester_pso_ann_v3.py + 9 + + + + \ No newline at end of file diff --git a/README.md b/README.md index 2aadc67..d3a7bb2 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,26 @@ -# parameter-estimation +# REEPS +REEPS is a toolkit for estimating standard thermodynamic parameters for Gibbs minimization. +Extend a methodology for estimating standard thermodynamic parameters for Gibbs minimization in multiphase, multicomponent separations systems -Extend a methodology for estimating standard thermodynamic parameters for Gibbs minimization in multiphase, multicomponent separations systems \ No newline at end of file + +## Installation + +To install REEPS, clone the repository with the command + +``` +$ git clone https://xgitlab.cels.anl.gov/summer-2020/parameter-estimation.git +``` + +Navigate into the parameter-estimation folder with +``` +cd parameter-estimation +``` +and run the command below in your terminal +``` +$ pip install -e. +``` +### Dependencies +REEPS uses packages: cantera (https://cantera.org/), pandas, numpy, scipy, xml, seaborn, and matplotlib + +## Usage +Do random stuff and pray. diff --git a/data/xmls/twophase.xml b/data/xmls/twophase.xml index 28f9edb..ed936a8 100644 --- a/data/xmls/twophase.xml +++ b/data/xmls/twophase.xml @@ -50,7 +50,7 @@ 298.14999999999998 - -4704703.645715787 + -4704703.645715787 1117.965 0.0 diff --git a/reeps.py b/reeps.py index 727fd23..8c97159 100644 --- a/reeps.py +++ b/reeps.py @@ -9,6 +9,8 @@ import seaborn as sns import matplotlib.pyplot as plt import shutil import copy +from inspect import signature +import os sns.set() @@ -16,6 +18,7 @@ class REEPS: """REEPS (Rare earth extraction parameter searcher) Takes in experimental data Returns parameters for GEM + Only good for 1 rare earth and 1 extractant :param exp_csv_filename: (str) csv file name with experimental data :param phases_xml_filename: (str) xml file with parameters for equilibrium calc :param opt_dict: (dict) optimize info {species:{thermo_prop:guess} @@ -27,8 +30,34 @@ class REEPS: :param rare_earth_ion_name: (str) name of rare earth ion in xml file :param aq_solvent_rho: (float) density of solvent (g/L) :param extractant_rho: (float) density of extractant (g/L) - :param diluant_rho: (float) density of extractant (g/L) + :param diluant_rho: (float) density of diluant (g/L) If no density is given, molar volume/molecular weight is used from xml + :param objective_function: (function) function to compute objective + By default, the objective function is log mean squared error + of distribution ratio np.log10(re_org/re_aq) + Function needs to take inputs: + objective_function(predicted_dict, measured_df, **kwargs) + **kwargs is optional + Below is the guide for referencing predicted values + | To access | Use | + |------------------------------------- |--------------------------| + | predicted rare earth eq conc in aq | predicted_dict['re_aq'] | + | predicted rare earth eq conc in org | predicted_dict['re_org'] | + | predicted hydrogen ion conc in aq | predicted_dict['h'] | + | predicted extractant conc in org | predicted_dict['z'] | + | predicted rare earth distribution ratio | predicted_dict['re_d'] | + For measured values, use the column names in the experimental data file + :param optimizer: (function) function to perform optimization + By default, the optimizer is scipy's optimize function with + default_kwargs= {"method": 'SLSQP', + "bounds": [(1e-1, 1e1)*len(x_guess)], + "constraints": (), + "options": {'disp': True, 'maxiter': 1000, 'ftol': 1e-6}} + Function needs to take inputs: + optimizer(objective_function, x_guess, **kwargs) + **kwargs is optional + :param temp_xml_file_path: (str) path to temporary xml file + default is local temp folder """ def __init__(self, @@ -44,7 +73,12 @@ class REEPS: aq_solvent_rho=None, extractant_rho=None, diluant_rho=None, + objective_function='Log-MSE', + optimizer='SLSQP', + temp_xml_file_path=None ): + self._built_in_obj_list = ['Log-MSE'] + self._built_in_opt_list = ['SLSQP'] self._exp_csv_filename = exp_csv_filename self._phases_xml_filename = phases_xml_filename self._opt_dict = opt_dict @@ -57,9 +91,14 @@ class REEPS: self._aq_solvent_rho = aq_solvent_rho self._extractant_rho = extractant_rho self._diluant_rho = diluant_rho - - self._temp_xml_filename = "temp.xml" - shutil.copyfile(phases_xml_filename, self._temp_xml_filename) + self._objective_function = None + self.set_objective_function(objective_function) + self._optimizer = None + self.set_optimizer(optimizer) + if temp_xml_file_path is None: + temp_xml_file_path = '{0}\\temp.xml'.format(os.getenv('TEMP')) + self._temp_xml_file_path = temp_xml_file_path + shutil.copyfile(phases_xml_filename, self._temp_xml_file_path) self._phases = ct.import_phases(phases_xml_filename, phase_names) self._exp_df = pd.read_csv(self._exp_csv_filename) @@ -69,6 +108,27 @@ class REEPS: self._org_ind = None self.set_in_moles(feed_vol=1) + self._predicted_dict = None + self.update_predicted_dict() + + @staticmethod + def log_mean_squared_error(predicted_dict, meas_df): + meas = meas_df['D(m)'].values + pred = predicted_dict['re_org'] / predicted_dict['re_aq'] + log_pred = np.log10(pred) + log_meas = np.log10(meas) + obj = np.sum((log_pred - log_meas) ** 2) + return obj + + @staticmethod + def slsqp_optimizer(objective, x_guess): + optimizer_kwargs = {"method": 'SLSQP', + "bounds": [(1e-1, 1e1) * len(x_guess)], + "constraints": (), + "options": {'disp': True, 'maxiter': 1000, 'ftol': 1e-6}} + res = minimize(objective, x_guess, **optimizer_kwargs) + est_parameters = res.x + return est_parameters def get_exp_csv_filename(self) -> str: return self._exp_csv_filename @@ -76,6 +136,7 @@ class REEPS: def set_exp_csv_filename(self, exp_csv_filename): self._exp_csv_filename = exp_csv_filename self._exp_df = pd.read_csv(self._exp_csv_filename) + self.update_predicted_dict() return None def get_phases(self) -> list: @@ -86,8 +147,10 @@ class REEPS: Also runs set_in_mole to set initial moles to 1 g/L""" self._phases_xml_filename = phases_xml_filename self._phase_names = phase_names + shutil.copyfile(phases_xml_filename, self._temp_xml_file_path) self._phases = ct.import_phases(phases_xml_filename, phase_names) self.set_in_moles(feed_vol=1) + self.update_predicted_dict() return None def get_opt_dict(self) -> dict: @@ -232,35 +295,72 @@ class REEPS: ] in_moles_data.append(species_moles) self._in_moles = pd.DataFrame(in_moles_data, columns=mixed.species_names) + self.update_predicted_dict() return None def get_in_moles(self) -> pd.DataFrame: return self._in_moles - def objective(self, x): - """Log mean squared error between measured and predicted data - :param x: (list) thermo properties varied to minimize LMSE""" - temp_xml_filename = self._temp_xml_filename + def set_objective_function(self, objective_function): + """Set objective function. see class docstring for instructions""" + if not callable(objective_function) \ + and objective_function not in self._built_in_obj_list: + raise Exception( + "objective_function must be a function " + "or in this strings list: {0}".format(self._built_in_obj_list)) + if callable(objective_function): + if len(signature(objective_function).parameters) < 2: + raise Exception( + "objective_function must be a function " + "with at least 3 arguments:" + " f(predicted_dict, experimental_df,**kwargs)") + if objective_function == 'Log-MSE': + objective_function = self.log_mean_squared_error + self._objective_function = objective_function + return None + + def get_objective_function(self): + return self._objective_function + + def set_optimizer(self, optimizer): + if not callable(optimizer) \ + and optimizer not in self._built_in_opt_list: + raise Exception( + "optimizer must be a function " + "or in this strings list: {0}".format(self._built_in_opt_list)) + if callable(optimizer): + if len(signature(optimizer).parameters) < 2: + raise Exception( + "optimizer must be a function " + "with at least 2 arguments: " + "f(objective_func,x_guess,**kwargs)") + if optimizer == 'SLSQP': + optimizer = self.slsqp_optimizer + self._optimizer = optimizer + return None + + def get_optimizer(self): + return self._optimizer + + def update_predicted_dict(self, phases_xml_filename=None): + if phases_xml_filename is None: + phases_xml_filename = self._phases_xml_filename phase_names = self._phase_names aq_ind = self._aq_ind org_ind = self._org_ind complex_name = self._complex_name + extractant_name = self._extractant_name rare_earth_ion_name = self._rare_earth_ion_name in_moles = self._in_moles - exp_df = self._exp_df - - x = np.array(x) - opt_dict = copy.deepcopy(self._opt_dict) - i = 0 - for species_name in opt_dict.keys(): - for thermo_prop in opt_dict[species_name].keys(): - opt_dict[species_name][thermo_prop] *= x[i] - i += 1 - self.update_xml(opt_dict, temp_xml_filename) - phases_copy = ct.import_phases(temp_xml_filename, phase_names) + phases_copy = ct.import_phases(phases_xml_filename, phase_names) mix = ct.Mixture(phases_copy) - pred = [] + predicted_dict = {"re_aq": [], + "re_org": [], + "h": [], + "z": [] + } + for row in in_moles.values: mix.species_moles = row mix.equilibrate('TP', log_level=0) @@ -268,17 +368,86 @@ class REEPS: org_ind, complex_name)] re_aq = mix.species_moles[mix.species_index( aq_ind, rare_earth_ion_name)] - pred.append(np.log10(re_org / re_aq)) - pred = np.array(pred) - meas = np.log10(exp_df['D(m)'].values) - obj = np.sum((pred - meas) ** 2) - return obj + hydrogen_ions = mix.species_moles[mix.species_index(aq_ind, 'H+')] + extractant = mix.species_moles[mix.species_index( + org_ind, extractant_name)] + predicted_dict['re_aq'].append(re_aq) + predicted_dict['re_org'].append(re_org) + predicted_dict['h'].append(hydrogen_ions) + predicted_dict['z'].append(extractant) + predicted_dict['re_aq'] = np.array(predicted_dict['re_aq']) + predicted_dict['re_org'] = np.array(predicted_dict['re_org']) + predicted_dict['h'] = np.array(predicted_dict['h']) + predicted_dict['z'] = np.array(predicted_dict['z']) + + self._predicted_dict = predicted_dict + return None + + def get_predicted_dict(self): + return self._predicted_dict + + def _internal_objective(self, x, kwargs=None): + """default Log mean squared error between measured and predicted data + :param x: (list) thermo properties varied to minimize LMSE + :param kwargs: (list) arguments for objective_function + """ + temp_xml_file_path = self._temp_xml_file_path + exp_df = self._exp_df + objective_function = self._objective_function + opt_dict = copy.deepcopy(self._opt_dict) + i = 0 + for species_name in opt_dict.keys(): + for _ in opt_dict[species_name].keys(): + i += 1 + x = np.array(x) + if i == len(x.shape): + xs = np.array([x]) + vectorized_x = False + else: + vectorized_x = True + xs = x + objective_values = [] + for x in xs: + i = 0 + for species_name in opt_dict.keys(): + for thermo_prop in opt_dict[species_name].keys(): + opt_dict[species_name][thermo_prop] *= x[i] + i += 1 + self.update_xml(opt_dict, temp_xml_file_path) + + self.update_predicted_dict(temp_xml_file_path) + predicted_dict = self.get_predicted_dict() + + if kwargs is None: + # noinspection PyCallingNonCallable + obj = objective_function(predicted_dict, exp_df) + else: + # noinspection PyCallingNonCallable + obj = objective_function(predicted_dict, exp_df, **kwargs) + objective_values.append(obj) + if vectorized_x: + objective_values = np.array(objective_values) + else: + objective_values = objective_values[0] + return objective_values - def fit(self, kwargs) -> float: - """Fits experimental to modeled data by estimating complex reference enthalpy + def fit(self, objective_function=None, optimizer=None, objective_kwargs=None, optimizer_kwargs=None) -> dict: + """Fits experimental to modeled data by minimizing objective function Returns estimated complex enthalpy in J/mol - :param kwargs: (dict) parameters and options for scipy.minimize + :param objective_function: (function) function to compute objective + :param optimizer: (function) function to perform optimization + :param optimizer_kwargs: (dict) arguments for optimizer + :param objective_kwargs: (dict) arguments for objective function """ + if objective_function is not None: + self.set_objective_function(objective_function) + if optimizer is not None: + self.set_optimizer(optimizer) + + def objective(x): + return self._internal_objective(x, objective_kwargs) + + optimizer = self._optimizer opt_dict = copy.deepcopy(self._opt_dict) # x_guess = [] i = 0 @@ -287,12 +456,20 @@ class REEPS: # x_guess.append(opt_dict[species_name][thermo_prop]) i += 1 x_guess = np.ones(i) - res = minimize(self.objective, x_guess, **kwargs) + + if optimizer_kwargs is None: + # noinspection PyCallingNonCallable + est_parameters = optimizer(objective, x_guess) + else: + # noinspection PyCallingNonCallable + est_parameters = optimizer(objective, x_guess, **optimizer_kwargs) + i = 0 for species_name in opt_dict.keys(): for thermo_prop in opt_dict[species_name].keys(): - opt_dict[species_name][thermo_prop] *= res.x[i] + opt_dict[species_name][thermo_prop] *= est_parameters[i] i += 1 + self.update_predicted_dict() return opt_dict def update_xml(self, @@ -323,8 +500,11 @@ class REEPS: now.year)) tree.write(phases_xml_filename) + if phases_xml_filename == self._phases_xml_filename: + self.set_phases(self._phases_xml_filename, self._phase_names) + return None - def parity_plot(self): + def parity_plot(self, species='re_aq'): """Parity plot between measured and predicted rare earth composition""" phases_copy = self._phases.copy() mix = ct.Mixture(phases_copy) @@ -345,8 +525,31 @@ class REEPS: max_data = np.max([pred, meas]) min_max_data = np.array([min_data, max_data]) fig, ax = plt.subplots() - sns.scatterplot(meas, pred, color="r") - sns.lineplot(min_max_data, min_max_data, color="b") - ax.set(xlabel='Measured X equilibrium', ylabel='Predicted X equilibrium') + sns.scatterplot(meas, pred, color="r", + label="Rare earth equilibrium concentration (mol/L)") + sns.lineplot(min_max_data, min_max_data, color="b", label="") + ax.set(xlabel='Measured', ylabel='Predicted') plt.show() return None + + def r_squared(self): + """r-squared value comparing measured and predicted rare earth composition""" + phases_copy = self._phases.copy() + mix = ct.Mixture(phases_copy) + aq_ind = self._aq_ind + exp_df = self._exp_df + in_moles = self._in_moles + rare_earth_ion_name = self._rare_earth_ion_name + pred = [] + for row in in_moles.values: + mix.species_moles = row + mix.equilibrate('TP', log_level=0) + re_aq = mix.species_moles[mix.species_index( + aq_ind, rare_earth_ion_name)] + pred.append(re_aq) + predicted_y = np.array(pred) + actual_y = exp_df['REeq(m)'].values + num = sum((actual_y - predicted_y) ** 2) + den = sum((actual_y - np.mean(actual_y)) ** 2) + r_2 = (1 - num / den) + return r_2 diff --git a/tests/test_reeps.py b/tests/test_reeps.py index 911f1e6..da361cd 100644 --- a/tests/test_reeps.py +++ b/tests/test_reeps.py @@ -1,22 +1,53 @@ import json +import numpy as np +import pyswarms as ps import sys - sys.path.append('../') from reeps import REEPS - with open('one_ree_settings.txt') as file: testing_params = json.load(file) beaker = REEPS(**testing_params) + +# def new_obj(predicted_dict, meas_df, epsilon): +# meas_cols = list(meas_df) +# pred_keys = list(predicted_dict.keys()) +# meas = meas_df[meas_cols[2]] +# pred = (predicted_dict['re_org'] + epsilon) / (predicted_dict['re_aq'] + epsilon) +# log_pred = np.log10(pred) +# log_meas = np.log10(meas) +# obj = np.sum((log_pred - log_meas) ** 2) +# return obj +# # +# # +# # def new_obj(ping): +# # print(ping) +# beaker.set_objective_function(new_obj) +# objective_kwargs = {"epsilon": 1e-14} +# beaker.set +# noinspection PyUnusedLocal +def optimizer(func, x_guess): + lb = np.array([1e-1]) + ub = np.array([1e1]) + bounds = (lb, ub) + options = {'c1': 1e-3, 'c2': 1e-3, 'w': 0.9} + mini_optimizer = ps.single.global_best.GlobalBestPSO(n_particles=100, dimensions=1, + options=options, bounds=bounds) + f_opt, x_opt = mini_optimizer.optimize(func, iters=100) + + return x_opt + + minimizer_kwargs = {"method": 'SLSQP', "bounds": [(1e-1, 1e1)], "constraints": (), "options": {'disp': True, 'maxiter': 1000, 'ftol': 1e-6}} -est_enthalpy = beaker.fit(minimizer_kwargs) +# est_enthalpy = beaker.fit(optimizer=optimizer) +est_enthalpy = beaker.fit() print(est_enthalpy) -# info_dict = {"Nd(H(A)2)3(org)": {"h0": est_enthalpy}} -# -beaker.update_xml(est_enthalpy) -beaker.parity_plot() + +# beaker.update_xml(est_enthalpy) +# beaker.parity_plot() +# print(beaker.r_squared())