From a8fd7169378769d21f43dbe8719d3599207f5442 Mon Sep 17 00:00:00 2001
From: titusquah <46580668+titusquah@users.noreply.github.com>
Date: Fri, 5 Jun 2020 14:20:59 -0600
Subject: [PATCH] Allow user to change optimizer and objective function. Also
added r-squared evaluator. Changed temp file location to user temp folder.
Added a prediction dictionary to access the values predicted by model.
---
.idea/dictionaries/Titus.xml | 5 +
.idea/workspace.xml | 90 +++++++-----
README.md | 27 +++-
data/xmls/twophase.xml | 2 +-
reeps.py | 271 ++++++++++++++++++++++++++++++-----
tests/test_reeps.py | 45 +++++-
6 files changed, 360 insertions(+), 80 deletions(-)
diff --git a/.idea/dictionaries/Titus.xml b/.idea/dictionaries/Titus.xml
index 7ca660c..b634bf6 100644
--- a/.idea/dictionaries/Titus.xml
+++ b/.idea/dictionaries/Titus.xml
@@ -2,16 +2,21 @@
coeffs
+ conc
diluant
+ disp
dodecane
equilibrate
extractant
+ ftol
kmol
lmse
+ maxiter
ndarray
pred
reeps
scipy
+ slsqp
thermo
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index ee250e0..18a656d 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -1,12 +1,12 @@
-
+
+
-
@@ -37,17 +37,17 @@
-
-
+
+
-
+
-
+
@@ -56,7 +56,7 @@
-
+
@@ -64,13 +64,13 @@
-
-
+
+
-
+
-
+
@@ -132,10 +132,10 @@
+
+
-
-
-
+
@@ -215,7 +215,14 @@
1591130305884
-
+
+ 1591209082778
+
+
+
+ 1591209082779
+
+
@@ -254,10 +261,10 @@
-
+
-
+
@@ -266,30 +273,30 @@
-
-
+
+
-
+
-
-
-
+
+
+
-
+
-
-
-
+
+
+
-
+
-
-
-
+
+
+
-
+
-
+
@@ -298,12 +305,12 @@
-
-
+
+
-
+
@@ -317,4 +324,15 @@
+
+
+
+
+ file://$PROJECT_DIR$/../../../Powell Research/CSTR-RL/ann-pso_ppo_compare_stochastic10/tester_pso_ann_v3.py
+ 9
+
+
+
+
+
\ No newline at end of file
diff --git a/README.md b/README.md
index 2aadc67..d3a7bb2 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,26 @@
-# parameter-estimation
+# REEPS
+REEPS is a toolkit for estimating standard thermodynamic parameters for Gibbs minimization.
+Extend a methodology for estimating standard thermodynamic parameters for Gibbs minimization in multiphase, multicomponent separations systems
-Extend a methodology for estimating standard thermodynamic parameters for Gibbs minimization in multiphase, multicomponent separations systems
\ No newline at end of file
+
+## Installation
+
+To install REEPS, clone the repository with the command
+
+```
+$ git clone https://xgitlab.cels.anl.gov/summer-2020/parameter-estimation.git
+```
+
+Navigate into the parameter-estimation folder with
+```
+cd parameter-estimation
+```
+and run the command below in your terminal
+```
+$ pip install -e.
+```
+### Dependencies
+REEPS uses packages: cantera (https://cantera.org/), pandas, numpy, scipy, xml, seaborn, and matplotlib
+
+## Usage
+Do random stuff and pray.
diff --git a/data/xmls/twophase.xml b/data/xmls/twophase.xml
index 28f9edb..ed936a8 100644
--- a/data/xmls/twophase.xml
+++ b/data/xmls/twophase.xml
@@ -50,7 +50,7 @@
298.14999999999998
- -4704703.645715787
+ -4704703.645715787
1117.965
0.0
diff --git a/reeps.py b/reeps.py
index 727fd23..8c97159 100644
--- a/reeps.py
+++ b/reeps.py
@@ -9,6 +9,8 @@ import seaborn as sns
import matplotlib.pyplot as plt
import shutil
import copy
+from inspect import signature
+import os
sns.set()
@@ -16,6 +18,7 @@ class REEPS:
"""REEPS (Rare earth extraction parameter searcher)
Takes in experimental data
Returns parameters for GEM
+ Only good for 1 rare earth and 1 extractant
:param exp_csv_filename: (str) csv file name with experimental data
:param phases_xml_filename: (str) xml file with parameters for equilibrium calc
:param opt_dict: (dict) optimize info {species:{thermo_prop:guess}
@@ -27,8 +30,34 @@ class REEPS:
:param rare_earth_ion_name: (str) name of rare earth ion in xml file
:param aq_solvent_rho: (float) density of solvent (g/L)
:param extractant_rho: (float) density of extractant (g/L)
- :param diluant_rho: (float) density of extractant (g/L)
+ :param diluant_rho: (float) density of diluant (g/L)
If no density is given, molar volume/molecular weight is used from xml
+ :param objective_function: (function) function to compute objective
+ By default, the objective function is log mean squared error
+ of distribution ratio np.log10(re_org/re_aq)
+ Function needs to take inputs:
+ objective_function(predicted_dict, measured_df, **kwargs)
+ **kwargs is optional
+ Below is the guide for referencing predicted values
+ | To access | Use |
+ |------------------------------------- |--------------------------|
+ | predicted rare earth eq conc in aq | predicted_dict['re_aq'] |
+ | predicted rare earth eq conc in org | predicted_dict['re_org'] |
+ | predicted hydrogen ion conc in aq | predicted_dict['h'] |
+ | predicted extractant conc in org | predicted_dict['z'] |
+ | predicted rare earth distribution ratio | predicted_dict['re_d'] |
+ For measured values, use the column names in the experimental data file
+ :param optimizer: (function) function to perform optimization
+ By default, the optimizer is scipy's optimize function with
+ default_kwargs= {"method": 'SLSQP',
+ "bounds": [(1e-1, 1e1)*len(x_guess)],
+ "constraints": (),
+ "options": {'disp': True, 'maxiter': 1000, 'ftol': 1e-6}}
+ Function needs to take inputs:
+ optimizer(objective_function, x_guess, **kwargs)
+ **kwargs is optional
+ :param temp_xml_file_path: (str) path to temporary xml file
+ default is local temp folder
"""
def __init__(self,
@@ -44,7 +73,12 @@ class REEPS:
aq_solvent_rho=None,
extractant_rho=None,
diluant_rho=None,
+ objective_function='Log-MSE',
+ optimizer='SLSQP',
+ temp_xml_file_path=None
):
+ self._built_in_obj_list = ['Log-MSE']
+ self._built_in_opt_list = ['SLSQP']
self._exp_csv_filename = exp_csv_filename
self._phases_xml_filename = phases_xml_filename
self._opt_dict = opt_dict
@@ -57,9 +91,14 @@ class REEPS:
self._aq_solvent_rho = aq_solvent_rho
self._extractant_rho = extractant_rho
self._diluant_rho = diluant_rho
-
- self._temp_xml_filename = "temp.xml"
- shutil.copyfile(phases_xml_filename, self._temp_xml_filename)
+ self._objective_function = None
+ self.set_objective_function(objective_function)
+ self._optimizer = None
+ self.set_optimizer(optimizer)
+ if temp_xml_file_path is None:
+ temp_xml_file_path = '{0}\\temp.xml'.format(os.getenv('TEMP'))
+ self._temp_xml_file_path = temp_xml_file_path
+ shutil.copyfile(phases_xml_filename, self._temp_xml_file_path)
self._phases = ct.import_phases(phases_xml_filename, phase_names)
self._exp_df = pd.read_csv(self._exp_csv_filename)
@@ -69,6 +108,27 @@ class REEPS:
self._org_ind = None
self.set_in_moles(feed_vol=1)
+ self._predicted_dict = None
+ self.update_predicted_dict()
+
+ @staticmethod
+ def log_mean_squared_error(predicted_dict, meas_df):
+ meas = meas_df['D(m)'].values
+ pred = predicted_dict['re_org'] / predicted_dict['re_aq']
+ log_pred = np.log10(pred)
+ log_meas = np.log10(meas)
+ obj = np.sum((log_pred - log_meas) ** 2)
+ return obj
+
+ @staticmethod
+ def slsqp_optimizer(objective, x_guess):
+ optimizer_kwargs = {"method": 'SLSQP',
+ "bounds": [(1e-1, 1e1) * len(x_guess)],
+ "constraints": (),
+ "options": {'disp': True, 'maxiter': 1000, 'ftol': 1e-6}}
+ res = minimize(objective, x_guess, **optimizer_kwargs)
+ est_parameters = res.x
+ return est_parameters
def get_exp_csv_filename(self) -> str:
return self._exp_csv_filename
@@ -76,6 +136,7 @@ class REEPS:
def set_exp_csv_filename(self, exp_csv_filename):
self._exp_csv_filename = exp_csv_filename
self._exp_df = pd.read_csv(self._exp_csv_filename)
+ self.update_predicted_dict()
return None
def get_phases(self) -> list:
@@ -86,8 +147,10 @@ class REEPS:
Also runs set_in_mole to set initial moles to 1 g/L"""
self._phases_xml_filename = phases_xml_filename
self._phase_names = phase_names
+ shutil.copyfile(phases_xml_filename, self._temp_xml_file_path)
self._phases = ct.import_phases(phases_xml_filename, phase_names)
self.set_in_moles(feed_vol=1)
+ self.update_predicted_dict()
return None
def get_opt_dict(self) -> dict:
@@ -232,35 +295,72 @@ class REEPS:
]
in_moles_data.append(species_moles)
self._in_moles = pd.DataFrame(in_moles_data, columns=mixed.species_names)
+ self.update_predicted_dict()
return None
def get_in_moles(self) -> pd.DataFrame:
return self._in_moles
- def objective(self, x):
- """Log mean squared error between measured and predicted data
- :param x: (list) thermo properties varied to minimize LMSE"""
- temp_xml_filename = self._temp_xml_filename
+ def set_objective_function(self, objective_function):
+ """Set objective function. see class docstring for instructions"""
+ if not callable(objective_function) \
+ and objective_function not in self._built_in_obj_list:
+ raise Exception(
+ "objective_function must be a function "
+ "or in this strings list: {0}".format(self._built_in_obj_list))
+ if callable(objective_function):
+ if len(signature(objective_function).parameters) < 2:
+ raise Exception(
+ "objective_function must be a function "
+ "with at least 3 arguments:"
+ " f(predicted_dict, experimental_df,**kwargs)")
+ if objective_function == 'Log-MSE':
+ objective_function = self.log_mean_squared_error
+ self._objective_function = objective_function
+ return None
+
+ def get_objective_function(self):
+ return self._objective_function
+
+ def set_optimizer(self, optimizer):
+ if not callable(optimizer) \
+ and optimizer not in self._built_in_opt_list:
+ raise Exception(
+ "optimizer must be a function "
+ "or in this strings list: {0}".format(self._built_in_opt_list))
+ if callable(optimizer):
+ if len(signature(optimizer).parameters) < 2:
+ raise Exception(
+ "optimizer must be a function "
+ "with at least 2 arguments: "
+ "f(objective_func,x_guess,**kwargs)")
+ if optimizer == 'SLSQP':
+ optimizer = self.slsqp_optimizer
+ self._optimizer = optimizer
+ return None
+
+ def get_optimizer(self):
+ return self._optimizer
+
+ def update_predicted_dict(self, phases_xml_filename=None):
+ if phases_xml_filename is None:
+ phases_xml_filename = self._phases_xml_filename
phase_names = self._phase_names
aq_ind = self._aq_ind
org_ind = self._org_ind
complex_name = self._complex_name
+ extractant_name = self._extractant_name
rare_earth_ion_name = self._rare_earth_ion_name
in_moles = self._in_moles
- exp_df = self._exp_df
-
- x = np.array(x)
- opt_dict = copy.deepcopy(self._opt_dict)
- i = 0
- for species_name in opt_dict.keys():
- for thermo_prop in opt_dict[species_name].keys():
- opt_dict[species_name][thermo_prop] *= x[i]
- i += 1
- self.update_xml(opt_dict, temp_xml_filename)
- phases_copy = ct.import_phases(temp_xml_filename, phase_names)
+ phases_copy = ct.import_phases(phases_xml_filename, phase_names)
mix = ct.Mixture(phases_copy)
- pred = []
+ predicted_dict = {"re_aq": [],
+ "re_org": [],
+ "h": [],
+ "z": []
+ }
+
for row in in_moles.values:
mix.species_moles = row
mix.equilibrate('TP', log_level=0)
@@ -268,17 +368,86 @@ class REEPS:
org_ind, complex_name)]
re_aq = mix.species_moles[mix.species_index(
aq_ind, rare_earth_ion_name)]
- pred.append(np.log10(re_org / re_aq))
- pred = np.array(pred)
- meas = np.log10(exp_df['D(m)'].values)
- obj = np.sum((pred - meas) ** 2)
- return obj
+ hydrogen_ions = mix.species_moles[mix.species_index(aq_ind, 'H+')]
+ extractant = mix.species_moles[mix.species_index(
+ org_ind, extractant_name)]
+ predicted_dict['re_aq'].append(re_aq)
+ predicted_dict['re_org'].append(re_org)
+ predicted_dict['h'].append(hydrogen_ions)
+ predicted_dict['z'].append(extractant)
+ predicted_dict['re_aq'] = np.array(predicted_dict['re_aq'])
+ predicted_dict['re_org'] = np.array(predicted_dict['re_org'])
+ predicted_dict['h'] = np.array(predicted_dict['h'])
+ predicted_dict['z'] = np.array(predicted_dict['z'])
+
+ self._predicted_dict = predicted_dict
+ return None
+
+ def get_predicted_dict(self):
+ return self._predicted_dict
+
+ def _internal_objective(self, x, kwargs=None):
+ """default Log mean squared error between measured and predicted data
+ :param x: (list) thermo properties varied to minimize LMSE
+ :param kwargs: (list) arguments for objective_function
+ """
+ temp_xml_file_path = self._temp_xml_file_path
+ exp_df = self._exp_df
+ objective_function = self._objective_function
+ opt_dict = copy.deepcopy(self._opt_dict)
+ i = 0
+ for species_name in opt_dict.keys():
+ for _ in opt_dict[species_name].keys():
+ i += 1
+ x = np.array(x)
+ if i == len(x.shape):
+ xs = np.array([x])
+ vectorized_x = False
+ else:
+ vectorized_x = True
+ xs = x
+ objective_values = []
+ for x in xs:
+ i = 0
+ for species_name in opt_dict.keys():
+ for thermo_prop in opt_dict[species_name].keys():
+ opt_dict[species_name][thermo_prop] *= x[i]
+ i += 1
+ self.update_xml(opt_dict, temp_xml_file_path)
+
+ self.update_predicted_dict(temp_xml_file_path)
+ predicted_dict = self.get_predicted_dict()
+
+ if kwargs is None:
+ # noinspection PyCallingNonCallable
+ obj = objective_function(predicted_dict, exp_df)
+ else:
+ # noinspection PyCallingNonCallable
+ obj = objective_function(predicted_dict, exp_df, **kwargs)
+ objective_values.append(obj)
+ if vectorized_x:
+ objective_values = np.array(objective_values)
+ else:
+ objective_values = objective_values[0]
+ return objective_values
- def fit(self, kwargs) -> float:
- """Fits experimental to modeled data by estimating complex reference enthalpy
+ def fit(self, objective_function=None, optimizer=None, objective_kwargs=None, optimizer_kwargs=None) -> dict:
+ """Fits experimental to modeled data by minimizing objective function
Returns estimated complex enthalpy in J/mol
- :param kwargs: (dict) parameters and options for scipy.minimize
+ :param objective_function: (function) function to compute objective
+ :param optimizer: (function) function to perform optimization
+ :param optimizer_kwargs: (dict) arguments for optimizer
+ :param objective_kwargs: (dict) arguments for objective function
"""
+ if objective_function is not None:
+ self.set_objective_function(objective_function)
+ if optimizer is not None:
+ self.set_optimizer(optimizer)
+
+ def objective(x):
+ return self._internal_objective(x, objective_kwargs)
+
+ optimizer = self._optimizer
opt_dict = copy.deepcopy(self._opt_dict)
# x_guess = []
i = 0
@@ -287,12 +456,20 @@ class REEPS:
# x_guess.append(opt_dict[species_name][thermo_prop])
i += 1
x_guess = np.ones(i)
- res = minimize(self.objective, x_guess, **kwargs)
+
+ if optimizer_kwargs is None:
+ # noinspection PyCallingNonCallable
+ est_parameters = optimizer(objective, x_guess)
+ else:
+ # noinspection PyCallingNonCallable
+ est_parameters = optimizer(objective, x_guess, **optimizer_kwargs)
+
i = 0
for species_name in opt_dict.keys():
for thermo_prop in opt_dict[species_name].keys():
- opt_dict[species_name][thermo_prop] *= res.x[i]
+ opt_dict[species_name][thermo_prop] *= est_parameters[i]
i += 1
+ self.update_predicted_dict()
return opt_dict
def update_xml(self,
@@ -323,8 +500,11 @@ class REEPS:
now.year))
tree.write(phases_xml_filename)
+ if phases_xml_filename == self._phases_xml_filename:
+ self.set_phases(self._phases_xml_filename, self._phase_names)
+ return None
- def parity_plot(self):
+ def parity_plot(self, species='re_aq'):
"""Parity plot between measured and predicted rare earth composition"""
phases_copy = self._phases.copy()
mix = ct.Mixture(phases_copy)
@@ -345,8 +525,31 @@ class REEPS:
max_data = np.max([pred, meas])
min_max_data = np.array([min_data, max_data])
fig, ax = plt.subplots()
- sns.scatterplot(meas, pred, color="r")
- sns.lineplot(min_max_data, min_max_data, color="b")
- ax.set(xlabel='Measured X equilibrium', ylabel='Predicted X equilibrium')
+ sns.scatterplot(meas, pred, color="r",
+ label="Rare earth equilibrium concentration (mol/L)")
+ sns.lineplot(min_max_data, min_max_data, color="b", label="")
+ ax.set(xlabel='Measured', ylabel='Predicted')
plt.show()
return None
+
+ def r_squared(self):
+ """r-squared value comparing measured and predicted rare earth composition"""
+ phases_copy = self._phases.copy()
+ mix = ct.Mixture(phases_copy)
+ aq_ind = self._aq_ind
+ exp_df = self._exp_df
+ in_moles = self._in_moles
+ rare_earth_ion_name = self._rare_earth_ion_name
+ pred = []
+ for row in in_moles.values:
+ mix.species_moles = row
+ mix.equilibrate('TP', log_level=0)
+ re_aq = mix.species_moles[mix.species_index(
+ aq_ind, rare_earth_ion_name)]
+ pred.append(re_aq)
+ predicted_y = np.array(pred)
+ actual_y = exp_df['REeq(m)'].values
+ num = sum((actual_y - predicted_y) ** 2)
+ den = sum((actual_y - np.mean(actual_y)) ** 2)
+ r_2 = (1 - num / den)
+ return r_2
diff --git a/tests/test_reeps.py b/tests/test_reeps.py
index 911f1e6..da361cd 100644
--- a/tests/test_reeps.py
+++ b/tests/test_reeps.py
@@ -1,22 +1,53 @@
import json
+import numpy as np
+import pyswarms as ps
import sys
-
sys.path.append('../')
from reeps import REEPS
-
with open('one_ree_settings.txt') as file:
testing_params = json.load(file)
beaker = REEPS(**testing_params)
+
+# def new_obj(predicted_dict, meas_df, epsilon):
+# meas_cols = list(meas_df)
+# pred_keys = list(predicted_dict.keys())
+# meas = meas_df[meas_cols[2]]
+# pred = (predicted_dict['re_org'] + epsilon) / (predicted_dict['re_aq'] + epsilon)
+# log_pred = np.log10(pred)
+# log_meas = np.log10(meas)
+# obj = np.sum((log_pred - log_meas) ** 2)
+# return obj
+# #
+# #
+# # def new_obj(ping):
+# # print(ping)
+# beaker.set_objective_function(new_obj)
+# objective_kwargs = {"epsilon": 1e-14}
+# beaker.set
+# noinspection PyUnusedLocal
+def optimizer(func, x_guess):
+ lb = np.array([1e-1])
+ ub = np.array([1e1])
+ bounds = (lb, ub)
+ options = {'c1': 1e-3, 'c2': 1e-3, 'w': 0.9}
+ mini_optimizer = ps.single.global_best.GlobalBestPSO(n_particles=100, dimensions=1,
+ options=options, bounds=bounds)
+ f_opt, x_opt = mini_optimizer.optimize(func, iters=100)
+
+ return x_opt
+
+
minimizer_kwargs = {"method": 'SLSQP',
"bounds": [(1e-1, 1e1)],
"constraints": (),
"options": {'disp': True, 'maxiter': 1000, 'ftol': 1e-6}}
-est_enthalpy = beaker.fit(minimizer_kwargs)
+# est_enthalpy = beaker.fit(optimizer=optimizer)
+est_enthalpy = beaker.fit()
print(est_enthalpy)
-# info_dict = {"Nd(H(A)2)3(org)": {"h0": est_enthalpy}}
-#
-beaker.update_xml(est_enthalpy)
-beaker.parity_plot()
+
+# beaker.update_xml(est_enthalpy)
+# beaker.parity_plot()
+# print(beaker.r_squared())