From 07d6a0ac3c65757475045cb93806b4a496619c12 Mon Sep 17 00:00:00 2001
From: titusquah <46580668+titusquah@users.noreply.github.com>
Date: Fri, 5 Jun 2020 14:22:19 -0600
Subject: [PATCH] added docs
---
.../1_getting_started-checkpoint.ipynb | 6 +
docs/Examples/1_getting_started.ipynb | 533 ++++++++++++++++++
2 files changed, 539 insertions(+)
create mode 100644 docs/Examples/.ipynb_checkpoints/1_getting_started-checkpoint.ipynb
create mode 100644 docs/Examples/1_getting_started.ipynb
diff --git a/docs/Examples/.ipynb_checkpoints/1_getting_started-checkpoint.ipynb b/docs/Examples/.ipynb_checkpoints/1_getting_started-checkpoint.ipynb
new file mode 100644
index 0000000..7fec515
--- /dev/null
+++ b/docs/Examples/.ipynb_checkpoints/1_getting_started-checkpoint.ipynb
@@ -0,0 +1,6 @@
+{
+ "cells": [],
+ "metadata": {},
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/docs/Examples/1_getting_started.ipynb b/docs/Examples/1_getting_started.ipynb
new file mode 100644
index 0000000..6f93a81
--- /dev/null
+++ b/docs/Examples/1_getting_started.ipynb
@@ -0,0 +1,533 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# REEPS Tutorial - Getting started\n",
+ "## Introduction\n",
+ "In this notebook, you will learn how to use REEPS to fit thermodynamic parameters to experimental data and explore how well the parameters fit.\n",
+ "## Installation\n",
+ "In your terminal run
\n",
+ "```$ git clone https://xgitlab.cels.anl.gov/summer-2020/parameter-estimation.git```
\n",
+ "Navigate into the folder with
\n",
+ "```$ cd parameter-estimation```
\n",
+ "And run
\n",
+ "```pip install -e.```
\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import and instantiate REEPS"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "First, you will need to import the package and instantiate REEPS with a few parameters."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import sys # Navigating to reeps.py since not a package yet\n",
+ "sys.path.append('../../')\n",
+ "from reeps import REEPS\n",
+ "searcher_parameters = {'exp_csv_filename': '../../data/csvs/exp_data.csv',\n",
+ " 'phases_xml_filename': '../../data/xmls/twophase.xml',\n",
+ " 'opt_dict': {'Nd(H(A)2)3(org)': {'h0': -4662344.64}},\n",
+ " 'phase_names': ['HCl_electrolyte', 'PC88A_liquid'],\n",
+ " 'aq_solvent_name': 'H2O(L)',\n",
+ " 'extractant_name': '(HA)2(org)',\n",
+ " 'diluant_name': 'dodecane',\n",
+ " 'complex_name': 'Nd(H(A)2)3(org)',\n",
+ " 'rare_earth_ion_name': 'Nd+++',\n",
+ " 'aq_solvent_rho': 1000.0,\n",
+ " 'extractant_rho': 960.0,\n",
+ " 'diluant_rho': 750.0}\n",
+ "searcher = REEPS(**searcher_parameters)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Parameters explanation "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### exp_csv_filename"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "exp_csv_filename is the file name for the csv containing experimental data.
\n",
+ "Let us explore the format of this file with pandas"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " HI(m) | \n",
+ " REeq(m) | \n",
+ " D(m) | \n",
+ " ZI(m) | \n",
+ " Zeq | \n",
+ " Heq | \n",
+ " REI | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0.01 | \n",
+ " 0.0239 | \n",
+ " 1.0921 | \n",
+ " 1 | \n",
+ " 0.921696 | \n",
+ " 0.088304 | \n",
+ " 0.050001 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 0.01 | \n",
+ " 0.0683 | \n",
+ " 0.4641 | \n",
+ " 1 | \n",
+ " 0.904906 | \n",
+ " 0.105094 | \n",
+ " 0.099998 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0.01 | \n",
+ " 0.1170 | \n",
+ " 0.2821 | \n",
+ " 1 | \n",
+ " 0.900983 | \n",
+ " 0.109017 | \n",
+ " 0.150006 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0.01 | \n",
+ " 0.1680 | \n",
+ " 0.1905 | \n",
+ " 1 | \n",
+ " 0.903988 | \n",
+ " 0.106012 | \n",
+ " 0.200004 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 0.01 | \n",
+ " 0.2637 | \n",
+ " 0.1377 | \n",
+ " 1 | \n",
+ " 0.891066 | \n",
+ " 0.118934 | \n",
+ " 0.300011 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " HI(m) REeq(m) D(m) ZI(m) Zeq Heq REI\n",
+ "0 0.01 0.0239 1.0921 1 0.921696 0.088304 0.050001\n",
+ "1 0.01 0.0683 0.4641 1 0.904906 0.105094 0.099998\n",
+ "2 0.01 0.1170 0.2821 1 0.900983 0.109017 0.150006\n",
+ "3 0.01 0.1680 0.1905 1 0.903988 0.106012 0.200004\n",
+ "4 0.01 0.2637 0.1377 1 0.891066 0.118934 0.300011"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "exp_df = pd.read_csv(searcher_parameters['exp_csv_filename'])\n",
+ "exp_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "The rows are for experiments, and the columns are for the measured quantaties.
\n",
+ "REEPS is looking for the ordering of these columns so it is important your experimental file has this ordering. Column names do not matter.
\n",
+ "Below is a table explaining the meaning of the column headers"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "| Column | Meaning |\n",
+ "|---------|-----------------------------------------------------------------------------------------|\n",
+ "| HI(m) | Initial Concentration of H+ ions (mol/L) |\n",
+ "| REeq(m) | Equilibrium concentration of Rare Earth ions (mol/L) |\n",
+ "| D(m) | Equilibrium Ratio between amount of rare earth elements in organic to amount in aqueous |\n",
+ "| ZI(m) | Initial concentration of extractant (mol/L) |\n",
+ "| Zeq | Equilibrium concentration of extractant (mol/L) |\n",
+ "| Heq | Equilibrium concentration of H+ ions (mol/L) |\n",
+ "| REI | Initial concentration of rare earth ions (mol/L) |"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### phases_xml_filename"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is the xml file containing information to be loaded into Cantera, the thermodynamic modeling package.
\n",
+ "Please see parameter-estimation/data/xmls for file examples.
\n",
+ "We can explore what has been loaded."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[, ]\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(searcher.get_phases())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "It is a list of two Cantera solutions so we will dig in a little further and see what species these solutions contain."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "HCl_electrolyte\n",
+ "['H2O(L)', 'H+', 'OH-', 'Cl-', 'Nd+++']\n",
+ "PC88A_liquid\n",
+ "['(HA)2(org)', 'dodecane', 'Nd(H(A)2)3(org)']\n"
+ ]
+ }
+ ],
+ "source": [
+ "for phase in searcher.get_phases():\n",
+ " print(phase.name)\n",
+ " print(phase.species_names)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can explore Cantera solutions further by visiting https://cantera.org/ and seeing Cantera's documentation."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### opt_dict"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This is a dictionary that contains the information about what species and what thermodynamic properties are to be modified.
\n",
+ "The number after the thermodynamic property is the initial guess for the optimizer.
\n",
+ "In this example, we chose to optimize the standard enthalpy (h0) of the neodymium-PC88A complex ('Nd(H(A)2)3(org)') and give it an initial guess of -4662344.64. Thus,
\n",
+ "```python \n",
+ "opt_dict={'Nd(H(A)2)3(org)': {'h0': -4662344.64}}```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Say we wanted to also modify the extractant ('(HA)2(org)'), but this time change both the standard enthalpy (h0) and the molar volume (molarVolume), then the dictionary would be\n",
+ "```python \n",
+ "opt_dict={'Nd(H(A)2)3(org)': {'h0': -4662344.64, 'molarVolume':1.01},\n",
+ " '(HA)2(org)': {'h0': -4662344.64, 'molarVolume':1.01}}```"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### phase_names"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "This a list of the phase names in the xml file and can be found in the field phase id."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "#### Names and rhos"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "| Parameter | Meaning | Example value |\n",
+ "|---------------------|----------------------------------------------|-------------------|\n",
+ "| aq_solvent_name | Name of solvent in aqueous phase | 'H2O(L)' |\n",
+ "| extractant_name | Name of extractant in organic phase | '(HA)2(org)' |\n",
+ "| diluant_name | Name of diluant in organic phase | 'dodecane' |\n",
+ "| complex_name | Name of rare earth complex in organic phase | 'Nd(H(A)2)3(org)' |\n",
+ "| rare_earth_ion_name | Name of rare earth ion name in aqueous phase | 'Nd+++' |\n",
+ "| rhos | Density of species (g/L) | 1000 for 'H2O(L)' |"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "For the variables containing \"rho\", these parameters can be left \"None\", and molecular weight and molar volume will be used to calculate density.
However, molar volume values may be wrong and mess up calculations so it is recommended to find density values and replace the default values."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Fitting thermodynamic properties to data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now that the thermodynamic properties have been set, we now need to set up the optimizer.
The default optimizer is from scipy.optimize.minimize with the arguments below."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "minimizer_kwargs = {\"method\": 'SLSQP',\n",
+ " \"bounds\": [(1e-1, 1e1)],\n",
+ " \"constraints\": (),\n",
+ " \"options\": {'disp': True, \n",
+ " 'maxiter': 1000, \n",
+ " 'ftol': 1e-6}}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "With the minimizer arguments defined, we can perform our fit.
\n",
+ "This minimizes the log mean squared error between the predicted and experimental Distribution ratio (D)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Optimization terminated successfully. (Exit mode 0)\n",
+ " Current function value: 0.025193550841886146\n",
+ " Iterations: 5\n",
+ " Function evaluations: 19\n",
+ " Gradient evaluations: 5\n",
+ "{'Nd(H(A)2)3(org)': {'h0': -4704703.645715787}}\n"
+ ]
+ }
+ ],
+ "source": [
+ "est_enthalpy = searcher.fit()\n",
+ "print(est_enthalpy)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can see that the fit function returns an identical structure to opt_dict"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Updating the xml"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now we have our new values, let us write them to our original xml to replace the old values"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "searcher.update_xml(est_enthalpy)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Visualization and analysis"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also see how well this new xml data fits to the experimental data with a parity plot."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "searcher.parity_plot()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "We can also find what the r-squared value is. The closer to 1, the better the prediction model."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "0.997071413389365\n"
+ ]
+ }
+ ],
+ "source": [
+ "print(searcher.r_squared())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Yay! Good job! That is an amazing fit."
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.7.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}