LLEPE/docs/_Examples/iterative_fitter_eval_graph...

#  LLEPE: Liquid-Liquid Equilibrium Parameter Estimator
#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
#  Released under the modified BSD license. See LICENSE for more details.

import llepe
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
import matplotlib
import re


def set_size(w, h, ax=None):
    """ w, h: width, height in inches """
    if not ax:
        ax = plt.gca()
    left = ax.figure.subplotpars.left
    right = ax.figure.subplotpars.right
    top = ax.figure.subplotpars.top
    bottom = ax.figure.subplotpars.bottom
    fig_width = float(w) / (right - left)
    fig_height = float(h) / (top - bottom)
    ax.figure.set_size_inches(fig_width, fig_height)


font = {'family': 'sans serif',
        'size': 24}
matplotlib.rc('font', **font)
matplotlib.rc('xtick', labelsize=18)
matplotlib.rc('ytick', labelsize=18)
matplotlib.rcParams['lines.linewidth'] = 4
matplotlib.rcParams['lines.markersize'] = 10


def ext_to_complex(h0, custom_obj_dict, mini_species):
    linear_params = custom_obj_dict['lin_param_df']
    row = linear_params[linear_params['species'] == mini_species]
    return row['slope'].values[0] * h0[0] + row['intercept'].values[0]


def mod_lin_param_df(lp_df, input_val, mini_species, mini_lin_param):
    new_lp_df = lp_df.copy()
    index = new_lp_df.index[new_lp_df['species'] == mini_species].tolist()[0]
    new_lp_df.at[index, mini_lin_param] = input_val
    return new_lp_df


info_df = pd.read_csv('outputs/iterative_fitter_output4.csv')
test_row = -1
pitzer_params_filename = "../../data/jsons/min_h0_pitzer_params.txt"
with open(pitzer_params_filename) as file:
    pitzer_params_dict = json.load(file)
pitzer_params_df = pd.DataFrame(pitzer_params_dict)
species_list = 'Nd,Pr,Ce,La,Dy,Sm,Y'.split(',')
pitzer_param_list = ['beta0', 'beta1']
labeled_data = pd.read_csv("../../data/csvs/"
                           "PC88A_HCL_NdPrCeLaDySmY.csv")
labeled_data = labeled_data.sort_values(['Feed Pr[M]', 'Feed Ce[M]'],
                                        ascending=True)
exp_data = labeled_data.drop(labeled_data.columns[0], axis=1)
xml_file = "PC88A_HCL_NdPrCeLaDySmY_w_pitzer.xml"
lin_param_df = pd.read_csv("../../data/csvs"
                           "/zeroes_removed_min_h0_pitzer_lin_params.csv")
estimator_params = {'exp_data': exp_data,
                    'phases_xml_filename': xml_file,
                    'phase_names': ['HCl_electrolyte', 'PC88A_liquid'],
                    'aq_solvent_name': 'H2O(L)',
                    'extractant_name': '(HA)2(org)',
                    'diluant_name': 'dodecane',
                    'complex_names': ['{0}(H(A)2)3(org)'.format(species)
                                      for species in species_list],
                    'extracted_species_ion_names': ['{0}+++'.format(species)
                                                    for species in
                                                    species_list],
                    'aq_solvent_rho': 1000.0,
                    'extractant_rho': 960.0,
                    'diluant_rho': 750.0,
                    'temp_xml_file_path': 'outputs/temp.xml',
                    'objective_function': llepe.lmse_perturbed_obj
                    }
dependant_params_dict = {}
for species, complex_name in zip(species_list,
                                 estimator_params['complex_names']):
    inner_dict = {'upper_element_name': 'species',
                  'upper_attrib_name': 'name',
                  'upper_attrib_value': complex_name,
                  'lower_element_name': 'h0',
                  'lower_attrib_name': None,
                  'lower_attrib_value': None,
                  'input_format': '{0}',
                  'function': ext_to_complex,
                  'kwargs': {"mini_species": species},
                  'independent_params': '(HA)2(org)_h0'}
    dependant_params_dict['{0}_h0'.format(complex_name)] = inner_dict
info_dict = {'(HA)2(org)_h0': {'upper_element_name': 'species',
                               'upper_attrib_name': 'name',
                               'upper_attrib_value': '(HA)2(org)',
                               'lower_element_name': 'h0',
                               'lower_attrib_name': None,
                               'lower_attrib_value': None,
                               'input_format': '{0}',
                               'input_value':
                                   info_df.iloc[test_row, :]['best_ext_h0']}}
for species in species_list:
    for pitzer_param in pitzer_param_list:
        pitzer_str = "{0}_{1}".format(species, pitzer_param)
        value = info_df.iloc[test_row, :][pitzer_str]
        pitzer_params_dict[pitzer_str]['input_value'] = value
    lin_str = "{0}_slope".format(species)
    inner_dict = {'custom_object_name': 'lin_param_df',
                  'function': mod_lin_param_df,
                  'kwargs': {'mini_species': species,
                             'mini_lin_param': 'slope'},
                  'input_value': 3
                  }
    info_dict[lin_str] = inner_dict
    lin_str = "{0}_intercept".format(species)
    value = info_df.iloc[test_row, :][lin_str]
    inner_dict = {'custom_object_name': 'lin_param_df',
                  'function': mod_lin_param_df,
                  'kwargs': {'mini_species': species,
                             'mini_lin_param': 'intercept'},
                  'input_value': value
                  }
    info_dict[lin_str] = inner_dict

info_dict.update(pitzer_params_dict)
estimator = llepe.LLEPE(**estimator_params)
estimator.set_custom_objects_dict({'lin_param_df': lin_param_df})
estimator.update_custom_objects_dict(info_dict)
estimator.update_xml(info_dict,
                     dependant_params_dict=dependant_params_dict)
compared_value = 'La_org_eq'
plot_title = None
legend = True
predicted_dict = estimator.get_predicted_dict()
exp_df = estimator.get_exp_df()
pred = pd.DataFrame(predicted_dict)[compared_value].fillna(0).values
meas = exp_df[compared_value].fillna(0).values
name_breakdown = re.findall('[^_\W]+', compared_value)
compared_species = name_breakdown[0]
data_labels = list(labeled_data['label'])
if compared_species == 'h':
    feed_molarity = exp_df['h_i'].fillna(0).values
elif compared_species == 'z':
    feed_molarity = exp_df['z_i'].fillna(0).values
else:
    feed_molarity = exp_df[
        '{0}_aq_i'.format(compared_species)].fillna(0).values
combined_df = pd.DataFrame({'pred': pred,
                            'meas': meas,
                            'label': data_labels,
                            'feed_molarity': feed_molarity})
combined_df = combined_df[(combined_df['feed_molarity'] != 0)]
meas = combined_df['meas'].values
pred = combined_df['pred'].values

min_data = np.min([pred, meas])
max_data = np.max([pred, meas])
min_max_data = np.array([min_data, max_data])

if compared_species == 'h':
    default_title = '$H^+$ eq. conc. (mol/L)'
elif compared_species == 'z':
    default_title = '{0} eq. conc. (mol/L)'.format(extractant_name)
else:
    phase = name_breakdown[1]
    if phase == 'aq':
        extracted_species_charge = extracted_species_charges[
            extracted_species_list.index(
                compared_species)]
        default_title = '$%s^{%d+}$ eq. conc. (mol/L)' \
                        % (compared_species, extracted_species_charge)
    elif phase == 'd':
        default_title = '{0} distribution ratio'.format(
            compared_species)
    else:
        default_title = '{0} complex eq. conc. (mol/L)'.format(
            compared_species)
fig, ax = plt.subplots(figsize=(8, 6))

if isinstance(data_labels, list):
    # unique_labels = list(set(data_labels))
    unique_labels = ['Li (1987)',
                     'Kim (2012)',
                     'Formiga (2016)',
                     'Banda (2014)',
                     ]
    color_list = ['r', 'g', 'b', 'm']
    marker_list = ['o', 's', 'P', 'X', ]
    for ind, label in enumerate(unique_labels):
        filtered_data = combined_df[combined_df['label'] == label]
        filtered_meas = filtered_data['meas']
        filtered_pred = filtered_data['pred']
        if len(filtered_pred) != 0:
            ax.scatter(filtered_meas,
                       filtered_pred,
                       label=label,
                       color=color_list[ind],
                       marker=marker_list[ind])
    if legend:
        ax.legend(loc=4)
ax.plot(min_max_data, min_max_data, color="b", label="")

ax.text(min_max_data[0],
        min_max_data[1] * 0.9,
        '$R^2$={0:.2f}'.format(estimator.r_squared(compared_value)))

ax.set(xlabel='Measured', ylabel='Predicted')
if plot_title is None:
    ax.set_title(default_title)
set_size(8, 6)
plt.tight_layout()
plt.show()
# exp_data = estimator.get_exp_df()
# feed_cols = []
# for col in exp_data.columns:
#     if 'aq_i' in col:
#         feed_cols.append(col)
# exp_data['total_re'] = exp_data[feed_cols].sum(axis=1)
# label_list = []
# for index, row in exp_data[feed_cols].iterrows():
#     bool_list = list((row > 0).values)
#     label = ''
#     for species, el in zip(species_list, bool_list):
#         if el:
#             label = '{0}-{1}'.format(label, species)
#     label = label[1:]
#     label_list.append(label)
# r2s = ""
# for species in species_list:
#     # if species=='La':
#     # save_name = 'outputs' \
#     #             '/parity_iterative_fitter_{0}_org_eq'.format(species)
#     save_name = None
#     fig, ax = estimator.parity_plot('{0}_org_eq'.format(species),
#                                     c_data=
#                                     exp_data['total_re'].values,
#                                     c_label='Feed total RE '
#                                             'molarity (mol/L)',
#                                     print_r_squared=False,
#                                     plot_title='')
#     ax.plot([0, 0.05], [0, 0.05], c='b')
#     ax.text(0.01, 0.04,
#             '$R^2$={0:.2f}'.format(estimator.r_squared(
#                 '{0}_org_eq'.format(species))))
#     ax.set_xlim((0, 0.05))
#     ax.set_ylim((0, 0.05))
#     r2s += str(estimator.r_squared('{0}_org_eq'.format(species))) + ','
#
#     # fig, ax = estimator.parity_plot('{0}_org_eq'.format(species),
#     #                                 data_labels=list(labeled_data['label']),
#     #                                 print_r_squared=True,
#     #                                 save_path=save_name)
#     # ax.legend(loc=4)
# pred_df = pd.DataFrame(estimator.get_predicted_dict())
# new_cols = []
# for col in pred_df.columns:
#     new_cols.append("pred_{0}".format(col))
# pred_df.columns = new_cols
# new_cols = ['label',
#             'h_i',
#             'h_eq',
#             'z_i',
#             'z_eq'
#             ]
# for species in species_list:
#     new_cols.append("{0}_aq_i".format(species))
#     new_cols.append("{0}_aq_eq".format(species))
#     new_cols.append("{0}_d_eq".format(species))
# labeled_data.columns = new_cols
# total_df = labeled_data.join(pred_df)
# total_df.to_csv('if_mse_total_df.csv')
# short_info_dict = {}
# for key, value in info_dict.items():
#     short_info_dict[key] = value['input_value']
# with open("outputs/iterative_fitter_short_info_dict.txt", 'w') as file:
#     json.dump(short_info_dict, file)