Source code for chamber.analysis.experiments

"""
Analysis of test results.

Description to come, see:
http://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html

Notes
-----
This could actually be a class that extends DataFrame. In this case,
the module level functions would become methods of the class. But for now,
the following implementation will allow futher development.

Functions
---------
- `mass_transfer` -- perform analysis on the experimental data.
- `preprocess` -- ready a `DataFrame` for analysis.
- `results_from_csv` -- get analysis results from a .csv file.

"""
from collections import defaultdict
from itertools import repeat
import math
import multiprocessing as multi

from CoolProp.HumidAirProp import HAPropsSI
import pandas as pd
import numpy as np
import scipy.signal as signal

from chamber.analysis import chi2
from chamber.models import models
from chamber.models import uncertainty as unc

TC_LIST = ['TC{0}'.format(i) for i in range(4, 14)]
TC_SET = set(TC_LIST)

FILTER_LENGTH = 3601
RH_STEP_PCT = 5

R_TUBE = 1.5e-2
A_TUBE = math.pi * pow(R_TUBE, 2)

CPU_COUNT = multi.cpu_count()


# --------------------------------------------------------------------------- #
# Public Functions
# --------------------------------------------------------------------------- #
[docs]def preprocess( dataframe, param_list=['PressureSmooth', 'TeSmooth', 'DewPointSmooth'], purge=False ): """ Readies a `DataFrame` for further analysis. This function takes in a `DataFrame` and calls all of the module helper functions to produce a `DataFrame` that is ready for analysis. The following steps are included in 'preprocess': '_zero_time' '_format_temp' '_format_dew_point' '_format_pressure' '_add_avg_te' '_add_smooth_avg_te' '_add_smooth_dew_point' '_add_smooth_pressure' '_add_rh' See respective docstrings for more details. Parameters ---------- dataframe: DataFrame A `DataFrame` of experimental data to be preprocessed. param_list: list(str) List of parameters to use to calculate the relative humidity using the CoolProp API, see `_get_coolprop_rh` docstring for more detail. Defaults to: `['PressureSmooth', 'TeSmooth', 'DewPointSmooth']`. purge: bool If `True` the original raw data is removed from the returned `DataFrame`. If `False` the raw data remains in the 'DataFrame'. Defaults to `False`. Returns ------- DataFrame The preprocessed data. Examples -------- .. todo:: Examples. """ dataframe = _zero_time(dataframe) dataframe = _format_temp(dataframe) dataframe = _format_dew_point(dataframe) dataframe = _format_pressure(dataframe) dataframe = _add_avg_te(dataframe, purge=purge) dataframe = _add_smooth_avg_te(dataframe, purge=purge) dataframe = _add_smooth_dew_point(dataframe, purge=purge) dataframe = _add_smooth_pressure(dataframe, purge=purge) dataframe = _add_rh(dataframe, param_list=param_list, purge=purge) return dataframe
[docs]def mass_transfer(dataframe, sigma=4e-8, steps=100, plot=False): """ Perform analysis of experiment. Use the dataframe and uncertainty to perform an opening window analysis. This function finds all valid relative humidity targets for a given `DataFrame`. For each of these targets, the function determines where the RH occurs, generates windows up to `max_half_length`, performs analysis on each window, and returns the results. Parameters ---------- dataframe: DataFrame Processed experimental data. sigma: float Standard deviation of mass measurement. Defaults to 4e-8 accoriding to the spec sheet. steps: int Steps to increase the `half_length`. Returns ------- DataFrame Results of the analysis of the test. Attributes include: - `a`: y-intercept - `sig_a`: standard deviation of `a` - `b`: slope (mass-transfer) - `b_sig`: standard deviation of `b` - `chi_2`: chi-square metric - `Q`: goodness of fit score (survival function) - `nu`: degrees of freedom - `RH`: target relative humidity - `SigRH`: target relative humidity - `spalding_mdpp`: mass flux determinded by Spalding Model Examples -------- .. todo:: Examples. """ print('Starting analysis...') rh_targets = _get_valid_rh_targets(dataframe) res = [] pool = multi.Pool(CPU_COUNT) no_format_list = pool.starmap( _multi_analysis, zip( repeat(dataframe), repeat(sigma), repeat(steps), repeat(plot), rh_targets ) ) for col in no_format_list: res += col pool.close() pool.join() res_df = pd.DataFrame( res, columns=['A', 'SigA', 'B', 'SigB', 'Chi2', 'Q', 'Nu', 'RH', 'ObsIdx'] ) print('Analysis complete.') return res_df
[docs]def rh_targets_df(dataframe, res_df): """Build the RHTargets table and run Spalding analysis.""" rh_targets = defaultdict(list) rh_list = list(res_df['RH'].unique()) pool = multi.Pool(CPU_COUNT) rh_target_columns = pool.starmap( _multi_rh_targets, zip(rh_list, repeat(dataframe), repeat(res_df)) ) pool.close() pool.join() rh_targets_df = pd.DataFrame( rh_target_columns, columns=['RH', 'SigRH', 'Nu'] ) return rh_targets_df
def _multi_rh_targets(rh, dataframe, res_df): """Build a row for the RHTargets table.""" rh_specific_df = res_df[res_df['RH'] == rh] analysis_row = _get_df_row(rh_specific_df) data_idx = analysis_row['ObsIdx'].iloc[0].item() data_row = dataframe.iloc[data_idx] sig_rh = data_row['SigRH'].item() nu = analysis_row['Nu'].iloc[0].item() rh_target_columns = [rh, sig_rh, nu] return rh_target_columns def _multi_analysis(dataframe, sigma, steps, plot, rh): """Extract values from dataframe and build the analyzed rows as lists.""" idx = _get_target_idx(dataframe, rh) stats = [] for _len in _half_len_gen(dataframe, idx, steps=steps): stats.append(_add_mass(dataframe, rh, _len, idx, steps, sigma, plot)) return stats def _add_mass(dataframe, rh, len_, idx, steps, sigma, plot): """Calculate the Chi2 statistics for a single row of data.""" time, mass = _get_stat_group(dataframe, idx, len_) stats = chi2.chi2(time, mass, sigma, plot=plot) # Check SigRH for accuracy later. Probably fine. stats.append(rh) stats.append(idx) return stats # --------------------------------------------------------------------------- # # Internal Use Functions (Weakly Enforced) # --------------------------------------------------------------------------- # def _zero_time(dataframe): """Remove offset from `Idx` attribute of `DataFrame` in place.""" dataframe.Idx = dataframe.Idx - min(dataframe.Idx) return dataframe # --------------------------------------------------------------------------- # # Formatting # --------------------------------------------------------------------------- # def _format_temp(dataframe): """ Format observed temperature. Round thermocouple attributes of the `DataFrame` to one decimal place. Operation is performed in place. thermocouple attributes: `TC4`, `TC5`, `TC6`, `TC7`, `TC8`, `TC9`, `TC10`, `TC11`, `TC12`, `TC13` """ dataframe.loc[:, TC_LIST] = dataframe.loc[:, TC_LIST].round(1) return dataframe def _format_dew_point(dataframe): """ Format dew point observation. Round `DewPoint` attribute of the `DataFrame` to one decimal place. Operation is performed in place. """ dataframe.DewPoint = dataframe.DewPoint.round(1) return dataframe def _format_pressure(dataframe): """ Format pressure observation. Convert `Pressure` attribute of the `DataFrame` to an integer. Operation is performed in place. """ dataframe.Pressure = dataframe.Pressure.round(0).astype(int) return dataframe # --------------------------------------------------------------------------- # # Averaging # --------------------------------------------------------------------------- # def _add_avg_te(dataframe, purge=False): """ Add average temperature to the `DataFrame`. Add `Te` attribute to the `DataFrame` that is an average of all other thermocouple attributes. Operation is performed in place. thermocouple attributes ----------------------- `TC4`, `TC5`, `TC6`, `TC7`, `TC8`, `TC9`, `TC10`, `TC11`, `TC12`, `TC13` Parameters ---------- dataframe: DataFrame Experimental data containing TC4-TC13 attributes. purge: bool If `True` delete TC4-TC13 attributes from `DataFrame` in place. Defaults to `False`. Returns ------- DataFrame: Tansformed `DataFrame` containing a new `Te` attribute. If `purge == True` then TC4-TC13 will have been dropped. """ pool = multi.Pool(CPU_COUNT) df_stack = np.array_split(dataframe, CPU_COUNT) dataframe['Te'] = pd.concat(pool.map(_multi_te, df_stack)) pool.close() pool.join() if purge: dataframe.drop(columns=TC_LIST, inplace=True) return dataframe def _multi_te(dataframe): """Add average temperature to the `DataFrame`.""" df_te = dataframe.loc[:, TC_LIST].apply(np.mean, axis=1).round(1) return df_te # --------------------------------------------------------------------------- # # Smoothing # --------------------------------------------------------------------------- # def _add_smooth_avg_te(dataframe, purge=False): """ Add smoothed temperature. Add `TeSmooth` attribute to the `DataFrame`. Operation is performed in place. Parameters ---------- dataframe: DataFrame Experimental data containing `Te` attribute. purge: bool If `True` delete `Te` attributes from `DataFrame` in place. Defaults to `False`. Returns ------- DataFrame Transformed `DataFrame` containing a new `TeSmooth` attribute. If `purge == True` then `Te` attribute was dropped. """ dataframe['TeSmooth'] = signal.savgol_filter( dataframe.Te, FILTER_LENGTH, 1 ) dataframe.TeSmooth = dataframe.TeSmooth.round(1) if purge: dataframe.drop(columns='Te', inplace=True) return dataframe def _add_smooth_dew_point(dataframe, purge=False): """ Add smoothed dew point. Add `DewPointSmooth` attribute to the `DataFrame`. Operation is performed in place. Parameters ---------- dataframe: DataFrame Experimental data containing `DewPoint` attribute. purge: bool If `True` delete `DewPoint` attributes from `DataFrame` in place. Defaults to `False`. Returns ------- DataFrame Transformed `DataFrame` containing a new `DewPointSmooth` attribute. If `purge == True` then `DewPoint`attribute was dropped. """ dataframe['DewPointSmooth'] = signal.savgol_filter( dataframe.DewPoint, FILTER_LENGTH, 1 ) dataframe.DewPointSmooth = dataframe.DewPointSmooth.round(1) if purge: dataframe.drop(columns='DewPoint', inplace=True) return dataframe def _add_smooth_pressure(dataframe, purge=False): """ Add smoothed pressure. Add `PressureSmooth` attribute to the `DataFrame`. Operation is performed in place. Parameters ---------- dataframe: DataFrame Experimental data containing `Pressure` attribute. purge: bool If `True` delete `Pressure` attributes from `DataFrame` in place. Defaults to `False`. Returns ------- DataFrame Transformed `DataFrame` containing a new `DewPointSmooth` attribute. If `purge == True` then `DewPoint` attribute was dropped. """ dataframe['PressureSmooth'] = signal.savgol_filter( dataframe.Pressure, FILTER_LENGTH, 1 ) dataframe.Pressure = dataframe.Pressure.round(0).astype(int) if purge: dataframe.drop(columns='Pressure', inplace=True) return dataframe # --------------------------------------------------------------------------- # # Relative Humidity # --------------------------------------------------------------------------- # def _get_coolprop_rh(params): """ Get relative humidity. This is a wrapper for the CoolProp.HumidAirProp.HAPropsSI API in Python. It accepts a list of pressure (Pa), temperature (K), and dew-point (K) and converts it to the required API. This allows the use of pandas.DataFrame.apply(_get_rh, axis=1) which is more readable. Parameters ---------- params: list(float) A list of at least three elements where the first, second, and third elements are the values of pressure (Pa), temperature (K), and dew-point (K) respectively. Returns ------- float Dimensionless relative humidity as specified by the `param` input list. """ pressure = params[0] temp = params[1] dew_point = params[2] return HAPropsSI('RH', 'P', pressure, 'T', temp, 'Tdp', dew_point) def _get_coolprop_rh_err(params): """ Get relative humidity error. This is a wrapper for the CoolProp.HumidAirProp.HAPropsSI API in Python. It accepts a list of pressure (Pa), temperature (K), and dew-point (K) and converts it to the required API. This allows the use of pandas.DataFrame.apply(_get_rh, axis=1) which is more readable. Parameters ---------- params: list(float) A list of at least three elements where the first, second, and third elements are the values of pressure (Pa), temperature (K), and dew-point (K) respectively. Returns ------- float Dimensionless relative humidity error as specified by the `param` input list. """ pressure = params[0] temp = params[1] dew_point = params[2] high_rh = HAPropsSI('RH', 'P', pressure, 'T', temp, 'Tdp', dew_point + 0.2) low_rh = HAPropsSI('RH', 'P', pressure, 'T', temp, 'Tdp', dew_point - 0.2) rh = HAPropsSI('RH', 'P', pressure, 'T', temp, 'Tdp', dew_point) upper_err = abs(high_rh - rh) lower_err = abs(rh - low_rh) if upper_err > lower_err: return upper_err else: return lower_err def _add_rh( dataframe, param_list=['PressureSmooth', 'TeSmooth', 'DewPointSmooth'], purge=False ): """ Add relative humidity to the `DataFrame`. Add `RH` attribute to the `DataFrame`. Operation is performed in place and uses the `_get_coolprop_rh` wrapper for the CoolProp.HumidAirProp.HAPropsSI API in Python. Parameters ---------- dataframe: DataFrame Experimental data containing (by default) `PressureSmooth`, `TeSmooth`, and `DewPointSmooth` attributes. This behavior can be modified using the optional `param_list` parameter, see below. param_list: list(str) List of parameters to use to calculate the relative humidity using the CoolProp API, see `_get_coolprop_rh` docstring for more detail. purge: bool If `True` delete the third element from the `param_list`. This third element (index = 2) represents the dew-point. It could be `DewPoint`, `DewPointSmooth`, etc. Defaults to `False`. Returns ------- DataFrame Transformed `DataFrame` containing a new `RH` attribute. If `purge == True` then the attribute representing the dew-point was dropped. """ pool = multi.Pool(CPU_COUNT) df_stack = np.array_split(dataframe, CPU_COUNT) dataframe['RH'] = pd.concat( pool.starmap(_multi_rh, zip(df_stack, repeat(param_list))) ) dataframe['SigRH'] = pd.concat( pool.starmap(_multi_rh_err, zip(df_stack, repeat(param_list))) ) pool.close() pool.join() if purge: dataframe.drop(columns=param_list[2], inplace=True) return dataframe def _multi_rh(dataframe, param_list): """Apply `_get_coolprop_rh` to the argument dataframe.""" df = dataframe.loc[:, param_list].apply(_get_coolprop_rh, axis=1) return df def _multi_rh_err(dataframe, param_list): """Apply `_get_coolprop_rh_err` to the argument dataframe.""" df = dataframe.loc[:, param_list].apply(_get_coolprop_rh_err, axis=1) return df # --------------------------------------------------------------------------- # # Selecting Data For Fits # --------------------------------------------------------------------------- # def _get_max_half_len(dataframe, target_idx, max_=10000): """ Get maximum window half length. Return the maximum half-length of datapoints before hitting either the first element (left) or the last element (right). The half-length is used to extract a sample of 2*half-length + 1 data points centered at the target index. Parameters ---------- dataframe: DataFrame Experimental data. The DataFrame is only used to determine the number of elements it contains. The dataframe is input rather than its length explicitly to increase readability and code consistancy. Other much more substantial bottlenecks exist in this module; for example, the adding relative humidity. target_idx: int Index in the dataframe that corresponds to the relative humidity. When the time-step is one second, the `Idx` attribute and the DataFrame.index will be the same, however, this is not always the case. Because the sampling rate may change, the decision is made to make the DataFrame.index the standard, unless regression analysis is being performed. Returns ------- int Maximum half window length. Raises ------ IndexError If calculated `max_half_length is zero or negative. This occurs when the `target_idx` is negative, zero, >= len(DataFrame). """ to_the_right = (len(dataframe)-1) - target_idx to_the_left = target_idx max_half_len = min(to_the_left, to_the_right) if max_half_len > max_: max_half_len = max_ if max_half_len > 0: return max_half_len else: raise IndexError( "no 'half length' available for the target index: " "target must have at least one element to right or left" ) def _get_target_idx(dataframe, target_rh): """ Get target index. Get the target_idx from the `DataFrame` given a target dimensionless relative humidity. As noted in the `_get_max_half_len()` docstring, the target_idx refers to the DataFrame.index rather than the attribute `Idx`, which was specific to data acquisition. It should be noted that the implementation selects the first occurance of the closest realtive humidity, approacing from the bottom. In other words, the `target_rh` - `DataFrame.RH` will be positive when the observed RH is less than the target. As a result, the function looks for a minimum positive value. Parameters ---------- dataframe: DataFrame Experimental data containing `RH` attribute. target_rh: float Dimensionless relative humidity, typically between zero and one. Returns ------- int The DataFrame.index coresponding to the earliert time the observed dimensionless relative humidity obtained the value before it exceeded or equaled the target. """ rh = dataframe.RH.copy() rh = target_rh - rh target_idx = rh[rh > 0].idxmin() return target_idx def _get_stat_group(dataframe, target_idx, half_len): """ Get statistics group. Use the `target_idx` and `half_length` to return a group of observations on which to perform perform statistial analysis. Note, this function returns a copy of the data for statistical analysis so that there is no risk of contamination durring analysis. Notes ----- This method employs pandas.DataFrame.loc[], which is inclusive unline python's list indexing. Parameters ---------- dataframe: DataFrame Experimental data containing `Idx` and `Mass` attributes. target_idx: int Index in the dataframe that corresponds to the relative humidity. When the time-step is one second, the `Idx` attribute and the DataFrame.index will be the same, however, this is not always the case. Because the sampling rate may change, the decision is made to make the DataFrame.index the standard, unless regression analysis is being performed. half_len: int The number of elements to select on either side of the target index. Returns ------- list List of length 2. The first element (index 0) is a copy of the `Idx` attribute of the `DataFrame` for analysis. The second element (index 1) is a copy of the `Mass` attribute of the `DataFrame` for analysis. """ start = target_idx - half_len stop = target_idx + half_len return [dataframe.loc[start:stop, 'Idx'].copy(), dataframe.loc[start:stop, 'Mass'].copy()] def _get_valid_rh_targets(dataframe, rh_step_pct=5): """ Use the dataframe to return a list of valid RH values for analysis. Parameters ---------- dataframe: DataFrame Experimental data containing `RH` attribute. rh_ste_pct: int Size of RH steps in percent. Defaults to 5. Returns ------- list List of all the valid relative humidity targets for the given `DataFrame`. """ # Multipy by 100 to get into percent rh_min = dataframe.RH.min() * 100 rh_max = dataframe.RH.max() * 100 rh_min_valid = int(math.ceil(rh_min/rh_step_pct)) * rh_step_pct rh_max_valid = int(math.floor(rh_max/rh_step_pct)) * rh_step_pct # Devide by 100 to get back to dimensionless RH my_map = map( lambda x: x/100, range(rh_min_valid, rh_max_valid + 5, rh_step_pct) ) return list(my_map) def _half_len_gen(dataframe, target_idx, steps=100): """ Generate window half length. Progressively increase the the `half_length` until you exceed the maximum half length as specified by `_get_max_half_len()`. Parameters ---------- dataframe: DataFrame Experimental data required for `_get_max_half_length()`. Yeilds ------- int The next `half_lenth` to be used to grab data. """ max_half_len = _get_max_half_len(dataframe, target_idx) half_len = steps while half_len <= max_half_len: yield half_len half_len += steps def _get_df_row(res_df): """ Get the `DataFrame` row with the index of the last occurence of Q >= 0.49. Get the row of data from a `DataFrame` corresponding to the index found by stepping through the 'Results' data backwards and selecting the index of the first occurence of Q >= 0.49 from the reversed data. This is assuming that a Chi2 fit is appropreate for the data and that the Q values drop fro 1, through 0.5, to 0. Parameters ---------- res_df: DataFrame `DataFrame` object with a 'Q' value column. Returns ------- DataFrame The one row `DataFrame` with the specified index. Examples -------- >>> df_dict = dict(A=[0, 1, 2, 3, 4], B=[a, b, c, d, e] Q=[1, 0.6, 0.51, 0.1, 0]) >>> df = pd.DataFrame(df_dict) >>> _get_df_row(df) A B Q 3 3 d 0.51 """ q = 0 for i in range(len(res_df)-1, -1, -1): if q < res_df.iloc[i].Q: q = res_df.iloc[i].Q if q >= 0.49: df_row = res_df[res_df.index == res_df.index[i]] return df_row return res_df[res_df.index == res_df.index[0]]