Source code for descmap.sampling

"""Functionality related to sampling"""
from warnings import warn
from collections import namedtuple

import numpy as np
from scipy.stats import variation
import pandas as pd
from pyDOE import lhs

DescPoint = namedtuple('DescPoint',
                       ('name', 'i', 'val', 'field_len', 'sampling'))

[docs]def get_linspace_map(descriptors_data): """Returns linear spacing mapping Parameters ---------- descriptors_data : list of dict Each element of the list has information related to a descriptor. The dictionary is expected to have the fields: ``name``, ``low_value``, ``high_value``, ``n``. Returns ------- linspace_map : list of list of DescPoint namedtuples Linear space mapping of all the descriptors. Each element of the top list has info about each descriptor. The next list contains the runs. The DescPoint namedtuples has the attribute ``name``, ``i``, ``val``, and ``field_len``. e.g. [ [('A', 0, 0.1, 1), ('A', 1, 0.2, 1), ('A', 2, 0.3, 1)], [('B', 0, 0.5, 1), ('B', 1, 1., 1)] ] """ linspace_map = [] for descriptor_data in descriptors_data: # Individual descriptor mapping desc_map = [] name = descriptor_data['name'] field_len = len(str(descriptor_data['n'])) for i, val in enumerate(np.linspace(descriptor_data['low_value'], descriptor_data['high_value'], descriptor_data['n'])): desc_point = DescPoint(name=name, i=i, val=val, field_len=field_len, sampling='linear') desc_map.append(desc_point) linspace_map.append(desc_map) return linspace_map
[docs]def get_lhs_map(descriptors_data): """Returns Latin hypercube sampling mapping Parameters ---------- descriptors_data : list of dict Each element of the list has information related to a descriptor. The dictionary is expected to have the fields: ``name``, ``low_value``, ``high_value``, ``n``. Note that all ``n`` are expected to be the same. If there is a discrepancy, assumes the max value. Returns ------- lhs_map : list of list of DescPoint namedtuples Latin hypercube sampling mapping of all the descriptors. Each element of the top list has info about each descriptor. The next list contains the runs. The DescPoint namedtuple has the attributes ``name``, ``i``, ``val``, and ``field_len``. e.g. [ [('A', 0, 0.37, 1), ('A', 1, 0.98, 1), ('A', 2, 0.24, 1)], [('B', 0, 0.60, 1), ('B', 1, 0.03, 1), ('B', 1, 0.95, 1)] ] """ '''Determine number of samples''' descriptors_n = np.array([row['n'] for row in descriptors_data]) samples = np.max(descriptors_n) # Warn user if # of data points are inconsistent if not np.isclose(variation(descriptors_n), 0.): warn_msg = ('Number of samples for each descriptor in Latin hypercube ' 'sampling must remain constant. Using maximum value, {}.' ''.format(samples)) warn(warn_msg) '''Determine number of descriptors''' n = len(descriptors_data) '''Get field length''' field_len = len(str(samples)) '''Create the LHS array''' lhs_array = lhs(n=n, samples=samples) '''Format the LHS array to desired mapping format''' lhs_map = [] for i, descriptor_data in enumerate(descriptors_data): # Individual descriptor mapping desc_map = [] name = descriptor_data['name'] low_val = descriptor_data['low_value'] high_val = descriptor_data['high_value'] val_range = high_val - low_val for j, val in enumerate(lhs_array[:, i]): out_val = val*val_range + low_val desc_point = DescPoint(name=name, i=j, val=out_val, field_len=field_len, sampling='lhs') desc_map.append(desc_point) lhs_map.append(desc_map) return lhs_map
sampling_map = { 'linear': get_linspace_map, 'lhs': get_lhs_map } """dict: Keys represent sampling type. Values represent function handles."""