Source code for cave.utils.io

import os
import warnings

import numpy as np
import pandas as pd
from ConfigSpace.util import deactivate_inactive_hyperparameters, fix_types
from bokeh.io import export_png

from cave.utils.timing import timing


[docs]@timing def export_bokeh(plot, path, logger): """Export bokeh-plot to png-file. Create directory if necessary Parameters ---------- plot: bokeh.plotting.figure bokeh plot to export path: str path to save plot to logger: Logger logger for debugging """ base = os.path.split(path)[0] logger.debug("Exporting to %s (base: %s)", path, base) plot.background_fill_color, plot.border_fill_color = None, None if base and not os.path.exists(base): logger.debug("%s does not exist. Creating...", base) os.makedirs(base) try: with warnings.catch_warnings(record=True) as list_of_warnings: warnings.simplefilter('always') export_png(plot, filename=path) for w in list_of_warnings: logger.debug("During export a %s was raised: %s", str(w.category), w.message) except (RuntimeError, TypeError) as err: logger.debug("Exporting failed", exc_info=1) logger.warning("Exporting bokeh-plot to \"%s\" failed. " "To activate automatic png-export, please follow instructions on CAVE's GitHub " "(install selenium and phantomjs-prebuilt).", path) except (SystemError) as err: logger.debug("Exporting failed", exc_info=1) logger.warning("Exporting bokeh-plot to \"%s\" failed. " "This issue is known, but not yet solved. However it seems to appear with too few data-points. " "Feel free to report your example on https://github.com/automl/CAVE/issues.", path) except Exception as err: logger.debug("Exporting failed", exc_info=1) logger.warning("Exporting bokeh-plot to \"%s\" failed. (run --verbose DEBUG for more info)", path)
[docs]def load_csv_to_pandaframe(csv_path, logger, apply_numeric=True, delimiter=','): """Load csv-file and return pd.DataFrame. First line of file is expected to be the header. Parameters ---------- csv_path: str path to csv-file logger: logging.Logger logger, for debugging apply_numeric: boolean whether to an attempt should be taken to turn columns into numeric values. delimiter: str can be used to determine custom delimiter Returns ------- data_frame: pd.DataFrame csv-dataframe """ with open(csv_path, 'r') as csv_file: lines = csv_file.readlines() csv_data = [[e.strip('" \n') for e in l.split(delimiter)] for l in lines] header, csv_data = csv_data[0], np.array([csv_data[1:]])[0] data = pd.DataFrame(csv_data, columns=header) if apply_numeric: data = data.apply(pd.to_numeric, errors='ignore') logger.debug("Headers in \'%s\': %s", csv_path, data.columns.values) if not len(data.columns) == len(set(data.columns)): raise ValueError("Detected a duplicate in the columns of the " "csv-file \"%s\"." % csv_path) return data
[docs]def load_config_csv(path, cs, logger): """ Load configurations.csv in the following format: +-----------+-----------------+-----------------+-----+ | CONFIG_ID | parameter_name1 | parameter_name2 | ... | +===========+=================+=================+=====+ | 0 | value1 | value2 | ... | +-----------+-----------------+-----------------+-----+ | ... | ... | ... | ... | +-----------+-----------------+-----------------+-----+ Parameters ---------- path: str path to csv-file cs: ConfigurationSpace configspace with matching parameters logger: Logger logger for debugs Returns ------- (parameters, id_to_config): (str, dict) parameter-names and dict mapping ids to Configurations """ id_to_config = {} logger.debug("Trying to read configuration-csv-file: %s.", path) config_data = load_csv_to_pandaframe(path, logger, apply_numeric=False) config_data['CONFIG_ID'] = config_data['CONFIG_ID'].apply(pd.to_numeric) config_data.set_index('CONFIG_ID', inplace=True) logger.debug("Found parameters: %s", config_data.columns) logger.debug("Parameters in pcs: %s", cs.get_hyperparameter_names()) diff = set(config_data.columns).symmetric_difference(set(cs.get_hyperparameter_names())) if diff: raise ValueError("Provided pcs does not match configuration-file " "\'%s\' (check parameters %s)" % (path, diff)) for index, row in config_data.iterrows(): values = {name: row[name] for name in config_data.columns if row[name]} id_to_config[index] = deactivate_inactive_hyperparameters(fix_types(values, cs), cs) return config_data.columns, id_to_config