Source code for predictatops.main

# -*- coding: utf-8 -*-
"""
    The main.py module of predictatops merely holds a few utility functions leveraged by other modules.
"""

import pandas as pd
import numpy as np
import itertools
import matplotlib.pyplot as plt

# %matplotlib inline
import welly
from welly import Well
import lasio
import glob
from sklearn.externals import joblib

### """Main module."""


[docs]def printHello():
    """
    This function simply prints a hello message for testing.
    """
    print("hello, you've run the main module")


[docs]def load_prev_results_at_path(full_path_to_results_file, key="df"):
    """
    A function used to return a dataframe of wells stored in an h5 file at a given path with a given key.
    
    Parameters
    ----------
    full_path_to_results_file: string
        A path to a .h5 file that contains a wells dataframe.

    key: string
        A string representation of a key used to find the dataframe in the h5 file whose path is defined by the full_path_to_results_file argument.

    Returns
    -------
    wells_df_from_wellsKNN: dataframe
        Returns a dataframe of wells that existed at the path defined in the full_path_to_results_file argument.
    """
    wells_df_from_wellsKNN = pd.read_hdf(full_path_to_results_file, key=key)
    return wells_df_from_wellsKNN


[docs]def getMainDFsavedInStep(path_to_results, path_to_directory, file_name, ending):
    """
    A function used to return a dataframe of data stored in a file at a given path. Not specific to a dataframe of wells in h5 file like load_prev_results_at_path.
    
    Parameters
    ----------
    path_to_results: string
        A path to a top-level results folder.

    path_to_directory: string
        A path to a folder within the results folder that has the file in question.

    file_name: string
        A path to a file within the path_to_results and path_to_directory arguments.
    
    ending: string
        String representation of the file type like ".h5" or ".csv". It should include the dot!

    Returns
    -------
    full_path_to_results_file: string
        Returns a string representation of the full path to the file in question.

    """
    dir_path = path_to_results + "/" + path_to_directory
    full_path_to_results_file = dir_path + "/" + file_name + ending
    return full_path_to_results_file


[docs]def get_df_results_from_step_X(output_data_inst, directory, filename, key="df"):
    """
    Another function used to return a dataframe stored in an h5 file at a given path with a given key.
    
    Parameters
    ----------
    output_data_inst: string
        A path to a folder with previously output data.

    directory: string
        A folder within the directory defined by the 'output_data_inst' that holds a file.

    key: string
        A string representation of a key used to find the dataframe in the h5 file. Default is "df".

    Returns
    -------
    wells_df_of_results: dataframe
        Returns a dataframe of wells that existed at the path defined via the given input arguments.
    """
    #### get parts of the path to the resulting dataframe from wellsKN from the output_data_inst variable
    ending = output_data_inst.default_results_file_format
    base_path_for_all_results = output_data_inst.base_path_for_all_results
    ##### combine all those variables into a single
    full_path_to_features_results = getMainDFsavedInStep(
        base_path_for_all_results, directory, filename, ending
    )
    ##### load dataframe from full path
    wells_df_of_results = load_prev_results_at_path(full_path_to_features_results, key)
    return wells_df_of_results


[docs]def getJobLibPickleResults(output_data_inst, subfolder, filename):
    """
    Another function used to generate the string representation of the path to a pickle file and then returns that pickled datafile.
    
    Parameters
    ----------
    output_data_inst: string
        A path to a folder with previously output data.

    subfolder: string
        A folder within the directory defined by the 'output_data_inst' that holds a file.

    filename: string
        Name of the file in question.

    Returns
    -------
    joblib.load(full_path_to_pickle): dataframe
        Returns a dataframe that exists at the path defined via the given input arguments.
    """
    full_path_to_pickle = (
        output_data_inst.base_path_for_all_results + "/" + subfolder + "/" + filename
    )
    return joblib.load(full_path_to_pickle)