__author__ = "Donald Ghazi"
__email__ = "donald@donaldghazi.com"
__website__ = "donaldghazi.com"


import gzip
import json
import pickle
import ipywidgets as widgets
import pandas as pd
import matplotlib.pyplot as plt

from imblearn.over_sampling import RandomOverSampler
from ipywidgets import interact
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    ConfusionMatrixDisplay,
    classification_report,
    confusion_matrix,
)
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score
from sklearn.pipeline import make_pipeline


%%bash

cd data
gzip -dkf taiwan-bankruptcy-data.json.gz


# Open file and load JSON
with gzip.open("data/taiwan-bankruptcy-data.json.gz", "r") as read_file:
    taiwan_data = json.load(read_file)
print(type(taiwan_data))

<class 'dict'>


# Extract the key names from taiwan_data and assign them to the variable taiwan_data_keys
taiwan_data_keys = taiwan_data.keys()
print(taiwan_data_keys)

dict_keys(['schema', 'metadata', 'observations'])


taiwan_data.keys()

dict_keys(['schema', 'metadata', 'observations'])


type(taiwan_data["observations"])

list


taiwan_data["observations"][0]

{'id': 1,
 'bankrupt': True,
 'feat_1': 0.3705942573,
 'feat_2': 0.4243894461,
 'feat_3': 0.4057497725,
 'feat_4': 0.6014572133,
 'feat_5': 0.6014572133,
 'feat_6': 0.9989692032,
 'feat_7': 0.7968871459,
 'feat_8': 0.8088093609,
 'feat_9': 0.3026464339,
 'feat_10': 0.7809848502,
 'feat_11': 0.0001256969,
 'feat_12': 0.0,
 'feat_13': 0.4581431435,
 'feat_14': 0.0007250725,
 'feat_15': 0.0,
 'feat_16': 0.1479499389,
 'feat_17': 0.1479499389,
 'feat_18': 0.1479499389,
 'feat_19': 0.1691405881,
 'feat_20': 0.3116644267,
 'feat_21': 0.0175597804,
 'feat_22': 0.0959205276,
 'feat_23': 0.1387361603,
 'feat_24': 0.0221022784,
 'feat_25': 0.8481949945,
 'feat_26': 0.6889794628,
 'feat_27': 0.6889794628,
 'feat_28': 0.2175353862,
 'feat_29': 4980000000.0,
 'feat_30': 0.0003269773,
 'feat_31': 0.2630999837,
 'feat_32': 0.363725271,
 'feat_33': 0.0022589633,
 'feat_34': 0.0012077551,
 'feat_35': 0.629951302,
 'feat_36': 0.0212659244,
 'feat_37': 0.2075762615,
 'feat_38': 0.7924237385,
 'feat_39': 0.0050244547,
 'feat_40': 0.3902843544,
 'feat_41': 0.0064785025,
 'feat_42': 0.095884834,
 'feat_43': 0.1377573335,
 'feat_44': 0.3980356983,
 'feat_45': 0.0869565217,
 'feat_46': 0.0018138841,
 'feat_47': 0.0034873643,
 'feat_48': 0.0001820926,
 'feat_49': 0.0001165007,
 'feat_50': 0.0329032258,
 'feat_51': 0.034164182,
 'feat_52': 0.3929128695,
 'feat_53': 0.0371353016,
 'feat_54': 0.6727752925,
 'feat_55': 0.1666729588,
 'feat_56': 0.1906429591,
 'feat_57': 0.004094406,
 'feat_58': 0.0019967709,
 'feat_59': 0.000147336,
 'feat_60': 0.1473084504,
 'feat_61': 0.3340151713,
 'feat_62': 0.2769201582,
 'feat_63': 0.00103599,
 'feat_64': 0.6762691762,
 'feat_65': 0.7212745515,
 'feat_66': 0.3390770068,
 'feat_67': 0.025592368,
 'feat_68': 0.9032247712,
 'feat_69': 0.002021613,
 'feat_70': 0.0648557077,
 'feat_71': 701000000.0,
 'feat_72': 6550000000.0,
 'feat_73': 0.593830504,
 'feat_74': 458000000.0,
 'feat_75': 0.6715676536,
 'feat_76': 0.4242057622,
 'feat_77': 0.6762691762,
 'feat_78': 0.3390770068,
 'feat_79': 0.1265494878,
 'feat_80': 0.6375553953,
 'feat_81': 0.4586091477,
 'feat_82': 0.5203819179,
 'feat_83': 0.3129049481,
 'feat_84': 0.1182504766,
 'feat_85': 0,
 'feat_86': 0.7168453432,
 'feat_87': 0.00921944,
 'feat_88': 0.6228789594,
 'feat_89': 0.6014532901,
 'feat_90': 0.827890214,
 'feat_91': 0.2902018928,
 'feat_92': 0.0266006308,
 'feat_93': 0.5640501123,
 'feat_94': 1,
 'feat_95': 0.0164687409}


len(taiwan_data["observations"])

6137


# Calculate how many companies are in taiwan_data and assign the result to n_companies
n_companies = len(taiwan_data["observations"])
print(n_companies)

6137


type(taiwan_data["observations"])

list


taiwan_data["observations"][0]

{'id': 1,
 'bankrupt': True,
 'feat_1': 0.3705942573,
 'feat_2': 0.4243894461,
 'feat_3': 0.4057497725,
 'feat_4': 0.6014572133,
 'feat_5': 0.6014572133,
 'feat_6': 0.9989692032,
 'feat_7': 0.7968871459,
 'feat_8': 0.8088093609,
 'feat_9': 0.3026464339,
 'feat_10': 0.7809848502,
 'feat_11': 0.0001256969,
 'feat_12': 0.0,
 'feat_13': 0.4581431435,
 'feat_14': 0.0007250725,
 'feat_15': 0.0,
 'feat_16': 0.1479499389,
 'feat_17': 0.1479499389,
 'feat_18': 0.1479499389,
 'feat_19': 0.1691405881,
 'feat_20': 0.3116644267,
 'feat_21': 0.0175597804,
 'feat_22': 0.0959205276,
 'feat_23': 0.1387361603,
 'feat_24': 0.0221022784,
 'feat_25': 0.8481949945,
 'feat_26': 0.6889794628,
 'feat_27': 0.6889794628,
 'feat_28': 0.2175353862,
 'feat_29': 4980000000.0,
 'feat_30': 0.0003269773,
 'feat_31': 0.2630999837,
 'feat_32': 0.363725271,
 'feat_33': 0.0022589633,
 'feat_34': 0.0012077551,
 'feat_35': 0.629951302,
 'feat_36': 0.0212659244,
 'feat_37': 0.2075762615,
 'feat_38': 0.7924237385,
 'feat_39': 0.0050244547,
 'feat_40': 0.3902843544,
 'feat_41': 0.0064785025,
 'feat_42': 0.095884834,
 'feat_43': 0.1377573335,
 'feat_44': 0.3980356983,
 'feat_45': 0.0869565217,
 'feat_46': 0.0018138841,
 'feat_47': 0.0034873643,
 'feat_48': 0.0001820926,
 'feat_49': 0.0001165007,
 'feat_50': 0.0329032258,
 'feat_51': 0.034164182,
 'feat_52': 0.3929128695,
 'feat_53': 0.0371353016,
 'feat_54': 0.6727752925,
 'feat_55': 0.1666729588,
 'feat_56': 0.1906429591,
 'feat_57': 0.004094406,
 'feat_58': 0.0019967709,
 'feat_59': 0.000147336,
 'feat_60': 0.1473084504,
 'feat_61': 0.3340151713,
 'feat_62': 0.2769201582,
 'feat_63': 0.00103599,
 'feat_64': 0.6762691762,
 'feat_65': 0.7212745515,
 'feat_66': 0.3390770068,
 'feat_67': 0.025592368,
 'feat_68': 0.9032247712,
 'feat_69': 0.002021613,
 'feat_70': 0.0648557077,
 'feat_71': 701000000.0,
 'feat_72': 6550000000.0,
 'feat_73': 0.593830504,
 'feat_74': 458000000.0,
 'feat_75': 0.6715676536,
 'feat_76': 0.4242057622,
 'feat_77': 0.6762691762,
 'feat_78': 0.3390770068,
 'feat_79': 0.1265494878,
 'feat_80': 0.6375553953,
 'feat_81': 0.4586091477,
 'feat_82': 0.5203819179,
 'feat_83': 0.3129049481,
 'feat_84': 0.1182504766,
 'feat_85': 0,
 'feat_86': 0.7168453432,
 'feat_87': 0.00921944,
 'feat_88': 0.6228789594,
 'feat_89': 0.6014532901,
 'feat_90': 0.827890214,
 'feat_91': 0.2902018928,
 'feat_92': 0.0266006308,
 'feat_93': 0.5640501123,
 'feat_94': 1,
 'feat_95': 0.0164687409}


len(taiwan_data["observations"][0])

97


# Calculate the number of features associated with each company and assign the result to n_features
n_features = len(taiwan_data["observations"][0])
print(n_features)

97


# Iterate through companies
for item in taiwan_data["observations"]:
    if len(item) !=97:
        print("ALERT!!")


# Open compressed file and load contents
with gzip.open("data/taiwan-bankruptcy-data.json.gz", "r") as read_file:
    taiwan_data_gz = json.load(read_file)

print(type(taiwan_data_gz))

<class 'dict'>


# Explore taiwan_data_gz`
print(taiwan_data_gz.keys())
print(len(taiwan_data_gz["observations"]))
print(len(taiwan_data_gz["observations"][0]))

dict_keys(['schema', 'metadata', 'observations'])
6137
97


df = pd.DataFrame().from_dict(taiwan_data_gz["observations"]).set_index("id")
print(df.shape)
df.head()

(6137, 96)


# Create wrangle function
def wrangle(filename):
    # Open compressed file, load into dict
    with gzip.open(filename, "r") as f:
        data = json.load(f)
        
    # Turn dict into DataFrame
    df = pd.DataFrame().from_dict(data["observations"]).set_index("id")
    
    return df


df = wrangle("data/taiwan-bankruptcy-data.json.gz")
print("df shape:", df.shape)
df.head()

df shape: (6137, 96)


df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 6137 entries, 1 to 6819
Data columns (total 96 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   bankrupt  6137 non-null   bool   
 1   feat_1    6137 non-null   float64
 2   feat_2    6137 non-null   float64
 3   feat_3    6137 non-null   float64
 4   feat_4    6137 non-null   float64
 5   feat_5    6137 non-null   float64
 6   feat_6    6137 non-null   float64
 7   feat_7    6137 non-null   float64
 8   feat_8    6137 non-null   float64
 9   feat_9    6137 non-null   float64
 10  feat_10   6137 non-null   float64
 11  feat_11   6137 non-null   float64
 12  feat_12   6137 non-null   float64
 13  feat_13   6137 non-null   float64
 14  feat_14   6137 non-null   float64
 15  feat_15   6137 non-null   float64
 16  feat_16   6137 non-null   float64
 17  feat_17   6137 non-null   float64
 18  feat_18   6137 non-null   float64
 19  feat_19   6137 non-null   float64
 20  feat_20   6137 non-null   float64
 21  feat_21   6137 non-null   float64
 22  feat_22   6137 non-null   float64
 23  feat_23   6137 non-null   float64
 24  feat_24   6137 non-null   float64
 25  feat_25   6137 non-null   float64
 26  feat_26   6137 non-null   float64
 27  feat_27   6137 non-null   float64
 28  feat_28   6137 non-null   float64
 29  feat_29   6137 non-null   float64
 30  feat_30   6137 non-null   float64
 31  feat_31   6137 non-null   float64
 32  feat_32   6137 non-null   float64
 33  feat_33   6137 non-null   float64
 34  feat_34   6137 non-null   float64
 35  feat_35   6137 non-null   float64
 36  feat_36   6137 non-null   float64
 37  feat_37   6137 non-null   float64
 38  feat_38   6137 non-null   float64
 39  feat_39   6137 non-null   float64
 40  feat_40   6137 non-null   float64
 41  feat_41   6137 non-null   float64
 42  feat_42   6137 non-null   float64
 43  feat_43   6137 non-null   float64
 44  feat_44   6137 non-null   float64
 45  feat_45   6137 non-null   float64
 46  feat_46   6137 non-null   float64
 47  feat_47   6137 non-null   float64
 48  feat_48   6137 non-null   float64
 49  feat_49   6137 non-null   float64
 50  feat_50   6137 non-null   float64
 51  feat_51   6137 non-null   float64
 52  feat_52   6137 non-null   float64
 53  feat_53   6137 non-null   float64
 54  feat_54   6137 non-null   float64
 55  feat_55   6137 non-null   float64
 56  feat_56   6137 non-null   float64
 57  feat_57   6137 non-null   float64
 58  feat_58   6137 non-null   float64
 59  feat_59   6137 non-null   float64
 60  feat_60   6137 non-null   float64
 61  feat_61   6137 non-null   float64
 62  feat_62   6137 non-null   float64
 63  feat_63   6137 non-null   float64
 64  feat_64   6137 non-null   float64
 65  feat_65   6137 non-null   float64
 66  feat_66   6137 non-null   float64
 67  feat_67   6137 non-null   float64
 68  feat_68   6137 non-null   float64
 69  feat_69   6137 non-null   float64
 70  feat_70   6137 non-null   float64
 71  feat_71   6137 non-null   float64
 72  feat_72   6137 non-null   float64
 73  feat_73   6137 non-null   float64
 74  feat_74   6137 non-null   float64
 75  feat_75   6137 non-null   float64
 76  feat_76   6137 non-null   float64
 77  feat_77   6137 non-null   float64
 78  feat_78   6137 non-null   float64
 79  feat_79   6137 non-null   float64
 80  feat_80   6137 non-null   float64
 81  feat_81   6137 non-null   float64
 82  feat_82   6137 non-null   float64
 83  feat_83   6137 non-null   float64
 84  feat_84   6137 non-null   float64
 85  feat_85   6137 non-null   int64  
 86  feat_86   6137 non-null   float64
 87  feat_87   6137 non-null   float64
 88  feat_88   6137 non-null   float64
 89  feat_89   6137 non-null   float64
 90  feat_90   6137 non-null   float64
 91  feat_91   6137 non-null   float64
 92  feat_92   6137 non-null   float64
 93  feat_93   6137 non-null   float64
 94  feat_94   6137 non-null   int64  
 95  feat_95   6137 non-null   float64
dtypes: bool(1), float64(93), int64(2)
memory usage: 4.5 MB


df.isnull().sum().sum()

0


df.nunique()

bankrupt       2
feat_1      3159
feat_2      2985
feat_3      2984
feat_4      3580
            ... 
feat_91     6136
feat_92     5599
feat_93     5608
feat_94        1
feat_95     6137
Length: 96, dtype: int64


df.isna().sum()

bankrupt    0
feat_1      0
feat_2      0
feat_3      0
feat_4      0
           ..
feat_91     0
feat_92     0
feat_93     0
feat_94     0
feat_95     0
Length: 96, dtype: int64


nans_by_col = pd.Series(df.isna().sum())
print("nans_by_col shape:", nans_by_col.shape)
nans_by_col.head()

nans_by_col shape: (96,)

bankrupt    0
feat_1      0
feat_2      0
feat_3      0
feat_4      0
dtype: int64


df["bankrupt"].value_counts()

False    5947
True      190
Name: bankrupt, dtype: int64


df["bankrupt"].value_counts(normalize=True)

False    0.96904
True     0.03096
Name: bankrupt, dtype: float64


# Plot class balance
df["bankrupt"].value_counts(normalize=True).plot(
    kind="bar",
    xlabel="Bankrupt",  # Label x-axis "Bankrupt"
    ylabel="Frequency", # Label y-axis "Frequency"
    title="Class Balance" # Title "Chart Balance"
);


# Create my feature matrix X and target vector y
target = "bankrupt" # My target is "bankrupt"
X = df.drop(columns="bankrupt") # Drop column with multicollinearity in the feature matrix
y = df[target]

print("X shape:", X.shape)
print("y shape:", y.shape)

X shape: (6137, 95)
y shape: (6137,)


# Divide my dataset into training and test sets using a randomized split
X_train, X_test, y_train, y_test =  train_test_split(
    X, y, test_size=0.2, random_state=42   # My test set should be 20% of my data & set random_state to 42
)
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (4909, 95)
y_train shape: (4909,)
X_test shape: (1228, 95)
y_test shape: (1228,)


# Create a new feature matrix X_train_over and target vector y_train_over by performing random over-sampling on the training data
over_sampler = RandomOverSampler(random_state=42) # Set the random_state to 42
X_train_over, y_train_over = over_sampler.fit_resample(X_train, y_train)
print("X_train_over shape:", X_train_over.shape)
X_train_over.head()

X_train_over shape: (9512, 95)


# Calculate the baseline accuracy score for my model
acc_baseline = y_train.value_counts(normalize=True).max()
print("Baseline Accuracy:", round(acc_baseline, 4))

Baseline Accuracy: 0.9688


# Create a classifier clf that can be trained on (X_train_over, y_train_over)
clf = RandomForestClassifier(random_state=42)
print(clf)

RandomForestClassifier(random_state=42)


# Perform cross-validation with my classifier using the over-sampled training data, and assign my results to cv_scores
cv_scores = cross_val_score(clf, X_train_over, y_train_over, cv=5, n_jobs=-1)  # Set the cv argument to 5
print(cv_scores)

[0.99316868 0.99474514 0.99369085 0.99369085 0.9957939 ]


# Create a dictionary params with the range of hyperparameters that I want to evaluate for my classifier
params = {
    "n_estimators": range(25, 100, 25),
    "max_depth": range(10, 50, 10)


}
params

{'n_estimators': range(25, 100, 25), 'max_depth': range(10, 50, 10)}


# Create a GridSearchCV named model that includes my classifier and hyperparameter grid
model = GridSearchCV(clf, param_grid=params, cv=5, n_jobs=-1, verbose=1) # Set cv to 5, n_jobs to -1, and verbose to 1
model

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42), n_jobs=-1,
             param_grid={'max_depth': range(10, 50, 10),
                         'n_estimators': range(25, 100, 25)},
             verbose=1)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42), n_jobs=-1,
             param_grid={'max_depth': range(10, 50, 10),
                         'n_estimators': range(25, 100, 25)},
             verbose=1)

RandomForestClassifier(random_state=42)

RandomForestClassifier(random_state=42)


# Fit my model to the over-sampled training data
model.fit(X_train_over, y_train_over)

Fitting 5 folds for each of 12 candidates, totalling 60 fits

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42), n_jobs=-1,
             param_grid={'max_depth': range(10, 50, 10),
                         'n_estimators': range(25, 100, 25)},
             verbose=1)

GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42), n_jobs=-1,
             param_grid={'max_depth': range(10, 50, 10),
                         'n_estimators': range(25, 100, 25)},
             verbose=1)

RandomForestClassifier(random_state=42)

RandomForestClassifier(random_state=42)


# Extract the cross-validation results from my model, and load them into a DataFrame named cv_results
cv_results = pd.DataFrame(model.cv_results_)
cv_results.head(5)


# Extract the best hyperparameters from my model and assign them to best_params
best_params = model.best_params_
print(best_params)

{'max_depth': 40, 'n_estimators': 75}


# Test the quality of my model by calculating accuracy scores for the training and test data
acc_train = model.score(X_train, y_train)
acc_test = model.score(X_test, y_test)

print("Model Training Accuracy:", round(acc_train, 4))
print("Model Test Accuracy:", round(acc_test, 4))

Model Training Accuracy: 1.0
Model Test Accuracy: 0.9764


# Plot a confusion matrix that shows how my model performed on my test set
ConfusionMatrixDisplay.from_estimator(model, X_test, y_test);


# Generate a classification report for my model's performance on the test data and assign it to class_report
class_report = classification_report(y_test, model.predict(X_test))
print(class_report)

              precision    recall  f1-score   support

       False       0.98      0.99      0.99      1191
        True       0.68      0.41      0.51        37

    accuracy                           0.98      1228
   macro avg       0.83      0.70      0.75      1228
weighted avg       0.97      0.98      0.97      1228


# Get feature names from training data
features = X_train_over.columns

# Extract importances from model
importances = model.best_estimator_.feature_importances_

# Create a series with feature names and importances
feat_imp = pd.Series(importances, index=features).sort_values()

# Plot 10 most important features
feat_imp.tail(10).plot(kind="barh")
plt.xlabel("Gini Importance")
plt.ylabel("Feature")
plt.title("Feature Importance");


# Save my best-performing model to a a file named "model-5-5.pkl"
with open("model-5-5.pkl", "wb") as f:
    pickle.dump(model, f)


%%bash

cat my_predictor_taiwan.py

# Import libraries
import gzip
import json
import pickle
import pandas as pd

def wrangle(filename):
    # Open compressed file, load into dict
    with gzip.open(filename, "r") as f:
        data = json.load(f)
        
    # Turn dict into DataFrame
    df = pd.DataFrame().from_dict(data["observations"]).set_index("id")
    
    return df


def make_predictions(data_filepath, model_filepath):
    # Wrangle JSON file
    X_test = wrangle(data_filepath)
    # Load model
    with open(model_filepath, "rb") as f:
        model = pickle.load(f)
    # Generate predictions
    y_test_pred = model.predict(X_test)
    # Put predictions into Series w/ name "bankrupt", and same index as X_test
    y_test_pred = pd.Series(y_test_pred, index=X_test.index, name="bankrupt")
    return y_test_pred


# Import my module
from my_predictor_taiwan import make_predictions

# Generate predictions
y_test_pred = make_predictions(
    data_filepath="data/taiwan-bankruptcy-data-test-features.json.gz",
    model_filepath="model-5-5.pkl",
)

print("predictions shape:", y_test_pred.shape)
y_test_pred.head()

predictions shape: (682,)

id
18    False
20    False
24    False
32    False
38    False
Name: bankrupt, dtype: bool

	bankrupt	feat_1	feat_2	feat_3	feat_4	feat_5	feat_6	feat_7	feat_8	feat_9	...	feat_86	feat_87	feat_88	feat_89	feat_90	feat_91	feat_92	feat_93	feat_94	feat_95
id
1	True	0.370594	0.424389	0.405750	0.601457	0.601457	0.998969	0.796887	0.808809	0.302646	...	0.716845	0.009219	0.622879	0.601453	0.827890	0.290202	0.026601	0.564050	1	0.016469
2	True	0.464291	0.538214	0.516730	0.610235	0.610235	0.998946	0.797380	0.809301	0.303556	...	0.795297	0.008323	0.623652	0.610237	0.839969	0.283846	0.264577	0.570175	1	0.020794
3	True	0.426071	0.499019	0.472295	0.601450	0.601364	0.998857	0.796403	0.808388	0.302035	...	0.774670	0.040003	0.623841	0.601449	0.836774	0.290189	0.026555	0.563706	1	0.016474
4	True	0.399844	0.451265	0.457733	0.583541	0.583541	0.998700	0.796967	0.808966	0.303350	...	0.739555	0.003252	0.622929	0.583538	0.834697	0.281721	0.026697	0.564663	1	0.023982
5	True	0.465022	0.538432	0.522298	0.598783	0.598783	0.998973	0.797366	0.809304	0.303475	...	0.795016	0.003878	0.623521	0.598782	0.839973	0.278514	0.024752	0.575617	1	0.035490

	bankrupt	feat_1	feat_2	feat_3	feat_4	feat_5	feat_6	feat_7	feat_8	feat_9	...	feat_86	feat_87	feat_88	feat_89	feat_90	feat_91	feat_92	feat_93	feat_94	feat_95
id
1	True	0.370594	0.424389	0.405750	0.601457	0.601457	0.998969	0.796887	0.808809	0.302646	...	0.716845	0.009219	0.622879	0.601453	0.827890	0.290202	0.026601	0.564050	1	0.016469
2	True	0.464291	0.538214	0.516730	0.610235	0.610235	0.998946	0.797380	0.809301	0.303556	...	0.795297	0.008323	0.623652	0.610237	0.839969	0.283846	0.264577	0.570175	1	0.020794
3	True	0.426071	0.499019	0.472295	0.601450	0.601364	0.998857	0.796403	0.808388	0.302035	...	0.774670	0.040003	0.623841	0.601449	0.836774	0.290189	0.026555	0.563706	1	0.016474
4	True	0.399844	0.451265	0.457733	0.583541	0.583541	0.998700	0.796967	0.808966	0.303350	...	0.739555	0.003252	0.622929	0.583538	0.834697	0.281721	0.026697	0.564663	1	0.023982
5	True	0.465022	0.538432	0.522298	0.598783	0.598783	0.998973	0.797366	0.809304	0.303475	...	0.795016	0.003878	0.623521	0.598782	0.839973	0.278514	0.024752	0.575617	1	0.035490

	feat_1	feat_2	feat_3	feat_4	feat_5	feat_6	feat_7	feat_8	feat_9	feat_10	...	feat_86	feat_87	feat_88	feat_89	feat_90	feat_91	feat_92	feat_93	feat_94	feat_95
0	0.535855	0.599160	0.594411	0.627099	0.627099	0.999220	0.797686	0.809591	0.303518	0.781865	...	0.834091	0.022025	0.624364	0.627101	0.841977	0.275384	0.026791	0.565158	1	0.147943
1	0.554136	0.612734	0.595000	0.607388	0.607388	0.999120	0.797614	0.809483	0.303600	0.781754	...	0.840293	0.002407	0.624548	0.607385	0.842645	0.276532	0.026791	0.565158	1	0.062544
2	0.549554	0.603467	0.599122	0.620166	0.620166	0.999119	0.797569	0.809470	0.303524	0.781740	...	0.840403	0.000840	0.624010	0.620163	0.842873	0.277249	0.026800	0.565200	1	0.047929
3	0.543801	0.603249	0.606992	0.622515	0.622515	0.999259	0.797728	0.809649	0.303510	0.781930	...	0.831514	0.006176	0.626775	0.622513	0.842989	0.280013	0.026839	0.565375	1	0.028386
4	0.498659	0.562364	0.546978	0.603670	0.603670	0.998904	0.797584	0.809459	0.304000	0.781713	...	0.811988	0.004256	0.623674	0.603669	0.841105	0.277628	0.026897	0.565618	1	0.043080

	mean_fit_time	std_fit_time	mean_score_time	std_score_time	param_max_depth	param_n_estimators	params	split0_test_score	split1_test_score	split2_test_score	split3_test_score	split4_test_score	mean_test_score	std_test_score	rank_test_score
0	0.466611	0.030509	0.006319	0.002242	10	25	{'max_depth': 10, 'n_estimators': 25}	0.981608	0.980032	0.979495	0.981073	0.980547	0.980551	0.000745	11
1	0.906622	0.022882	0.009819	0.002386	10	50	{'max_depth': 10, 'n_estimators': 50}	0.983184	0.981608	0.977918	0.980021	0.981073	0.980761	0.001750	10
2	1.409413	0.038556	0.013992	0.002892	10	75	{'max_depth': 10, 'n_estimators': 75}	0.983184	0.981083	0.977392	0.979495	0.981073	0.980445	0.001925	12
3	0.566776	0.029568	0.008576	0.002357	20	25	{'max_depth': 20, 'n_estimators': 25}	0.990541	0.986863	0.985804	0.990011	0.989485	0.988541	0.001863	9
4	1.125918	0.056730	0.014820	0.003020	20	50	{'max_depth': 20, 'n_estimators': 50}	0.990541	0.990016	0.987907	0.990011	0.990011	0.989697	0.000918	7

Prepare Data¶

Import¶

Explore¶

Split¶

Resample¶

Build Model¶

Baseline¶

Iterate¶

Evaluate¶

Communicate¶