Copy import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.ensemble import RandomForestClassifier
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from statsmodels.tools import categorical
from sklearn import tree
# ------------------ Loading Dataset --------------------------#
dataframe = pd.read_csv("http://archive.ics.uci.edu/ml/machine-learning-
databases/abalone/abalone.data")
dataframe = dataframe .drop(dataframe .index[:-1000])
numericdf = dataframe[dataframe .columns[1:9]]
categordf = dataframe[dataframe .columns[0]]
categordf_en = categorical(categordf.values , drop=True)
categordf_en = categordf_en[:, 0:2]
numeric_arr = np.asarray(numericdf.values)
categor_arr = np.asarray(categordf_en)
Output = numeric_arr[:, 7]
Input_numeric = numeric_arr[:, 0:6]
Input_categor = categor_arr
Input = np.concatenate((Input_numeric, Input_categor), axis=1)
#---------------------------------------------------------------#
RF = RandomForestClassifier(n_estimators=5, random_state=12)
RF.fit(Input, Output)
Z_RF = RF.predict(Input)
CM_RF= confusion_matrix(Output, Z_RF)
#---------------------------------------------------------------
DT = tree.DecisionTreeClassifier()
DT.fit(Input, Output)
Z_DT = DT.predict(Input)
CM_DT= confusion_matrix(Output, Z_DT)
#---------------------------------------------------------------
plt.subplot(121)
sns.heatmap(CM_RF.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label')
plt.subplot(122)
sns.heatmap(CM_DT.T, square=True, annot=True, fmt='d', cbar=False)
plt.xlabel('true label')
plt.ylabel('predicted label')
plt.show()