本文共 1223 字,大约阅读时间需要 4 分钟。
# Show your work here - the plot below was helpful for me
# https://stackoverflow.com/questions/44101458/random-forest-feature-importance-chart-using-python features = diabetes.columns[:diabetes.shape[1]] print("features=",features) importances = random_search.best_estimator_.feature_importances_ print("importances=",importances) indicies = np.argsort(importances) print("indicies=",indicies) print("importances[indicies]=",importances[indicies]) plt.figure(1) plt.barh(range(len(indicies)),importances[indicies],color='b',align='center') plt.yticks(range(len(indicies)),features[indicies]) plt.xlabel("realative correlation") plt.figure(2) plt.bar(range(len(indicies)),importances[indicies]) plt.xticks(range(len(indicies)),features[indicies],rotation=45)#效果
features= Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'], dtype='object')importances= [ 0.04620102 0.35829186 0.06762202 0.04479566 0.0593829 0.17193048 0.10250037 0.14927569]indicies= [3 0 4 2 6 7 5 1]importances[indicies]= [ 0.04479566 0.04620102 0.0593829 0.06762202 0.10250037 0.14927569 0.17193048 0.35829186]
下面两图的意义是各个特征值与输出标签的相关系数,用横纵柱状图进行描绘
转载地址:http://lsfti.baihongyu.com/