溫馨提示×

溫馨提示×

您好,登錄后才能下訂單哦!

密碼登錄×
登錄注冊×
其他方式登錄
點擊 登錄注冊 即表示同意《億速云用戶服務條款》

異方差 Python運行

發(fā)布時間:2020-07-24 10:18:13 來源:網絡 閱讀:229 作者:專注地一哥 欄目:編程語言

import pandas as pd

?

import numpy as np

?

from patsy import dmatrices

?

from statsmodels.stats.outliers_influence import variance_inflation_factor

?

import statsmodels.api as sm

?

import scipy.stats as stats

?

from sklearn.metrics import mean_squared_error

?

import seaborn as sns

?

import matplotlib.pyplot as plt

?

import matplotlib.mlab as mlab

import scipy.io

plt.rcParams['font.sans-serif']=['SimHei']

plt.rcParams['axes.unicode_minus'] = False

# 數據讀取

?

# #ccpp = pd.read_excel( 'CCPP.xlsx')ccpp.describe()

# data = scipy.io.loadmat('ENCDATA-2hp.mat') # 讀取mat文件

# # path = scio.loadmat('FFT-2hp.mat')['FFT-2hp']

# print(data)

# train1=data['train3hp']

# test1=data['test3hp']

# train_y=data['train_y3hp']

# test_y=data['test_y3hp']

# data1=train1[:,1]

# print("train1",train1.shape)

# print("data",data1.shape)

# # sns.pairplot(data)

# # plt.show()

# #y, X = dmatrices( data1, data = train1, return_type= 'dataframe')

# fit2 = sm.formula.ols( data1,data = train1).fit()

# print("fit2",fit2)

# fit2.summary()

# pred2 = fit2.predict()

# print("pred2",pred2)

#

# np.sqrt(mean_squared_error(train1.PE, pred2))

# resid = fit2.resid

# plt.scatter(fit2.predict(), (fit2.resid-fit2.resid.mean())/fit2.resid.std())

# plt.xlabel( '預測值')

# plt.ylabel( '標準化殘差')

#

# # 添加水平參考線

#

# plt.axhline(y = 0, color = 'r', linewidth = 2)

# plt.show()

?

ccpp = pd.read_excel( 'CCPP.xlsx')

ccpp.describe()

sns.pairplot(ccpp)

plt.show()

?

# 發(fā)電量與自變量之間的相關系數

ccpp.corrwith(ccpp.PE)

y, X = dmatrices( 'PE~AT+V+AP', data = ccpp, return_type= 'dataframe')

?

# 構造空的數據框

?

vif = pd.DataFrame()

vif[ "VIF Factor"] = [variance_inflation_factor(X.values, i) for i in range(X.shape[ 1])]

# vif[ "features"] = X.columnsvif

# print(vif[ "features"])

# 構造PEATVAP之間的線性模型

?

fit = sm.formula.ols( 'PE~AT+V+AP',data = ccpp).fit()

fit.summary()

print("fit",fit)

# 計算模型的RMSE

?

pred = fit.predict()

np.sqrt(mean_squared_error(ccpp.PE, pred))

?

# 離群點檢驗

?

outliers = fit.get_influence()

?

# 高杠桿值點(帽子矩陣)

?

leverage = outliers.hat_matrix_diag

?

# dffits

?

dffits = outliers.dffits[ 0]

?

# 學生化殘差

?

resid_stu = outliers.resid_studentized_external

?

# cook距離

?

cook = outliers.cooks_distance[ 0]

?

# covratio

?

covratio = outliers.cov_ratio

?

# 將上面的幾種異常值檢驗統(tǒng)計量與原始數據集合并

?

contat1 = pd.concat([pd.Series(leverage, name = 'leverage'),

?????????????????????pd.Series(dffits, name = 'dffits'),

?????????????????????pd.Series(resid_stu,name = 'resid_stu'),

?

?

?

?????????????????????pd.Series(cook, name = 'cook'),

?????????????????????pd.Series(covratio, name = 'covratio'),],axis = 1)

ccpp_outliers = pd.concat([ccpp,contat1], axis = 1)

ccpp_outliers.head()

print("contat1",contat1)

?

# 重新建模

?

fit2 = sm.formula.ols( 'PE~AT+V+AP',data = ccpp_outliers).fit()

fit2.summary()

?

# 計算模型的RMSE

?

pred2 = fit2.predict()

np.sqrt(mean_squared_error(ccpp_outliers.PE, pred2))

function(){ //K線圖 http://www.kaifx.cn/mt4/kaifx/1770.html

resid = fit2.resid

# 標準化殘差與預測值之間的散點圖

?

plt.scatter(fit2.predict(), (fit2.resid-fit2.resid.mean())/fit2.resid.std())

plt.xlabel( '預測值',fontdict={'family' : 'sans-serif', 'size' : 20})

plt.ylabel( '標準化殘差',fontdict={'family' : 'sans-serif', 'size' : 20})

?

# 添加水平參考線

?

plt.axhline(y = 0, color = 'r', linewidth = 2)

plt.show()


向AI問一下細節(jié)

免責聲明:本站發(fā)布的內容(圖片、視頻和文字)以原創(chuàng)、轉載和分享為主,文章觀點不代表本網站立場,如果涉及侵權請聯(lián)系站長郵箱:is@yisu.com進行舉報,并提供相關證據,一經查實,將立刻刪除涉嫌侵權內容。

AI