? ? 當(dāng)你有一個(gè)數(shù)據(jù)集彭羹,每一條數(shù)據(jù)都M種屬性,然后你想知道M種屬性對(duì)數(shù)據(jù)集的影響的時(shí)候。你需要用到協(xié)方差矩陣悲靴。
? ? 求協(xié)方差矩陣之前請(qǐng)一定要知道協(xié)方差矩陣是干嘛的,是表示屬性之間關(guān)系的矩陣莫其,協(xié)方差矩陣的規(guī)模只與屬性數(shù)量有關(guān)癞尚,和數(shù)據(jù)總量無(wú)關(guān)。blog.sciencenet.cn/blog-455004-805926.html 這里講的很清楚乱陡。
python代碼如下:
class PCA:
def avg(self,data):
avgData = [0]*len(data[0])
for i in range(0,len(data)):
for t in range(0,len(data[i])):
avgData[t] += data[i][t]
for i in range(0,len(avgData)):
avgData[i] = float(avgData[i])/len(data)
return avgData
def getCovMatrix(self,data,avg):
covData = [[0 for i in range(len(data[0]))] for i in range(len(data[0]))]
for i in range(0,len(data[0])):
for t in range(0,len(data[0])):
covData[i][t] = self.getCov(data,i,t,avg)
return covData
def getCov(self,data,col1,col2,avg):
cov = 0;
for i in range(0,len(data)):
cov += (data[i][col1]-avg[col1])*(data[i][col2]-avg[col2])
#print cov/(len(data)-1)
return cov/(len(data)-1)
data = [[-1,-1,1],[-2,-1,4],[-3,-2,-2],[1,1,1],[2,1,2],[3,2,1],[1,2,4]]
example = PCA()
avgdata = example.avg(data)
print example.getCovMatrix(data,avgdata)