GCN圖卷積 utils.py腳本

GCN圖卷積 utils.py腳本
把帶#注釋掉的部分取消铃诬，打印一些數(shù)據(jù)，就能理解GCN是怎么處理數(shù)據(jù)的年扩，也有助于將自己的數(shù)據(jù)轉變成與它類似的形式吭净。

import numpy as np
import pickle as pkl
import networkx as nx
import scipy.sparse as sp
from scipy.sparse.linalg.eigen.arpack import eigsh
import sys


def parse_index_file(filename):
    """Parse index file."""
    index = []
    for line in open(filename):
        index.append(int(line.strip()))
    return index


def sample_mask(idx, l):
    """Create mask."""
    mask = np.zeros(l)
    mask[idx] = 1
    return np.array(mask, dtype=np.bool)


def load_data(dataset_str):
    """
    Loads input data from gcn/data directory

    ind.dataset_str.x => the feature vectors of the training instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.tx => the feature vectors of the test instances as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.allx => the feature vectors of both labeled and unlabeled training instances
        (a superset of ind.dataset_str.x) as scipy.sparse.csr.csr_matrix object;
    ind.dataset_str.y => the one-hot labels of the labeled training instances as numpy.ndarray object;
    ind.dataset_str.ty => the one-hot labels of the test instances as numpy.ndarray object;
    ind.dataset_str.ally => the labels for instances in ind.dataset_str.allx as numpy.ndarray object;
    ind.dataset_str.graph => a dict in the format {index: [index_of_neighbor_nodes]} as collections.defaultdict
        object;
    ind.dataset_str.test.index => the indices of test instances in graph, for the inductive setting as list object.

    All objects above must be saved using python pickle module.

    :param dataset_str: Dataset name
    :return: All data input files loaded (as well the training/test data).
    """
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    for i in range(len(names)):
        with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
            if sys.version_info > (3, 0):
                data = pkl.load(f, encoding='latin1')
                # if(names[i].find('graph')==-1):
                #     print(f)
                #     print(data.shape)
                #     for j in range(data.shape[0]):
                #         print('********',names[i],j,data[j].shape,'**********')
                #         print(data[j])
                # else:
                #     print(f)
                #     print(data)
                objects.append(data)

            else:
                objects.append(pkl.load(f))

    x, y, tx, ty, allx, ally, graph = tuple(objects)

    #測試數(shù)據(jù)集
    # print(x[0][0],x.shape,type(x))  ##x是一個稀疏矩陣,記住1的位置,140個實例,每個實例的特征向量維度是1433  (140,1433)
    # print(y[0],y.shape)   ##y是標簽向量,7分類，140個實例 (140,7)

    ##訓練數(shù)據(jù)集
    # print(tx[0][0],tx.shape,type(tx))  ##tx是一個稀疏矩陣,1000個實例,每個實例的特征向量維度是1433  (1000,1433)
    # print(ty[0],ty.shape)   ##y是標簽向量,7分類理郑，1000個實例 (1000,7)

    ##allx,ally和上面的形式一致
    # print(allx[0][0],allx.shape,type(allx))  ##tx是一個稀疏矩陣,1708個實例,每個實例的特征向量維度是1433  (1708,1433)
    # print(ally[0],ally.shape)   ##y是標簽向量,7分類蹄溉，1708個實例 (1708,7)


    ##graph是一個字典，大圖總共2708個節(jié)點
    # for i in graph:
    #     print(i,graph[i])


    test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
    test_idx_range = np.sort(test_idx_reorder)

    # print(test_idx_range)

    if dataset_str == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range-min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range-min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
    # print(adj,adj.shape)

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y)+500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]
    

    return adj, features, y_train, y_val, y_test, train_mask, val_mask, test_mask


def sparse_to_tuple(sparse_mx):
    """Convert sparse matrix to tuple representation."""
    def to_tuple(mx):
        if not sp.isspmatrix_coo(mx):
            mx = mx.tocoo()
        coords = np.vstack((mx.row, mx.col)).transpose()
        values = mx.data
        shape = mx.shape
        return coords, values, shape

    if isinstance(sparse_mx, list):
        for i in range(len(sparse_mx)):
            sparse_mx[i] = to_tuple(sparse_mx[i])
    else:
        sparse_mx = to_tuple(sparse_mx)

    return sparse_mx


def preprocess_features(features):
    """Row-normalize feature matrix and convert to tuple representation"""
    rowsum = np.array(features.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    features = r_mat_inv.dot(features)
    return sparse_to_tuple(features)


def normalize_adj(adj):
    """Symmetrically normalize adjacency matrix."""
    adj = sp.coo_matrix(adj)
    rowsum = np.array(adj.sum(1))
    d_inv_sqrt = np.power(rowsum, -0.5).flatten()
    d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
    d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
    return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()


def preprocess_adj(adj):
    """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
    adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
    return sparse_to_tuple(adj_normalized)


def construct_feed_dict(features, support, labels, labels_mask, placeholders):
    """Construct feed dictionary."""
    feed_dict = dict()
    feed_dict.update({placeholders['labels']: labels})
    feed_dict.update({placeholders['labels_mask']: labels_mask})
    feed_dict.update({placeholders['features']: features})
    feed_dict.update({placeholders['support'][i]: support[i] for i in range(len(support))})
    feed_dict.update({placeholders['num_features_nonzero']: features[1].shape})
    return feed_dict


def chebyshev_polynomials(adj, k):
    """Calculate Chebyshev polynomials up to order k. Return a list of sparse matrices (tuple representation)."""
    print("Calculating Chebyshev polynomials up to order {}...".format(k))

    adj_normalized = normalize_adj(adj)
    laplacian = sp.eye(adj.shape[0]) - adj_normalized
    largest_eigval, _ = eigsh(laplacian, 1, which='LM')
    scaled_laplacian = (2. / largest_eigval[0]) * laplacian - sp.eye(adj.shape[0])

    t_k = list()
    t_k.append(sp.eye(adj.shape[0]))
    t_k.append(scaled_laplacian)

    def chebyshev_recurrence(t_k_minus_one, t_k_minus_two, scaled_lap):
        s_lap = sp.csr_matrix(scaled_lap, copy=True)
        return 2 * s_lap.dot(t_k_minus_one) - t_k_minus_two

    for i in range(2, k+1):
        t_k.append(chebyshev_recurrence(t_k[-1], t_k[-2], scaled_laplacian))

    return sparse_to_tuple(t_k)


load_data('cora')

?著作權歸作者所有,轉載或內容合作請聯(lián)系作者

人面猴
序言：七十年代末您炉，一起剝皮案震驚了整個濱河市柒爵，隨后出現(xiàn)的幾起案子，更是在濱河造成了極大的恐慌赚爵，老刑警劉巖棉胀，帶你破解...
沈念sama閱讀 217,277評論 6贊 503
死咒
序言：濱河連續(xù)發(fā)生了三起死亡事件，死亡現(xiàn)場離奇詭異冀膝，居然都是意外死亡唁奢，警方通過查閱死者的電腦和手機，發(fā)現(xiàn)死者居然都...
沈念sama閱讀 92,689評論 3贊 393
救了他兩次的神仙讓他今天三更去死
文/潘曉璐我一進店門窝剖，熙熙樓的掌柜王于貴愁眉苦臉地迎上來麻掸，“玉大人，你說我怎么就攤上這事赐纱〖狗埽” “怎么了？”我有些...
開封第一講書人閱讀 163,624評論 0贊 353
道士緝兇錄：失蹤的賣姜人
文/不壞的土叔我叫張陵疙描，是天一觀的道長诚隙。經(jīng)常有香客問我，道長起胰，這世上最難降的妖魔是什么久又？我笑而不...
開封第一講書人閱讀 58,356評論 1贊 293
?港島之戀（遺憾婚禮）
正文為了忘掉前任，我火速辦了婚禮待错，結果婚禮上籽孙，老公的妹妹穿的比我還像新娘。我一直安慰自己火俄，他們只是感情好犯建，可當我...
茶點故事閱讀 67,402評論 6贊 392
惡毒庶女頂嫁案：這布局不是一般人想出來的
文/花漫我一把揭開白布。她就那樣靜靜地躺著瓜客，像睡著了一般适瓦。火紅的嫁衣襯著肌膚如雪竿开。梳的紋絲不亂的頭發(fā)上，一...
開封第一講書人閱讀 51,292評論 1贊 301
城市分裂傳說
那天玻熙，我揣著相機與錄音否彩，去河邊找鬼。笑死嗦随，一個胖子當著我的面吹牛列荔，可吹牛的內容都是我干的。我是一名探鬼主播枚尼，決...
沈念sama閱讀 40,135評論 3贊 418
雙鴛鴦連環(huán)套：你想象不到人心有多黑
文/蒼蘭香墨我猛地睜開眼贴浙，長吁一口氣：“原來是場噩夢啊……” “哼！你這毒婦竟也來了署恍？” 一聲冷哼從身側響起崎溃，我...
開封第一講書人閱讀 38,992評論 0贊 275
萬榮殺人案實錄
序言：老撾萬榮一對情侶失蹤，失蹤者是張志新（化名）和其女友劉穎盯质，沒想到半個月后袁串，有當?shù)厝嗽跇淞掷锇l(fā)現(xiàn)了一具尸體，經(jīng)...
沈念sama閱讀 45,429評論 1贊 314
?護林員之死
正文獨居荒郊野嶺守林人離奇死亡呼巷，尸身上長有42處帶血的膿包…… 初始之章·張勛以下內容為張勛視角年9月15日...
茶點故事閱讀 37,636評論 3贊 334
?白月光啟示錄
正文我和宋清朗相戀三年囱修，在試婚紗的時候發(fā)現(xiàn)自己被綠了。大學時的朋友給我發(fā)了我未婚夫和他白月光在一起吃飯的照片朵逝。...
茶點故事閱讀 39,785評論 1贊 348
活死人
序言：一個原本活蹦亂跳的男人離奇死亡蔚袍，死狀恐怖，靈堂內的尸體忽然破棺而出配名，到底是詐尸還是另有隱情，我是刑警寧澤晋辆，帶...
沈念sama閱讀 35,492評論 5贊 345
?日本核電站爆炸內幕
正文年R本政府宣布渠脉，位于F島的核電站，受9級特大地震影響瓶佳，放射性物質發(fā)生泄漏芋膘。R本人自食惡果不足惜，卻給世界環(huán)境...
茶點故事閱讀 41,092評論 3贊 328
男人毒藥：我在死后第九天來索命
文/蒙蒙一霸饲、第九天我趴在偏房一處隱蔽的房頂上張望为朋。院中可真熱鬧，春花似錦厚脉、人聲如沸习寸。這莊子的主人今日做“春日...
開封第一講書人閱讀 31,723評論 0贊 22
一樁弒父案傻工，背后竟有這般陰謀
文/蒼蘭香墨我抬頭看了看天上的太陽霞溪。三九已至孵滞，卻和暖如春，著一層夾襖步出監(jiān)牢的瞬間鸯匹，已是汗流浹背坊饶。一陣腳步聲響...
開封第一講書人閱讀 32,858評論 1贊 269
情欲美人皮
我被黑心中介騙來泰國打工，沒想到剛下飛機就差點兒被人妖公主榨干…… 1. 我叫王不留殴蓬，地道東北人匿级。一個月前我還...
沈念sama閱讀 47,891評論 2贊 370
代替公主和親
正文我出身青樓，卻偏偏與公主長得像染厅，于是被迫代替她去往敵國和親根蟹。傳聞我的和親對象是個殘疾皇子，可洞房花燭夜當晚...
茶點故事閱讀 44,713評論 2贊 354

GCN圖卷積 utils.py腳本

推薦閱讀更多精彩內容