Paper
Anomaly Detection on Attributed Networks via Contrastive Self-Supervised Learnin
代码实现
异常注入
inject_anomaly.py
inject_anomaly.py
注入异常过程,处理原始数据集,并添加结构和属性扰动,注入结构属性异常。
import numpy as np
import scipy.sparse as sp
import random
import scipy.io as sio
import argparse
import pickle as pkl
import networkx as nx
import sys
import os
import os.path as osp
from sklearn import preprocessing
from scipy.spatial.distance import euclidean
def dense_to_sparse(dense_matrix):
shape = dense_matrix.shape
row = []
col = []
data = []
for i, r in enumerate(dense_matrix):
for j in np.where(r > 0)[0]:
row.append(i)
col.append(j)
data.append(dense_matrix[i,j])
sparse_matrix = sp.coo_matrix((data, (row, col)), shape=shape).tocsc()
return sparse_matrix
def parse_index_file(filename):
"""Parse index file."""
index = []
for line in open(filename):
index.append(int(line.strip()))
return index
def load_citation_datadet(dataset_str):
names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
objects = []
for i in range(len(names)):
with open("raw_dataset/{}/ind.{}.{}".format(dataset_str, dataset_str, names[i]), 'rb') as f:
if sys.version_info > (3, 0):
objects.append(pkl.load(f, encoding='latin1'))
else:
objects.append(pkl.load(f))
x, y, tx, ty, allx, ally, graph = tuple(objects)
test_idx_reorder = parse_index_file("raw_dataset/{}/ind.{}.test.index".format(dataset_str, dataset_str))
test_idx_range = np.sort(test_idx_reorder)
if dataset_str == 'citeseer':
# Fix citeseer dataset (there are some isolated nodes in the graph)
# Find isolated nodes, add them as zero-vecs into the right position
test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
tx_extended[test_idx_range-min(test_idx_range), :] = tx
tx = tx_extended
ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
ty_extended[test_idx_range-min(test_idx_range), :] = ty
ty = ty_extended
features = sp.vstack((allx, tx)).tolil()
features[test_idx_reorder, :] = features[test_idx_range, :]
adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
labels = np.vstack((ally, ty))
labels[test_idx_reorder, :] = labels[test_idx_range, :]
adj_dense = np.array(adj.todense(), dtype=np.float64)
attribute_dense = np.array(features.todense(), dtype=np.float64)
cat_labels = np.array(np.argmax(labels, axis = 1).reshape(-1,1), dtype=np.uint8)
return attribute_dense, adj_dense, cat_labels
parser = argparse.ArgumentParser()
parser.add_argument('--dataset', type=str, default='cora') #'BlogCatalog' 'Flickr' 'cora' 'citeseer' 'pubmed'
parser.add_argument('--seed', type=int, default=1) #random seed
parser.add_argument('--m', type=int, default=15) #num of fully connected nodes
parser.add_argument('--n', type=int)
parser.add_argument('--k', type=int, default=50) #num of clusters
args = parser.parse_args()
AD_dataset_list = ['BlogCatalog', 'Flickr']
Citation_dataset_list = ['cora', 'citeseer', 'pubmed']
# Set hyperparameters of disturbing
dataset_str = args.dataset #'BlogCatalog' 'Flickr' 'cora' 'citeseer' 'pubmed'
seed = args.seed
m = args.m #num of fully connected nodes #10 15 20 5
k = args.k
if args.n is None:
if dataset_str == 'cora' or dataset_str == 'citeseer':
n = 5
elif dataset_str == 'BlogCatalog':
n = 10
elif dataset_str == 'Flickr':
n = 15
elif dataset_str == 'pubmed':
n = 20
else:
n = args.n
if __name__ == "__main__":
# Set seed
print('Random seed: {:d}. \n'.format(seed))
np.random.seed(seed)
random.seed(seed)
# Load data
print('Loading data: {}...'.format(dataset_str))
if dataset_str in AD_dataset_list:
data = sio.loadmat('./raw_dataset/{}/{}.mat'.format(dataset_str, dataset_str))
attribute_dense = np.array(data['Attributes'].todense())
attribute_dense = preprocessing.no
标签:via,idx,Self,Contrastive,dataset,np,str,test,import
From: https://blog.csdn.net/Misnearch/article/details/139624723