新增管理员页面和用户申诉、迁移审核页面,推荐系统
Change-Id: Ief5646321feb98fadb17da4b4e91caeaacdbacc5
diff --git a/recommend/utils/dataloader.py b/recommend/utils/dataloader.py
new file mode 100644
index 0000000..d519f17
--- /dev/null
+++ b/recommend/utils/dataloader.py
@@ -0,0 +1,92 @@
+from utils.parse_args import args
+from os import path
+from tqdm import tqdm
+import numpy as np
+import scipy.sparse as sp
+import torch
+import networkx as nx
+from copy import deepcopy
+from collections import defaultdict
+import pandas as pd
+
+
+class EdgeListData:
+ def __init__(self, train_file, test_file, phase='pretrain', pre_dataset=None, has_time=True):
+ self.phase = phase
+ self.has_time = has_time
+ self.pre_dataset = pre_dataset
+
+ self.hour_interval = args.hour_interval_pre if phase == 'pretrain' else args.hour_interval_f
+
+ self.edgelist = []
+ self.edge_time = []
+ self.num_users = 0
+ self.num_items = 0
+ self.num_edges = 0
+
+ self.train_user_dict = {}
+ self.test_user_dict = {}
+
+ self._load_data(train_file, test_file, has_time)
+
+ if phase == 'pretrain':
+ self.user_hist_dict = self.train_user_dict
+
+ users_has_hist = set(list(self.user_hist_dict.keys()))
+ all_users = set(list(range(self.num_users)))
+ users_no_hist = all_users - users_has_hist
+ for u in users_no_hist:
+ self.user_hist_dict[u] = []
+
+ def _read_file(self, train_file, test_file, has_time=True):
+ with open(train_file, 'r') as f:
+ for line in f:
+ line = line.strip().split('\t')
+ if not has_time:
+ user, items = line[:2]
+ times = " ".join(["0"] * len(items.split(" ")))
+ else:
+ user, items, times = line
+
+ for i in items.split(" "):
+ self.edgelist.append((int(user), int(i)))
+ for i in times.split(" "):
+ self.edge_time.append(int(i))
+ self.train_user_dict[int(user)] = [int(i) for i in items.split(" ")]
+
+ self.test_edge_num = 0
+ with open(test_file, 'r') as f:
+ for line in f:
+ line = line.strip().split('\t')
+ user, items = line[:2]
+ self.test_user_dict[int(user)] = [int(i) for i in items.split(" ")]
+ self.test_edge_num += len(self.test_user_dict[int(user)])
+
+ def _load_data(self, train_file, test_file, has_time=True):
+ self._read_file(train_file, test_file, has_time)
+
+ self.edgelist = np.array(self.edgelist, dtype=np.int32)
+ self.edge_time = 1 + self.timestamp_to_time_step(np.array(self.edge_time, dtype=np.int32))
+ self.num_edges = len(self.edgelist)
+ if self.pre_dataset is not None:
+ self.num_users = self.pre_dataset.num_users
+ self.num_items = self.pre_dataset.num_items
+ else:
+ self.num_users = max([np.max(self.edgelist[:, 0]) + 1, np.max(list(self.test_user_dict.keys())) + 1])
+ self.num_items = max([np.max(self.edgelist[:, 1]) + 1, np.max([np.max(self.test_user_dict[u]) for u in self.test_user_dict.keys()]) + 1])
+
+ self.graph = sp.coo_matrix((np.ones(self.num_edges), (self.edgelist[:, 0], self.edgelist[:, 1])), shape=(self.num_users, self.num_items))
+
+ if self.has_time:
+ self.edge_time_dict = defaultdict(dict)
+ for i in range(len(self.edgelist)):
+ self.edge_time_dict[self.edgelist[i][0]][self.edgelist[i][1]+self.num_users] = self.edge_time[i]
+ self.edge_time_dict[self.edgelist[i][1]+self.num_users][self.edgelist[i][0]] = self.edge_time[i]
+
+ def timestamp_to_time_step(self, timestamp_arr, least_time=None):
+ interval_hour = self.hour_interval
+ if least_time is None:
+ least_time = np.min(timestamp_arr)
+ timestamp_arr = timestamp_arr - least_time
+ timestamp_arr = timestamp_arr // (interval_hour * 3600)
+ return timestamp_arr
diff --git a/recommend/utils/parse_args.py b/recommend/utils/parse_args.py
new file mode 100644
index 0000000..3e86a47
--- /dev/null
+++ b/recommend/utils/parse_args.py
@@ -0,0 +1,57 @@
+import argparse
+
+def parse_args():
+ parser = argparse.ArgumentParser(description='GraphPro')
+ parser.add_argument('--phase', type=str, default='pretrain')
+ parser.add_argument('--plugin', action='store_true', default=False)
+ parser.add_argument('--save_path', type=str, default="saved" ,help='where to save model and logs')
+ parser.add_argument('--data_path', type=str, default="dataset/yelp",help='where to load data')
+ parser.add_argument('--exp_name', type=str, default='1')
+ parser.add_argument('--desc', type=str, default='')
+ parser.add_argument('--ab', type=str, default='full')
+ parser.add_argument('--log', type=int, default=1)
+
+ parser.add_argument('--device', type=str, default="cuda")
+ parser.add_argument('--model', type=str, default='GraphPro')
+ parser.add_argument('--pre_model', type=str, default='GraphPro')
+ parser.add_argument('--f_model', type=str, default='GraphPro')
+ parser.add_argument('--pre_model_path', type=str, default='pretrained_model.pt')
+
+ parser.add_argument('--hour_interval_pre', type=float, default=1)
+ parser.add_argument('--hour_interval_f', type=int, default=1)
+ parser.add_argument('--emb_dropout', type=float, default=0)
+
+ parser.add_argument('--updt_inter', type=int, default=1)
+ parser.add_argument('--samp_decay', type=float, default=0.05)
+
+ parser.add_argument('--edge_dropout', type=float, default=0.5)
+ parser.add_argument('--emb_size', type=int, default=64)
+ parser.add_argument('--batch_size', type=int, default=2048)
+ parser.add_argument('--eval_batch_size', type=int, default=512)
+ parser.add_argument('--seed', type=int, default=2023)
+ parser.add_argument('--num_epochs', type=int, default=300)
+ parser.add_argument('--neighbor_sample_num', type=int, default=5)
+ parser.add_argument('--lr', type=float, default=0.001)
+ parser.add_argument('--weight_decay', type=float, default=1e-4)
+ parser.add_argument('--metrics', type=str, default='recall;ndcg')
+ parser.add_argument('--metrics_k', type=str, default='20')
+ parser.add_argument('--early_stop_patience', type=int, default=10)
+ parser.add_argument('--neg_num', type=int, default=1)
+
+ parser.add_argument('--num_layers', type=int, default=3)
+
+
+ return parser
+
+parser = parse_args()
+args = parser.parse_known_args()[0]
+if args.pre_model == args.f_model:
+ args.model = args.pre_model
+elif args.pre_model != 'LightGCN':
+ args.model = args.pre_model
+
+args = parser.parse_args()
+if args.pre_model == args.f_model:
+ args.model = args.pre_model
+elif args.pre_model != 'LightGCN':
+ args.model = args.pre_model
\ No newline at end of file