新增管理员页面和用户申诉、迁移审核页面,推荐系统

Change-Id: Ief5646321feb98fadb17da4b4e91caeaacdbacc5
diff --git a/recommend/utils/dataloader.py b/recommend/utils/dataloader.py
new file mode 100644
index 0000000..d519f17
--- /dev/null
+++ b/recommend/utils/dataloader.py
@@ -0,0 +1,92 @@
+from utils.parse_args import args
+from os import path
+from tqdm import tqdm
+import numpy as np
+import scipy.sparse as sp
+import torch
+import networkx as nx
+from copy import deepcopy
+from collections import defaultdict
+import pandas as pd
+
+
+class EdgeListData:
+    def __init__(self, train_file, test_file, phase='pretrain', pre_dataset=None, has_time=True):
+        self.phase = phase
+        self.has_time = has_time
+        self.pre_dataset = pre_dataset
+
+        self.hour_interval = args.hour_interval_pre if phase == 'pretrain' else args.hour_interval_f
+
+        self.edgelist = []
+        self.edge_time = []
+        self.num_users = 0
+        self.num_items = 0
+        self.num_edges = 0
+
+        self.train_user_dict = {}
+        self.test_user_dict = {}
+
+        self._load_data(train_file, test_file, has_time)
+
+        if phase == 'pretrain':
+            self.user_hist_dict = self.train_user_dict
+        
+        users_has_hist = set(list(self.user_hist_dict.keys()))
+        all_users = set(list(range(self.num_users)))
+        users_no_hist = all_users - users_has_hist
+        for u in users_no_hist:
+            self.user_hist_dict[u] = []
+
+    def _read_file(self, train_file, test_file, has_time=True):
+        with open(train_file, 'r') as f:
+            for line in f:
+                line = line.strip().split('\t')
+                if not has_time:
+                    user, items = line[:2]
+                    times = " ".join(["0"] * len(items.split(" ")))
+                else:
+                    user, items, times = line
+                    
+                for i in items.split(" "):
+                    self.edgelist.append((int(user), int(i)))
+                for i in times.split(" "):
+                    self.edge_time.append(int(i))
+                self.train_user_dict[int(user)] = [int(i) for i in items.split(" ")]
+
+        self.test_edge_num = 0
+        with open(test_file, 'r') as f:
+            for line in f:
+                line = line.strip().split('\t')
+                user, items = line[:2]
+                self.test_user_dict[int(user)] = [int(i) for i in items.split(" ")]
+                self.test_edge_num += len(self.test_user_dict[int(user)])
+
+    def _load_data(self, train_file, test_file, has_time=True):
+        self._read_file(train_file, test_file, has_time)
+
+        self.edgelist = np.array(self.edgelist, dtype=np.int32)
+        self.edge_time = 1 + self.timestamp_to_time_step(np.array(self.edge_time, dtype=np.int32))
+        self.num_edges = len(self.edgelist)
+        if self.pre_dataset is not None:
+            self.num_users = self.pre_dataset.num_users
+            self.num_items = self.pre_dataset.num_items
+        else:
+            self.num_users = max([np.max(self.edgelist[:, 0]) + 1, np.max(list(self.test_user_dict.keys())) + 1])
+            self.num_items = max([np.max(self.edgelist[:, 1]) + 1, np.max([np.max(self.test_user_dict[u]) for u in self.test_user_dict.keys()]) + 1])
+
+        self.graph = sp.coo_matrix((np.ones(self.num_edges), (self.edgelist[:, 0], self.edgelist[:, 1])), shape=(self.num_users, self.num_items))
+
+        if self.has_time:
+            self.edge_time_dict = defaultdict(dict)
+            for i in range(len(self.edgelist)):
+                self.edge_time_dict[self.edgelist[i][0]][self.edgelist[i][1]+self.num_users] = self.edge_time[i]
+                self.edge_time_dict[self.edgelist[i][1]+self.num_users][self.edgelist[i][0]] = self.edge_time[i]
+
+    def timestamp_to_time_step(self, timestamp_arr, least_time=None):
+        interval_hour = self.hour_interval
+        if least_time is None:
+            least_time = np.min(timestamp_arr)
+        timestamp_arr = timestamp_arr - least_time
+        timestamp_arr = timestamp_arr // (interval_hour * 3600)
+        return timestamp_arr
diff --git a/recommend/utils/parse_args.py b/recommend/utils/parse_args.py
new file mode 100644
index 0000000..3e86a47
--- /dev/null
+++ b/recommend/utils/parse_args.py
@@ -0,0 +1,57 @@
+import argparse
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='GraphPro')
+    parser.add_argument('--phase', type=str, default='pretrain')
+    parser.add_argument('--plugin', action='store_true', default=False)
+    parser.add_argument('--save_path', type=str, default="saved" ,help='where to save model and logs')
+    parser.add_argument('--data_path', type=str, default="dataset/yelp",help='where to load data')
+    parser.add_argument('--exp_name', type=str, default='1')
+    parser.add_argument('--desc', type=str, default='')
+    parser.add_argument('--ab', type=str, default='full')
+    parser.add_argument('--log', type=int, default=1)
+
+    parser.add_argument('--device', type=str, default="cuda")
+    parser.add_argument('--model', type=str, default='GraphPro')
+    parser.add_argument('--pre_model', type=str, default='GraphPro')
+    parser.add_argument('--f_model', type=str, default='GraphPro')
+    parser.add_argument('--pre_model_path', type=str, default='pretrained_model.pt')
+
+    parser.add_argument('--hour_interval_pre', type=float, default=1)
+    parser.add_argument('--hour_interval_f', type=int, default=1)
+    parser.add_argument('--emb_dropout', type=float, default=0)
+
+    parser.add_argument('--updt_inter', type=int, default=1)
+    parser.add_argument('--samp_decay', type=float, default=0.05)
+    
+    parser.add_argument('--edge_dropout', type=float, default=0.5)
+    parser.add_argument('--emb_size', type=int, default=64)
+    parser.add_argument('--batch_size', type=int, default=2048)
+    parser.add_argument('--eval_batch_size', type=int, default=512)
+    parser.add_argument('--seed', type=int, default=2023)
+    parser.add_argument('--num_epochs', type=int, default=300)
+    parser.add_argument('--neighbor_sample_num', type=int, default=5)
+    parser.add_argument('--lr', type=float, default=0.001)
+    parser.add_argument('--weight_decay', type=float, default=1e-4)
+    parser.add_argument('--metrics', type=str, default='recall;ndcg')
+    parser.add_argument('--metrics_k', type=str, default='20')
+    parser.add_argument('--early_stop_patience', type=int, default=10)
+    parser.add_argument('--neg_num', type=int, default=1)
+
+    parser.add_argument('--num_layers', type=int, default=3)
+
+
+    return parser
+
+parser = parse_args()
+args = parser.parse_known_args()[0]
+if args.pre_model == args.f_model:
+    args.model = args.pre_model
+elif args.pre_model != 'LightGCN':
+    args.model = args.pre_model
+
+args = parser.parse_args()
+if args.pre_model == args.f_model:
+    args.model = args.pre_model
+elif args.pre_model != 'LightGCN':
+    args.model = args.pre_model
\ No newline at end of file