新增管理员页面和用户申诉、迁移审核页面,推荐系统
Change-Id: Ief5646321feb98fadb17da4b4e91caeaacdbacc5
diff --git a/recommend/model/LightGCN.py b/recommend/model/LightGCN.py
new file mode 100644
index 0000000..b6b447e
--- /dev/null
+++ b/recommend/model/LightGCN.py
@@ -0,0 +1,121 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import scipy.sparse as sp
+import math
+import networkx as nx
+import random
+from copy import deepcopy
+from utils.parse_args import args
+from model.base_model import BaseModel
+from model.operators import EdgelistDrop
+from model.operators import scatter_add, scatter_sum
+
+
+init = nn.init.xavier_uniform_
+
+class LightGCN(BaseModel):
+ def __init__(self, dataset, pretrained_model=None, phase='pretrain'):
+ super().__init__(dataset)
+ self.adj = self._make_binorm_adj(dataset.graph)
+ self.edges = self.adj._indices().t()
+ self.edge_norm = self.adj._values()
+
+ self.phase = phase
+
+ self.emb_gate = lambda x: x
+
+ if self.phase == 'pretrain' or self.phase == 'vanilla' or self.phase == 'for_tune':
+ self.user_embedding = nn.Parameter(init(torch.empty(self.num_users, self.emb_size)))
+ self.item_embedding = nn.Parameter(init(torch.empty(self.num_items, self.emb_size)))
+
+
+ elif self.phase == 'finetune':
+ pre_user_emb, pre_item_emb = pretrained_model.generate()
+ self.user_embedding = nn.Parameter(pre_user_emb).requires_grad_(True)
+ self.item_embedding = nn.Parameter(pre_item_emb).requires_grad_(True)
+
+ elif self.phase == 'continue_tune':
+ # re-initialize for loading state dict
+ self.user_embedding = nn.Parameter(init(torch.empty(self.num_users, self.emb_size)))
+ self.item_embedding = nn.Parameter(init(torch.empty(self.num_items, self.emb_size)))
+
+ self.edge_dropout = EdgelistDrop()
+
+ def _agg(self, all_emb, edges, edge_norm):
+ src_emb = all_emb[edges[:, 0]]
+
+ # bi-norm
+ src_emb = src_emb * edge_norm.unsqueeze(1)
+
+ # conv
+ dst_emb = scatter_sum(src_emb, edges[:, 1], dim=0, dim_size=self.num_users+self.num_items)
+ return dst_emb
+
+ def _edge_binorm(self, edges):
+ user_degs = scatter_add(torch.ones_like(edges[:, 0]), edges[:, 0], dim=0, dim_size=self.num_users)
+ user_degs = user_degs[edges[:, 0]]
+ item_degs = scatter_add(torch.ones_like(edges[:, 1]), edges[:, 1], dim=0, dim_size=self.num_items)
+ item_degs = item_degs[edges[:, 1]]
+ norm = torch.pow(user_degs, -0.5) * torch.pow(item_degs, -0.5)
+ return norm
+
+ def forward(self, edges, edge_norm, return_layers=False):
+ all_emb = torch.cat([self.user_embedding, self.item_embedding], dim=0)
+ all_emb = self.emb_gate(all_emb)
+ res_emb = [all_emb]
+ for l in range(args.num_layers):
+ all_emb = self._agg(res_emb[-1], edges, edge_norm)
+ res_emb.append(all_emb)
+ if not return_layers:
+ res_emb = sum(res_emb)
+ user_res_emb, item_res_emb = res_emb.split([self.num_users, self.num_items], dim=0)
+ else:
+ user_res_emb, item_res_emb = [], []
+ for emb in res_emb:
+ u_emb, i_emb = emb.split([self.num_users, self.num_items], dim=0)
+ user_res_emb.append(u_emb)
+ item_res_emb.append(i_emb)
+ return user_res_emb, item_res_emb
+
+ def cal_loss(self, batch_data):
+ edges, dropout_mask = self.edge_dropout(self.edges, 1-args.edge_dropout, return_mask=True)
+ edge_norm = self.edge_norm[dropout_mask]
+
+ # forward
+ users, pos_items, neg_items = batch_data
+ user_emb, item_emb = self.forward(edges, edge_norm)
+ batch_user_emb = user_emb[users]
+ pos_item_emb = item_emb[pos_items]
+ neg_item_emb = item_emb[neg_items]
+ rec_loss = self._bpr_loss(batch_user_emb, pos_item_emb, neg_item_emb)
+ reg_loss = args.weight_decay * self._reg_loss(users, pos_items, neg_items)
+
+ loss = rec_loss + reg_loss
+ loss_dict = {
+ "rec_loss": rec_loss.item(),
+ "reg_loss": reg_loss.item(),
+ }
+ return loss, loss_dict
+
+ @torch.no_grad()
+ def generate(self, return_layers=False):
+ return self.forward(self.edges, self.edge_norm, return_layers=return_layers)
+
+ @torch.no_grad()
+ def generate_lgn(self, return_layers=False):
+ return self.forward(self.edges, self.edge_norm, return_layers=return_layers)
+
+ @torch.no_grad()
+ def rating(self, user_emb, item_emb):
+ return torch.matmul(user_emb, item_emb.t())
+
+ def _reg_loss(self, users, pos_items, neg_items):
+ u_emb = self.user_embedding[users]
+ pos_i_emb = self.item_embedding[pos_items]
+ neg_i_emb = self.item_embedding[neg_items]
+ reg_loss = (1/2)*(u_emb.norm(2).pow(2) +
+ pos_i_emb.norm(2).pow(2) +
+ neg_i_emb.norm(2).pow(2))/float(len(users))
+ return reg_loss
diff --git a/recommend/model/LightGCN_pretrained.pt b/recommend/model/LightGCN_pretrained.pt
new file mode 100644
index 0000000..825e0e2
--- /dev/null
+++ b/recommend/model/LightGCN_pretrained.pt
Binary files differ
diff --git a/recommend/model/base_model.py b/recommend/model/base_model.py
new file mode 100644
index 0000000..819442a
--- /dev/null
+++ b/recommend/model/base_model.py
@@ -0,0 +1,111 @@
+import torch
+import torch.nn as nn
+from utils.parse_args import args
+from scipy.sparse import csr_matrix
+import scipy.sparse as sp
+import numpy as np
+import torch.nn.functional as F
+
+
+class BaseModel(nn.Module):
+ def __init__(self, dataloader):
+ super(BaseModel, self).__init__()
+ self.num_users = dataloader.num_users
+ self.num_items = dataloader.num_items
+ self.emb_size = args.emb_size
+
+ def forward(self):
+ pass
+
+ def cal_loss(self, batch_data):
+ pass
+
+ def _check_inf(self, loss, pos_score, neg_score, edge_weight):
+ # find inf idx
+ inf_idx = torch.isinf(loss) | torch.isnan(loss)
+ if inf_idx.any():
+ print("find inf in loss")
+ if type(edge_weight) != int:
+ print(edge_weight[inf_idx])
+ print(f"pos_score: {pos_score[inf_idx]}")
+ print(f"neg_score: {neg_score[inf_idx]}")
+ raise ValueError("find inf in loss")
+
+ def _make_binorm_adj(self, mat):
+ a = csr_matrix((self.num_users, self.num_users))
+ b = csr_matrix((self.num_items, self.num_items))
+ mat = sp.vstack(
+ [sp.hstack([a, mat]), sp.hstack([mat.transpose(), b])])
+ mat = (mat != 0) * 1.0
+ # mat = (mat + sp.eye(mat.shape[0])) * 1.0# MARK
+ degree = np.array(mat.sum(axis=-1))
+ d_inv_sqrt = np.reshape(np.power(degree, -0.5), [-1])
+ d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.0
+ d_inv_sqrt_mat = sp.diags(d_inv_sqrt)
+ mat = mat.dot(d_inv_sqrt_mat).transpose().dot(
+ d_inv_sqrt_mat).tocoo()
+
+ # make torch tensor
+ idxs = torch.from_numpy(np.vstack([mat.row, mat.col]).astype(np.int64))
+ vals = torch.from_numpy(mat.data.astype(np.float32))
+ shape = torch.Size(mat.shape)
+ return torch.sparse.FloatTensor(idxs, vals, shape).to(args.device)
+
+ def _make_binorm_adj_self_loop(self, mat):
+ a = csr_matrix((self.num_users, self.num_users))
+ b = csr_matrix((self.num_items, self.num_items))
+ mat = sp.vstack(
+ [sp.hstack([a, mat]), sp.hstack([mat.transpose(), b])])
+ mat = (mat != 0) * 1.0
+ mat = (mat + sp.eye(mat.shape[0])) * 1.0 # self loop
+ degree = np.array(mat.sum(axis=-1))
+ d_inv_sqrt = np.reshape(np.power(degree, -0.5), [-1])
+ d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.0
+ d_inv_sqrt_mat = sp.diags(d_inv_sqrt)
+ mat = mat.dot(d_inv_sqrt_mat).transpose().dot(
+ d_inv_sqrt_mat).tocoo()
+
+ # make torch tensor
+ idxs = torch.from_numpy(np.vstack([mat.row, mat.col]).astype(np.int64))
+ vals = torch.from_numpy(mat.data.astype(np.float32))
+ shape = torch.Size(mat.shape)
+ return torch.sparse.FloatTensor(idxs, vals, shape).to(args.device)
+
+
+ def _sp_matrix_to_sp_tensor(self, sp_matrix):
+ coo = sp_matrix.tocoo()
+ indices = torch.LongTensor([coo.row, coo.col])
+ values = torch.FloatTensor(coo.data)
+ return torch.sparse.FloatTensor(indices, values, coo.shape).coalesce().to(args.device)
+
+ def _bpr_loss(self, user_emb, pos_item_emb, neg_item_emb):
+ pos_score = (user_emb * pos_item_emb).sum(dim=1)
+ neg_score = (user_emb * neg_item_emb).sum(dim=1)
+ loss = -torch.log(1e-10 + torch.sigmoid((pos_score - neg_score)))
+ self._check_inf(loss, pos_score, neg_score, 0)
+ return loss.mean()
+
+ def _nce_loss(self, pos_score, neg_score, edge_weight=1):
+ numerator = torch.exp(pos_score)
+ denominator = torch.exp(pos_score) + torch.exp(neg_score).sum(dim=1)
+ loss = -torch.log(numerator/denominator) * edge_weight
+ self._check_inf(loss, pos_score, neg_score, edge_weight)
+ return loss.mean()
+
+ def _infonce_loss(self, pos_1, pos_2, negs, tau):
+ pos_1 = self.cl_mlp(pos_1)
+ pos_2 = self.cl_mlp(pos_2)
+ negs = self.cl_mlp(negs)
+ pos_1 = F.normalize(pos_1, dim=-1)
+ pos_2 = F.normalize(pos_2, dim=-1)
+ negs = F.normalize(negs, dim=-1)
+ pos_score = torch.mul(pos_1, pos_2).sum(dim=1)
+ # B, 1, E * B, E, N -> B, N
+ neg_score = torch.bmm(pos_1.unsqueeze(1), negs.transpose(1, 2)).squeeze(1)
+ # infonce loss
+ numerator = torch.exp(pos_score / tau)
+ denominator = torch.exp(pos_score / tau) + torch.exp(neg_score / tau).sum(dim=1)
+ loss = -torch.log(numerator/denominator)
+ self._check_inf(loss, pos_score, neg_score, 0)
+ return loss.mean()
+
\ No newline at end of file
diff --git a/recommend/model/operators.py b/recommend/model/operators.py
new file mode 100644
index 0000000..a508966
--- /dev/null
+++ b/recommend/model/operators.py
@@ -0,0 +1,52 @@
+import torch
+from typing import Optional, Tuple
+from torch import nn
+
+def broadcast(src: torch.Tensor, other: torch.Tensor, dim: int):
+ if dim < 0:
+ dim = other.dim() + dim
+ if src.dim() == 1:
+ for _ in range(0, dim):
+ src = src.unsqueeze(0)
+ for _ in range(src.dim(), other.dim()):
+ src = src.unsqueeze(-1)
+ src = src.expand(other.size())
+ return src
+
+def scatter_sum(src: torch.Tensor, index: torch.Tensor, dim: int = -1,
+ out: Optional[torch.Tensor] = None,
+ dim_size: Optional[int] = None) -> torch.Tensor:
+ index = broadcast(index, src, dim)
+ if out is None:
+ size = list(src.size())
+ if dim_size is not None:
+ size[dim] = dim_size
+ elif index.numel() == 0:
+ size[dim] = 0
+ else:
+ size[dim] = int(index.max()) + 1
+ out = torch.zeros(size, dtype=src.dtype, device=src.device)
+ return out.scatter_add_(dim, index, src)
+ else:
+ return out.scatter_add_(dim, index, src)
+
+def scatter_add(src: torch.Tensor, index: torch.Tensor, dim: int = -1,
+ out: Optional[torch.Tensor] = None,
+ dim_size: Optional[int] = None) -> torch.Tensor:
+ return scatter_sum(src, index, dim, out, dim_size)
+
+
+class EdgelistDrop(nn.Module):
+ def __init__(self):
+ super(EdgelistDrop, self).__init__()
+
+ def forward(self, edgeList, keep_rate, return_mask=False):
+ if keep_rate == 1.0:
+ return edgeList, torch.ones(edgeList.size(0)).type(torch.bool)
+ edgeNum = edgeList.size(0)
+ mask = (torch.rand(edgeNum) + keep_rate).floor().type(torch.bool)
+ newEdgeList = edgeList[mask, :]
+ if return_mask:
+ return newEdgeList, mask
+ else:
+ return newEdgeList