新增管理员页面和用户申诉、迁移审核页面,推荐系统

Change-Id: Ief5646321feb98fadb17da4b4e91caeaacdbacc5
diff --git a/recommend/model/LightGCN.py b/recommend/model/LightGCN.py
new file mode 100644
index 0000000..b6b447e
--- /dev/null
+++ b/recommend/model/LightGCN.py
@@ -0,0 +1,121 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+import scipy.sparse as sp
+import math
+import networkx as nx
+import random
+from copy import deepcopy
+from utils.parse_args import args
+from model.base_model import BaseModel
+from model.operators import EdgelistDrop
+from model.operators import scatter_add, scatter_sum
+
+
+init = nn.init.xavier_uniform_
+
+class LightGCN(BaseModel):
+    def __init__(self, dataset, pretrained_model=None, phase='pretrain'):
+        super().__init__(dataset)
+        self.adj = self._make_binorm_adj(dataset.graph)
+        self.edges = self.adj._indices().t()
+        self.edge_norm = self.adj._values()
+
+        self.phase = phase
+
+        self.emb_gate = lambda x: x
+
+        if self.phase == 'pretrain' or self.phase == 'vanilla' or self.phase == 'for_tune':
+            self.user_embedding = nn.Parameter(init(torch.empty(self.num_users, self.emb_size)))
+            self.item_embedding = nn.Parameter(init(torch.empty(self.num_items, self.emb_size)))
+
+
+        elif self.phase == 'finetune':
+            pre_user_emb, pre_item_emb = pretrained_model.generate()
+            self.user_embedding = nn.Parameter(pre_user_emb).requires_grad_(True)
+            self.item_embedding = nn.Parameter(pre_item_emb).requires_grad_(True)
+
+        elif self.phase == 'continue_tune':
+            # re-initialize for loading state dict
+            self.user_embedding = nn.Parameter(init(torch.empty(self.num_users, self.emb_size)))
+            self.item_embedding = nn.Parameter(init(torch.empty(self.num_items, self.emb_size)))
+
+        self.edge_dropout = EdgelistDrop()
+
+    def _agg(self, all_emb, edges, edge_norm):
+        src_emb = all_emb[edges[:, 0]]
+
+        # bi-norm
+        src_emb = src_emb * edge_norm.unsqueeze(1)
+
+        # conv
+        dst_emb = scatter_sum(src_emb, edges[:, 1], dim=0, dim_size=self.num_users+self.num_items)
+        return dst_emb
+    
+    def _edge_binorm(self, edges):
+        user_degs = scatter_add(torch.ones_like(edges[:, 0]), edges[:, 0], dim=0, dim_size=self.num_users)
+        user_degs = user_degs[edges[:, 0]]
+        item_degs = scatter_add(torch.ones_like(edges[:, 1]), edges[:, 1], dim=0, dim_size=self.num_items)
+        item_degs = item_degs[edges[:, 1]]
+        norm = torch.pow(user_degs, -0.5) * torch.pow(item_degs, -0.5)
+        return norm
+
+    def forward(self, edges, edge_norm, return_layers=False):
+        all_emb = torch.cat([self.user_embedding, self.item_embedding], dim=0)
+        all_emb = self.emb_gate(all_emb)
+        res_emb = [all_emb]
+        for l in range(args.num_layers):
+            all_emb = self._agg(res_emb[-1], edges, edge_norm)
+            res_emb.append(all_emb)
+        if not return_layers:
+            res_emb = sum(res_emb)
+            user_res_emb, item_res_emb = res_emb.split([self.num_users, self.num_items], dim=0)
+        else:
+            user_res_emb, item_res_emb = [], []
+            for emb in res_emb:
+                u_emb, i_emb = emb.split([self.num_users, self.num_items], dim=0)
+                user_res_emb.append(u_emb)
+                item_res_emb.append(i_emb)
+        return user_res_emb, item_res_emb
+    
+    def cal_loss(self, batch_data):
+        edges, dropout_mask = self.edge_dropout(self.edges, 1-args.edge_dropout, return_mask=True)
+        edge_norm = self.edge_norm[dropout_mask]
+
+        # forward
+        users, pos_items, neg_items = batch_data
+        user_emb, item_emb = self.forward(edges, edge_norm)
+        batch_user_emb = user_emb[users]
+        pos_item_emb = item_emb[pos_items]
+        neg_item_emb = item_emb[neg_items]
+        rec_loss = self._bpr_loss(batch_user_emb, pos_item_emb, neg_item_emb)
+        reg_loss = args.weight_decay * self._reg_loss(users, pos_items, neg_items)
+
+        loss = rec_loss + reg_loss
+        loss_dict = {
+            "rec_loss": rec_loss.item(),
+            "reg_loss": reg_loss.item(),
+        }
+        return loss, loss_dict
+    
+    @torch.no_grad()
+    def generate(self, return_layers=False):
+        return self.forward(self.edges, self.edge_norm, return_layers=return_layers)
+    
+    @torch.no_grad()
+    def generate_lgn(self, return_layers=False):
+        return self.forward(self.edges, self.edge_norm, return_layers=return_layers)
+    
+    @torch.no_grad()
+    def rating(self, user_emb, item_emb):
+        return torch.matmul(user_emb, item_emb.t())
+    
+    def _reg_loss(self, users, pos_items, neg_items):
+        u_emb = self.user_embedding[users]
+        pos_i_emb = self.item_embedding[pos_items]
+        neg_i_emb = self.item_embedding[neg_items]
+        reg_loss = (1/2)*(u_emb.norm(2).pow(2) +
+                          pos_i_emb.norm(2).pow(2) +
+                          neg_i_emb.norm(2).pow(2))/float(len(users))
+        return reg_loss
diff --git a/recommend/model/LightGCN_pretrained.pt b/recommend/model/LightGCN_pretrained.pt
new file mode 100644
index 0000000..825e0e2
--- /dev/null
+++ b/recommend/model/LightGCN_pretrained.pt
Binary files differ
diff --git a/recommend/model/base_model.py b/recommend/model/base_model.py
new file mode 100644
index 0000000..819442a
--- /dev/null
+++ b/recommend/model/base_model.py
@@ -0,0 +1,111 @@
+import torch
+import torch.nn as nn
+from utils.parse_args import args
+from scipy.sparse import csr_matrix
+import scipy.sparse as sp
+import numpy as np
+import torch.nn.functional as F
+
+
+class BaseModel(nn.Module):
+    def __init__(self, dataloader):
+        super(BaseModel, self).__init__()
+        self.num_users = dataloader.num_users
+        self.num_items = dataloader.num_items
+        self.emb_size = args.emb_size
+
+    def forward(self):
+        pass
+
+    def cal_loss(self, batch_data):
+        pass
+
+    def _check_inf(self, loss, pos_score, neg_score, edge_weight):
+        # find inf idx
+        inf_idx = torch.isinf(loss) | torch.isnan(loss)
+        if inf_idx.any():
+            print("find inf in loss")
+            if type(edge_weight) != int:
+                print(edge_weight[inf_idx])
+            print(f"pos_score: {pos_score[inf_idx]}")
+            print(f"neg_score: {neg_score[inf_idx]}")
+            raise ValueError("find inf in loss")
+
+    def _make_binorm_adj(self, mat):
+        a = csr_matrix((self.num_users, self.num_users))
+        b = csr_matrix((self.num_items, self.num_items))
+        mat = sp.vstack(
+            [sp.hstack([a, mat]), sp.hstack([mat.transpose(), b])])
+        mat = (mat != 0) * 1.0
+        # mat = (mat + sp.eye(mat.shape[0])) * 1.0# MARK
+        degree = np.array(mat.sum(axis=-1))
+        d_inv_sqrt = np.reshape(np.power(degree, -0.5), [-1])
+        d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.0
+        d_inv_sqrt_mat = sp.diags(d_inv_sqrt)
+        mat = mat.dot(d_inv_sqrt_mat).transpose().dot(
+            d_inv_sqrt_mat).tocoo()
+
+        # make torch tensor
+        idxs = torch.from_numpy(np.vstack([mat.row, mat.col]).astype(np.int64))
+        vals = torch.from_numpy(mat.data.astype(np.float32))
+        shape = torch.Size(mat.shape)
+        return torch.sparse.FloatTensor(idxs, vals, shape).to(args.device)
+    
+    def _make_binorm_adj_self_loop(self, mat):
+        a = csr_matrix((self.num_users, self.num_users))
+        b = csr_matrix((self.num_items, self.num_items))
+        mat = sp.vstack(
+            [sp.hstack([a, mat]), sp.hstack([mat.transpose(), b])])
+        mat = (mat != 0) * 1.0
+        mat = (mat + sp.eye(mat.shape[0])) * 1.0 # self loop
+        degree = np.array(mat.sum(axis=-1))
+        d_inv_sqrt = np.reshape(np.power(degree, -0.5), [-1])
+        d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.0
+        d_inv_sqrt_mat = sp.diags(d_inv_sqrt)
+        mat = mat.dot(d_inv_sqrt_mat).transpose().dot(
+            d_inv_sqrt_mat).tocoo()
+
+        # make torch tensor
+        idxs = torch.from_numpy(np.vstack([mat.row, mat.col]).astype(np.int64))
+        vals = torch.from_numpy(mat.data.astype(np.float32))
+        shape = torch.Size(mat.shape)
+        return torch.sparse.FloatTensor(idxs, vals, shape).to(args.device)
+
+
+    def _sp_matrix_to_sp_tensor(self, sp_matrix):
+        coo = sp_matrix.tocoo()
+        indices = torch.LongTensor([coo.row, coo.col])
+        values = torch.FloatTensor(coo.data)
+        return torch.sparse.FloatTensor(indices, values, coo.shape).coalesce().to(args.device)
+
+    def _bpr_loss(self, user_emb, pos_item_emb, neg_item_emb):
+        pos_score = (user_emb * pos_item_emb).sum(dim=1)
+        neg_score = (user_emb * neg_item_emb).sum(dim=1)
+        loss = -torch.log(1e-10 + torch.sigmoid((pos_score - neg_score)))
+        self._check_inf(loss, pos_score, neg_score, 0)
+        return loss.mean()
+    
+    def _nce_loss(self, pos_score, neg_score, edge_weight=1):
+        numerator = torch.exp(pos_score)
+        denominator = torch.exp(pos_score) + torch.exp(neg_score).sum(dim=1)
+        loss = -torch.log(numerator/denominator) * edge_weight
+        self._check_inf(loss, pos_score, neg_score, edge_weight)
+        return loss.mean()
+    
+    def _infonce_loss(self, pos_1, pos_2, negs, tau):
+        pos_1 = self.cl_mlp(pos_1)
+        pos_2 = self.cl_mlp(pos_2)
+        negs = self.cl_mlp(negs)
+        pos_1 = F.normalize(pos_1, dim=-1)
+        pos_2 = F.normalize(pos_2, dim=-1)
+        negs = F.normalize(negs, dim=-1)
+        pos_score = torch.mul(pos_1, pos_2).sum(dim=1)
+        # B, 1, E * B, E, N -> B, N
+        neg_score = torch.bmm(pos_1.unsqueeze(1), negs.transpose(1, 2)).squeeze(1)
+        # infonce loss
+        numerator = torch.exp(pos_score / tau)
+        denominator = torch.exp(pos_score / tau) + torch.exp(neg_score / tau).sum(dim=1)
+        loss = -torch.log(numerator/denominator)
+        self._check_inf(loss, pos_score, neg_score, 0)
+        return loss.mean()
+    
\ No newline at end of file
diff --git a/recommend/model/operators.py b/recommend/model/operators.py
new file mode 100644
index 0000000..a508966
--- /dev/null
+++ b/recommend/model/operators.py
@@ -0,0 +1,52 @@
+import torch
+from typing import Optional, Tuple
+from torch import nn
+
+def broadcast(src: torch.Tensor, other: torch.Tensor, dim: int):
+    if dim < 0:
+        dim = other.dim() + dim
+    if src.dim() == 1:
+        for _ in range(0, dim):
+            src = src.unsqueeze(0)
+    for _ in range(src.dim(), other.dim()):
+        src = src.unsqueeze(-1)
+    src = src.expand(other.size())
+    return src
+
+def scatter_sum(src: torch.Tensor, index: torch.Tensor, dim: int = -1,
+                out: Optional[torch.Tensor] = None,
+                dim_size: Optional[int] = None) -> torch.Tensor:
+    index = broadcast(index, src, dim)
+    if out is None:
+        size = list(src.size())
+        if dim_size is not None:
+            size[dim] = dim_size
+        elif index.numel() == 0:
+            size[dim] = 0
+        else:
+            size[dim] = int(index.max()) + 1
+        out = torch.zeros(size, dtype=src.dtype, device=src.device)
+        return out.scatter_add_(dim, index, src)
+    else:
+        return out.scatter_add_(dim, index, src)
+
+def scatter_add(src: torch.Tensor, index: torch.Tensor, dim: int = -1,
+                out: Optional[torch.Tensor] = None,
+                dim_size: Optional[int] = None) -> torch.Tensor:
+    return scatter_sum(src, index, dim, out, dim_size)
+
+
+class EdgelistDrop(nn.Module):
+    def __init__(self):
+        super(EdgelistDrop, self).__init__()
+
+    def forward(self, edgeList, keep_rate, return_mask=False):
+        if keep_rate == 1.0:
+            return edgeList, torch.ones(edgeList.size(0)).type(torch.bool)
+        edgeNum = edgeList.size(0)
+        mask = (torch.rand(edgeNum) + keep_rate).floor().type(torch.bool)
+        newEdgeList = edgeList[mask, :]
+        if return_mask:
+            return newEdgeList, mask
+        else:
+            return newEdgeList