推荐系统

Change-Id: I49b9205568f1ccf88b32b08511aff8b0bea8d1bd
diff --git a/rhj/backend/app/models/recall/hot_recall.py b/rhj/backend/app/models/recall/hot_recall.py
new file mode 100644
index 0000000..dbc716c
--- /dev/null
+++ b/rhj/backend/app/models/recall/hot_recall.py
@@ -0,0 +1,163 @@
+import pymysql
+from typing import List, Tuple, Dict
+import numpy as np
+
+class HotRecall:
+    """
+    热度召回算法实现
+    基于物品的热度(热度分数、交互次数等)进行召回
+    """
+    
+    def __init__(self, db_config: dict):
+        """
+        初始化热度召回模型
+        
+        Args:
+            db_config: 数据库配置
+        """
+        self.db_config = db_config
+        self.hot_items = []
+        
+    def _calculate_heat_scores(self):
+        """计算物品热度分数"""
+        conn = pymysql.connect(**self.db_config)
+        try:
+            cursor = conn.cursor()
+            
+            # 综合考虑多个热度指标
+            cursor.execute("""
+                SELECT 
+                    p.id,
+                    p.heat,
+                    COUNT(DISTINCT CASE WHEN b.type = 'like' THEN b.user_id END) as like_count,
+                    COUNT(DISTINCT CASE WHEN b.type = 'favorite' THEN b.user_id END) as favorite_count,
+                    COUNT(DISTINCT CASE WHEN b.type = 'comment' THEN b.user_id END) as comment_count,
+                    COUNT(DISTINCT CASE WHEN b.type = 'view' THEN b.user_id END) as view_count,
+                    COUNT(DISTINCT CASE WHEN b.type = 'share' THEN b.user_id END) as share_count,
+                    DATEDIFF(NOW(), p.created_at) as days_since_created
+                FROM posts p
+                LEFT JOIN behaviors b ON p.id = b.post_id
+                WHERE p.status = 'published'
+                GROUP BY p.id, p.heat, p.created_at
+            """)
+            
+            results = cursor.fetchall()
+            
+            # 计算综合热度分数
+            items_with_scores = []
+            for row in results:
+                post_id, heat, like_count, favorite_count, comment_count, view_count, share_count, days_since_created = row
+                
+                # 处理None值
+                heat = heat or 0
+                like_count = like_count or 0
+                favorite_count = favorite_count or 0
+                comment_count = comment_count or 0
+                view_count = view_count or 0
+                share_count = share_count or 0
+                days_since_created = days_since_created or 0
+                
+                # 综合热度分数计算
+                # 基础热度 + 加权的用户行为 + 时间衰减
+                behavior_score = (
+                    like_count * 1.0 +
+                    favorite_count * 2.0 +
+                    comment_count * 3.0 +
+                    view_count * 0.1 +
+                    share_count * 5.0
+                )
+                
+                # 时间衰减因子(越新的内容热度越高)
+                time_decay = np.exp(-days_since_created / 30.0)  # 30天半衰期
+                
+                # 最终热度分数
+                final_score = (heat * 0.3 + behavior_score * 0.7) * time_decay
+                
+                items_with_scores.append((post_id, final_score))
+            
+            # 按热度排序
+            self.hot_items = sorted(items_with_scores, key=lambda x: x[1], reverse=True)
+            
+        finally:
+            cursor.close()
+            conn.close()
+    
+    def train(self):
+        """训练热度召回模型"""
+        print("开始计算热度分数...")
+        self._calculate_heat_scores()
+        print(f"热度召回模型训练完成,共{len(self.hot_items)}个物品")
+    
+    def recall(self, user_id: int, num_items: int = 50) -> List[Tuple[int, float]]:
+        """
+        为用户召回热门物品
+        
+        Args:
+            user_id: 用户ID
+            num_items: 召回物品数量
+            
+        Returns:
+            List of (item_id, score) tuples
+        """
+        # 如果尚未训练,先进行训练
+        if not hasattr(self, 'hot_items') or not self.hot_items:
+            self.train()
+        
+        # 获取用户已交互的物品,避免重复推荐
+        conn = pymysql.connect(**self.db_config)
+        try:
+            cursor = conn.cursor()
+            cursor.execute("""
+                SELECT DISTINCT post_id 
+                FROM behaviors 
+                WHERE user_id = %s AND type IN ('like', 'favorite', 'comment')
+            """, (user_id,))
+            
+            user_interacted_items = set(row[0] for row in cursor.fetchall())
+            
+        finally:
+            cursor.close()
+            conn.close()
+        
+        # 过滤掉用户已交互的物品
+        filtered_items = [
+            (item_id, score) for item_id, score in self.hot_items
+            if item_id not in user_interacted_items
+        ]
+        
+        # 如果过滤后没有足够的候选,放宽条件:只过滤强交互(like, favorite, comment)
+        if len(filtered_items) < num_items:
+            print(f"热度召回:过滤后候选不足({len(filtered_items)}),放宽过滤条件")
+            conn = pymysql.connect(**self.db_config)
+            try:
+                cursor = conn.cursor()
+                cursor.execute("""
+                    SELECT DISTINCT post_id 
+                    FROM behaviors 
+                    WHERE user_id = %s AND type IN ('like', 'favorite', 'comment')
+                """, (user_id,))
+                
+                strong_interacted_items = set(row[0] for row in cursor.fetchall())
+                
+            finally:
+                cursor.close()
+                conn.close()
+            
+            filtered_items = [
+                (item_id, score) for item_id, score in self.hot_items
+                if item_id not in strong_interacted_items
+            ]
+        
+        return filtered_items[:num_items]
+    
+    def get_top_hot_items(self, num_items: int = 100) -> List[Tuple[int, float]]:
+        """
+        获取全局热门物品(不考虑用户个性化)
+        
+        Args:
+            num_items: 返回物品数量
+            
+        Returns:
+            List of (item_id, score) tuples
+        """
+        return self.hot_items[:num_items]