Blame - rhj/backend/app/models/recall/ad_recall.py - API-TRM

blob: 0fe3b0a996118916dc16cc97bc48324d612dd16a [file] [log] [blame]

Raver	d789517	2025-06-18 17:54:38 +0800	[diff] [blame]	1	import pymysql
				2	from typing import List, Tuple, Dict
				3	import random
				4
				5	class AdRecall:
				6	"""
				7	广告召回算法实现
				8	专门用于召回广告类型的内容
				9	"""
				10
				11	def __init__(self, db_config: dict):
				12	"""
				13	初始化广告召回模型
				14
				15	Args:
				16	db_config: 数据库配置
				17	"""
				18	self.db_config = db_config
				19	self.ad_items = []
				20
				21	def _get_ad_items(self):
				22	"""获取广告物品列表"""
				23	conn = pymysql.connect(**self.db_config)
				24	try:
				25	cursor = conn.cursor()
				26
				27	# 获取所有广告帖子，按热度和发布时间排序
				28	cursor.execute("""
				29	SELECT
				30	p.id,
				31	p.heat,
				32	p.created_at,
				33	COUNT(DISTINCT b.user_id) as interaction_count,
				34	DATEDIFF(NOW(), p.created_at) as days_since_created
				35	FROM posts p
				36	LEFT JOIN behaviors b ON p.id = b.post_id
				37	WHERE p.is_advertisement = 1 AND p.status = 'published'
				38	GROUP BY p.id, p.heat, p.created_at
				39	ORDER BY p.heat DESC, p.created_at DESC
				40	""")
				41
				42	results = cursor.fetchall()
				43
				44	# 计算广告分数
				45	items_with_scores = []
				46	for row in results:
				47	post_id, heat, created_at, interaction_count, days_since_created = row
				48
				49	# 处理None值
				50	heat = heat or 0
				51	interaction_count = interaction_count or 0
				52	days_since_created = days_since_created or 0
				53
				54	# 广告分数计算：热度 + 交互数 - 时间惩罚
				55	# 新发布的广告给予更高权重
				56	freshness_bonus = max(0, 30 - days_since_created) / 30.0 # 30天内的新鲜度奖励
				57
				58	ad_score = (
				59	heat * 0.6 +
				60	interaction_count * 0.3 +
				61	freshness_bonus * 100 # 新鲜度奖励
				62	)
				63
				64	items_with_scores.append((post_id, ad_score))
				65
				66	# 按广告分数排序
				67	self.ad_items = sorted(items_with_scores, key=lambda x: x[1], reverse=True)
				68
				69	finally:
				70	cursor.close()
				71	conn.close()
				72
				73	def train(self):
				74	"""训练广告召回模型"""
				75	print("开始获取广告物品...")
				76	self._get_ad_items()
				77	print(f"广告召回模型训练完成，共{len(self.ad_items)}个广告物品")
				78
				79	def recall(self, user_id: int, num_items: int = 10) -> List[Tuple[int, float]]:
				80	"""
				81	为用户召回广告物品
				82
				83	Args:
				84	user_id: 用户ID
				85	num_items: 召回物品数量
				86
				87	Returns:
				88	List of (item_id, score) tuples
				89	"""
				90	# 如果尚未训练，先进行训练
				91	if not hasattr(self, 'ad_items') or not self.ad_items:
				92	self.train()
				93
				94	# 获取用户已交互的广告，避免重复推荐
				95	conn = pymysql.connect(**self.db_config)
				96	try:
				97	cursor = conn.cursor()
				98	cursor.execute("""
				99	SELECT DISTINCT b.post_id
				100	FROM behaviors b
				101	JOIN posts p ON b.post_id = p.id
				102	WHERE b.user_id = %s AND p.is_advertisement = 1
				103	AND b.type IN ('like', 'favorite', 'comment', 'view')
				104	""", (user_id,))
				105
				106	user_interacted_ads = set(row[0] for row in cursor.fetchall())
				107
				108	# 获取用户的兴趣标签（基于历史行为）
				109	cursor.execute("""
				110	SELECT t.name, COUNT(*) as count
				111	FROM behaviors b
				112	JOIN posts p ON b.post_id = p.id
				113	JOIN post_tags pt ON p.id = pt.post_id
				114	JOIN tags t ON pt.tag_id = t.id
				115	WHERE b.user_id = %s AND b.type IN ('like', 'favorite', 'comment')
				116	GROUP BY t.name
				117	ORDER BY count DESC
				118	LIMIT 10
				119	""", (user_id,))
				120
				121	user_interest_tags = set(row[0] for row in cursor.fetchall())
				122
				123	finally:
				124	cursor.close()
				125	conn.close()
				126
				127	# 过滤掉用户已交互的广告
				128	filtered_ads = [
				129	(item_id, score) for item_id, score in self.ad_items
				130	if item_id not in user_interacted_ads
				131	]
				132
				133	# 如果没有未交互的广告，但有广告数据，返回评分最高的广告（可能用户会再次感兴趣）
				134	if not filtered_ads and self.ad_items:
				135	print(f"用户 {user_id} 已与所有广告交互，返回评分最高的广告")
				136	filtered_ads = self.ad_items[:num_items]
				137
				138	# 如果用户有兴趣标签，可以进一步个性化广告推荐
				139	if user_interest_tags and filtered_ads:
				140	filtered_ads = self._personalize_ads(filtered_ads, user_interest_tags)
				141
				142	return filtered_ads[:num_items]
				143
				144	def _personalize_ads(self, ad_list: List[Tuple[int, float]], user_interest_tags: set) -> List[Tuple[int, float]]:
				145	"""
				146	根据用户兴趣标签个性化广告推荐
				147
				148	Args:
				149	ad_list: 广告列表
				150	user_interest_tags: 用户兴趣标签
				151
				152	Returns:
				153	个性化后的广告列表
				154	"""
				155	conn = pymysql.connect(**self.db_config)
				156	try:
				157	cursor = conn.cursor()
				158
				159	personalized_ads = []
				160	for ad_id, ad_score in ad_list:
				161	# 获取广告的标签
				162	cursor.execute("""
				163	SELECT t.name
				164	FROM post_tags pt
				165	JOIN tags t ON pt.tag_id = t.id
				166	WHERE pt.post_id = %s
				167	""", (ad_id,))
				168
				169	ad_tags = set(row[0] for row in cursor.fetchall())
				170
				171	# 计算标签匹配度
				172	tag_match_score = len(ad_tags & user_interest_tags) / max(len(user_interest_tags), 1)
				173
				174	# 调整广告分数
				175	final_score = ad_score * (1 + tag_match_score)
				176	personalized_ads.append((ad_id, final_score))
				177
				178	# 重新排序
				179	personalized_ads.sort(key=lambda x: x[1], reverse=True)
				180	return personalized_ads
				181
				182	finally:
				183	cursor.close()
				184	conn.close()
				185
				186	def get_random_ads(self, num_items: int = 5) -> List[Tuple[int, float]]:
				187	"""
				188	获取随机广告（用于多样性）
				189
				190	Args:
				191	num_items: 返回物品数量
				192
				193	Returns:
				194	List of (item_id, score) tuples
				195	"""
				196	if len(self.ad_items) <= num_items:
				197	return self.ad_items
				198
				199	# 随机选择但倾向于高分广告
				200	weights = [score for _, score in self.ad_items]
				201	selected_indices = random.choices(
				202	range(len(self.ad_items)),
				203	weights=weights,
				204	k=num_items
				205	)
				206
				207	return [self.ad_items[i] for i in selected_indices]