| import pymysql |
| import datetime |
| from collections import defaultdict |
| |
| SqlURL = "10.126.59.25" |
| SqlPort = 3306 |
| Database = "redbook" # 修改为redbook数据库 |
| SqlUsername = "root" |
| SqlPassword = "123456" |
| |
| |
| def fetch_user_post_data(): |
| """ |
| 从redbook数据库的behaviors表获取用户-帖子交互数据,只包含已发布的帖子 |
| """ |
| conn = pymysql.connect( |
| host=SqlURL, |
| port=SqlPort, |
| user=SqlUsername, |
| password=SqlPassword, |
| database=Database, |
| charset="utf8mb4" |
| ) |
| cursor = conn.cursor() |
| # 获取用户行为数据,只包含已发布帖子的行为数据 |
| cursor.execute(""" |
| SELECT b.user_id, b.post_id, b.type, b.value, b.created_at |
| FROM behaviors b |
| INNER JOIN posts p ON b.post_id = p.id |
| WHERE b.type IN ('like', 'favorite', 'comment', 'view', 'share') |
| AND p.status = 'published' |
| ORDER BY b.created_at |
| """) |
| behavior_rows = cursor.fetchall() |
| cursor.close() |
| conn.close() |
| return behavior_rows |
| |
| |
| def process_records(behavior_rows): |
| """ |
| 处理用户行为记录,为不同类型的行为分配权重 |
| """ |
| records = [] |
| user_set = set() |
| post_set = set() |
| |
| # 为不同行为类型分配权重 |
| behavior_weights = { |
| 'view': 1, |
| 'like': 2, |
| 'comment': 3, |
| 'share': 4, |
| 'favorite': 5 |
| } |
| |
| for row in behavior_rows: |
| user_id, post_id, behavior_type, value, created_at = row |
| user_set.add(user_id) |
| post_set.add(post_id) |
| |
| if isinstance(created_at, datetime.datetime): |
| ts = int(created_at.timestamp()) |
| else: |
| ts = 0 |
| |
| # 使用行为权重 |
| weight = behavior_weights.get(behavior_type, 1) * (value or 1) |
| records.append((user_id, post_id, ts, weight)) |
| |
| return records, user_set, post_set |
| |
| |
| def build_id_maps(user_set, post_set): |
| """ |
| 构建用户和帖子的ID映射 |
| """ |
| user2idx = {uid: idx for idx, uid in enumerate(sorted(user_set))} |
| post2idx = {pid: idx for idx, pid in enumerate(sorted(post_set))} |
| return user2idx, post2idx |
| |
| |
| def group_and_write(records, user2idx, post2idx, output_path="./app/user_post_graph.txt"): |
| """ |
| 将记录按用户分组并写入文件,支持行为权重 |
| """ |
| user_items = defaultdict(list) |
| user_times = defaultdict(list) |
| user_weights = defaultdict(list) |
| |
| for user_id, post_id, ts, weight in records: |
| uid = user2idx[user_id] |
| pid = post2idx[post_id] |
| user_items[uid].append(pid) |
| user_times[uid].append(ts) |
| user_weights[uid].append(weight) |
| |
| with open(output_path, "w", encoding="utf-8") as f: |
| for uid in sorted(user_items.keys()): |
| items = " ".join(str(item) for item in user_items[uid]) |
| times = " ".join(str(t) for t in user_times[uid]) |
| weights = " ".join(str(w) for w in user_weights[uid]) |
| f.write(f"{uid}\t{items}\t{times}\t{weights}\n") |
| |
| |
| def build_user_post_graph(return_mapping=False): |
| """ |
| 构建用户-帖子交互图 |
| """ |
| behavior_rows = fetch_user_post_data() |
| records, user_set, post_set = process_records(behavior_rows) |
| user2idx, post2idx = build_id_maps(user_set, post_set) |
| group_and_write(records, user2idx, post2idx) |
| if return_mapping: |
| return user2idx, post2idx |