blob: becf5df98ecc0ad5c270619c99ad97cb6dea1003 [file] [log] [blame]
import pymysql
import datetime
from collections import defaultdict
SqlURL = "10.126.59.25"
SqlPort = 3306
Database = "pt_database_test"
SqlUsername = "root"
SqlPassword = "123456"
def fetch_user_seed_data():
conn = pymysql.connect(
host=SqlURL,
port=SqlPort,
user=SqlUsername,
password=SqlPassword,
database=Database,
charset="utf8mb4"
)
cursor = conn.cursor()
cursor.execute("SELECT user_id, seed_id, download_start FROM SeedDownload")
download_rows = cursor.fetchall()
cursor.execute("SELECT user_id, seed_id, created_at FROM UserFavorite")
favorite_rows = cursor.fetchall()
cursor.close()
conn.close()
return download_rows, favorite_rows
def process_records(download_rows, favorite_rows):
records = []
user_set = set()
seed_set = set()
for row in download_rows:
user_id, seed_id, created_at = row
user_set.add(user_id)
seed_set.add(seed_id)
if isinstance(created_at, datetime.datetime):
ts = int(created_at.timestamp())
else:
ts = 0
records.append((user_id, seed_id, ts))
for row in favorite_rows:
user_id, seed_id, created_at = row
user_set.add(user_id)
seed_set.add(seed_id)
if isinstance(created_at, datetime.datetime):
ts = int(created_at.timestamp())
else:
ts = 0
records.append((user_id, seed_id, ts))
return records, user_set, seed_set
def build_id_maps(user_set, seed_set):
user2idx = {uid: idx for idx, uid in enumerate(sorted(user_set))}
seed2idx = {sid: idx for idx, sid in enumerate(sorted(seed_set))}
return user2idx, seed2idx
def group_and_write(records, user2idx, seed2idx, output_path="./user_seed_graph.txt"):
user_items = defaultdict(list)
user_times = defaultdict(list)
for user_id, seed_id, ts in records:
uid = user2idx[user_id]
sid = seed2idx[seed_id]
user_items[uid].append(sid)
user_times[uid].append(ts)
with open(output_path, "w", encoding="utf-8") as f:
for uid in sorted(user_items.keys()):
items = " ".join(str(item) for item in user_items[uid])
times = " ".join(str(t) for t in user_times[uid])
f.write(f"{uid}\t{items}\t{times}\n")
def build_user_seed_graph(return_mapping=False):
download_rows, favorite_rows = fetch_user_seed_data()
records, user_set, seed_set = process_records(download_rows, favorite_rows)
user2idx, seed2idx = build_id_maps(user_set, seed_set)
group_and_write(records, user2idx, seed2idx)
if return_mapping:
return user2idx, seed2idx