22301110 | f2e3c09 | 2025-06-05 01:24:43 +0800 | [diff] [blame^] | 1 | import os |
| 2 | import urllib.request |
| 3 | from recommend import train_and_save_itemcf |
| 4 | MODEL_URL = "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.zh.300.bin.gz" |
| 5 | MODEL_DIR = "./models" |
| 6 | MODEL_PATH = os.path.join(MODEL_DIR, "cc.zh.300.bin") |
| 7 | COMPRESSED_PATH = MODEL_PATH + ".gz" |
| 8 | |
| 9 | def download_model(): |
| 10 | if not os.path.exists(MODEL_DIR): |
| 11 | os.makedirs(MODEL_DIR) |
| 12 | |
| 13 | if os.path.exists(MODEL_PATH): |
| 14 | print("✅ 模型已存在,跳过下载。") |
| 15 | return |
| 16 | |
| 17 | print("⏬ 下载 fastText 中文模型...") |
| 18 | urllib.request.urlretrieve(MODEL_URL, COMPRESSED_PATH) |
| 19 | |
| 20 | print("📦 解压模型文件...") |
| 21 | import gzip |
| 22 | import shutil |
| 23 | with gzip.open(COMPRESSED_PATH, 'rb') as f_in: |
| 24 | with open(MODEL_PATH, 'wb') as f_out: |
| 25 | shutil.copyfileobj(f_in, f_out) |
| 26 | |
| 27 | os.remove(COMPRESSED_PATH) |
| 28 | print("✅ 模型下载并解压完成!") |
| 29 | |
| 30 | if __name__ == "__main__": |
| 31 | train_and_save_itemcf() |
| 32 | download_model() |