今日目标
o 理解推荐系统的基本概念和类型
o 掌握协同过滤算法(用户和物品)
o 学会基于内容的推荐方法
o 了解矩阵分解和深度学习推荐
o 掌握推荐系统评估和优化技术
推荐系统概述
推荐系统是信息过滤系统,用于预测用户对物品的偏好:
o 协同过滤:基于用户或物品的相似性
o 基于内容:基于物品特征和用户偏好
o 混合推荐:结合多种推荐方法
o 深度学习:使用神经网络进行推荐
推荐系统应用领域
# 主要应用领域:
# - 电商平台:商品推荐
# - 视频网站:内容推荐
# - 音乐平台:歌曲推荐
# - 社交媒体:好友推荐
# - 新闻网站:文章推荐
# - 游戏平台:游戏推荐
推荐系统基础
1. 安装和导入
pip install pandas numpy matplotlib seaborn scikit-learn scipy surprise
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF, TruncatedSVD
from scipy.sparse import csr_matrix
import warnings
warnings.filterwarnings('ignore')
# 设置中文字体
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
# 设置Seaborn样式
sns.set_style("whitegrid")
print("推荐系统环境设置完成")
2. 创建示例数据
def create_recommendation_data():
"""创建推荐系统示例数据"""
np.random.seed(42)
print("=== 创建推荐系统数据 ===")
# 1. 用户-物品评分矩阵
n_users = 100
n_items = 50
# 生成稀疏评分矩阵
ratings_data = []
for user_id in range(n_users):
# 每个用户随机评价10-20个物品
n_ratings = np.random.randint(10, 21)
item_ids = np.random.choice(n_items, n_ratings, replace=False)
for item_id in item_ids:
# 生成1-5的评分
rating = np.random.randint(1, 6)
ratings_data.append({
'user_id': user_id,
'item_id': item_id,
'rating': rating
})
ratings_df = pd.DataFrame(ratings_data)
# 2. 物品特征数据
categories = ['电影', '音乐', '书籍', '游戏', '电子产品']
genres = ['动作', '喜剧', '科幻', '恐怖', '爱情', '纪录片', '动画']
items_data = []
for item_id in range(n_items):
category = np.random.choice(categories)
genre = np.random.choice(genres)
price = np.random.randint(10, 1000)
items_data.append({
'item_id': item_id,
'name': f'物品{item_id}',
'category': category,
'genre': genre,
'price': price,
'description': f'这是一个{category}类别的{genre}作品,价格{price}元'
})
items_df = pd.DataFrame(items_data)
# 3. 用户特征数据
ages = np.random.randint(18, 65, n_users)
genders = np.random.choice(['男', '女'], n_users)
users_data = []
for user_id in range(n_users):
users_data.append({
'user_id': user_id,
'age': ages[user_id],
'gender': genders[user_id],
'preference': np.random.choice(categories)
})
users_df = pd.DataFrame(users_data)
print(f"用户数量: {n_users}")
print(f"物品数量: {n_items}")
print(f"评分数量: {len(ratings_df)}")
print(f"稀疏度: {len(ratings_df) / (n_users * n_items) * 100:.2f}%")
# 4. 创建评分矩阵
rating_matrix = ratings_df.pivot(index='user_id', columns='item_id', values='rating')
rating_matrix = rating_matrix.fillna(0)
# 可视化数据分布
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
# 评分分布
axes[0, 0].hist(ratings_df['rating'], bins=5, alpha=0.7, edgecolor='black')
axes[0, 0].set_title('评分分布')
axes[0, 0].set_xlabel('评分')
axes[0, 0].set_ylabel('频次')
axes[0, 0].grid(True, alpha=0.3)
# 用户评分数量分布
user_rating_counts = ratings_df.groupby('user_id').size()
axes[0, 1].hist(user_rating_counts, bins=20, alpha=0.7, edgecolor='black')
axes[0, 1].set_title('用户评分数量分布')
axes[0, 1].set_xlabel('评分数量')
axes[0, 1].set_ylabel('用户数量')
axes[0, 1].grid(True, alpha=0.3)
# 物品评分数量分布
item_rating_counts = ratings_df.groupby('item_id').size()
axes[1, 0].hist(item_rating_counts, bins=20, alpha=0.7, edgecolor='black')
axes[1, 0].set_title('物品评分数量分布')
axes[1, 0].set_xlabel('评分数量')
axes[1, 0].set_ylabel('物品数量')
axes[1, 0].grid(True, alpha=0.3)
# 类别分布
category_counts = items_df['category'].value_counts()
axes[1, 1].pie(category_counts.values, labels=category_counts.index, autopct='%1.1f%%')
axes[1, 1].set_title('物品类别分布')
plt.tight_layout()
plt.show()
return {
'ratings_df': ratings_df,
'items_df': items_df,
'users_df': users_df,
'rating_matrix': rating_matrix
}
# 运行数据创建示例
rec_data = create_recommendation_data()
协同过滤算法
1. 基于用户的协同过滤
def user_based_collaborative_filtering():
"""基于用户的协同过滤示例"""
rating_matrix = rec_data['rating_matrix']
print("=== 基于用户的协同过滤 ===")
# 1. 计算用户相似度矩阵
user_similarity = cosine_similarity(rating_matrix)
user_similarity_df = pd.DataFrame(user_similarity,
index=rating_matrix.index,
columns=rating_matrix.index)
print(f"用户相似度矩阵形状: {user_similarity_df.shape}")
# 2. 为用户推荐物品
def recommend_for_user(user_id, n_recommendations=5):
"""为用户推荐物品"""
if user_id not in rating_matrix.index:
return []
# 获取用户的评分
user_ratings = rating_matrix.loc[user_id]
# 找到用户未评分的物品
unrated_items = user_ratings[user_ratings == 0].index
if len(unrated_items) == 0:
return []
# 获取相似用户
similar_users = user_similarity_df.loc[user_id].sort_values(ascending=False)[1:11]
# 计算预测评分
predictions = {}
for item_id in unrated_items:
numerator = 0
denominator = 0
for similar_user_id, similarity in similar_users.items():
if similarity > 0: # 只考虑正相似度
similar_user_rating = rating_matrix.loc[similar_user_id, item_id]
if similar_user_rating > 0: # 只考虑有评分的用户
numerator += similarity * similar_user_rating
denominator += similarity
if denominator > 0:
predictions[item_id] = numerator / denominator
# 返回推荐结果
recommendations = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
return recommendations[:n_recommendations]
# 3. 为示例用户推荐
example_user = 0
recommendations = recommend_for_user(example_user, n_recommendations=10)
print(f"\n为用户{example_user}的推荐结果:")
for item_id, predicted_rating in recommendations:
item_name = rec_data['items_df'].loc[item_id, 'name']
category = rec_data['items_df'].loc[item_id, 'category']
print(f"物品{item_id} ({item_name}, {category}): 预测评分 {predicted_rating:.2f}")
# 4. 可视化用户相似度
plt.figure(figsize=(10, 8))
sns.heatmap(user_similarity_df.iloc[:20, :20], cmap='coolwarm', center=0)
plt.title('用户相似度热力图 (前20个用户)')
plt.xlabel('用户ID')
plt.ylabel('用户ID')
plt.show()
# 5. 评估推荐质量
def evaluate_recommendations(user_id, n_recommendations=5):
"""评估推荐质量"""
# 获取用户的实际评分
user_ratings = rating_matrix.loc[user_id]
rated_items = user_ratings[user_ratings > 0]
if len(rated_items) < 2:
return None
# 随机隐藏一些评分作为测试集
test_size = min(len(rated_items) // 2, 5)
test_items = np.random.choice(rated_items.index, test_size, replace=False)
# 临时隐藏测试评分
original_ratings = {}
for item_id in test_items:
original_ratings[item_id] = rating_matrix.loc[user_id, item_id]
rating_matrix.loc[user_id, item_id] = 0
# 重新计算相似度(简化处理)
user_similarity_temp = cosine_similarity(rating_matrix)
# 获取推荐
recommendations = recommend_for_user(user_id, n_recommendations)
recommended_items = [item_id for item_id, _ in recommendations]
# 恢复原始评分
for item_id, rating in original_ratings.items():
rating_matrix.loc[user_id, item_id] = rating
# 计算指标
hits = len(set(recommended_items) & set(test_items))
precision = hits / len(recommended_items) if len(recommended_items) > 0 else 0
recall = hits / len(test_items) if len(test_items) > 0 else 0
return precision, recall
# 评估多个用户
evaluation_results = []
for user_id in range(min(20, len(rating_matrix))):
result = evaluate_recommendations(user_id)
if result is not None:
evaluation_results.append(result)
if evaluation_results:
avg_precision = np.mean([r[0] for r in evaluation_results])
avg_recall = np.mean([r[1] for r in evaluation_results])
print(f"\n推荐质量评估 (基于{len(evaluation_results)}个用户):")
print(f"平均精确率: {avg_precision:.3f}")
print(f"平均召回率: {avg_recall:.3f}")
return {
'user_similarity': user_similarity_df,
'recommendations': recommendations,
'evaluation_results': evaluation_results
}
# 运行基于用户的协同过滤示例
user_cf_results = user_based_collaborative_filtering()
2. 基于物品的协同过滤
def item_based_collaborative_filtering():
"""基于物品的协同过滤示例"""
rating_matrix = rec_data['rating_matrix']
print("=== 基于物品的协同过滤 ===")
# 1. 计算物品相似度矩阵
item_similarity = cosine_similarity(rating_matrix.T)
item_similarity_df = pd.DataFrame(item_similarity,
index=rating_matrix.columns,
columns=rating_matrix.columns)
print(f"物品相似度矩阵形状: {item_similarity_df.shape}")
# 2. 为用户推荐物品
def recommend_for_user_item_based(user_id, n_recommendations=5):
"""基于物品的协同过滤推荐"""
if user_id not in rating_matrix.index:
return []
# 获取用户的评分
user_ratings = rating_matrix.loc[user_id]
# 找到用户未评分的物品
unrated_items = user_ratings[user_ratings == 0].index
if len(unrated_items) == 0:
return []
# 计算预测评分
predictions = {}
for item_id in unrated_items:
numerator = 0
denominator = 0
# 获取用户已评分的物品
rated_items = user_ratings[user_ratings > 0].index
for rated_item_id in rated_items:
similarity = item_similarity_df.loc[item_id, rated_item_id]
if similarity > 0: # 只考虑正相似度
rating = user_ratings[rated_item_id]
numerator += similarity * rating
denominator += similarity
if denominator > 0:
predictions[item_id] = numerator / denominator
# 返回推荐结果
recommendations = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
return recommendations[:n_recommendations]
# 3. 为示例用户推荐
example_user = 0
recommendations = recommend_for_user_item_based(example_user, n_recommendations=10)
print(f"\n基于物品的协同过滤为用户{example_user}的推荐结果:")
for item_id, predicted_rating in recommendations:
item_name = rec_data['items_df'].loc[item_id, 'name']
category = rec_data['items_df'].loc[item_id, 'category']
print(f"物品{item_id} ({item_name}, {category}): 预测评分 {predicted_rating:.2f}")
# 4. 可视化物品相似度
plt.figure(figsize=(10, 8))
sns.heatmap(item_similarity_df.iloc[:20, :20], cmap='coolwarm', center=0)
plt.title('物品相似度热力图 (前20个物品)')
plt.xlabel('物品ID')
plt.ylabel('物品ID')
plt.show()
# 5. 找到最相似的物品
def find_similar_items(item_id, n_similar=5):
"""找到最相似的物品"""
if item_id not in item_similarity_df.index:
return []
similar_items = item_similarity_df.loc[item_id].sort_values(ascending=False)[1:n_similar+1]
return similar_items
example_item = 0
similar_items = find_similar_items(example_item, n_similar=5)
print(f"\n与物品{example_item}最相似的物品:")
for item_id, similarity in similar_items.items():
item_name = rec_data['items_df'].loc[item_id, 'name']
category = rec_data['items_df'].loc[item_id, 'category']
print(f"物品{item_id} ({item_name}, {category}): 相似度 {similarity:.3f}")
return {
'item_similarity': item_similarity_df,
'recommendations': recommendations,
'similar_items': similar_items
}
# 运行基于物品的协同过滤示例
item_cf_results = item_based_collaborative_filtering()
基于内容的推荐
1. 内容特征提取
def content_based_recommendation():
"""基于内容的推荐示例"""
items_df = rec_data['items_df']
users_df = rec_data['users_df']
ratings_df = rec_data['ratings_df']
print("=== 基于内容的推荐 ===")
# 1. 提取物品特征
# 使用TF-IDF提取文本特征
tfidf = TfidfVectorizer(max_features=100, stop_words=None)
item_features = tfidf.fit_transform(items_df['description'])
print(f"物品特征矩阵形状: {item_features.shape}")
# 2. 计算物品相似度
item_content_similarity = cosine_similarity(item_features)
item_content_similarity_df = pd.DataFrame(item_content_similarity,
index=items_df.index,
columns=items_df.index)
# 3. 构建用户偏好模型
def build_user_profile(user_id):
"""构建用户偏好模型"""
# 获取用户评分的物品
user_ratings = ratings_df[ratings_df['user_id'] == user_id]
if len(user_ratings) == 0:
return None
# 计算用户偏好向量
user_profile = np.zeros(item_features.shape[1])
for _, row in user_ratings.iterrows():
item_id = row['item_id']
rating = row['rating']
# 加权物品特征
item_feature = item_features[item_id].toarray().flatten()
user_profile += rating * item_feature
# 归一化
if np.sum(user_profile) > 0:
user_profile = user_profile / np.sum(user_profile)
return user_profile
# 4. 基于内容的推荐
def recommend_content_based(user_id, n_recommendations=5):
"""基于内容的推荐"""
# 构建用户偏好模型
user_profile = build_user_profile(user_id)
if user_profile is None:
return []
# 获取用户已评分的物品
user_rated_items = ratings_df[ratings_df['user_id'] == user_id]['item_id'].tolist()
# 计算用户对未评分物品的偏好分数
predictions = {}
for item_id in range(len(items_df)):
if item_id not in user_rated_items:
item_feature = item_features[item_id].toarray().flatten()
score = np.dot(user_profile, item_feature)
predictions[item_id] = score
# 返回推荐结果
recommendations = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
return recommendations[:n_recommendations]
# 5. 为示例用户推荐
example_user = 0
recommendations = recommend_content_based(example_user, n_recommendations=10)
print(f"\n基于内容的推荐为用户{example_user}的推荐结果:")
for item_id, score in recommendations:
item_name = items_df.loc[item_id, 'name']
category = items_df.loc[item_id, 'category']
description = items_df.loc[item_id, 'description']
print(f"物品{item_id} ({item_name}, {category}): 偏好分数 {score:.3f}")
print(f" 描述: {description}")
# 6. 可视化物品特征
# 使用PCA降维可视化
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
item_features_2d = pca.fit_transform(item_features.toarray())
plt.figure(figsize=(12, 8))
# 按类别着色
categories = items_df['category'].unique()
colors = plt.cm.Set3(np.linspace(0, 1, len(categories)))
for i, category in enumerate(categories):
mask = items_df['category'] == category
plt.scatter(item_features_2d[mask, 0], item_features_2d[mask, 1],
c=[colors[i]], label=category, alpha=0.7, s=50)
plt.title('物品特征空间可视化 (PCA降维)')
plt.xlabel('主成分1')
plt.ylabel('主成分2')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
return {
'item_features': item_features,
'item_content_similarity': item_content_similarity_df,
'recommendations': recommendations,
'pca_features': item_features_2d
}
# 运行基于内容的推荐示例
content_based_results = content_based_recommendation()
矩阵分解方法
1. 非负矩阵分解 (NMF)
def matrix_factorization():
"""矩阵分解方法示例"""
rating_matrix = rec_data['rating_matrix']
print("=== 矩阵分解方法 ===")
# 1. 非负矩阵分解 (NMF)
# 将评分矩阵转换为非负矩阵(添加偏移)
rating_matrix_shifted = rating_matrix + 1 # 将0-4评分转换为1-5
# 应用NMF
n_components = 10
nmf = NMF(n_components=n_components, random_state=42, max_iter=200)
# 只对非零元素进行分解
rating_matrix_sparse = csr_matrix(rating_matrix_shifted.values)
# 使用NMF分解
user_factors = nmf.fit_transform(rating_matrix_sparse)
item_factors = nmf.components_
print(f"用户因子矩阵形状: {user_factors.shape}")
print(f"物品因子矩阵形状: {item_factors.shape}")
print(f"NMF重构误差: {nmf.reconstruction_err_:.4f}")
# 2. 奇异值分解 (SVD)
# 使用TruncatedSVD进行SVD分解
svd = TruncatedSVD(n_components=n_components, random_state=42)
user_factors_svd = svd.fit_transform(rating_matrix_sparse)
item_factors_svd = svd.components_
print(f"SVD解释方差比例: {svd.explained_variance_ratio_.sum():.4f}")
# 3. 基于矩阵分解的推荐
def recommend_matrix_factorization(user_id, user_factors, item_factors, n_recommendations=5):
"""基于矩阵分解的推荐"""
if user_id >= user_factors.shape[0]:
return []
# 获取用户因子
user_factor = user_factors[user_id]
# 计算预测评分
predictions = user_factor @ item_factors
# 获取用户已评分的物品
user_ratings = rating_matrix.iloc[user_id]
rated_items = user_ratings[user_ratings > 0].index
# 过滤已评分的物品
unrated_predictions = []
for item_id in range(len(predictions)):
if item_id not in rated_items:
unrated_predictions.append((item_id, predictions[item_id]))
# 返回推荐结果
recommendations = sorted(unrated_predictions, key=lambda x: x[1], reverse=True)
return recommendations[:n_recommendations]
# 4. 比较不同方法的推荐结果
example_user = 0
# NMF推荐
nmf_recommendations = recommend_matrix_factorization(example_user, user_factors, item_factors, 5)
# SVD推荐
svd_recommendations = recommend_matrix_factorization(example_user, user_factors_svd, item_factors_svd, 5)
print(f"\n矩阵分解推荐结果 (用户{example_user}):")
print("NMF推荐:")
for item_id, score in nmf_recommendations:
item_name = rec_data['items_df'].loc[item_id, 'name']
category = rec_data['items_df'].loc[item_id, 'category']
print(f" 物品{item_id} ({item_name}, {category}): 预测评分 {score:.3f}")
print("\nSVD推荐:")
for item_id, score in svd_recommendations:
item_name = rec_data['items_df'].loc[item_id, 'name']
category = rec_data['items_df'].loc[item_id, 'category']
print(f" 物品{item_id} ({item_name}, {category}): 预测评分 {score:.3f}")
# 5. 可视化因子矩阵
fig, axes = plt.subplots(2, 2, figsize=(15, 10))
# NMF用户因子热力图
sns.heatmap(user_factors[:20, :10], ax=axes[0, 0], cmap='viridis')
axes[0, 0].set_title('NMF用户因子矩阵 (前20用户, 前10因子)')
axes[0, 0].set_xlabel('因子')
axes[0, 0].set_ylabel('用户')
# NMF物品因子热力图
sns.heatmap(item_factors[:10, :20], ax=axes[0, 1], cmap='viridis')
axes[0, 1].set_title('NMF物品因子矩阵 (前10因子, 前20物品)')
axes[0, 1].set_xlabel('物品')
axes[0, 1].set_ylabel('因子')
# SVD用户因子热力图
sns.heatmap(user_factors_svd[:20, :10], ax=axes[1, 0], cmap='coolwarm', center=0)
axes[1, 0].set_title('SVD用户因子矩阵 (前20用户, 前10因子)')
axes[1, 0].set_xlabel('因子')
axes[1, 0].set_ylabel('用户')
# SVD物品因子热力图
sns.heatmap(item_factors_svd[:10, :20], ax=axes[1, 1], cmap='coolwarm', center=0)
axes[1, 1].set_title('SVD物品因子矩阵 (前10因子, 前20物品)')
axes[1, 1].set_xlabel('物品')
axes[1, 1].set_ylabel('因子')
plt.tight_layout()
plt.show()
return {
'user_factors_nmf': user_factors,
'item_factors_nmf': item_factors,
'user_factors_svd': user_factors_svd,
'item_factors_svd': item_factors_svd,
'nmf_recommendations': nmf_recommendations,
'svd_recommendations': svd_recommendations
}
# 运行矩阵分解示例
matrix_factorization_results = matrix_factorization()
推荐系统评估
1. 评估指标
def recommendation_evaluation():
"""推荐系统评估示例"""
ratings_df = rec_data['ratings_df']
rating_matrix = rec_data['rating_matrix']
print("=== 推荐系统评估 ===")
# 1. 数据分割
def split_data(ratings_df, test_ratio=0.2):
"""分割训练集和测试集"""
np.random.seed(42)
# 随机选择测试集
test_indices = np.random.choice(len(ratings_df),
size=int(len(ratings_df) * test_ratio),
replace=False)
test_data = ratings_df.iloc[test_indices].copy()
train_data = ratings_df.drop(test_indices).copy()
return train_data, test_data
train_data, test_data = split_data(ratings_df)
print(f"训练集大小: {len(train_data)}")
print(f"测试集大小: {len(test_data)}")
# 2. 重新构建训练矩阵
train_matrix = train_data.pivot(index='user_id', columns='item_id', values='rating')
train_matrix = train_matrix.fillna(0)
# 3. 评估函数
def evaluate_recommendations(train_matrix, test_data, recommend_func, n_recommendations=10):
"""评估推荐函数"""
precisions = []
recalls = []
ndcgs = []
# 为每个测试用户评估
test_users = test_data['user_id'].unique()
for user_id in test_users[:50]: # 限制用户数量以加快计算
if user_id not in train_matrix.index:
continue
# 获取用户的实际测试评分
user_test_items = test_data[test_data['user_id'] == user_id]
actual_items = set(user_test_items['item_id'].tolist())
if len(actual_items) == 0:
continue
# 获取推荐结果
try:
recommendations = recommend_func(user_id, n_recommendations)
recommended_items = set([item_id for item_id, _ in recommendations])
# 计算精确率和召回率
hits = len(actual_items & recommended_items)
precision = hits / len(recommended_items) if len(recommended_items) > 0 else 0
recall = hits / len(actual_items) if len(actual_items) > 0 else 0
precisions.append(precision)
recalls.append(recall)
# 计算NDCG (简化版本)
dcg = 0
idcg = 0
for i, (item_id, _) in enumerate(recommendations):
if item_id in actual_items:
dcg += 1 / np.log2(i + 2)
for i in range(min(len(actual_items), len(recommendations))):
idcg += 1 / np.log2(i + 2)
ndcg = dcg / idcg if idcg > 0 else 0
ndcgs.append(ndcg)
except:
continue
return {
'precision': np.mean(precisions),
'recall': np.mean(recalls),
'ndcg': np.mean(ndcgs),
'coverage': len(precisions)
}
# 4. 定义推荐函数
def user_cf_recommend(user_id, n_recommendations=10):
"""基于用户的协同过滤推荐"""
if user_id not in train_matrix.index:
return []
# 计算用户相似度
user_similarity = cosine_similarity(train_matrix)
user_similarity_df = pd.DataFrame(user_similarity,
index=train_matrix.index,
columns=train_matrix.index)
# 获取用户评分
user_ratings = train_matrix.loc[user_id]
unrated_items = user_ratings[user_ratings == 0].index
if len(unrated_items) == 0:
return []
# 计算预测评分
predictions = {}
similar_users = user_similarity_df.loc[user_id].sort_values(ascending=False)[1:11]
for item_id in unrated_items:
numerator = 0
denominator = 0
for similar_user_id, similarity in similar_users.items():
if similarity > 0:
similar_user_rating = train_matrix.loc[similar_user_id, item_id]
if similar_user_rating > 0:
numerator += similarity * similar_user_rating
denominator += similarity
if denominator > 0:
predictions[item_id] = numerator / denominator
recommendations = sorted(predictions.items(), key=lambda x: x[1], reverse=True)
return recommendations[:n_recommendations]
# 5. 评估不同方法
print("评估推荐系统性能...")
# 基于用户的协同过滤
user_cf_results = evaluate_recommendations(train_matrix, test_data, user_cf_recommend)
print(f"\n基于用户的协同过滤评估结果:")
print(f"精确率: {user_cf_results['precision']:.3f}")
print(f"召回率: {user_cf_results['recall']:.3f}")
print(f"NDCG: {user_cf_results['ndcg']:.3f}")
print(f"覆盖用户数: {user_cf_results['coverage']}")
# 6. 可视化评估结果
metrics = ['精确率', '召回率', 'NDCG']
values = [user_cf_results['precision'], user_cf_results['recall'], user_cf_results['ndcg']]
plt.figure(figsize=(10, 6))
bars = plt.bar(metrics, values, color=['skyblue', 'lightgreen', 'lightcoral'])
# 添加数值标签
for bar, value in zip(bars, values):
plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
f'{value:.3f}', ha='center', va='bottom')
plt.title('推荐系统评估指标')
plt.ylabel('指标值')
plt.ylim(0, 1)
plt.grid(True, alpha=0.3)
plt.show()
return {
'train_data': train_data,
'test_data': test_data,
'user_cf_results': user_cf_results
}
# 运行推荐系统评估示例
evaluation_results = recommendation_evaluation()
今日总结
今天我们学习了推荐系统的基础知识:
1. 推荐系统基础:概念、类型、应用领域
2. 协同过滤算法:基于用户和物品的协同过滤
3. 基于内容的推荐:特征提取、用户偏好建模
4. 矩阵分解方法:NMF、SVD分解
5. 推荐系统评估:精确率、召回率、NDCG等指标
推荐系统是现代互联网应用的核心技术,掌握这些算法可以构建个性化推荐服务。