feat: 评论去重 + 热度/新鲜度加权选文

评论去重逻辑： - 查询今日已评论的文章ID，选文时已评论的文章权重降为10% - 若选中已评论文章：改为回复其他用户的评论（虚拟用户互动链） - 若选中未评论文章：正常发新评论，评论成功后随机回复他人评论热度+新鲜度加权选文规则： - 热度分 = commentNum×3 + praiseNum×2 + readNum×1 - 新鲜度 = 72小时内的新文章获得最高3倍加成，随时间线性衰减 - 综合权重 = (热度分+1) × 新鲜度，确保真实用户互动多的新文章优先被虚拟用户关注
2026-04-02 17:33:07 +08:00
parent 958eaeda8a
commit 7203f04be6
2 changed files with 157 additions and 34 deletions
--- a/backend/app/services/news_service.py
+++ b/backend/app/services/news_service.py
@@ -407,7 +407,54 @@ class NewsPlatformService:
                             if (x.get("recordId") or x.get("id")) not in INVALID_IDS]
                    logger.info(f"[广场新闻] {user.account} 获取到 {len(items)} 条（已过滤本人+无效文章）")
                    import random as _rand
-                    return _rand.sample(items, min(count, len(items))) if items else []
+                    from datetime import datetime as _dt
+                    import math as _math
+
+                    # ── 热度 + 新鲜度加权选取 ─────────────────────────────────
+                    # 规则：真实用户互动量越大 + 发布时间越新 → 虚拟用户越倾向互动
+                    def _hot_weight(a):
+                        comment_n = int(a.get("commentNum") or 0)
+                        praise_n  = int(a.get("praiseNum")  or 0)
+                        read_n    = int(a.get("readNum")    or 0)
+                        # 热度分：评论权重3倍，点赞2倍，阅读1倍
+                        hot_score = comment_n * 3 + praise_n * 2 + read_n
+
+                        # 新鲜度衰减：发布时间越近权重越高（72小时内为新鲜文章）
+                        freshness = 1.0
+                        pub_time_str = a.get("publishTime") or a.get("createTime") or ""
+                        if pub_time_str:
+                            try:
+                                for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"]:
+                                    try:
+                                        pub_dt = _dt.strptime(pub_time_str[:19], fmt)
+                                        hours_old = (_dt.now() - pub_dt).total_seconds() / 3600
+                                        # 72小时内新鲜文章：新鲜度加成最高3倍
+                                        freshness = max(1.0, 3.0 - hours_old / 36.0)
+                                        break
+                                    except Exception:
+                                        continue
+                            except Exception:
+                                pass
+
+                        # 综合权重：热度 * 新鲜度（基础权重最少为1）
+                        return max(1.0, (hot_score + 1) * freshness)
+
+                    if len(items) <= count:
+                        return items
+                    # 加权随机采样
+                    weights = [_hot_weight(a) for a in items]
+                    selected = []
+                    pool = list(range(len(items)))
+                    w_pool = list(weights)
+                    for _ in range(min(count, len(items))):
+                        if not pool:
+                            break
+                        chosen_idx = _rand.choices(pool, weights=w_pool, k=1)[0]
+                        selected.append(items[chosen_idx])
+                        i = pool.index(chosen_idx)
+                        pool.pop(i)
+                        w_pool.pop(i)
+                    return selected
                logger.warning(f"[广场新闻] {user.account} code={d.get('code')} msg={d.get('message')}")
        except Exception as e:
            logger.error(f"[广场新闻] {user.account}: {e}")
--- a/backend/app/services/scheduler.py
+++ b/backend/app/services/scheduler.py
@@ -230,10 +230,43 @@ class SchedulerService:
                        )
                        return

-                article = random.choice(articles)
+                # ── 文章去重 + 热度加权选取 ─────────────────────────────────
+                # 查询今日已评论过的文章ID（避免重复评论同一篇）
+                from sqlalchemy import func as _func
+                from datetime import date as _date
+                today_str = datetime.now().date()
+                dup_result = await db.execute(
+                    select(InteractionRecord.article_id).where(
+                        InteractionRecord.user_id == user_id,
+                        InteractionRecord.interact_type == "comment",
+                        InteractionRecord.status == 1,
+                        _func.date(InteractionRecord.executed_at) == today_str,
+                    )
+                )
+                already_commented = {r[0] for r in dup_result.all()}
+
+                # 按热度加权：commentNum + praiseNum + readNum 越高权重越大
+                # 同时优先未评论过的文章
+                def _article_weight(a):
+                    aid = str(a.get("recordId") or a.get("id", ""))
+                    base = (
+                        int(a.get("commentNum") or 0) * 3 +
+                        int(a.get("praiseNum")  or 0) * 2 +
+                        int(a.get("readNum")    or 0)
+                    )
+                    # 已评论的文章权重大幅降低（但不为0，还可以点赞/收藏）
+                    penalty = 0.1 if aid in already_commented else 1.0
+                    return max(1, base) * penalty
+
+                weights = [_article_weight(a) for a in articles]
+                article = random.choices(articles, weights=weights, k=1)[0]
+
+                # 判断是否已评论此文章（用于后续逻辑）
+                news_id    = str(article.get("recordId") or article.get("id", ""))
+                already_commented_this = news_id in already_commented
+
                # 接口返回字段: id/newsTitle/content/digest/createUser
                # 广场接口字段：recordId=新闻实际ID, id=广场记录ID, title=标题
-                news_id    = str(article.get("recordId") or article.get("id", ""))
                news_title = article.get("title") or article.get("newsTitle") or "未知文章"
                news_content = article.get("content") or article.get("digest") or news_title
                news_author  = str(article.get("createUser") or "")
@@ -278,11 +311,48 @@ class SchedulerService:
                        interactions_done.append("forward")
                        await self._incr_total(db, user_id)

-                # ⑤ 评论（AI生成内容，调用 POST /message/comment）
-                if can_comment and random.random() < comment_prob and personality:
+                # ⑤ 评论/回复逻辑（去重：已评论过的文章改为回复他人评论）
+                if can_comment and personality:
                    style_prompt = personality.comment_style_prompt or ""
-                    # 字数上限最多80字，避免超出 max_tokens 被截断
                    safe_word_max = min(personality.word_count_max, 80)
+
+                    if already_commented_this:
+                        # 已评论过此文章 → 改为回复其他用户的评论（虚拟用户互动）
+                        if random.random() < reply_prob:
+                            existing = await news_service.get_comments(db, user, news_id)
+                            if existing:
+                                # 优先回复虚拟用户的评论（促进互动），过滤掉自己的评论
+                                from app.core.redis_client import get_session as _gs
+                                my_sess = await _gs(user.id)
+                                my_uid = my_sess.get("platform_uid", "") if my_sess else ""
+                                others = [c for c in existing
+                                          if str(c.get("userId") or c.get("createUser") or "") != my_uid]
+                                if others:
+                                    target = random.choice(others)
+                                    cid = str(target.get("id") or target.get("commentId") or "")
+                                    parent_content = target.get("content") or ""
+                                    if cid:
+                                        reply_text, r_tokens = await ai_service.generate_reply(
+                                            db, news_title, parent_content,
+                                            style_prompt,
+                                            personality.word_count_min,
+                                            safe_word_max
+                                        )
+                                        if reply_text:
+                                            r_ok, r_err = await news_service.post_reply(
+                                                db, user, news_id, cid, reply_text
+                                            )
+                                            await self._save_record(
+                                                db, user, news_id, news_title, "reply",
+                                                reply_text, r_tokens, r_ok, r_err,
+                                                parent_comment_id=cid
+                                            )
+                                            if r_ok:
+                                                interactions_done.append("reply")
+                                                logger.info(f"💬 {user.account} 回复了已评论文章的评论（去重逻辑）")
+                    else:
+                        # 未评论过此文章 → 正常发评论
+                        if random.random() < comment_prob:
                            comment_text, tokens = await ai_service.generate_comment(
                                db, news_title, news_content,
                                style_prompt, personality.word_count_min, safe_word_max
@@ -306,30 +376,36 @@ class SchedulerService:
                                        )
                                    )

-                            # ⑥ 回复评论（评论成功后，随机回复别人的评论）
+                                    # ⑥ 评论成功后，随机回复其他用户的评论（互动链）
                                    if random.random() < reply_prob:
                                        existing = await news_service.get_comments(db, user, news_id)
                                        if existing:
-                                    target = random.choice(existing)
-                                    cid = str(target.get("id") or target.get("commentId") or "")
-                                    parent_content = target.get("content") or ""
-                                    if cid:
-                                        reply_text, r_tokens = await ai_service.generate_reply(
-                                            db, news_title, parent_content,
+                                            from app.core.redis_client import get_session as _gs2
+                                            my_sess2 = await _gs2(user.id)
+                                            my_uid2 = my_sess2.get("platform_uid", "") if my_sess2 else ""
+                                            others2 = [c for c in existing
+                                                       if str(c.get("userId") or c.get("createUser") or "") != my_uid2]
+                                            if others2:
+                                                target2 = random.choice(others2)
+                                                cid2 = str(target2.get("id") or target2.get("commentId") or "")
+                                                parent_content2 = target2.get("content") or ""
+                                                if cid2:
+                                                    reply_text2, r_tokens2 = await ai_service.generate_reply(
+                                                        db, news_title, parent_content2,
                                                        style_prompt,
                                                        personality.word_count_min,
-                                            personality.word_count_max
+                                                        safe_word_max
                                                    )
-                                        if reply_text:
-                                            r_ok, r_err = await news_service.post_reply(
-                                                db, user, news_id, cid, reply_text
+                                                    if reply_text2:
+                                                        r_ok2, r_err2 = await news_service.post_reply(
+                                                            db, user, news_id, cid2, reply_text2
                                                        )
                                                        await self._save_record(
                                                            db, user, news_id, news_title, "reply",
-                                                reply_text, r_tokens, r_ok, r_err,
-                                                parent_comment_id=cid
+                                                            reply_text2, r_tokens2, r_ok2, r_err2,
+                                                            parent_comment_id=cid2
                                                        )
-                                            if r_ok:
+                                                        if r_ok2:
                                                            interactions_done.append("reply")

                await db.commit()