fix: 今日文章不足时混入历史文章，避免所有用户扎堆同一篇

问题：今日只有1篇文章时，所有虚拟用户都只互动这同一篇原因：Phase 1 找到今日文章后直接返回，不管数量多少修复逻辑： - 今日有效文章 → 每用户最多取 1 篇（count//3，最少1篇） - 剩余名额（count - today_quota）从历史文章补充 - 历史文章：按当前小时对应页拉取（与Phase 2相同），随机打散 - 历史文章排除今日文章ID和当天发布的文章，保证内容不重复 - 最终返回：今日N篇 + 历史M篇，总量接近 count 效果：今日1篇文章时 → 用户取1篇今日 + 4篇历史，互动多样性恢复
2026-04-08 11:49:57 +08:00
parent c944fbb0ea
commit 79d57da769
1 changed files with 85 additions and 16 deletions
--- a/backend/app/services/news_service.py
+++ b/backend/app/services/news_service.py
@@ -508,7 +508,7 @@ class NewsPlatformService:
            logger.warning(f"[广场新闻-今日] {user.account} 请求异常: {e}")
        if today_articles:
-            # 今日文章：从新到旧排序（createTime 降序）
+            # 今日文章：从新到旧排序
            today_articles.sort(
                key=lambda x: x.get("publishTime") or x.get("createTime") or x.get("pushTime") or "",
                reverse=True
@@ -520,24 +520,93 @@ class NewsPlatformService:
                if aid and aid not in seen:
                    seen.add(aid)
                    unique.append(a)
            logger.info(
                f"[广场新闻] {user.account} 今日新文章 {len(unique)} 篇，"
                f"顺序轮询（最多取 {count} 篇）"
            )
            candidates = unique[:max(count * 2, 20)]   # 取候选池，稍多于 count 以备校验失败
-            # 有效性校验后返回
+            # 有效性校验今日文章
-            valid = []
+            today_valid = []
-            for a in candidates:
+            for a in unique:
                if len(valid) >= count:
                    break
                aid = str(a.get("recordId") or a.get("id", ""))
                if await self.validate_article(db, user, aid):
-                    valid.append(a)
+                    today_valid.append(a)
-            if valid and not force_history:
+
-                return valid
+            if today_valid:
-            if not valid:
+                # ── 混合策略：今日文章不足时，补充历史文章，避免所有用户扎堆同一篇 ──
-                logger.info(f"[广场新闻] {user.account} 今日文章校验后全部无效，转历史")
+                # 每个用户最多分配 1 篇今日文章，其余名额从历史文章随机补充
                today_quota = min(len(today_valid), max(1, count // 3))
                today_pick  = today_valid[:today_quota]
                today_ids   = {str(a.get("recordId") or a.get("id","")) for a in today_pick}
                hist_needed = count - today_quota
                logger.info(
                    f"[广场新闻] {user.account} 今日有效 {len(today_valid)} 篇，"
                    f"取 {today_quota} 篇今日 + {hist_needed} 篇历史"
                )
                hist_valid = []
                if hist_needed > 0:
                    import math as _math2
                    # 拉历史页（与 Phase 2 相同的小时分页逻辑）
                    total_pages_h = 1
                    page1_items_h = []
                    try:
                        async with httpx.AsyncClient(timeout=10) as _hc:
                            _hr = await _hc.get(
                                f"{biz}/business/square/list",
                                headers=self._bearer(token),
                                params=_build(1),
                            )
                        _hd = _hr.json()
                        if _hd.get("code") in [0, 200]:
                            _hnd = _hd.get("data", {})
                            total_pages_h = max(1, _math2.ceil(_hnd.get("totalSize", 0) / 50))
                            page1_items_h = _filter(
                                _hnd.get("data") or _hnd.get("list") or _hnd.get("records") or []
                            )
                    except Exception:
                        pass
                    max_pages_h = min(total_pages_h, 10)
                    hour_page_h = (_dt.now().hour % max_pages_h) + 1
                    hist_pool   = page1_items_h if hour_page_h == 1 else []
                    if hour_page_h != 1:
                        try:
                            async with httpx.AsyncClient(timeout=12) as _hc2:
                                _hr2 = await _hc2.get(
                                    f"{biz}/business/square/list",
                                    headers=self._bearer(token),
                                    params=_build(hour_page_h),
                                )
                            if _hr2.status_code == 200:
                                _hd2 = _hr2.json()
                                if _hd2.get("code") in [0, 200]:
                                    _hnd2 = _hd2.get("data", {})
                                    hist_pool = _filter(
                                        _hnd2.get("data") or _hnd2.get("list") or _hnd2.get("records") or []
                                    )
                        except Exception:
                            hist_pool = page1_items_h  # fallback to page 1
                    # 排除今日文章，避免重复
                    hist_pool = [x for x in hist_pool
                                 if (x.get("recordId") or x.get("id","")) not in today_ids
                                 and not _is_today(x)]
                    _rand.shuffle(hist_pool)
                    for a in hist_pool:
                        if len(hist_valid) >= hist_needed:
                            break
                        aid = str(a.get("recordId") or a.get("id", ""))
                        if await self.validate_article(db, user, aid):
                            hist_valid.append(a)
                result = today_pick + hist_valid
                logger.info(
                    f"[广场新闻] {user.account} 返回 {len(result)} 篇"
                    f"（今日 {len(today_pick)} + 历史 {len(hist_valid)}）"
                )
                return result
            logger.info(f"[广场新闻] {user.account} 今日文章校验后全部无效，转历史")
        # ── Phase 2: 无今日新文章 → 从最新(第1页)开始往旧顺序遍历 ────
        # 规则：始终从第1页（最新）开始，按页顺序 1→2→3...往旧方向走