fix: 今日文章不足时混入历史文章,避免所有用户扎堆同一篇

问题:今日只有1篇文章时,所有虚拟用户都只互动这同一篇
原因:Phase 1 找到今日文章后直接返回,不管数量多少

修复逻辑:
- 今日有效文章 → 每用户最多取 1 篇(count//3,最少1篇)
- 剩余名额(count - today_quota)从历史文章补充
- 历史文章:按当前小时对应页拉取(与Phase 2相同),随机打散
- 历史文章排除今日文章ID和当天发布的文章,保证内容不重复
- 最终返回:今日N篇 + 历史M篇,总量接近 count

效果:今日1篇文章时 → 用户取1篇今日 + 4篇历史,互动多样性恢复
This commit is contained in:
stefanfeng
2026-04-08 11:49:57 +08:00
parent c944fbb0ea
commit 79d57da769

View File

@@ -508,7 +508,7 @@ class NewsPlatformService:
logger.warning(f"[广场新闻-今日] {user.account} 请求异常: {e}") logger.warning(f"[广场新闻-今日] {user.account} 请求异常: {e}")
if today_articles: if today_articles:
# 今日文章:从新到旧排序createTime 降序) # 今日文章:从新到旧排序
today_articles.sort( today_articles.sort(
key=lambda x: x.get("publishTime") or x.get("createTime") or x.get("pushTime") or "", key=lambda x: x.get("publishTime") or x.get("createTime") or x.get("pushTime") or "",
reverse=True reverse=True
@@ -520,24 +520,93 @@ class NewsPlatformService:
if aid and aid not in seen: if aid and aid not in seen:
seen.add(aid) seen.add(aid)
unique.append(a) unique.append(a)
logger.info(
f"[广场新闻] {user.account} 今日新文章 {len(unique)} 篇,"
f"顺序轮询(最多取 {count} 篇)"
)
candidates = unique[:max(count * 2, 20)] # 取候选池,稍多于 count 以备校验失败
# 有效性校验后返回 # 有效性校验今日文章
valid = [] today_valid = []
for a in candidates: for a in unique:
if len(valid) >= count:
break
aid = str(a.get("recordId") or a.get("id", "")) aid = str(a.get("recordId") or a.get("id", ""))
if await self.validate_article(db, user, aid): if await self.validate_article(db, user, aid):
valid.append(a) today_valid.append(a)
if valid and not force_history:
return valid if today_valid:
if not valid: # ── 混合策略:今日文章不足时,补充历史文章,避免所有用户扎堆同一篇 ──
logger.info(f"[广场新闻] {user.account} 今日文章校验后全部无效,转历史") # 每个用户最多分配 1 篇今日文章,其余名额从历史文章随机补充
today_quota = min(len(today_valid), max(1, count // 3))
today_pick = today_valid[:today_quota]
today_ids = {str(a.get("recordId") or a.get("id","")) for a in today_pick}
hist_needed = count - today_quota
logger.info(
f"[广场新闻] {user.account} 今日有效 {len(today_valid)} 篇,"
f"{today_quota} 篇今日 + {hist_needed} 篇历史"
)
hist_valid = []
if hist_needed > 0:
import math as _math2
# 拉历史页(与 Phase 2 相同的小时分页逻辑)
total_pages_h = 1
page1_items_h = []
try:
async with httpx.AsyncClient(timeout=10) as _hc:
_hr = await _hc.get(
f"{biz}/business/square/list",
headers=self._bearer(token),
params=_build(1),
)
_hd = _hr.json()
if _hd.get("code") in [0, 200]:
_hnd = _hd.get("data", {})
total_pages_h = max(1, _math2.ceil(_hnd.get("totalSize", 0) / 50))
page1_items_h = _filter(
_hnd.get("data") or _hnd.get("list") or _hnd.get("records") or []
)
except Exception:
pass
max_pages_h = min(total_pages_h, 10)
hour_page_h = (_dt.now().hour % max_pages_h) + 1
hist_pool = page1_items_h if hour_page_h == 1 else []
if hour_page_h != 1:
try:
async with httpx.AsyncClient(timeout=12) as _hc2:
_hr2 = await _hc2.get(
f"{biz}/business/square/list",
headers=self._bearer(token),
params=_build(hour_page_h),
)
if _hr2.status_code == 200:
_hd2 = _hr2.json()
if _hd2.get("code") in [0, 200]:
_hnd2 = _hd2.get("data", {})
hist_pool = _filter(
_hnd2.get("data") or _hnd2.get("list") or _hnd2.get("records") or []
)
except Exception:
hist_pool = page1_items_h # fallback to page 1
# 排除今日文章,避免重复
hist_pool = [x for x in hist_pool
if (x.get("recordId") or x.get("id","")) not in today_ids
and not _is_today(x)]
_rand.shuffle(hist_pool)
for a in hist_pool:
if len(hist_valid) >= hist_needed:
break
aid = str(a.get("recordId") or a.get("id", ""))
if await self.validate_article(db, user, aid):
hist_valid.append(a)
result = today_pick + hist_valid
logger.info(
f"[广场新闻] {user.account} 返回 {len(result)}"
f"(今日 {len(today_pick)} + 历史 {len(hist_valid)}"
)
return result
logger.info(f"[广场新闻] {user.account} 今日文章校验后全部无效,转历史")
# ── Phase 2: 无今日新文章 → 从最新(第1页)开始往旧顺序遍历 ──── # ── Phase 2: 无今日新文章 → 从最新(第1页)开始往旧顺序遍历 ────
# 规则始终从第1页最新开始按页顺序 1→2→3...往旧方向走 # 规则始终从第1页最新开始按页顺序 1→2→3...往旧方向走