fix: Phase2始终从第1页(最新)开始,按小时递进页码
问题: - 原实现用 hour % total_pages 决定起始页 - 13点时 13%3=1,start_page=2,直接跳过第1页(最新文章) - 导致虚拟用户永远不互动最新发布的文章 修复: - 第1页(最新文章)始终在获取总页数时一并拉取,零额外开销 - hour_page = (hour % max_pages) + 1,每小时推进一页(1→2→3...循环) - 0点=第1页最新,1点=第2页,依此类推,形成完整的新→旧覆盖 - 若当前时段页为空则顺序回退,最终兜底第1页 Phase 1(今日新文章)逻辑不变
This commit is contained in:
@@ -488,13 +488,14 @@ class NewsPlatformService:
|
||||
return valid
|
||||
logger.info(f"[广场新闻] {user.account} 今日文章校验后全部无效,转历史")
|
||||
|
||||
# ── Phase 2: 无今日新文章 → 从最新到旧顺序翻页(随机从某页开始)────
|
||||
# 规则:从第1页(最新)开始依次往后,轮到哪页由当前小时决定,
|
||||
# 保证不同时段覆盖不同页,模拟"从新到旧"逐步互动
|
||||
# ── Phase 2: 无今日新文章 → 从最新(第1页)开始往旧顺序遍历 ────
|
||||
# 规则:始终从第1页(最新)开始,按页顺序 1→2→3...往旧方向走
|
||||
# 每小时推进一页,保证不同时段覆盖不同深度的文章
|
||||
logger.info(f"[广场新闻] {user.account} 无今日新文章,从最新向旧顺序翻页")
|
||||
|
||||
# 获取总页数
|
||||
# 获取总页数(第1页同时也是数据源)
|
||||
total_pages = 1
|
||||
page1_items = []
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as _c:
|
||||
_r = await _c.get(
|
||||
@@ -506,20 +507,30 @@ class NewsPlatformService:
|
||||
if _d.get("code") in [0, 200]:
|
||||
total_size = _d.get("data", {}).get("totalSize", 0)
|
||||
total_pages = max(1, math.ceil(total_size / 50))
|
||||
nd = _d.get("data", {})
|
||||
page1_items = _filter(nd.get("data") or nd.get("list") or nd.get("records") or [])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
max_pages = min(total_pages, 10) # 最多翻前10页
|
||||
# 用小时数取模决定起始页,保证同一小时内不同用户分散在不同页
|
||||
# 从最新(page=1)开始往旧的方向走
|
||||
hour_slot = _dt.now().hour % max_pages
|
||||
start_page = hour_slot + 1 # 1-based
|
||||
max_pages = min(total_pages, 10)
|
||||
# 用小时决定本轮目标页:0点→第1页(最新),每小时推进一页,循环往复
|
||||
hour_page = (_dt.now().hour % max_pages) + 1 # 1-based,范围 1~max_pages
|
||||
|
||||
items = []
|
||||
# 尝试从 start_page 开始,若该页为空则顺序往后再往前找
|
||||
pages_to_try = list(range(start_page, max_pages + 1)) + list(range(1, start_page))
|
||||
tried_page = start_page
|
||||
tried_page = 1
|
||||
|
||||
if hour_page == 1:
|
||||
# 当前时段对应第1页,直接使用已获取的数据
|
||||
items = page1_items
|
||||
tried_page = 1
|
||||
else:
|
||||
# 先尝试当前时段对应的目标页
|
||||
pages_to_try = [hour_page] + list(range(1, hour_page)) + list(range(hour_page + 1, max_pages + 1))
|
||||
for page in pages_to_try:
|
||||
if page == 1 and page1_items:
|
||||
items = page1_items
|
||||
tried_page = 1
|
||||
break
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15) as c:
|
||||
r = await c.get(
|
||||
@@ -531,8 +542,7 @@ class NewsPlatformService:
|
||||
d = r.json()
|
||||
if d.get("code") in [0, 200]:
|
||||
nd = d.get("data", {})
|
||||
_items = nd.get("data") or nd.get("list") or nd.get("records") or []
|
||||
_items = _filter(_items)
|
||||
_items = _filter(nd.get("data") or nd.get("list") or nd.get("records") or [])
|
||||
if _items:
|
||||
items = _items
|
||||
tried_page = page
|
||||
@@ -540,7 +550,7 @@ class NewsPlatformService:
|
||||
except Exception as e:
|
||||
logger.error(f"[广场新闻-历史] {user.account} page={page}: {e}")
|
||||
|
||||
logger.info(f"[广场新闻] {user.account} 历史第{tried_page}页获取到 {len(items)} 条")
|
||||
logger.info(f"[广场新闻] {user.account} 历史第{tried_page}/{max_pages}页获取到 {len(items)} 条(时段对应第{hour_page}页)")
|
||||
|
||||
if not items:
|
||||
return []
|
||||
|
||||
Reference in New Issue
Block a user