feat: 调度优先今日新文章,无新文章时随机历史翻页
新调度规则: Phase 1 — 今日新文章优先(从新到旧轮询): - 从第1页开始拉取(接口返回最新优先) - 只保留今日发布的文章,按 createTime 降序排列(新→旧) - 最多扫描3页,发现非今日文章立即停止 - 对今日文章逐篇 validate_article 校验后返回 Phase 2 — 历史兜底(仅今日无新文章时触发): - 随机翻 1~10 页历史 - 热度+新鲜度加权采样(commentNum×3 + praiseNum×2 + readNum) - validate_article 校验后返回 两阶段均包含: - 本人发布文章过滤 - 静态+运行时无效ID过滤 - 文章有效性校验(不可开/正文<100字自动加入缓存黑名单)
This commit is contained in:
@@ -378,37 +378,125 @@ class NewsPlatformService:
|
||||
|
||||
async def get_news_list(self, db, user, count=5, interest_tags=None) -> list:
|
||||
"""
|
||||
GET /business/member/square/list 广场数据分页查询
|
||||
type=1 表示新闻,orgId 选填(不填则查全平台新闻,无需配置 orgId)
|
||||
返回字段:id(广场ID), recordId(新闻实际ID), title, orgId, orgName
|
||||
获取文章列表,优先返回今日新发布的文章(从新到旧轮询),
|
||||
无今日新文章时才随机翻历史页。
|
||||
"""
|
||||
import math, random as _rand
|
||||
from datetime import datetime as _dt
|
||||
|
||||
sess = await get_session(user.id)
|
||||
if not sess:
|
||||
return []
|
||||
biz = await self._biz_url(db)
|
||||
cfg = await self._client(db)
|
||||
org_id = sess.get("org_id") or cfg.get("orgId") or ""
|
||||
platform_uid = sess.get("platform_uid", "")
|
||||
token = sess["token"]
|
||||
|
||||
# 先查总数,再随机翻页,避免每次都取第1页相同内容
|
||||
import math
|
||||
# 第一次查询获取总页数
|
||||
first_params = self._build_form({
|
||||
"pageNum": 1,
|
||||
"pageSize": 50,
|
||||
"type": "1",
|
||||
"isPlatformShow": "true",
|
||||
"isAdmin": "false",
|
||||
# 已知静态无效ID(直接过滤,无需 API 校验)
|
||||
INVALID_IDS = {
|
||||
"1965670408480907266","2029092495693975554","1960652956793597953",
|
||||
"1960651987045347330","1960596408620838914","1960596083193180161",
|
||||
"1960595664341594113","1952296583257133058",
|
||||
} | news_service._invalid_ids_cache
|
||||
|
||||
def _build(page, size=50):
|
||||
p = self._build_form({
|
||||
"pageNum": page, "pageSize": size,
|
||||
"type": "1", "isPlatformShow": "true", "isAdmin": "false",
|
||||
}, cfg)
|
||||
if org_id:
|
||||
first_params["orgId"] = org_id
|
||||
p["orgId"] = org_id
|
||||
return p
|
||||
|
||||
def _filter(items):
|
||||
"""过滤本人发布 + 无效 ID"""
|
||||
if platform_uid:
|
||||
items = [x for x in items if x.get("createUser") != platform_uid]
|
||||
items = [x for x in items
|
||||
if (x.get("recordId") or x.get("id")) not in INVALID_IDS]
|
||||
return items
|
||||
|
||||
def _is_today(item):
|
||||
t = item.get("createTime") or item.get("publishTime") or ""
|
||||
if not t:
|
||||
return False
|
||||
try:
|
||||
pub = _dt.strptime(t[:10], "%Y-%m-%d")
|
||||
return pub.date() == _dt.now().date()
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
# ── Phase 1: 今日新发布文章(从新到旧轮询,最多查3页)──────────────
|
||||
today_articles = []
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=12) as c:
|
||||
for page in range(1, 4): # 第1页最新,逐页往前
|
||||
r = await c.get(
|
||||
f"{biz}/business/member/square/list",
|
||||
headers=self._bearer(token),
|
||||
params=_build(page),
|
||||
)
|
||||
if r.status_code != 200:
|
||||
break
|
||||
d = r.json()
|
||||
if d.get("code") not in [0, 200]:
|
||||
break
|
||||
nd = d.get("data", {})
|
||||
items = nd.get("data") or nd.get("list") or nd.get("records") or []
|
||||
items = _filter(items)
|
||||
|
||||
# 只保留今日发布的文章,按 createTime 降序(接口本就如此)
|
||||
today_page = [x for x in items if _is_today(x)]
|
||||
today_articles.extend(today_page)
|
||||
|
||||
# 如果该页已经有非今日文章,说明今日文章已全部抓完
|
||||
has_older = any(not _is_today(x) for x in items)
|
||||
if has_older or not items:
|
||||
break
|
||||
except Exception as e:
|
||||
logger.warning(f"[广场新闻-今日] {user.account} 请求异常: {e}")
|
||||
|
||||
if today_articles:
|
||||
# 今日文章:从新到旧排序(createTime 降序)
|
||||
today_articles.sort(
|
||||
key=lambda x: x.get("createTime") or x.get("publishTime") or "",
|
||||
reverse=True
|
||||
)
|
||||
# 去重(按 recordId)
|
||||
seen, unique = set(), []
|
||||
for a in today_articles:
|
||||
aid = str(a.get("recordId") or a.get("id", ""))
|
||||
if aid and aid not in seen:
|
||||
seen.add(aid)
|
||||
unique.append(a)
|
||||
logger.info(
|
||||
f"[广场新闻] {user.account} 今日新文章 {len(unique)} 篇,"
|
||||
f"顺序轮询(最多取 {count} 篇)"
|
||||
)
|
||||
candidates = unique[:max(count * 2, 20)] # 取候选池,稍多于 count 以备校验失败
|
||||
|
||||
# 有效性校验后返回
|
||||
valid = []
|
||||
for a in candidates:
|
||||
if len(valid) >= count:
|
||||
break
|
||||
aid = str(a.get("recordId") or a.get("id", ""))
|
||||
if await self.validate_article(db, user, aid):
|
||||
valid.append(a)
|
||||
if valid:
|
||||
return valid
|
||||
logger.info(f"[广场新闻] {user.account} 今日文章校验后全部无效,转历史")
|
||||
|
||||
# ── Phase 2: 无今日新文章 → 历史随机翻页(热度+新鲜度加权)────────
|
||||
logger.info(f"[广场新闻] {user.account} 无今日新文章,随机历史翻页")
|
||||
total_pages = 1
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=10) as _c:
|
||||
_r = await _c.get(
|
||||
f"{biz}/business/member/square/list",
|
||||
headers=self._bearer(sess["token"]),
|
||||
params=first_params
|
||||
headers=self._bearer(token),
|
||||
params=_build(1),
|
||||
)
|
||||
_d = _r.json()
|
||||
if _d.get("code") in [0, 200]:
|
||||
@@ -417,120 +505,78 @@ class NewsPlatformService:
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 随机选择一页
|
||||
import random as _random
|
||||
rand_page = _random.randint(1, min(total_pages, 10)) # 最多取前10页随机
|
||||
|
||||
params = self._build_form({
|
||||
"pageNum": rand_page,
|
||||
"pageSize": 50,
|
||||
"type": "1",
|
||||
"isPlatformShow": "true",
|
||||
"isAdmin": "false",
|
||||
}, cfg)
|
||||
if org_id:
|
||||
params["orgId"] = org_id # 选填,有则按组织过滤
|
||||
|
||||
rand_page = _rand.randint(1, min(total_pages, 10))
|
||||
items = []
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=15) as c:
|
||||
r = await c.get(
|
||||
f"{biz}/business/member/square/list",
|
||||
headers=self._bearer(sess["token"]),
|
||||
params=params
|
||||
headers=self._bearer(token),
|
||||
params=_build(rand_page),
|
||||
)
|
||||
if r.status_code == 200:
|
||||
d = r.json()
|
||||
if d.get("code") in [0, 200]:
|
||||
nd = d.get("data", {})
|
||||
items = nd.get("data") or nd.get("list") or nd.get("records") or []
|
||||
# 过滤本人发布的文章
|
||||
platform_uid = sess.get("platform_uid", "")
|
||||
if platform_uid:
|
||||
items = [x for x in items if x.get("createUser") != platform_uid]
|
||||
# 过滤已知无效新闻(详情为空或不存在)
|
||||
# 已知静态无效ID(直接过滤,无需 API 校验)
|
||||
INVALID_IDS = {
|
||||
"1965670408480907266","2029092495693975554","1960652956793597953",
|
||||
"1960651987045347330","1960596408620838914","1960596083193180161",
|
||||
"1960595664341594113","1952296583257133058",
|
||||
} | news_service._invalid_ids_cache # 合并运行时缓存
|
||||
items = [x for x in items
|
||||
if (x.get("recordId") or x.get("id")) not in INVALID_IDS]
|
||||
logger.info(f"[广场新闻] {user.account} 获取到 {len(items)} 条(已过滤本人+无效文章)")
|
||||
import random as _rand
|
||||
from datetime import datetime as _dt
|
||||
import math as _math
|
||||
items = _filter(items)
|
||||
except Exception as e:
|
||||
logger.error(f"[广场新闻-历史] {user.account}: {e}")
|
||||
|
||||
# ── 热度 + 新鲜度加权选取 ─────────────────────────────────
|
||||
# 规则:真实用户互动量越大 + 发布时间越新 → 虚拟用户越倾向互动
|
||||
logger.info(f"[广场新闻] {user.account} 历史第{rand_page}页获取到 {len(items)} 条")
|
||||
|
||||
if not items:
|
||||
return []
|
||||
|
||||
# 热度 + 新鲜度加权采样
|
||||
def _hot_weight(a):
|
||||
comment_n = int(a.get("commentNum") or 0)
|
||||
praise_n = int(a.get("praiseNum") or 0)
|
||||
read_n = int(a.get("readNum") or 0)
|
||||
# 热度分:评论权重3倍,点赞2倍,阅读1倍
|
||||
hot_score = comment_n * 3 + praise_n * 2 + read_n
|
||||
|
||||
# 新鲜度衰减:发布时间越近权重越高(72小时内为新鲜文章)
|
||||
hot = (int(a.get("commentNum") or 0) * 3 +
|
||||
int(a.get("praiseNum") or 0) * 2 +
|
||||
int(a.get("readNum") or 0))
|
||||
freshness = 1.0
|
||||
pub_time_str = a.get("publishTime") or a.get("createTime") or ""
|
||||
if pub_time_str:
|
||||
t = a.get("createTime") or a.get("publishTime") or ""
|
||||
if t:
|
||||
try:
|
||||
for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"]:
|
||||
try:
|
||||
pub_dt = _dt.strptime(pub_time_str[:19], fmt)
|
||||
hours_old = (_dt.now() - pub_dt).total_seconds() / 3600
|
||||
# 72小时内新鲜文章:新鲜度加成最高3倍
|
||||
freshness = max(1.0, 3.0 - hours_old / 36.0)
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
pub = _dt.strptime(t[:19], "%Y-%m-%d %H:%M:%S")
|
||||
h = (_dt.now() - pub).total_seconds() / 3600
|
||||
freshness = max(1.0, 3.0 - h / 36.0)
|
||||
except Exception:
|
||||
pass
|
||||
return max(1.0, (hot + 1) * freshness)
|
||||
|
||||
# 综合权重:热度 * 新鲜度(基础权重最少为1)
|
||||
return max(1.0, (hot_score + 1) * freshness)
|
||||
|
||||
if len(items) <= count:
|
||||
return items
|
||||
# 加权随机采样
|
||||
weights = [_hot_weight(a) for a in items]
|
||||
pool_idx = list(range(len(items)))
|
||||
selected = []
|
||||
pool = list(range(len(items)))
|
||||
w_pool = list(weights)
|
||||
for _ in range(min(count, len(items))):
|
||||
if not pool:
|
||||
for _ in range(min(count * 2, len(items))):
|
||||
if not pool_idx:
|
||||
break
|
||||
chosen_idx = _rand.choices(pool, weights=w_pool, k=1)[0]
|
||||
selected.append(items[chosen_idx])
|
||||
i = pool.index(chosen_idx)
|
||||
pool.pop(i)
|
||||
w_pool.pop(i)
|
||||
# ── 文章有效性校验(过滤不可开、字数<100的文章)─────
|
||||
ci = _rand.choices(pool_idx, weights=[weights[i] for i in pool_idx], k=1)[0]
|
||||
selected.append(items[ci])
|
||||
pool_idx.remove(ci)
|
||||
|
||||
# 有效性校验
|
||||
valid = []
|
||||
for _a in selected:
|
||||
_aid = str(_a.get("recordId") or _a.get("id", ""))
|
||||
if await self.validate_article(db, user, _aid):
|
||||
valid.append(_a)
|
||||
# 若校验失败,尝试从候选池补充
|
||||
# 若有效文章不够,从剩余候选中按权重补充
|
||||
if len(valid) < count and len(pool) > 0:
|
||||
remaining = [items[i] for i in pool]
|
||||
_w2 = [weights[pool.index(i)] if i in pool else 1 for i in range(len(remaining))]
|
||||
import random as _r2
|
||||
_r2.shuffle(remaining)
|
||||
for _a2 in remaining:
|
||||
remaining = [i for i in pool_idx]
|
||||
for a in selected:
|
||||
if len(valid) >= count:
|
||||
break
|
||||
_aid2 = str(_a2.get("recordId") or _a2.get("id", ""))
|
||||
if await self.validate_article(db, user, _aid2):
|
||||
valid.append(_a2)
|
||||
aid = str(a.get("recordId") or a.get("id", ""))
|
||||
if await self.validate_article(db, user, aid):
|
||||
valid.append(a)
|
||||
|
||||
# 不够则从剩余池补充
|
||||
for ri in remaining:
|
||||
if len(valid) >= count:
|
||||
break
|
||||
a = items[ri]
|
||||
aid = str(a.get("recordId") or a.get("id", ""))
|
||||
if await self.validate_article(db, user, aid):
|
||||
valid.append(a)
|
||||
|
||||
if not valid:
|
||||
logger.warning(f"[广场新闻] {user.account} 校验后无可用文章")
|
||||
return valid
|
||||
logger.warning(f"[广场新闻] {user.account} code={d.get('code')} msg={d.get('message')}")
|
||||
except Exception as e:
|
||||
logger.error(f"[广场新闻] {user.account}: {e}")
|
||||
return []
|
||||
|
||||
|
||||
async def read_news(self, db, user, news_id: str) -> bool:
|
||||
sess = await get_session(user.id)
|
||||
|
||||
Reference in New Issue
Block a user