From 053d22965c4c014692ecf7bc8a5a9bcadd8af90b Mon Sep 17 00:00:00 2001 From: stefanfeng Date: Fri, 3 Apr 2026 11:33:31 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E8=B0=83=E5=BA=A6=E4=BC=98=E5=85=88?= =?UTF-8?q?=E4=BB=8A=E6=97=A5=E6=96=B0=E6=96=87=E7=AB=A0=EF=BC=8C=E6=97=A0?= =?UTF-8?q?=E6=96=B0=E6=96=87=E7=AB=A0=E6=97=B6=E9=9A=8F=E6=9C=BA=E5=8E=86?= =?UTF-8?q?=E5=8F=B2=E7=BF=BB=E9=A1=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 新调度规则: Phase 1 — 今日新文章优先(从新到旧轮询): - 从第1页开始拉取(接口返回最新优先) - 只保留今日发布的文章,按 createTime 降序排列(新→旧) - 最多扫描3页,发现非今日文章立即停止 - 对今日文章逐篇 validate_article 校验后返回 Phase 2 — 历史兜底(仅今日无新文章时触发): - 随机翻 1~10 页历史 - 热度+新鲜度加权采样(commentNum×3 + praiseNum×2 + readNum) - validate_article 校验后返回 两阶段均包含: - 本人发布文章过滤 - 静态+运行时无效ID过滤 - 文章有效性校验(不可开/正文<100字自动加入缓存黑名单) --- backend/app/services/news_service.py | 294 ++++++++++++++++----------- 1 file changed, 170 insertions(+), 124 deletions(-) diff --git a/backend/app/services/news_service.py b/backend/app/services/news_service.py index 98ee1d2..f815007 100755 --- a/backend/app/services/news_service.py +++ b/backend/app/services/news_service.py @@ -378,37 +378,125 @@ class NewsPlatformService: async def get_news_list(self, db, user, count=5, interest_tags=None) -> list: """ - GET /business/member/square/list 广场数据分页查询 - type=1 表示新闻,orgId 选填(不填则查全平台新闻,无需配置 orgId) - 返回字段:id(广场ID), recordId(新闻实际ID), title, orgId, orgName + 获取文章列表,优先返回今日新发布的文章(从新到旧轮询), + 无今日新文章时才随机翻历史页。 """ + import math, random as _rand + from datetime import datetime as _dt + sess = await get_session(user.id) if not sess: return [] - biz = await self._biz_url(db) - cfg = await self._client(db) - org_id = sess.get("org_id") or cfg.get("orgId") or "" + biz = await self._biz_url(db) + cfg = await self._client(db) + org_id = sess.get("org_id") or cfg.get("orgId") or "" + platform_uid = sess.get("platform_uid", "") + token = sess["token"] - # 先查总数,再随机翻页,避免每次都取第1页相同内容 - import math - # 第一次查询获取总页数 - first_params = self._build_form({ - "pageNum": 1, - "pageSize": 50, - "type": "1", - "isPlatformShow": "true", - "isAdmin": "false", - }, cfg) - if org_id: - first_params["orgId"] = org_id + # 已知静态无效ID(直接过滤,无需 API 校验) + INVALID_IDS = { + "1965670408480907266","2029092495693975554","1960652956793597953", + "1960651987045347330","1960596408620838914","1960596083193180161", + "1960595664341594113","1952296583257133058", + } | news_service._invalid_ids_cache + def _build(page, size=50): + p = self._build_form({ + "pageNum": page, "pageSize": size, + "type": "1", "isPlatformShow": "true", "isAdmin": "false", + }, cfg) + if org_id: + p["orgId"] = org_id + return p + + def _filter(items): + """过滤本人发布 + 无效 ID""" + if platform_uid: + items = [x for x in items if x.get("createUser") != platform_uid] + items = [x for x in items + if (x.get("recordId") or x.get("id")) not in INVALID_IDS] + return items + + def _is_today(item): + t = item.get("createTime") or item.get("publishTime") or "" + if not t: + return False + try: + pub = _dt.strptime(t[:10], "%Y-%m-%d") + return pub.date() == _dt.now().date() + except Exception: + return False + + # ── Phase 1: 今日新发布文章(从新到旧轮询,最多查3页)────────────── + today_articles = [] + try: + async with httpx.AsyncClient(timeout=12) as c: + for page in range(1, 4): # 第1页最新,逐页往前 + r = await c.get( + f"{biz}/business/member/square/list", + headers=self._bearer(token), + params=_build(page), + ) + if r.status_code != 200: + break + d = r.json() + if d.get("code") not in [0, 200]: + break + nd = d.get("data", {}) + items = nd.get("data") or nd.get("list") or nd.get("records") or [] + items = _filter(items) + + # 只保留今日发布的文章,按 createTime 降序(接口本就如此) + today_page = [x for x in items if _is_today(x)] + today_articles.extend(today_page) + + # 如果该页已经有非今日文章,说明今日文章已全部抓完 + has_older = any(not _is_today(x) for x in items) + if has_older or not items: + break + except Exception as e: + logger.warning(f"[广场新闻-今日] {user.account} 请求异常: {e}") + + if today_articles: + # 今日文章:从新到旧排序(createTime 降序) + today_articles.sort( + key=lambda x: x.get("createTime") or x.get("publishTime") or "", + reverse=True + ) + # 去重(按 recordId) + seen, unique = set(), [] + for a in today_articles: + aid = str(a.get("recordId") or a.get("id", "")) + if aid and aid not in seen: + seen.add(aid) + unique.append(a) + logger.info( + f"[广场新闻] {user.account} 今日新文章 {len(unique)} 篇," + f"顺序轮询(最多取 {count} 篇)" + ) + candidates = unique[:max(count * 2, 20)] # 取候选池,稍多于 count 以备校验失败 + + # 有效性校验后返回 + valid = [] + for a in candidates: + if len(valid) >= count: + break + aid = str(a.get("recordId") or a.get("id", "")) + if await self.validate_article(db, user, aid): + valid.append(a) + if valid: + return valid + logger.info(f"[广场新闻] {user.account} 今日文章校验后全部无效,转历史") + + # ── Phase 2: 无今日新文章 → 历史随机翻页(热度+新鲜度加权)──────── + logger.info(f"[广场新闻] {user.account} 无今日新文章,随机历史翻页") total_pages = 1 try: async with httpx.AsyncClient(timeout=10) as _c: _r = await _c.get( f"{biz}/business/member/square/list", - headers=self._bearer(sess["token"]), - params=first_params + headers=self._bearer(token), + params=_build(1), ) _d = _r.json() if _d.get("code") in [0, 200]: @@ -417,120 +505,78 @@ class NewsPlatformService: except Exception: pass - # 随机选择一页 - import random as _random - rand_page = _random.randint(1, min(total_pages, 10)) # 最多取前10页随机 - - params = self._build_form({ - "pageNum": rand_page, - "pageSize": 50, - "type": "1", - "isPlatformShow": "true", - "isAdmin": "false", - }, cfg) - if org_id: - params["orgId"] = org_id # 选填,有则按组织过滤 - + rand_page = _rand.randint(1, min(total_pages, 10)) + items = [] try: async with httpx.AsyncClient(timeout=15) as c: r = await c.get( f"{biz}/business/member/square/list", - headers=self._bearer(sess["token"]), - params=params + headers=self._bearer(token), + params=_build(rand_page), ) if r.status_code == 200: d = r.json() if d.get("code") in [0, 200]: - nd = d.get("data", {}) + nd = d.get("data", {}) items = nd.get("data") or nd.get("list") or nd.get("records") or [] - # 过滤本人发布的文章 - platform_uid = sess.get("platform_uid", "") - if platform_uid: - items = [x for x in items if x.get("createUser") != platform_uid] - # 过滤已知无效新闻(详情为空或不存在) - # 已知静态无效ID(直接过滤,无需 API 校验) - INVALID_IDS = { - "1965670408480907266","2029092495693975554","1960652956793597953", - "1960651987045347330","1960596408620838914","1960596083193180161", - "1960595664341594113","1952296583257133058", - } | news_service._invalid_ids_cache # 合并运行时缓存 - items = [x for x in items - if (x.get("recordId") or x.get("id")) not in INVALID_IDS] - logger.info(f"[广场新闻] {user.account} 获取到 {len(items)} 条(已过滤本人+无效文章)") - import random as _rand - from datetime import datetime as _dt - import math as _math - - # ── 热度 + 新鲜度加权选取 ───────────────────────────────── - # 规则:真实用户互动量越大 + 发布时间越新 → 虚拟用户越倾向互动 - def _hot_weight(a): - comment_n = int(a.get("commentNum") or 0) - praise_n = int(a.get("praiseNum") or 0) - read_n = int(a.get("readNum") or 0) - # 热度分:评论权重3倍,点赞2倍,阅读1倍 - hot_score = comment_n * 3 + praise_n * 2 + read_n - - # 新鲜度衰减:发布时间越近权重越高(72小时内为新鲜文章) - freshness = 1.0 - pub_time_str = a.get("publishTime") or a.get("createTime") or "" - if pub_time_str: - try: - for fmt in ["%Y-%m-%d %H:%M:%S", "%Y-%m-%dT%H:%M:%S"]: - try: - pub_dt = _dt.strptime(pub_time_str[:19], fmt) - hours_old = (_dt.now() - pub_dt).total_seconds() / 3600 - # 72小时内新鲜文章:新鲜度加成最高3倍 - freshness = max(1.0, 3.0 - hours_old / 36.0) - break - except Exception: - continue - except Exception: - pass - - # 综合权重:热度 * 新鲜度(基础权重最少为1) - return max(1.0, (hot_score + 1) * freshness) - - if len(items) <= count: - return items - # 加权随机采样 - weights = [_hot_weight(a) for a in items] - selected = [] - pool = list(range(len(items))) - w_pool = list(weights) - for _ in range(min(count, len(items))): - if not pool: - break - chosen_idx = _rand.choices(pool, weights=w_pool, k=1)[0] - selected.append(items[chosen_idx]) - i = pool.index(chosen_idx) - pool.pop(i) - w_pool.pop(i) - # ── 文章有效性校验(过滤不可开、字数<100的文章)───── - valid = [] - for _a in selected: - _aid = str(_a.get("recordId") or _a.get("id", "")) - if await self.validate_article(db, user, _aid): - valid.append(_a) - # 若校验失败,尝试从候选池补充 - # 若有效文章不够,从剩余候选中按权重补充 - if len(valid) < count and len(pool) > 0: - remaining = [items[i] for i in pool] - _w2 = [weights[pool.index(i)] if i in pool else 1 for i in range(len(remaining))] - import random as _r2 - _r2.shuffle(remaining) - for _a2 in remaining: - if len(valid) >= count: - break - _aid2 = str(_a2.get("recordId") or _a2.get("id", "")) - if await self.validate_article(db, user, _aid2): - valid.append(_a2) - if not valid: - logger.warning(f"[广场新闻] {user.account} 校验后无可用文章") - return valid - logger.warning(f"[广场新闻] {user.account} code={d.get('code')} msg={d.get('message')}") + items = _filter(items) except Exception as e: - logger.error(f"[广场新闻] {user.account}: {e}") - return [] + logger.error(f"[广场新闻-历史] {user.account}: {e}") + + logger.info(f"[广场新闻] {user.account} 历史第{rand_page}页获取到 {len(items)} 条") + + if not items: + return [] + + # 热度 + 新鲜度加权采样 + def _hot_weight(a): + hot = (int(a.get("commentNum") or 0) * 3 + + int(a.get("praiseNum") or 0) * 2 + + int(a.get("readNum") or 0)) + freshness = 1.0 + t = a.get("createTime") or a.get("publishTime") or "" + if t: + try: + pub = _dt.strptime(t[:19], "%Y-%m-%d %H:%M:%S") + h = (_dt.now() - pub).total_seconds() / 3600 + freshness = max(1.0, 3.0 - h / 36.0) + except Exception: + pass + return max(1.0, (hot + 1) * freshness) + + weights = [_hot_weight(a) for a in items] + pool_idx = list(range(len(items))) + selected = [] + for _ in range(min(count * 2, len(items))): + if not pool_idx: + break + ci = _rand.choices(pool_idx, weights=[weights[i] for i in pool_idx], k=1)[0] + selected.append(items[ci]) + pool_idx.remove(ci) + + # 有效性校验 + valid = [] + remaining = [i for i in pool_idx] + for a in selected: + if len(valid) >= count: + break + aid = str(a.get("recordId") or a.get("id", "")) + if await self.validate_article(db, user, aid): + valid.append(a) + + # 不够则从剩余池补充 + for ri in remaining: + if len(valid) >= count: + break + a = items[ri] + aid = str(a.get("recordId") or a.get("id", "")) + if await self.validate_article(db, user, aid): + valid.append(a) + + if not valid: + logger.warning(f"[广场新闻] {user.account} 校验后无可用文章") + return valid + async def read_news(self, db, user, news_id: str) -> bool: sess = await get_session(user.id)