diff --git a/backend/app/services/news_service.py b/backend/app/services/news_service.py index d44833d..98ee1d2 100755 --- a/backend/app/services/news_service.py +++ b/backend/app/services/news_service.py @@ -29,6 +29,10 @@ class NewsPlatformService: async def _biz_url(self, db: AsyncSession) -> str: return await self._cfg(db, "news_platform_base_url", "http://192.168.1.200:63120") + # Runtime cache of invalid article IDs (too short / not found / error) + # Persists for the lifetime of the process — avoids repeated API calls + _invalid_ids_cache: set = set() + async def _auth_url(self, db: AsyncSession) -> str: return await self._cfg(db, "auth_base_url", "http://192.168.1.200:60040") @@ -326,6 +330,52 @@ class NewsPlatformService: return False # ─── 新闻列表 ────────────────────────────────────────────── + async def validate_article(self, db, user, article_id: str) -> bool: + """ + 验证文章是否可用: + - 文章存在且接口返回 code=0 + - 去除 HTML 标签后正文字数 >= 100 字 + 返回 True 表示可用,False 表示应跳过 + """ + if not article_id: + return False + # 命中缓存直接跳过 + if article_id in news_service._invalid_ids_cache: + return False + + sess = await get_session(user.id) + if not sess: + return False + biz = await self._biz_url(db) + token = sess.get("token", "") + try: + async with httpx.AsyncClient(timeout=8) as c: + r = await c.get( + f"{biz}/news/{article_id}", + headers=self._bearer(token), + ) + if r.status_code != 200: + news_service._invalid_ids_cache.add(article_id) + return False + d = r.json() + if d.get("code") not in [0, 200]: + logger.info(f"[文章校验] {article_id} 无效: code={d.get('code')} {d.get('message','')}") + news_service._invalid_ids_cache.add(article_id) + return False + data = d.get("data") or {} + # 取正文,去除 HTML 标签,统计字数 + raw = data.get("content") or data.get("digest") or data.get("newsTitle") or "" + import re as _re + text = _re.sub(r"<[^>]+>", "", raw).strip() + if len(text) < 100: + logger.info(f"[文章校验] {article_id} 正文过短({len(text)}字),跳过") + news_service._invalid_ids_cache.add(article_id) + return False + return True + except Exception as e: + logger.warning(f"[文章校验] {article_id} 请求异常: {e}") + return False + async def get_news_list(self, db, user, count=5, interest_tags=None) -> list: """ GET /business/member/square/list 广场数据分页查询 @@ -398,11 +448,12 @@ class NewsPlatformService: if platform_uid: items = [x for x in items if x.get("createUser") != platform_uid] # 过滤已知无效新闻(详情为空或不存在) + # 已知静态无效ID(直接过滤,无需 API 校验) INVALID_IDS = { "1965670408480907266","2029092495693975554","1960652956793597953", "1960651987045347330","1960596408620838914","1960596083193180161", - "1960595664341594113", - } + "1960595664341594113","1952296583257133058", + } | news_service._invalid_ids_cache # 合并运行时缓存 items = [x for x in items if (x.get("recordId") or x.get("id")) not in INVALID_IDS] logger.info(f"[广场新闻] {user.account} 获取到 {len(items)} 条(已过滤本人+无效文章)") @@ -454,7 +505,28 @@ class NewsPlatformService: i = pool.index(chosen_idx) pool.pop(i) w_pool.pop(i) - return selected + # ── 文章有效性校验(过滤不可开、字数<100的文章)───── + valid = [] + for _a in selected: + _aid = str(_a.get("recordId") or _a.get("id", "")) + if await self.validate_article(db, user, _aid): + valid.append(_a) + # 若校验失败,尝试从候选池补充 + # 若有效文章不够,从剩余候选中按权重补充 + if len(valid) < count and len(pool) > 0: + remaining = [items[i] for i in pool] + _w2 = [weights[pool.index(i)] if i in pool else 1 for i in range(len(remaining))] + import random as _r2 + _r2.shuffle(remaining) + for _a2 in remaining: + if len(valid) >= count: + break + _aid2 = str(_a2.get("recordId") or _a2.get("id", "")) + if await self.validate_article(db, user, _aid2): + valid.append(_a2) + if not valid: + logger.warning(f"[广场新闻] {user.account} 校验后无可用文章") + return valid logger.warning(f"[广场新闻] {user.account} code={d.get('code')} msg={d.get('message')}") except Exception as e: logger.error(f"[广场新闻] {user.account}: {e}")