使用 aiortc 实现桌面实时屏幕流媒体传输（基于 MSS 屏幕捕获）

聖光之護

发布时间：2026-02-12 12:23:17

1009人浏览过

来源于php中文网

原创

使用 aiortc 实现桌面实时屏幕流媒体传输（基于 MSS 屏幕捕获）

本文详解如何用 aiortc 替代静态视频文件，结合 mss 库实现低延迟、高帧率的本地桌面实时 webrtc 流媒体推流，涵盖自定义 videostreamtrack、线程安全帧队列、时间戳同步等关键实践。

在基于 aiortc 的 WebRTC 服务中，MediaPlayer 仅适用于预录制的音视频文件（如 video.mp4），无法满足实时屏幕共享需求。要将用户当前桌面画面作为实时视频源推送到浏览器客户端，必须实现一个符合 aiortc 接口规范的自定义 VideoStreamTrack，并集成高效、轻量的屏幕捕获方案。mss 是 Python 中性能优异的纯 Python 屏幕捕获库，支持跨平台、无依赖、高帧率（可达 30–60 FPS），是理想选择。

以下是一个完整、可运行的 ScreenCapturing 类实现，它继承 VideoStreamTrack，通过后台线程持续抓取屏幕，并利用线程安全队列（queue.Queue）向主线程异步提供帧：

from aiortc import VideoStreamTrack
from av import VideoFrame
import numpy as np
import threading
import asyncio
import queue
import mss

class ScreenCapturing(VideoStreamTrack):
    """
    &#33258;&#23450;&#20041;&#35270;&#39057;&#27969;&#36712;&#36947;&#65306;&#23454;&#26102;&#25429;&#33719;&#20027;&#26174;&#31034;&#22120;&#30011;&#38754;&#65288;monitor[1]&#65289;
    &#25903;&#25345; WebRTC &#20860;&#23481;&#30340; BGR24 &#26684;&#24335;&#24103;&#36755;&#20986;&#19982; PTS &#26102;&#38388;&#25139;&#31649;&#29702;&#12290;
    """
    def __init__(self) -> None:
        super().__init__()
        self.queue = queue.Queue(maxsize=10)  # &#38480;&#21046;&#32531;&#20914;&#28145;&#24230;&#65292;&#38450;&#20869;&#23384;&#28322;&#20986;
        self._started = False

    async def recv(self) -> VideoFrame:
        """&#26680;&#24515;&#26041;&#27861;&#65306;&#34987; aiortc &#35843;&#29992;&#20197;&#33719;&#21462;&#19979;&#19968;&#24103;"""
        # &#38459;&#22622;&#31561;&#24453;&#26032;&#24103;&#65288;&#36229;&#26102;&#21487;&#36873;&#65292;&#36991;&#20813;&#27704;&#20037;&#25346;&#36215;&#65289;
        try:
            img = self.queue.get(timeout=5.0)
        except queue.Empty:
            # &#33509;&#38271;&#26102;&#38388;&#26080;&#24103;&#65292;&#29983;&#25104;&#40657;&#24103;&#20828;&#24213;&#65288;&#21487;&#36873;&#20581;&#22766;&#24615;&#22686;&#24378;&#65289;
            dummy = np.zeros((720, 1280, 3), dtype=np.uint8)
            frame = VideoFrame.from_ndarray(dummy, format="bgr24")
            pts, time_base = await self.next_timestamp()
            frame.pts = pts
            frame.time_base = time_base
            return frame

        # RGBA &rarr; RGB&#65288;&#20002;&#24323; Alpha &#36890;&#36947;&#65289;&#65292;&#30830;&#20445;&#26684;&#24335;&#20860;&#23481;
        if img.shape[2] == 4:
            img_rgb = img[:, :, :3]
        else:
            img_rgb = img

        # &#21019;&#24314; VideoFrame &#24182;&#35774;&#32622;&#26102;&#38388;&#25139;&#65288;&#24517;&#38656;&#65281;&#21542;&#21017;&#25773;&#25918;&#21345;&#39039;/&#19981;&#21516;&#27493;&#65289;
        frame = VideoFrame.from_ndarray(img_rgb, format="bgr24")
        pts, time_base = await self.next_timestamp()
        frame.pts = pts
        frame.time_base = time_base
        return frame

    def start(self) -> None:
        """&#21551;&#21160;&#29420;&#31435;&#25429;&#33719;&#32447;&#31243;&#65288;daemon=True &#30830;&#20445;&#20027;&#31243;&#24207;&#36864;&#20986;&#26102;&#33258;&#21160;&#32456;&#27490;&#65289;"""
        if self._started:
            return
        self._started = True
        thread = threading.Thread(target=self._capture_loop, daemon=True)
        thread.start()

    def _capture_loop(self) -> None:
        """&#21518;&#21488;&#25429;&#33719;&#24490;&#29615;&#65306;&#20351;&#29992; mss &#25235;&#21462;&#20027;&#23631;&#65292;&#36716;&#20026; ndarray &#21518;&#20837;&#38431;"""
        with mss.mss() as sct:
            monitor = sct.monitors[1]  # [0] &#26159;&#34394;&#25311;&#20840;&#23631;&#65292;[1] &#26159;&#20027;&#26174;&#31034;&#22120;
            while self._started:
                try:
                    # &#39640;&#25928;&#25235;&#23631;&#65288;&#36820;&#22238; PIL Image&#65292;&#36716; ndarray &#24320;&#38144;&#21487;&#25511;&#65289;
                    im = sct.grab(monitor)
                    im_np = np.array(im)
                    # &#23581;&#35797;&#38750;&#38459;&#22622;&#20837;&#38431;&#65292;&#28385;&#21017;&#20002;&#24323;&#26087;&#24103;&#65288;&#20445;&#35777;&#20302;&#24310;&#36831;&#65289;
                    try:
                        self.queue.put_nowait(im_np)
                    except queue.Full:
                        # &#20002;&#24323;&#26368;&#32769;&#24103;&#65292;&#20248;&#20808;&#20445;&#38556;&#23454;&#26102;&#24615;
                        try:
                            self.queue.get_nowait()
                            self.queue.put_nowait(im_np)
                        except queue.Empty:
                            pass
                except Exception as e:
                    print(f"[ScreenCapturing] &#25429;&#33719;&#24322;&#24120;: {e}")
                    break

    def stop(self) -> None:
        """&#20572;&#27490;&#25429;&#33719;&#65288;&#37197;&#21512; on_shutdown &#20351;&#29992;&#65289;"""
        self._started = False
        # &#28165;&#31354;&#38431;&#21015;&#36991;&#20813;&#27531;&#30041;&#24341;&#29992;
        while not self.queue.empty():
            try:
                self.queue.get_nowait()
            except queue.Empty:
                break

接下来，在主服务逻辑中替换原有的 create_local_tracks 函数，并确保在 offer 处理流程中正确初始化和启动该轨道：

CodeWP

针对 WordPress 训练的AI代码生成器

下载

# &#26367;&#25442;&#21407; create_local_tracks &#20989;&#25968;
async def create_local_tracks():
    track = ScreenCapturing()
    track.start()  # &#9888;&#65039; &#24517;&#39035;&#26174;&#24335;&#35843;&#29992;&#21551;&#21160;&#25429;&#33719;&#32447;&#31243;
    return track

# &#20462;&#25913; offer &#22788;&#29702;&#20989;&#25968;&#65288;&#20851;&#38190;&#21464;&#26356;&#28857;&#65289;
async def offer(request):
    params = await request.json()
    offer = RTCSessionDescription(sdp=params["sdp"], type=params["type"])
    pc = RTCPeerConnection()
    pcs.add(pc)

    # &#9989; &#20351;&#29992;&#23454;&#26102;&#23631;&#24149;&#36712;&#36947;&#26367;&#20195; MediaPlayer
    video = await create_local_tracks()  # &#27880;&#24847;&#65306;&#27492;&#22788;&#20026;&#21327;&#31243;&#65292;&#38656; await
    pc.addTrack(video)

    await pc.setRemoteDescription(offer)
    answer = await pc.createAnswer()
    await pc.setLocalDescription(answer)

    return web.Response(
        content_type="application/json",
        text=json.dumps(
            {"sdp": pc.localDescription.sdp, "type": pc.localDescription.type}
        ),
    )

重要注意事项与优化建议：

显示器选择：sct.monitors[1] 对应主显示器；可通过 print(sct.monitors) 查看所有屏幕区域，按需调整索引或指定 {"top": ..., "left": ..., "width": ..., "height": ...}。
性能调优：
- 降低分辨率（如 monitor.update({"width": 1280, "height": 720})）可显著提升帧率；
- mss 默认使用 numpy 后端，无需额外配置；
- 避免在 recv() 中执行耗时操作（如图像缩放、编码），应在捕获线程中预处理。
时间戳（PTS）：await self.next_timestamp() 是强制要求——它确保帧按正确时间间隔渲染。忽略会导致浏览器解码器卡顿、音画不同步。
资源清理：在 on_shutdown 中，除关闭 RTCPeerConnection 外，建议调用 track.stop()（若已扩展该接口），防止后台线程残留。
安全性与权限：Windows/macOS/Linux 均需授予 Python 进程屏幕录制权限（如 macOS 的“屏幕录制”隐私设置），否则 mss.grab() 将返回黑屏或报错。