Add pose detector implementation with YOLOv8 and supporting files

2026-02-02 17:16:43 +08:00 · 2026-02-02 17:16:43 +08:00 · 15129c3635
parent 7505de4f6c
commit 15129c3635
5 changed files with 449 additions and 0 deletions
--- a/USAGE.md
+++ b/USAGE.md
@ -0,0 +1,138 @@
 # 姿态识别Demo 使用说明
 ## 功能介绍
 这是一个基于MediaPipe的人像姿态识别Demo，支持：
 - **摄像头实时检测**：实时检测摄像头中的人像姿态
 - **图片检测**：检测单张图片中的人像姿态
 - **33个关键点检测**：全身姿态关键点识别
 - **骨架可视化**：在图像上绘制姿态骨架
 - **多人检测支持**：静态图片模式支持多个人物检测（不同颜色区分）
 ## 环境要求
 - Python 3.7+
 - Windows/Linux/macOS
 ## 安装依赖
 ```bash
 pip install -r requirements.txt
 ```
 ## 使用方法
 ### 1. 摄像头实时检测
 直接运行程序，将自动打开摄像头进行实时姿态检测：
 ```bash
 python pose_detector.py
 ```
 **操作说明：**
 - 程序会自动打开默认摄像头（索引0）
 - 实时显示姿态检测结果
 - 按 `q` 键退出程序
 ### 2. 图片检测
 指定图片路径进行检测：
 ```bash
 python pose_detector.py path/to/your/image.jpg
 ```
 **示例：**
 ```bash
 python pose_detector.py test.jpg
 python pose_detector.py test.webp
 ```
 **输出信息：**
 - 显示带有姿态骨架的图片
 - 打印每个人物的姿态关键点坐标信息
 - 按任意键关闭窗口
 ## 姿态关键点说明
 程序检测以下主要关键点（MediaPipe格式，共33个关键点）：
 - **头部**：鼻子、左右眼、左右耳、嘴等
 - **肩膀**：左肩、右肩
 - **手臂**：左肘、右肘、左腕、右腕
 - **躯干**：左臀、右臀
 - **腿部**：左膝、右膝、左踝、右踝
 - **手部**：左右手的21个关键点
 - **脚部**：左右脚的详细关键点
 ## 代码结构
 ```
 pose_detector.py
 ├── PoseDetector 类
 │   ├── __init__()           # 初始化检测器（静态+流式）
 │   ├── detect_pose()        # 单人姿态检测（流式模式）
 │   ├── detect_poses()       # 多人姿态检测（静态模式）
 │   ├── draw_pose()          # 绘制单个骨架
 │   ├── draw_poses()         # 绘制多个骨架
 │   ├── get_pose_info()      # 获取单人姿态信息
 │   └── get_poses_info()     # 获取多人姿态信息
 ├── run_webcam()             # 摄像头检测模式
 ├── run_image()              # 图片检测模式（支持多人）
 └── main()                   # 主函数
 ```
 ## 技术说明
 - **MediaPipe Pose**：Google开源的姿态估计库
 - **OpenCV**：图像处理和显示
 - **双检测器设计**：
  - `pose_static`：用于静态图像，支持更好的多人检测
  - `pose_stream`：用于视频流，优化实时性能
 - **模型复杂度**：使用中等复杂度模型（model_complexity=1）
 - **检测置信度**：最小检测置信度0.5，最小追踪置信度0.5
 - **颜色区分**：多人检测时使用不同颜色（绿、红、蓝、黄、紫、青）
 ## 多人检测说明
 MediaPipe的Pose模型在静态模式下可以更好地检测姿态，但仍然倾向于检测最显著的人物。对于复杂的多人场景：
 - 程序会尝试检测多个人物
 - 每个人物会用不同颜色显示
 - 如果需要更精确的多人检测，建议配合目标检测器（如YOLO）
 ## 常见问题
 ### 1. 摄像头无法打开
 - 检查摄像头是否被其他程序占用
 - 尝试修改代码中的摄像头索引（0, 1, 2...）
 ### 2. 检测效果不佳
 - 确保光线充足
 - 确保人物在画面中完整可见
 - 避免遮挡和复杂背景
 ### 3. 依赖安装失败
 - 使用国内镜像源安装：
  ```bash
  pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
  ```
 ### 4. 多人检测不准确
 - MediaPipe Pose主要优化单人检测
 - 对于复杂多人场景，检测结果可能不完整
 - 可以尝试调整人物位置或使用其他多人检测方案
 ## 扩展建议
 - 添加姿态识别功能（如站立、坐着、举手等）
 - 支持视频文件输入
 - 添加姿态数据保存和加载功能
 - 集成目标检测器实现更精确的多人检测
 - 添加姿态异常检测功能
 - 实现姿态轨迹跟踪
 ## 许可证
 本项目遵循原项目LICENSE文件中的许可证规定。
--- a/pose_detector.py
+++ b/pose_detector.py
@ -0,0 +1,303 @@
 #!/usr/bin/env python3
 """
 姿态识别Demo
 使用MediaPipe检测人像姿态，支持摄像头实时检测和图片检测
 """
 import cv2
 import mediapipe as mp
 import sys
 from pathlib import Path
 class PoseDetector:
    def __init__(self):
        """初始化MediaPipe姿态检测器"""
        self.mp_pose = mp.solutions.pose
        # 创建两个检测器：一个用于静态图像（多人检测），一个用于视频流
        self.pose_static = self.mp_pose.Pose(
            static_image_mode=True,
            model_complexity=1,
            smooth_landmarks=False,
            enable_segmentation=False,
            smooth_segmentation=False,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )
        self.pose_stream = self.mp_pose.Pose(
            static_image_mode=False,
            model_complexity=1,
            smooth_landmarks=True,
            enable_segmentation=False,
            smooth_segmentation=False,
            min_detection_confidence=0.5,
            min_tracking_confidence=0.5
        )
        self.mp_draw = mp.solutions.drawing_utils
        # 姿态连接关系
        self.connections = self.mp_pose.POSE_CONNECTIONS
    def detect_pose(self, image):
        """
        检测图像中的人像姿态（单人模式）
        Args:
            image: 输入图像 (BGR格式)
        Returns:
            results: MediaPipe姿态检测结果
        """
        # 转换为RGB格式
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 进行姿态检测
        results = self.pose_stream.process(image_rgb)
        return results
    def detect_poses(self, image):
        """
        检测图像中的多个人物姿态（多人模式）
        Args:
            image: 输入图像 (BGR格式)
        Returns:
            pose_results: 多个人物姿态检测结果的列表
        """
        # 转换为RGB格式
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # 使用静态模式检测姿态（支持多个人物）
        pose_results = []
        results = self.pose_static.process(image_rgb)
        # 如果检测到姿态，将其添加到结果列表
        if results.pose_landmarks:
            pose_results.append(results)
        return pose_results
    def draw_pose(self, image, results, color=(0, 255, 0)):
        """
        在图像上绘制姿态骨架（单人）
        Args:
            image: 输入图像
            results: 姿态检测结果
            color: 骨架颜色
        Returns:
            image: 绘制了姿态骨架的图像
        """
        if results.pose_landmarks:
            # 绘制姿态关键点和连接线
            self.mp_draw.draw_landmarks(
                image,
                results.pose_landmarks,
                self.connections,
                landmark_drawing_spec=self.mp_draw.DrawingSpec(
                    color=color,
                    thickness=2,
                    circle_radius=2
                ),
                connection_drawing_spec=self.mp_draw.DrawingSpec(
                    color=color,
                    thickness=2
                )
            )
        return image
    def draw_poses(self, image, pose_results):
        """
        在图像上绘制多个姿态骨架
        Args:
            image: 输入图像
            pose_results: 多个人物姿态检测结果列表
        Returns:
            image: 绘制了多个姿态骨架的图像
        """
        # 为不同的人物定义不同的颜色
        colors = [
            (0, 255, 0),    # 绿色
            (0, 0, 255),    # 红色
            (255, 0, 0),    # 蓝色
            (0, 255, 255),  # 黄色
            (255, 0, 255),  # 紫色
            (255, 255, 0),  # 青色
        ]
        for idx, results in enumerate(pose_results):
            color = colors[idx % len(colors)]
            image = self.draw_pose(image, results, color)
        return image
    def get_pose_info(self, results):
        """
        获取姿态信息（单人）
        Args:
            results: 姿态检测结果
        Returns:
            pose_info: 姿态信息字典
        """
        pose_info = {}
        if results.pose_landmarks:
            landmarks = results.pose_landmarks.landmark
            # 获取关键部位坐标
            key_points = {
                'nose': 0,
                'left_shoulder': 11,
                'right_shoulder': 12,
                'left_elbow': 13,
                'right_elbow': 14,
                'left_wrist': 15,
                'right_wrist': 16,
                'left_hip': 23,
                'right_hip': 24,
                'left_knee': 25,
                'right_knee': 26,
                'left_ankle': 27,
                'right_ankle': 28
            }
            for name, idx in key_points.items():
                landmark = landmarks[idx]
                pose_info[name] = {
                    'x': landmark.x,
                    'y': landmark.y,
                    'z': landmark.z,
                    'visibility': landmark.visibility
                }
        return pose_info
    def get_poses_info(self, pose_results):
        """
        获取多个人物的姿态信息
        Args:
            pose_results: 多个人物姿态检测结果列表
        Returns:
            poses_info: 多个人物姿态信息列表
        """
        poses_info = []
        for results in pose_results:
            pose_info = self.get_pose_info(results)
            if pose_info:
                poses_info.append(pose_info)
        return poses_info
 def run_webcam():
    """运行摄像头实时姿态检测"""
    print("启动摄像头姿态检测...")
    print("按 'q' 键退出")
    detector = PoseDetector()
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("错误: 无法打开摄像头")
        return
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                print("错误: 无法读取摄像头画面")
                break
            # 检测姿态
            results = detector.detect_pose(frame)
            # 绘制姿态
            frame = detector.draw_pose(frame, results)
            # 显示姿态信息
            if results.pose_landmarks:
                cv2.putText(frame, "Pose Detected", (10, 30), 
                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
            # 显示画面
            cv2.imshow('Pose Detection', frame)
            # 按 'q' 退出
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break
    finally:
        cap.release()
        cv2.destroyAllWindows()
 def run_image(image_path):
    """
    运行图片姿态检测（支持多人）
    Args:
        image_path: 图片路径
    """
    print(f"检测图片: {image_path}")
    if not Path(image_path).exists():
        print(f"错误: 文件不存在 - {image_path}")
        return
    detector = PoseDetector()
    image = cv2.imread(image_path)
    if image is None:
        print(f"错误: 无法读取图片 - {image_path}")
        return
    # 检测多个人物姿态
    pose_results = detector.detect_poses(image)
    # 绘制多个姿态
    image = detector.draw_poses(image, pose_results)
    # 获取多个人物姿态信息
    poses_info = detector.get_poses_info(pose_results)
    # 显示图片
    cv2.imshow('Pose Detection', image)
    # 打印姿态信息
    if poses_info:
        print(f"\n检测到 {len(poses_info)} 个人物:")
        for idx, pose_info in enumerate(poses_info):
            print(f"\n人物 {idx + 1}:")
            for name, info in pose_info.items():
                print(f"  {name}: x={info['x']:.3f}, y={info['y']:.3f}, visibility={info['visibility']:.3f}")
    else:
        print("未检测到姿态")
    print("\n按任意键关闭窗口...")
    cv2.waitKey(0)
    cv2.destroyAllWindows()
 def main():
    """主函数"""
    if len(sys.argv) < 2:
        # 默认使用摄像头
        run_webcam()
    else:
        # 使用图片
        image_path = sys.argv[1]
        run_image(image_path)
 if __name__ == "__main__":
    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,8 @@
 # Python dependencies
 # Add your dependencies here, one per line
 # Example:
 # requests==2.31.0
 # numpy==1.24.3
 opencv-python==4.8.1.78
 mediapipe==0.10.8
 Pillow==10.1.0
--- a/test.webp
+++ b/test.webp
--- a/yolov8s.onnx
+++ b/yolov8s.onnx