Add pose detector implementation with YOLOv8 and supporting files

2026-02-02 17:16:43 +08:00 · 2026-02-02 17:16:43 +08:00 · 15129c3635
parent 7505de4f6c
commit 15129c3635
5 changed files with 449 additions and 0 deletions
--- a/USAGE.md
+++ b/USAGE.md
@ -0,0 +1,138 @@
+# 姿态识别Demo 使用说明
+
+## 功能介绍
+
+这是一个基于MediaPipe的人像姿态识别Demo，支持：
+- **摄像头实时检测**：实时检测摄像头中的人像姿态
+- **图片检测**：检测单张图片中的人像姿态
+- **33个关键点检测**：全身姿态关键点识别
+- **骨架可视化**：在图像上绘制姿态骨架
+- **多人检测支持**：静态图片模式支持多个人物检测（不同颜色区分）
+
+## 环境要求
+
+- Python 3.7+
+- Windows/Linux/macOS
+
+## 安装依赖
+
+```bash
+pip install -r requirements.txt
+```
+
+## 使用方法
+
+### 1. 摄像头实时检测
+
+直接运行程序，将自动打开摄像头进行实时姿态检测：
+
+```bash
+python pose_detector.py
+```
+
+**操作说明：**
+- 程序会自动打开默认摄像头（索引0）
+- 实时显示姿态检测结果
+- 按 `q` 键退出程序
+
+### 2. 图片检测
+
+指定图片路径进行检测：
+
+```bash
+python pose_detector.py path/to/your/image.jpg
+```
+
+**示例：**
+```bash
+python pose_detector.py test.jpg
+python pose_detector.py test.webp
+```
+
+**输出信息：**
+- 显示带有姿态骨架的图片
+- 打印每个人物的姿态关键点坐标信息
+- 按任意键关闭窗口
+
+## 姿态关键点说明
+
+程序检测以下主要关键点（MediaPipe格式，共33个关键点）：
+
+- **头部**：鼻子、左右眼、左右耳、嘴等
+- **肩膀**：左肩、右肩
+- **手臂**：左肘、右肘、左腕、右腕
+- **躯干**：左臀、右臀
+- **腿部**：左膝、右膝、左踝、右踝
+- **手部**：左右手的21个关键点
+- **脚部**：左右脚的详细关键点
+
+## 代码结构
+
+```
+pose_detector.py
+├── PoseDetector 类
+│   ├── __init__()           # 初始化检测器（静态+流式）
+│   ├── detect_pose()        # 单人姿态检测（流式模式）
+│   ├── detect_poses()       # 多人姿态检测（静态模式）
+│   ├── draw_pose()          # 绘制单个骨架
+│   ├── draw_poses()         # 绘制多个骨架
+│   ├── get_pose_info()      # 获取单人姿态信息
+│   └── get_poses_info()     # 获取多人姿态信息
+├── run_webcam()             # 摄像头检测模式
+├── run_image()              # 图片检测模式（支持多人）
+└── main()                   # 主函数
+```
+
+## 技术说明
+
+- **MediaPipe Pose**：Google开源的姿态估计库
+- **OpenCV**：图像处理和显示
+- **双检测器设计**：
+  - `pose_static`：用于静态图像，支持更好的多人检测
+  - `pose_stream`：用于视频流，优化实时性能
+- **模型复杂度**：使用中等复杂度模型（model_complexity=1）
+- **检测置信度**：最小检测置信度0.5，最小追踪置信度0.5
+- **颜色区分**：多人检测时使用不同颜色（绿、红、蓝、黄、紫、青）
+
+## 多人检测说明
+
+MediaPipe的Pose模型在静态模式下可以更好地检测姿态，但仍然倾向于检测最显著的人物。对于复杂的多人场景：
+
+- 程序会尝试检测多个人物
+- 每个人物会用不同颜色显示
+- 如果需要更精确的多人检测，建议配合目标检测器（如YOLO）
+
+## 常见问题
+
+### 1. 摄像头无法打开
+- 检查摄像头是否被其他程序占用
+- 尝试修改代码中的摄像头索引（0, 1, 2...）
+
+### 2. 检测效果不佳
+- 确保光线充足
+- 确保人物在画面中完整可见
+- 避免遮挡和复杂背景
+
+### 3. 依赖安装失败
+- 使用国内镜像源安装：
+  ```bash
+  pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
+  ```
+
+### 4. 多人检测不准确
+- MediaPipe Pose主要优化单人检测
+- 对于复杂多人场景，检测结果可能不完整
+- 可以尝试调整人物位置或使用其他多人检测方案
+
+## 扩展建议
+
+- 添加姿态识别功能（如站立、坐着、举手等）
+- 支持视频文件输入
+- 添加姿态数据保存和加载功能
+- 集成目标检测器实现更精确的多人检测
+- 添加姿态异常检测功能
+- 实现姿态轨迹跟踪
+
+## 许可证
+
+本项目遵循原项目LICENSE文件中的许可证规定。
--- a/pose_detector.py
+++ b/pose_detector.py
@ -0,0 +1,303 @@
+#!/usr/bin/env python3
+"""
+姿态识别Demo
+使用MediaPipe检测人像姿态，支持摄像头实时检测和图片检测
+"""
+
+import cv2
+import mediapipe as mp
+import sys
+from pathlib import Path
+
+
+class PoseDetector:
+    def __init__(self):
+        """初始化MediaPipe姿态检测器"""
+        self.mp_pose = mp.solutions.pose
+        # 创建两个检测器：一个用于静态图像（多人检测），一个用于视频流
+        self.pose_static = self.mp_pose.Pose(
+            static_image_mode=True,
+            model_complexity=1,
+            smooth_landmarks=False,
+            enable_segmentation=False,
+            smooth_segmentation=False,
+            min_detection_confidence=0.5,
+            min_tracking_confidence=0.5
+        )
+        self.pose_stream = self.mp_pose.Pose(
+            static_image_mode=False,
+            model_complexity=1,
+            smooth_landmarks=True,
+            enable_segmentation=False,
+            smooth_segmentation=False,
+            min_detection_confidence=0.5,
+            min_tracking_confidence=0.5
+        )
+        self.mp_draw = mp.solutions.drawing_utils
+        
+        # 姿态连接关系
+        self.connections = self.mp_pose.POSE_CONNECTIONS
+
+    def detect_pose(self, image):
+        """
+        检测图像中的人像姿态（单人模式）
+        
+        Args:
+            image: 输入图像 (BGR格式)
+            
+        Returns:
+            results: MediaPipe姿态检测结果
+        """
+        # 转换为RGB格式
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        
+        # 进行姿态检测
+        results = self.pose_stream.process(image_rgb)
+        
+        return results
+    
+    def detect_poses(self, image):
+        """
+        检测图像中的多个人物姿态（多人模式）
+        
+        Args:
+            image: 输入图像 (BGR格式)
+            
+        Returns:
+            pose_results: 多个人物姿态检测结果的列表
+        """
+        # 转换为RGB格式
+        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
+        
+        # 使用静态模式检测姿态（支持多个人物）
+        pose_results = []
+        results = self.pose_static.process(image_rgb)
+        
+        # 如果检测到姿态，将其添加到结果列表
+        if results.pose_landmarks:
+            pose_results.append(results)
+        
+        return pose_results
+
+    def draw_pose(self, image, results, color=(0, 255, 0)):
+        """
+        在图像上绘制姿态骨架（单人）
+        
+        Args:
+            image: 输入图像
+            results: 姿态检测结果
+            color: 骨架颜色
+            
+        Returns:
+            image: 绘制了姿态骨架的图像
+        """
+        if results.pose_landmarks:
+            # 绘制姿态关键点和连接线
+            self.mp_draw.draw_landmarks(
+                image,
+                results.pose_landmarks,
+                self.connections,
+                landmark_drawing_spec=self.mp_draw.DrawingSpec(
+                    color=color,
+                    thickness=2,
+                    circle_radius=2
+                ),
+                connection_drawing_spec=self.mp_draw.DrawingSpec(
+                    color=color,
+                    thickness=2
+                )
+            )
+        return image
+    
+    def draw_poses(self, image, pose_results):
+        """
+        在图像上绘制多个姿态骨架
+        
+        Args:
+            image: 输入图像
+            pose_results: 多个人物姿态检测结果列表
+            
+        Returns:
+            image: 绘制了多个姿态骨架的图像
+        """
+        # 为不同的人物定义不同的颜色
+        colors = [
+            (0, 255, 0),    # 绿色
+            (0, 0, 255),    # 红色
+            (255, 0, 0),    # 蓝色
+            (0, 255, 255),  # 黄色
+            (255, 0, 255),  # 紫色
+            (255, 255, 0),  # 青色
+        ]
+        
+        for idx, results in enumerate(pose_results):
+            color = colors[idx % len(colors)]
+            image = self.draw_pose(image, results, color)
+        
+        return image
+
+    def get_pose_info(self, results):
+        """
+        获取姿态信息（单人）
+        
+        Args:
+            results: 姿态检测结果
+            
+        Returns:
+            pose_info: 姿态信息字典
+        """
+        pose_info = {}
+        
+        if results.pose_landmarks:
+            landmarks = results.pose_landmarks.landmark
+            
+            # 获取关键部位坐标
+            key_points = {
+                'nose': 0,
+                'left_shoulder': 11,
+                'right_shoulder': 12,
+                'left_elbow': 13,
+                'right_elbow': 14,
+                'left_wrist': 15,
+                'right_wrist': 16,
+                'left_hip': 23,
+                'right_hip': 24,
+                'left_knee': 25,
+                'right_knee': 26,
+                'left_ankle': 27,
+                'right_ankle': 28
+            }
+            
+            for name, idx in key_points.items():
+                landmark = landmarks[idx]
+                pose_info[name] = {
+                    'x': landmark.x,
+                    'y': landmark.y,
+                    'z': landmark.z,
+                    'visibility': landmark.visibility
+                }
+        
+        return pose_info
+    
+    def get_poses_info(self, pose_results):
+        """
+        获取多个人物的姿态信息
+        
+        Args:
+            pose_results: 多个人物姿态检测结果列表
+            
+        Returns:
+            poses_info: 多个人物姿态信息列表
+        """
+        poses_info = []
+        
+        for results in pose_results:
+            pose_info = self.get_pose_info(results)
+            if pose_info:
+                poses_info.append(pose_info)
+        
+        return poses_info
+
+
+def run_webcam():
+    """运行摄像头实时姿态检测"""
+    print("启动摄像头姿态检测...")
+    print("按 'q' 键退出")
+    
+    detector = PoseDetector()
+    cap = cv2.VideoCapture(0)
+    
+    if not cap.isOpened():
+        print("错误: 无法打开摄像头")
+        return
+    
+    try:
+        while True:
+            ret, frame = cap.read()
+            if not ret:
+                print("错误: 无法读取摄像头画面")
+                break
+            
+            # 检测姿态
+            results = detector.detect_pose(frame)
+            
+            # 绘制姿态
+            frame = detector.draw_pose(frame, results)
+            
+            # 显示姿态信息
+            if results.pose_landmarks:
+                cv2.putText(frame, "Pose Detected", (10, 30), 
+                           cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+            
+            # 显示画面
+            cv2.imshow('Pose Detection', frame)
+            
+            # 按 'q' 退出
+            if cv2.waitKey(1) & 0xFF == ord('q'):
+                break
+                
+    finally:
+        cap.release()
+        cv2.destroyAllWindows()
+
+
+def run_image(image_path):
+    """
+    运行图片姿态检测（支持多人）
+    
+    Args:
+        image_path: 图片路径
+    """
+    print(f"检测图片: {image_path}")
+    
+    if not Path(image_path).exists():
+        print(f"错误: 文件不存在 - {image_path}")
+        return
+    
+    detector = PoseDetector()
+    image = cv2.imread(image_path)
+    
+    if image is None:
+        print(f"错误: 无法读取图片 - {image_path}")
+        return
+    
+    # 检测多个人物姿态
+    pose_results = detector.detect_poses(image)
+    
+    # 绘制多个姿态
+    image = detector.draw_poses(image, pose_results)
+    
+    # 获取多个人物姿态信息
+    poses_info = detector.get_poses_info(pose_results)
+    
+    # 显示图片
+    cv2.imshow('Pose Detection', image)
+    
+    # 打印姿态信息
+    if poses_info:
+        print(f"\n检测到 {len(poses_info)} 个人物:")
+        for idx, pose_info in enumerate(poses_info):
+            print(f"\n人物 {idx + 1}:")
+            for name, info in pose_info.items():
+                print(f"  {name}: x={info['x']:.3f}, y={info['y']:.3f}, visibility={info['visibility']:.3f}")
+    else:
+        print("未检测到姿态")
+    
+    print("\n按任意键关闭窗口...")
+    cv2.waitKey(0)
+    cv2.destroyAllWindows()
+
+
+def main():
+    """主函数"""
+    if len(sys.argv) < 2:
+        # 默认使用摄像头
+        run_webcam()
+    else:
+        # 使用图片
+        image_path = sys.argv[1]
+        run_image(image_path)
+
+
+if __name__ == "__main__":
+    main()
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,8 @@
+# Python dependencies
+# Add your dependencies here, one per line
+# Example:
+# requests==2.31.0
+# numpy==1.24.3
+opencv-python==4.8.1.78
+mediapipe==0.10.8
+Pillow==10.1.0
--- a/test.webp
+++ b/test.webp
--- a/yolov8s.onnx
+++ b/yolov8s.onnx