MoveNetPoseEstimation呈現不準確的關鍵點-有解無憂

我正在嘗試在視頻上運行 MoveNet 姿勢估計模型，但由于某種原因，我的關鍵點非常不準確。我認為這與預測本身沒有任何關系，而是與我如何計算點和繪制然后使用我的估計有關。但是我找不到這些不準確之處來自哪里。

import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cv2

interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')
interpreter.allocate_tensors()



def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape

    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1) 


cap = cv2.VideoCapture("pushup-stock-compressed.mp4")
while cap.isOpened():
    ret, frame = cap.read()
    
    # Reshape image
    img = frame.copy()
    img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
    input_image = tf.cast(img, dtype=tf.float32)
    
    # Setup input and output 
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()
    
    # Make predictions 
    interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
    interpreter.invoke()
    keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
    
    # Rendering 
    draw_keypoints(frame, keypoints_with_scores, 0.4)
    
    cv2.imshow('MoveNet Lightning', frame)
    
    if cv2.waitKey(10) & 0xFF==ord('q'):
        break
        
cap.release()
cv2.destroyAllWindows()

MoveNet Pose Estimation 呈現不準確的關鍵點

uj5u.com熱心網友回復：

正如 MoveNet Pose Estimation 呈現不準確的關鍵點

該示例包含來自網路的一些錯誤預測（看右腿）。

現在對原始影像上的這些關鍵點應用逆仿射變換：

MoveNet Pose Estimation 呈現不準確的關鍵點

正如我們所看到的，關鍵點繪制在與調整大小填充影像相同的位置。

完整示例：

import tensorflow as tf
import numpy as np
import cv2

interpreter = tf.lite.Interpreter(
    model_path="lite-model_movenet_singlepose_lightning_3.tflite"
)
interpreter.allocate_tensors()


def draw_keypoints(frame, keypoints, confidence_threshold):
    for kp in keypoints:
        ky, kx, kp_conf = kp
        cv2.circle(frame, (int(kx), int(ky)), 4, (0, 255, 0), -1)


def get_affine_transform_to_fixed_sizes_with_padding(size, new_sizes):
    width, height = new_sizes
    scale = min(height / float(size[1]), width / float(size[0]))
    M = np.float32([[scale, 0, 0], [0, scale, 0]])
    M[0][2] = (width - scale * size[0]) / 2
    M[1][2] = (height - scale * size[1]) / 2
    return M


frame = cv2.imread("gym.png")

# Reshape image
img = frame.copy()
img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192)
input_image = tf.cast(img, dtype=tf.float32)

# Setup input and output
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Make predictions
interpreter.set_tensor(input_details[0]["index"], np.array(input_image))
interpreter.invoke()
keypoints_with_scores = interpreter.get_tensor(output_details[0]["index"])[0, 0]

img_resized = np.array(input_image).astype(np.uint8)[0]
keypoints_for_resized = keypoints_with_scores.copy()
keypoints_for_resized[:, 0] *= img_resized.shape[1]
keypoints_for_resized[:, 1] *= img_resized.shape[0]
draw_keypoints(img_resized, keypoints_for_resized, 0.4)
cv2.imwrite("image_with_keypoints_resized.png", img_resized)

orig_w, orig_h = frame.shape[:2]
M = get_affine_transform_to_fixed_sizes_with_padding((orig_w, orig_h), (192, 192))
# M has shape 2x3 but we need square matrix when finding an inverse
M = np.vstack((M, [0, 0, 1]))
M_inv = np.linalg.inv(M)[:2]
xy_keypoints = keypoints_with_scores[:, :2] * 192
xy_keypoints = cv2.transform(np.array([xy_keypoints]), M_inv)[0]
keypoints_with_scores = np.hstack((xy_keypoints, keypoints_with_scores[:, 2:]))

# Rendering
draw_keypoints(frame, keypoints_with_scores, 0.4)
cv2.imwrite("image_with_keypoints_original.png", frame)

轉載請註明出處，本文鏈接：https://www.uj5u.com/ruanti/506595.html

標籤：Python 张量流 opencv 张量流精简版

上一篇：AttributeError:'builtin_function_or_method'物件沒有屬性'apply'

下一篇：如何打破子行程內的while回圈