我正在嘗試在視頻上運行 MoveNet 姿勢估計模型,但由于某種原因,我的關鍵點非常不準確。我認為這與預測本身沒有任何關系,而是與我如何計算點和繪制然后使用我的估計有關。但是我找不到這些不準確之處來自哪里。
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
import cv2
interpreter = tf.lite.Interpreter(model_path='lite-model_movenet_singlepose_lightning_3.tflite')
interpreter.allocate_tensors()
def draw_keypoints(frame, keypoints, confidence_threshold):
y, x, c = frame.shape
shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
for kp in shaped:
ky, kx, kp_conf = kp
if kp_conf > confidence_threshold:
cv2.circle(frame, (int(kx), int(ky)), 4, (0,255,0), -1)
cap = cv2.VideoCapture("pushup-stock-compressed.mp4")
while cap.isOpened():
ret, frame = cap.read()
# Reshape image
img = frame.copy()
img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192,192)
input_image = tf.cast(img, dtype=tf.float32)
# Setup input and output
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Make predictions
interpreter.set_tensor(input_details[0]['index'], np.array(input_image))
interpreter.invoke()
keypoints_with_scores = interpreter.get_tensor(output_details[0]['index'])
# Rendering
draw_keypoints(frame, keypoints_with_scores, 0.4)
cv2.imshow('MoveNet Lightning', frame)
if cv2.waitKey(10) & 0xFF==ord('q'):
break
cap.release()
cv2.destroyAllWindows()
uj5u.com熱心網友回復:
正如
該示例包含來自網路的一些錯誤預測(看右腿)。
現在對原始影像上的這些關鍵點應用逆仿射變換:
正如我們所看到的,關鍵點繪制在與調整大小 填充影像相同的位置。
完整示例:
import tensorflow as tf
import numpy as np
import cv2
interpreter = tf.lite.Interpreter(
model_path="lite-model_movenet_singlepose_lightning_3.tflite"
)
interpreter.allocate_tensors()
def draw_keypoints(frame, keypoints, confidence_threshold):
for kp in keypoints:
ky, kx, kp_conf = kp
cv2.circle(frame, (int(kx), int(ky)), 4, (0, 255, 0), -1)
def get_affine_transform_to_fixed_sizes_with_padding(size, new_sizes):
width, height = new_sizes
scale = min(height / float(size[1]), width / float(size[0]))
M = np.float32([[scale, 0, 0], [0, scale, 0]])
M[0][2] = (width - scale * size[0]) / 2
M[1][2] = (height - scale * size[1]) / 2
return M
frame = cv2.imread("gym.png")
# Reshape image
img = frame.copy()
img = tf.image.resize_with_pad(np.expand_dims(img, axis=0), 192, 192)
input_image = tf.cast(img, dtype=tf.float32)
# Setup input and output
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
# Make predictions
interpreter.set_tensor(input_details[0]["index"], np.array(input_image))
interpreter.invoke()
keypoints_with_scores = interpreter.get_tensor(output_details[0]["index"])[0, 0]
img_resized = np.array(input_image).astype(np.uint8)[0]
keypoints_for_resized = keypoints_with_scores.copy()
keypoints_for_resized[:, 0] *= img_resized.shape[1]
keypoints_for_resized[:, 1] *= img_resized.shape[0]
draw_keypoints(img_resized, keypoints_for_resized, 0.4)
cv2.imwrite("image_with_keypoints_resized.png", img_resized)
orig_w, orig_h = frame.shape[:2]
M = get_affine_transform_to_fixed_sizes_with_padding((orig_w, orig_h), (192, 192))
# M has shape 2x3 but we need square matrix when finding an inverse
M = np.vstack((M, [0, 0, 1]))
M_inv = np.linalg.inv(M)[:2]
xy_keypoints = keypoints_with_scores[:, :2] * 192
xy_keypoints = cv2.transform(np.array([xy_keypoints]), M_inv)[0]
keypoints_with_scores = np.hstack((xy_keypoints, keypoints_with_scores[:, 2:]))
# Rendering
draw_keypoints(frame, keypoints_with_scores, 0.4)
cv2.imwrite("image_with_keypoints_original.png", frame)
轉載請註明出處,本文鏈接:https://www.uj5u.com/ruanti/506595.html
上一篇:AttributeError:'builtin_function_or_method'物件沒有屬性'apply'
下一篇:如何打破子行程內的while回圈