环境
- Ubuntu 22.04-local(x86-64)
- Ubuntu 22.04-rk3588(aarch64)
- RK3588
- python3.8
本机-x86-64
配置rknn-toolkit2环境
conda create -n rknn python=3.8
conda activate rknn
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# reference: https://github.com/rockchip-linux/rknn-toolkit2
wget https://ghfast.top/github.com/airockchip/rknn-toolkit2/blob/master/rknn-toolkit2/packages/x86_64/requirements_cp38-2.3.2.txt
pip install -r requirements_cp38-2.3.2.txt
wget https://ghfast.top/github.com/airockchip/rknn-toolkit2/blob/master/rknn-toolkit2/packages/x86_64/rknn_toolkit2-2.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
pip install rknn_toolkit2-2.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
git clone https://github.com/airockchip/ultralytics_yolov8.git
cd ultralytics_yolov8
pip install -e .
yolo check
模型转换
# 先转为onnx:
cd ultralytics_yolov8
# 编辑./ultralytics/cfg/default.yaml 中 model 文件路径
export PYTHONPATH=./
# 转换
python ./ultralytics/engine/exporter.py
# onnx转为rknn
git clone https://github.com/airockchip/rknn_model_zoo.git
cd rknn_model_zoo/examples/yolov8/python
# convert
python convert.py /your_path/yolov8n.onnx rk3588 fp yolov8n.rknn
copy yolov8n.rknn to your 3588 device
rk3588-aarch64
安装miniconda
# 1. download: https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-aarch64.sh
# 2. install miniconda3
chmod +x Miniconda3-latest-Linux-aarch64.sh
sudo bash Miniconda3-latest-Linux-aarch64.sh
# input installation path: /home/hello/miniconda3
miniconda3/bin/conda init bash
source ~/.bashrc
配置rknn-toolkit-lite2环境
# 1. create conda env
conda create -n rknn python=3.8
conda activate rknn
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
# 2. install opencv pytorch numpy...
wget https://ghfast.top/github.com/airockchip/rknn-toolkit2/blob/master/rknn-toolkit2/packages/arm64/arm64_requirements_cp38.txt
pip install -r arm64_requirements_cp38.txt
# 3. install rknn-toolkit-lite2
wget https://ghfast.top/github.com/airockchip/rknn-toolkit2/blob/master/rknn-toolkit-lite2/packages/rknn_toolkit_lite2-2.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
pip install rknn_toolkit_lite2-2.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
推理视频测试
from rknnlite.api import RKNNLite
import cv2
import numpy as np
import os
MODEL_PATH = 'weights/yolov8n.rknn'
# VIDEO_PATH = 'rtsp://192.168.10.178/live/camera0'
VIDEO_PATH = 'test.mp4'
OBJ_THRESH = 0.25
NMS_THRESH = 0.45
IMG_SIZE = (640, 640) # (width, height)
CLASSES = ("person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light",
"fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
"bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
"baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife",
"spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa",
"pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
"oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush")
def filter_boxes(boxes, box_confidences, box_class_probs):
"""Filter boxes with object threshold."""
box_confidences = box_confidences.reshape(-1)
candidate, class_num = box_class_probs.shape
class_max_score = np.max(box_class_probs, axis=-1)
classes = np.argmax(box_class_probs, axis=-1)
_class_pos = np.where(class_max_score * box_confidences >= OBJ_THRESH)
scores = (class_max_score * box_confidences)[_class_pos]
boxes = boxes[_class_pos]
classes = classes[_class_pos]
return boxes, classes, scores
def nms_boxes(boxes, scores):
"""Suppress non-maximal boxes."""
x = boxes[:, 0]
y = boxes[:, 1]
w = boxes[:, 2] - boxes[:, 0]
h = boxes[:, 3] - boxes[:, 1]
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x[i], x[order[1:]])
yy1 = np.maximum(y[i], y[order[1:]])
xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])
w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
inter = w1 * h1
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= NMS_THRESH)[0]
order = order[inds + 1]
keep = np.array(keep)
return keep
def dfl(position):
"""Distribution Focal Loss (DFL) - numpy implementation."""
n, c, h, w = position.shape
p_num = 4
mc = c // p_num
# Reshape to (n, p_num, mc, h, w)
position = position.reshape(n, p_num, mc, h, w)
# Softmax along mc dimension
exp_position = np.exp(position - np.max(position, axis=2, keepdims=True))
softmax_position = exp_position / np.sum(exp_position, axis=2, keepdims=True)
# Create accumulation matrix
acc_metrix = np.arange(mc).reshape(1, 1, mc, 1, 1).astype(np.float32)
# Weighted sum
result = (softmax_position * acc_metrix).sum(axis=2)
return result
def box_process(position):
"""Process box coordinates."""
grid_h, grid_w = position.shape[2:4]
col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h))
col = col.reshape(1, 1, grid_h, grid_w)
row = row.reshape(1, 1, grid_h, grid_w)
grid = np.concatenate((col, row), axis=1)
stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1, 2, 1, 1)
position = dfl(position)
box_xy = grid + 0.5 - position[:, 0:2, :, :]
box_xy2 = grid + 0.5 + position[:, 2:4, :, :]
xyxy = np.concatenate((box_xy * stride, box_xy2 * stride), axis=1)
return xyxy
def post_process(input_data):
"""Post-process model outputs."""
boxes, scores, classes_conf = [], [], []
defualt_branch = 3
pair_per_branch = len(input_data) // defualt_branch
for i in range(defualt_branch):
boxes.append(box_process(input_data[pair_per_branch * i]))
classes_conf.append(input_data[pair_per_branch * i + 1])
scores.append(np.ones_like(input_data[pair_per_branch * i + 1][:, :1, :, :], dtype=np.float32))
def sp_flatten(_in):
ch = _in.shape[1]
_in = _in.transpose(0, 2, 3, 1)
return _in.reshape(-1, ch)
boxes = [sp_flatten(_v) for _v in boxes]
classes_conf = [sp_flatten(_v) for _v in classes_conf]
scores = [sp_flatten(_v) for _v in scores]
boxes = np.concatenate(boxes)
classes_conf = np.concatenate(classes_conf)
scores = np.concatenate(scores)
boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)
nboxes, nclasses, nscores = [], [], []
for c in set(classes):
inds = np.where(classes == c)
b = boxes[inds]
c = classes[inds]
s = scores[inds]
keep = nms_boxes(b, s)
if len(keep) != 0:
nboxes.append(b[keep])
nclasses.append(c[keep])
nscores.append(s[keep])
if not nclasses and not nscores:
return None, None, None
boxes = np.concatenate(nboxes)
classes = np.concatenate(nclasses)
scores = np.concatenate(nscores)
return boxes, classes, scores
def letter_box(im, new_shape=(640, 640), pad_color=(0, 0, 0)):
"""Resize and pad image to new_shape."""
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
ratio = r, r
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
dw, dh = dw / 2, dh / 2
if shape[::-1] != new_unpad:
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=pad_color)
return im, ratio, (dw, dh)
def draw(image, boxes, scores, classes):
"""Draw detection results on image."""
for box, score, cl in zip(boxes, scores, classes):
top, left, right, bottom = [int(_b) for _b in box]
cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
(top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
def main():
# Initialize RKNNLite
rknn_lite = RKNNLite()
# Load RKNN model
print('--> Load RKNN model')
ret = rknn_lite.load_rknn(MODEL_PATH)
if ret != 0:
print('Load RKNN model failed!')
return
# Init runtime environment
print('--> Init runtime environment')
try:
# 尝试不指定 target,让系统自动检测
ret = rknn_lite.init_runtime()
if ret != 0:
print('Init runtime environment failed!')
return
except Exception as e:
print(f'Init runtime environment failed with error: {e}')
print('\nTrying alternative initialization methods...')
# 尝试指定不同的 target
targets_to_try = ['rk3588', 'rk3568', 'rk3566', 'rk3562', 'rk3576']
for target in targets_to_try:
try:
print(f' Trying target={target}...')
ret = rknn_lite.init_runtime(target=target)
if ret == 0:
print(f' Successfully initialized with target={target}')
break
except Exception as e2:
print(f' Failed with target={target}: {e2}')
continue
else:
print('\nAll initialization attempts failed.')
return
# Open video file
cap = cv2.VideoCapture(VIDEO_PATH)
if not cap.isOpened():
print(f'Cannot open video: {VIDEO_PATH}')
return
# Get video info
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f'Video info: {width}x{height} @ {fps} FPS')
frame_count = 0
while True:
ret, frame = cap.read()
if not ret:
break
frame_count += 1
print(f'Processing frame {frame_count}', end='\r')
# Preprocess
img, ratio, (dw, dh) = letter_box(frame.copy(), new_shape=IMG_SIZE, pad_color=(0, 0, 0))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
# RKNN 需要 4 维输入 (batch, height, width, channels)
img = np.expand_dims(img, axis=0)
# Inference
try:
outputs = rknn_lite.inference(inputs=[img])
except Exception as e:
print(f'\nInference failed: {e}')
continue
if outputs is None or len(outputs) == 0:
print('\nNo outputs from inference')
continue
# Post-process
boxes, classes, scores = post_process(outputs)
# Scale boxes back to original image size
if boxes is not None:
boxes[:, 0] = (boxes[:, 0] - dw) / ratio[1]
boxes[:, 1] = (boxes[:, 1] - dh) / ratio[0]
boxes[:, 2] = (boxes[:, 2] - dw) / ratio[1]
boxes[:, 3] = (boxes[:, 3] - dh) / ratio[0]
# Clip boxes to image boundaries
boxes[:, 0] = np.clip(boxes[:, 0], 0, width)
boxes[:, 1] = np.clip(boxes[:, 1], 0, height)
boxes[:, 2] = np.clip(boxes[:, 2], 0, width)
boxes[:, 3] = np.clip(boxes[:, 3], 0, height)
# Draw results
draw(frame, boxes, scores, classes)
# Show result
cv2.imshow('YOLOv8 Detection', frame)
# Press 'q' to quit
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release resources
cap.release()
cv2.destroyAllWindows()
rknn_lite.release()
print(f'\nProcessed {frame_count} frames')
if __name__ == '__main__':
main()