环境

Ubuntu 22.04-local(x86-64)
Ubuntu 22.04-rk3588(aarch64)
RK3588
python3.8

本机-x86-64

配置rknn-toolkit2环境

conda create -n rknn python=3.8
conda activate rknn
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

# reference: https://github.com/rockchip-linux/rknn-toolkit2

wget https://ghfast.top/github.com/airockchip/rknn-toolkit2/blob/master/rknn-toolkit2/packages/x86_64/requirements_cp38-2.3.2.txt

pip install -r requirements_cp38-2.3.2.txt

wget https://ghfast.top/github.com/airockchip/rknn-toolkit2/blob/master/rknn-toolkit2/packages/x86_64/rknn_toolkit2-2.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

pip install rknn_toolkit2-2.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

git clone https://github.com/airockchip/ultralytics_yolov8.git
cd ultralytics_yolov8
pip install -e .

yolo check

模型转换

# 先转为onnx:
cd ultralytics_yolov8
# 编辑./ultralytics/cfg/default.yaml 中 model 文件路径
export PYTHONPATH=./
# 转换
python ./ultralytics/engine/exporter.py

# onnx转为rknn
git clone https://github.com/airockchip/rknn_model_zoo.git
cd rknn_model_zoo/examples/yolov8/python

# convert
python convert.py /your_path/yolov8n.onnx rk3588 fp yolov8n.rknn

copy yolov8n.rknn to your 3588 device

rk3588-aarch64

安装miniconda

# 1. download: https://mirrors.tuna.tsinghua.edu.cn/anaconda/miniconda/Miniconda3-latest-Linux-aarch64.sh

# 2. install miniconda3
chmod +x Miniconda3-latest-Linux-aarch64.sh
sudo bash Miniconda3-latest-Linux-aarch64.sh

# input installation path: /home/hello/miniconda3

miniconda3/bin/conda init bash

source ~/.bashrc

配置rknn-toolkit-lite2环境

# 1. create conda env
conda create -n rknn python=3.8
conda activate rknn
pip config set global.index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple

# 2. install opencv pytorch numpy...
wget https://ghfast.top/github.com/airockchip/rknn-toolkit2/blob/master/rknn-toolkit2/packages/arm64/arm64_requirements_cp38.txt
pip install -r arm64_requirements_cp38.txt

# 3. install rknn-toolkit-lite2
wget https://ghfast.top/github.com/airockchip/rknn-toolkit2/blob/master/rknn-toolkit-lite2/packages/rknn_toolkit_lite2-2.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl
pip install rknn_toolkit_lite2-2.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl

推理视频测试

from rknnlite.api import RKNNLite
import cv2
import numpy as np
import os

MODEL_PATH = 'weights/yolov8n.rknn'
# VIDEO_PATH = 'rtsp://192.168.10.178/live/camera0'
VIDEO_PATH = 'test.mp4'

OBJ_THRESH = 0.25
NMS_THRESH = 0.45
IMG_SIZE = (640, 640)  # (width, height)

CLASSES = ("person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", "boat", "traffic light",
           "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant",
           "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball", "kite",
           "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup", "fork", "knife",
           "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "sofa",
           "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", "remote", "keyboard", "cell phone", "microwave",
           "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush")

def filter_boxes(boxes, box_confidences, box_class_probs):
    """Filter boxes with object threshold."""
    box_confidences = box_confidences.reshape(-1)
    candidate, class_num = box_class_probs.shape

    class_max_score = np.max(box_class_probs, axis=-1)
    classes = np.argmax(box_class_probs, axis=-1)

    _class_pos = np.where(class_max_score * box_confidences >= OBJ_THRESH)
    scores = (class_max_score * box_confidences)[_class_pos]

    boxes = boxes[_class_pos]
    classes = classes[_class_pos]

    return boxes, classes, scores

def nms_boxes(boxes, scores):
    """Suppress non-maximal boxes."""
    x = boxes[:, 0]
    y = boxes[:, 1]
    w = boxes[:, 2] - boxes[:, 0]
    h = boxes[:, 3] - boxes[:, 1]

    areas = w * h
    order = scores.argsort()[::-1]

    keep = []
    while order.size > 0:
        i = order[0]
        keep.append(i)

        xx1 = np.maximum(x[i], x[order[1:]])
        yy1 = np.maximum(y[i], y[order[1:]])
        xx2 = np.minimum(x[i] + w[i], x[order[1:]] + w[order[1:]])
        yy2 = np.minimum(y[i] + h[i], y[order[1:]] + h[order[1:]])

        w1 = np.maximum(0.0, xx2 - xx1 + 0.00001)
        h1 = np.maximum(0.0, yy2 - yy1 + 0.00001)
        inter = w1 * h1

        ovr = inter / (areas[i] + areas[order[1:]] - inter)
        inds = np.where(ovr <= NMS_THRESH)[0]
        order = order[inds + 1]
    keep = np.array(keep)
    return keep

def dfl(position):
    """Distribution Focal Loss (DFL) - numpy implementation."""
    n, c, h, w = position.shape
    p_num = 4
    mc = c // p_num
    # Reshape to (n, p_num, mc, h, w)
    position = position.reshape(n, p_num, mc, h, w)
    # Softmax along mc dimension
    exp_position = np.exp(position - np.max(position, axis=2, keepdims=True))
    softmax_position = exp_position / np.sum(exp_position, axis=2, keepdims=True)
    # Create accumulation matrix
    acc_metrix = np.arange(mc).reshape(1, 1, mc, 1, 1).astype(np.float32)
    # Weighted sum
    result = (softmax_position * acc_metrix).sum(axis=2)
    return result

def box_process(position):
    """Process box coordinates."""
    grid_h, grid_w = position.shape[2:4]
    col, row = np.meshgrid(np.arange(0, grid_w), np.arange(0, grid_h))
    col = col.reshape(1, 1, grid_h, grid_w)
    row = row.reshape(1, 1, grid_h, grid_w)
    grid = np.concatenate((col, row), axis=1)
    stride = np.array([IMG_SIZE[1] // grid_h, IMG_SIZE[0] // grid_w]).reshape(1, 2, 1, 1)

    position = dfl(position)
    box_xy = grid + 0.5 - position[:, 0:2, :, :]
    box_xy2 = grid + 0.5 + position[:, 2:4, :, :]
    xyxy = np.concatenate((box_xy * stride, box_xy2 * stride), axis=1)

    return xyxy

def post_process(input_data):
    """Post-process model outputs."""
    boxes, scores, classes_conf = [], [], []
    defualt_branch = 3
    pair_per_branch = len(input_data) // defualt_branch

    for i in range(defualt_branch):
        boxes.append(box_process(input_data[pair_per_branch * i]))
        classes_conf.append(input_data[pair_per_branch * i + 1])
        scores.append(np.ones_like(input_data[pair_per_branch * i + 1][:, :1, :, :], dtype=np.float32))

    def sp_flatten(_in):
        ch = _in.shape[1]
        _in = _in.transpose(0, 2, 3, 1)
        return _in.reshape(-1, ch)

    boxes = [sp_flatten(_v) for _v in boxes]
    classes_conf = [sp_flatten(_v) for _v in classes_conf]
    scores = [sp_flatten(_v) for _v in scores]

    boxes = np.concatenate(boxes)
    classes_conf = np.concatenate(classes_conf)
    scores = np.concatenate(scores)

    boxes, classes, scores = filter_boxes(boxes, scores, classes_conf)

    nboxes, nclasses, nscores = [], [], []
    for c in set(classes):
        inds = np.where(classes == c)
        b = boxes[inds]
        c = classes[inds]
        s = scores[inds]
        keep = nms_boxes(b, s)

        if len(keep) != 0:
            nboxes.append(b[keep])
            nclasses.append(c[keep])
            nscores.append(s[keep])

    if not nclasses and not nscores:
        return None, None, None

    boxes = np.concatenate(nboxes)
    classes = np.concatenate(nclasses)
    scores = np.concatenate(nscores)

    return boxes, classes, scores

def letter_box(im, new_shape=(640, 640), pad_color=(0, 0, 0)):
    """Resize and pad image to new_shape."""
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])

    ratio = r, r
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))

    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]
    dw, dh = dw / 2, dh / 2

    if shape[::-1] != new_unpad:
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=pad_color)

    return im, ratio, (dw, dh)

def draw(image, boxes, scores, classes):
    """Draw detection results on image."""
    for box, score, cl in zip(boxes, scores, classes):
        top, left, right, bottom = [int(_b) for _b in box]
        cv2.rectangle(image, (top, left), (right, bottom), (255, 0, 0), 2)
        cv2.putText(image, '{0} {1:.2f}'.format(CLASSES[cl], score),
                    (top, left - 6), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)

def main():
    # Initialize RKNNLite
    rknn_lite = RKNNLite()

    # Load RKNN model
    print('--> Load RKNN model')
    ret = rknn_lite.load_rknn(MODEL_PATH)
    if ret != 0:
        print('Load RKNN model failed!')
        return

    # Init runtime environment
    print('--> Init runtime environment')
    try:
        # 尝试不指定 target，让系统自动检测
        ret = rknn_lite.init_runtime()
        if ret != 0:
            print('Init runtime environment failed!')
            return
    except Exception as e:
        print(f'Init runtime environment failed with error: {e}')
        print('\nTrying alternative initialization methods...')

        # 尝试指定不同的 target
        targets_to_try = ['rk3588', 'rk3568', 'rk3566', 'rk3562', 'rk3576']
        for target in targets_to_try:
            try:
                print(f'  Trying target={target}...')
                ret = rknn_lite.init_runtime(target=target)
                if ret == 0:
                    print(f'  Successfully initialized with target={target}')
                    break
            except Exception as e2:
                print(f'  Failed with target={target}: {e2}')
                continue
        else:
            print('\nAll initialization attempts failed.')
            return

    # Open video file
    cap = cv2.VideoCapture(VIDEO_PATH)
    if not cap.isOpened():
        print(f'Cannot open video: {VIDEO_PATH}')
        return

    # Get video info
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    print(f'Video info: {width}x{height} @ {fps} FPS')

    frame_count = 0
    while True:
        ret, frame = cap.read()
        if not ret:
            break

        frame_count += 1
        print(f'Processing frame {frame_count}', end='\r')

        # Preprocess
        img, ratio, (dw, dh) = letter_box(frame.copy(), new_shape=IMG_SIZE, pad_color=(0, 0, 0))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        # RKNN 需要 4 维输入 (batch, height, width, channels)
        img = np.expand_dims(img, axis=0)

        # Inference
        try:
            outputs = rknn_lite.inference(inputs=[img])
        except Exception as e:
            print(f'\nInference failed: {e}')
            continue

        if outputs is None or len(outputs) == 0:
            print('\nNo outputs from inference')
            continue

        # Post-process
        boxes, classes, scores = post_process(outputs)

        # Scale boxes back to original image size
        if boxes is not None:
            boxes[:, 0] = (boxes[:, 0] - dw) / ratio[1]
            boxes[:, 1] = (boxes[:, 1] - dh) / ratio[0]
            boxes[:, 2] = (boxes[:, 2] - dw) / ratio[1]
            boxes[:, 3] = (boxes[:, 3] - dh) / ratio[0]

            # Clip boxes to image boundaries
            boxes[:, 0] = np.clip(boxes[:, 0], 0, width)
            boxes[:, 1] = np.clip(boxes[:, 1], 0, height)
            boxes[:, 2] = np.clip(boxes[:, 2], 0, width)
            boxes[:, 3] = np.clip(boxes[:, 3], 0, height)

            # Draw results
            draw(frame, boxes, scores, classes)

        # Show result
        cv2.imshow('YOLOv8 Detection', frame)

        # Press 'q' to quit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # Release resources
    cap.release()
    cv2.destroyAllWindows()
    rknn_lite.release()

    print(f'\nProcessed {frame_count} frames')

if __name__ == '__main__':
    main()