PipeLine/pl 无法获取rgb565/rgb888图像

Viewed 49

问题描述


使用模型训练生成的1.3版本的视频识别的代码文件,进行图像分类的任务,我要实现的任务是这样的:使用该代码进行识别图片类型,同时将摄像头获取的图像显示在一个ui界面上,例如下图中间区域显示摄像头的画面,(这个画面是我先创建了应该img_main,再通过sensor获取摄像头图像img 进行裁剪后,将其使用img_main.draw_Image(img),绘制在img_main上的)
image.png
我想的是这样:
image.png
但是没有找到如何将numpy格式的图像转化成rgb565/rgb888的api
代码如下:

import os, gc
from libs.PlatTasks import ClassificationApp
from libs.PipeLine import PipeLine
from libs.Utils import *
from media.sensor import *
root_path = "/sdcard/mp_deployment_source/"
deploy_conf = read_json(root_path + "/deploy_config.json")
kmodel_path = root_path + deploy_conf["kmodel_path"]              # KModel path
labels = deploy_conf["categories"]                                # Label list
confidence_threshold = deploy_conf["confidence_threshold"]        # Confidence threshold
model_input_size = deploy_conf["img_size"]                        # Model input size
num_classes = deploy_conf["num_classes"]                          # Number of classes
inference_mode = "video"                                          # Inference mode: 'video'
debug_mode = 1                                                    # Debug mode flag

# Create and initialize the video/display pipeline
pl = PipeLine(rgb888p_size=[800, 480],display_size=[800,480], display_mode="st7701")
pl.create()
display_size = pl.get_display_size()
cls_app = ClassificationApp(inference_mode,kmodel_path,labels,model_input_size,confidence_threshold,[800, 480],[800, 480],debug_mode=debug_mode)
cls_app.config_preprocess()

print(PipeLine)

# Main loop: capture, run inference, display results
while True:
    with ScopedTiming("total", 1):
        img = pl.get_frame()                          # Capture current frame
        # img2 = img.to_rgb888()
        # print(img2)
        # img3 = img.to_rgb565()
        # print(img3)
        res = cls_app.run(img)                        # Run inference
        cls_app.draw_result(pl.osd_img, res)          # Draw classification result
        pl.show_image()                               # Show result on display
        gc.collect()                                  # Run garbage collection to free memory

# Cleanup: These lines will only run if the loop is interrupted (e.g., by an IDE break or external interruption)
cls_app.deinit()                                      # De-initialize classification app
pl.destroy()                                          # Destroy pipeline instance

文档参考如下:
我知道PipeLine是将摄像头与屏幕输出相关的api封装了,但是找不到如何像使用img = sensor.snapshot(chn=CAM_CHN_ID_0)一样通过PipeLine获取rgb格式的图像的方式
image.png
image.png
image.png

硬件板卡


庐山派

软件版本


CanMV_K230_LCKFB_micropython_v1.4-0-g6cce59c_nncase_v2.9.0.img

1 Answers

可以将获取图像部分改为直接调用sensor的代码:

import os, gc
from libs.PlatTasks import ClassificationApp
from libs.PipeLine import PipeLine
from libs.Utils import *
from media.sensor import *
from media.display import *
root_path = "/sdcard/mp_deployment_source/"
deploy_conf = read_json(root_path + "/deploy_config.json")
kmodel_path = root_path + deploy_conf["kmodel_path"]              # KModel path
labels = deploy_conf["categories"]                                # Label list
confidence_threshold = deploy_conf["confidence_threshold"]        # Confidence threshold
model_input_size = deploy_conf["img_size"]                        # Model input size
num_classes = deploy_conf["num_classes"]                          # Number of classes
inference_mode = "video"                                          # Inference mode: 'video'
debug_mode = 0                                                    # Debug mode flag

rgb888p_size=[800,480]
display_size=[800,480]
rgb565_size=[800,480]

sensor = Sensor()
sensor.reset()
# 设置水平镜像和垂直翻转,不同板子的方向不同,通过配置这两个参数使画面转正
#sensor.set_hmirror(False)
#sensor.set_vflip(False)

# 配置sensor的多通道出图,每个通道的出图格式和分辨率可以不同,最多可以出三路图,参考sensor API文档
# 通道0直接给到显示VO,格式为YUV420
sensor.set_framesize(width = display_size[0], height = display_size[1],chn=CAM_CHN_ID_0)
sensor.set_pixformat(Sensor.YUV420SP,chn=CAM_CHN_ID_0)
# 通道1给到AI做算法处理,格式为RGB888P
sensor.set_framesize(width = rgb888p_size[0] , height = rgb888p_size[1], chn=CAM_CHN_ID_1)
# set chn1 output format
sensor.set_pixformat(Sensor.RGBP888, chn=CAM_CHN_ID_1)
# 通道2格式为RGB565
sensor.set_framesize(width = rgb565_size[0] , height = rgb565_size[1], chn=CAM_CHN_ID_2)
# set chn1 output format
sensor.set_pixformat(Sensor.RGB565, chn=CAM_CHN_ID_2)

# 绑定通道0的摄像头图像到屏幕,防止另一个通道的AI推理过程太慢影响显示过程,导致出现卡顿效果
sensor_bind_info = sensor.bind_info(x = 0, y = 0, chn = CAM_CHN_ID_0)
Display.bind_layer(**sensor_bind_info, layer = Display.LAYER_VIDEO1)

# OSD图像初始化,创建一帧和屏幕分辨率同样大的透明图像,用于绘制AI推理结果
osd_img = image.Image(display_size[0], display_size[1], image.ARGB8888)

## 如果使用ST7701的LCD屏幕显示,默认800*480,还支持640*480等,具体参考Display模块API文档
Display.init(Display.ST7701, width=display_size[0],height=display_size[1],osd_num=1, to_ide=True)

# 限制bind通道的帧率,防止生产者太快
sensor._set_chn_fps(chn = CAM_CHN_ID_0, fps = Display.fps())

# media初始化
MediaManager.init()
# 启动sensor
sensor.run()


cls_app = ClassificationApp(inference_mode,kmodel_path,labels,model_input_size,confidence_threshold,[800, 480],[800, 480],debug_mode=debug_mode)
cls_app.config_preprocess()

print(PipeLine)

# Main loop: capture, run inference, display results
while True:
    with ScopedTiming("total", 1):
        
        img_rgb565=sensor.snapshot(chn=CAM_CHN_ID_2)
        print(img_rgb565)
        
        img_rgb888p = sensor.snapshot(chn=CAM_CHN_ID_1)                          # Capture current frame
        img_np=img_rgb888p.to_numpy_ref()
        res = cls_app.run(img_np)                        # Run inference
        cls_app.draw_result(osd_img, res)          # Draw classification result
        Display.show_image(osd_img,0, 0, Display.LAYER_OSD3)
        gc.collect()                                  # Run garbage collection to free memory

# Cleanup: These lines will only run if the loop is interrupted (e.g., by an IDE break or external interruption)
cls_app.deinit()                                      # De-initialize classification app
pl.destroy()                                          # Destroy pipeline instance