问题描述
如标题,一个嘉楠官方的车辆识别模型,在预处理阶段完全不报错,一到kpu_run()就直接卡死,报错:terminate called without an active exception
我的代码如下,实在不知道哪有问题,还kpu_run呢就卡死了,这个模型用官方代码能跑
硬件板卡
k230_canmv_v3
软件版本
rtos+linux
复现步骤
#include
#include
#include
#include "my_model.h"
#include "util.h"
#define max_float(a, b) ((a) > (b) ? (a) : (b))
#define min_float(a, b) ((a) < (b) ? (a) : (b))
#define swap_float(a, b) do { typeof(a) tmp = (a); (a) = (b); (b) = tmp; } while(0)
Model::Model(const char *model_name, const char *kmodel_file): model_name_(model_name)
{
std::ifstream ifs(kmodel_file, std::ios::binary);
interp_.load_model(ifs).expect("load_model failed");
printf("inputs_size:%d\n",interp_.inputs_size());
auto s = interp_.input_shape(0);
std::cout << "input_shape(0)维度: " << s.size() << std::endl;
for (size_t i = 0; i < interp_.inputs_size(); i++)
{
auto desc = interp_.input_desc(i);
auto shape = interp_.input_shape(i);
auto tensor = host_runtime_tensor::create(desc.datatype, shape, hrt::pool_shared).expect("cannot create input tensor");
interp_.input_tensor(i, tensor).expect("cannot set input tensor");
}
}
Model::~Model()
{
}
void Model::preprocess(uintptr_t vaddr, uintptr_t paddr)
{
dims_t in_shape{1,3,720,1280};
auto in_tensor = host_runtime_tensor::create(dt_uint8, in_shape, { (gsl::byte *)vaddr, compute_size(in_shape) },false,hrt::pool_shared,paddr).expect("cannot create input tensor");
hrt::sync(in_tensor, sync_op_t::sync_write_back, true).expect("write back input failed");
auto out_shape = interp_.input_shape(0);
std::cout<<"shape:"<<out_shape[0]<<"x"<<out_shape[1]<<"x"<<out_shape[2]<<std::endl;
ai2d_datatype_t ai2d_dtype { ai2d_format::NCHW_FMT, ai2d_format::NCHW_FMT, dt_uint8,dt_uint8};
ai2d_crop_param_t crop_param { false, 0, 0, 720,1280};
ai2d_shift_param_t shift_param { false, 0 };
// 原图正确分辨率:1280x720
int src_w = 1280;
int src_h = 720;
ai2d_pad_param_t pad_param {
true,
{ {0,0}, {0,0}, {140,140}, {0,0} },
ai2d_pad_mode::constant,
{114,114,114}
};
ai2d_resize_param_t resize_param { true, ai2d_interp_method::tf_bilinear, ai2d_interp_mode::half_pixel};
ai2d_affine_param_t affine_param { false};
ai2d_out_tensor_ = input_tensor(0);
ai2d_builder builder { in_shape, out_shape, ai2d_dtype, crop_param, shift_param, pad_param, resize_param, affine_param };
builder.build_schedule().expect("error occurred in ai2d build_schedule");
builder.invoke(in_tensor, ai2d_out_tensor_).expect("error occurred in ai2d invoke");
}
int Model::run(uint64_t vaddr, uint64_t paddr)
{
auto start = std::chrono::steady_clock::now();
preprocess(vaddr, paddr);
kpu_run();
printf("run over!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
auto stop = std::chrono::steady_clock::now();
double duration = std::chrono::duration<double, std::milli>(stop - start).count();
std::cout << "ai2d run: duration = " << duration << " ms, fps = " << 1000 / duration << std::endl;
while(1);
return postprocess();
}
// ====================================================================================
// 官方 YOLOv8n 1×84×2100 后处理 - 仅识别 car(2)、motorcycle(3)
// ====================================================================================
int Model::nms_filter(const float* model_output, float conf_threshold, float iou_threshold)
{
const int NUM_BOXES = 2100;
const int NUM_CLASSES = 80;
const int CAR = 2;
const int MOTORCYCLE = 3;
const int MODEL_W = 320;
const int MODEL_H = 320;
const int SRC_W = 1280;
const int SRC_H = 720;
int valid_idx[NUM_BOXES];
int valid_count = 0;
k_vo_draw_frame frame{};
for (int i = 0; i < 16; i++) {
frame.draw_en = false;
frame.frame_num = i;
kd_mpi_vo_draw_frame(&frame);
}
// 正确解析 YOLOv8 输出 [1,84,2100]
for (int i = 0; i < NUM_BOXES; i++) {
const float* feat = model_output + i;
float x = feat[0 * NUM_BOXES];
float y = feat[1 * NUM_BOXES];
float w = feat[2 * NUM_BOXES];
float h = feat[3 * NUM_BOXES];
float conf = max_float(feat[(4+CAR)*NUM_BOXES], feat[(4+MOTORCYCLE)*NUM_BOXES]);
if (conf >= conf_threshold) {
valid_idx[valid_count++] = i;
}
}
if (valid_count == 0) return 0;
// 置信度排序
for (int i = 0; i < valid_count-1; i++) {
for (int j = i+1; j < valid_count; j++) {
int a = valid_idx[i];
int b = valid_idx[j];
float ca = max_float(model_output[(4+CAR)*NUM_BOXES+a], model_output[(4+MOTORCYCLE)*NUM_BOXES+a]);
float cb = max_float(model_output[(4+CAR)*NUM_BOXES+b], model_output[(4+MOTORCYCLE)*NUM_BOXES+b]);
if (ca < cb) {
int t = valid_idx[i];
valid_idx[i] = valid_idx[j];
valid_idx[j] = t;
}
}
}
// NMS
int removed[NUM_BOXES] = {0};
for (int i = 0; i < valid_count; i++) {
if (removed[i]) continue;
int a = valid_idx[i];
float x1 = model_output[0*NUM_BOXES+a] - model_output[2*NUM_BOXES+a]*0.5f;
float y1 = model_output[1*NUM_BOXES+a] - model_output[3*NUM_BOXES+a]*0.5f;
float x2 = model_output[0*NUM_BOXES+a] + model_output[2*NUM_BOXES+a]*0.5f;
float y2 = model_output[1*NUM_BOXES+a] + model_output[3*NUM_BOXES+a]*0.5f;
for (int j = i+1; j < valid_count; j++) {
if (removed[j]) continue;
int b = valid_idx[j];
float bx1 = model_output[0*NUM_BOXES+b] - model_output[2*NUM_BOXES+b]*0.5f;
float by1 = model_output[1*NUM_BOXES+b] - model_output[3*NUM_BOXES+b]*0.5f;
float bx2 = model_output[0*NUM_BOXES+b] + model_output[2*NUM_BOXES+b]*0.5f;
float by2 = model_output[1*NUM_BOXES+b] + model_output[3*NUM_BOXES+b]*0.5f;
float ix1 = max_float(x1, bx1);
float iy1 = max_float(y1, by1);
float ix2 = min_float(x2, bx2);
float iy2 = min_float(y2, by2);
float iw = ix2 - ix1;
float ih = iy2 - iy1;
if (iw <= 0 || ih <= 0) continue;
float inter = iw * ih;
float union_ = (x2-x1)*(y2-y1) + (bx2-bx1)*(by2-by1) - inter;
float iou = inter / union_;
if (iou > iou_threshold) removed[j] = 1;
}
}
// 坐标还原到原图 1280x720
float scale = (float)MODEL_W / max_float(SRC_W, SRC_H);
int pad_w = (MODEL_W - SRC_W * scale) / 2;
int pad_h = (MODEL_H - SRC_H * scale) / 2;
int draw_idx = 0;
for (int i = 0; i < valid_count; i++) {
if (removed[i]) continue;
int idx = valid_idx[i];
float x = model_output[0*NUM_BOXES+idx];
float y = model_output[1*NUM_BOXES+idx];
float w = model_output[2*NUM_BOXES+idx];
float h = model_output[3*NUM_BOXES+idx];
float rx1 = (x - w*0.5f - pad_w) / scale;
float ry1 = (y - h*0.5f - pad_h) / scale;
float rx2 = (x + w*0.5f - pad_w) / scale;
float ry2 = (y + h*0.5f - pad_h) / scale;
rx1 = max_float(0.0f, min_float(rx1, (float)SRC_W));
ry1 = max_float(0.0f, min_float(ry1, (float)SRC_H));
rx2 = max_float(0.0f, min_float(rx2, (float)SRC_W));
ry2 = max_float(0.0f, min_float(ry2, (float)SRC_H));
frame.draw_en = true;
frame.line_x_start = rx1;
frame.line_y_start = ry1;
frame.line_x_end = rx2;
frame.line_y_end = ry2;
frame.frame_num = draw_idx++;
kd_mpi_vo_draw_frame(&frame);
printf("✅ DETECT: x=%.1f y=%.1f w=%.1f h=%.1f\n", rx1, ry1, rx2-rx1, ry2-ry1);
}
return draw_idx;
}
int Model::postprocess()
{
auto tensor = output_tensor(0);
auto buf = tensor.impl()->to_host().unwrap()->buffer().as_host().unwrap().map(map_access_::map_read).unwrap().buffer();
const float* out = reinterpret_cast<const float*>(buf.data());
auto out_shape = interp_.output_shape(0);
printf("✅ YOLOv8 output: %d %d %d\n", out_shape[0], out_shape[1], out_shape[2]);
int final_boxes = nms_filter(out, 0.25f, 0.45f);
printf("✅ final_boxes: %d\n", final_boxes);
return final_boxes;
}
std::string Model::model_name() const
{
return model_name_;
}
void Model::kpu_run()
{
#if ENABLE_PROFILING
ScopedTiming st(model_name() + " " + FUNCTION);
#endif
interp_.run().expect("error occurred in running model");
}
runtime_tensor Model::input_tensor(size_t idx)
{
return interp_.input_tensor(idx).expect("cannot get input tensor");
}
void Model::input_tensor(size_t idx, runtime_tensor &tensor)
{
interp_.input_tensor(idx, tensor).expect("cannot set input tensor");
}
runtime_tensor Model::output_tensor(size_t idx)
{
return interp_.output_tensor(idx).expect("cannot get output tensor");
}
dims_t Model::input_shape(size_t idx)
{
return interp_.input_shape(idx);
}
dims_t Model::output_shape(size_t idx)
{
return interp_.output_shape(idx);
}
硬件板卡
k230_canmv_v3
软件版本
rtos+linux