6

AlexeyAB DarkNet框架总览

 2 years ago
source link: https://qiyuan-z.github.io/2020/02/21/AlexeyAB-DarkNet%E6%A1%86%E6%9E%B6%E6%80%BB%E8%A7%88/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

AlexeyAB DarkNet框架总览

发表于2020-02-21|更新于2022-03-28|学习
字数总计:2.9k|阅读时长:14分钟|阅读量:6

Darknet框架分析主线

分析主线的确定

Darknet相比当前训练的C/C++主流框架(如Caffe)来讲,具有编译速度快,依赖少,易部署等众多优点,我们先定位到src/darknet.c里面的main函数,这是这个框架实现分类,定位,回归,分割等功能的初始入口。这一节的核心代码如下,注意一下就是run_yolo只提供了yolo目标检测算法的原始实现。而run_detector函数提供了AlexeyAB添加了各种新特性的目标检测算法,所以之后我们会从这个函数跟进去来解析Darknet框架。Darknet提供的其他功能如run_super(高分辨率重建),run_classifier(图像分类),run_char_rnn(RNN文本识别)有兴趣可以自己去读(这个框架用来做目标检测比较好,其他算法建议还是去其它框架实现吧),本系列只讲目标检测。

if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "yolo")){
run_yolo(argc, argv);
} else if (0 == strcmp(argv[1], "voxel")){
run_voxel(argc, argv);
} else if (0 == strcmp(argv[1], "super")){
run_super(argc, argv);
} else if (0 == strcmp(argv[1], "detector")){
run_detector(argc, argv);
} else if (0 == strcmp(argv[1], "detect")){
float thresh = find_float_arg(argc, argv, "-thresh", .24);
int ext_output = find_arg(argc, argv, "-ext_output");
char *filename = (argc > 4) ? argv[4]: 0;
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, 0.5, 0, ext_output, 0, NULL, 0, 0);
} else if (0 == strcmp(argv[1], "cifar")){
run_cifar(argc, argv);
} else if (0 == strcmp(argv[1], "go")){
run_go(argc, argv);
} else if (0 == strcmp(argv[1], "rnn")){
run_char_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "vid")){
run_vid_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "coco")){
run_coco(argc, argv);
} else if (0 == strcmp(argv[1], "classify")){
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
} else if (0 == strcmp(argv[1], "classifier")){
run_classifier(argc, argv);
} else if (0 == strcmp(argv[1], "art")){
run_art(argc, argv);
} else if (0 == strcmp(argv[1], "tag")){
run_tag(argc, argv);
} else if (0 == strcmp(argv[1], "compare")){
run_compare(argc, argv);
} else if (0 == strcmp(argv[1], "dice")){
run_dice(argc, argv);
} else if (0 == strcmp(argv[1], "writing")){
run_writing(argc, argv);
} else if (0 == strcmp(argv[1], "3d")){
composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
} else if (0 == strcmp(argv[1], "test")){
test_resize(argv[2]);
} else if (0 == strcmp(argv[1], "captcha")){
run_captcha(argc, argv);
} else if (0 == strcmp(argv[1], "nightmare")){
run_nightmare(argc, argv);
} else if (0 == strcmp(argv[1], "rgbgr")){
rgbgr_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "reset")){
reset_normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "denormalize")){
denormalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "statistics")){
statistics_net(argv[2], argv[3]);
} else if (0 == strcmp(argv[1], "normalize")){
normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "rescale")){
rescale_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "ops")){
operations(argv[2]);
} else if (0 == strcmp(argv[1], "speed")){
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
} else if (0 == strcmp(argv[1], "oneoff")){
oneoff(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "partial")){
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "visualize")){
visualize(argv[2], (argc > 3) ? argv[3] : 0);
} else if (0 == strcmp(argv[1], "imtest")){
test_resize(argv[2]);
} else {
fprintf(stderr, "Not an option: %s\n", argv[1]);
}

跟进run_detector

run_detector函数在src/detector.c里面,这个函数首先有很多超参数可以设置,然后我们可以看到这个函数包含了训练验证,测试,计算Anchors,demo展示,计算map值和recall值等功能。由于训练,测试,验证阶段差不多,我们跟进去一个看看就好,至于后面那几个功能是AlexeyAB添加的,之后再逐一解释。

void run_detector(int argc, char **argv)
{
int dont_show = find_arg(argc, argv, "-dont_show");//展示图像界面
int benchmark = find_arg(argc, argv, "-benchmark");//评估模型的表现
int benchmark_layers = find_arg(argc, argv, "-benchmark_layers");
//if (benchmark_layers) benchmark = 1;
if (benchmark) dont_show = 1;
int show = find_arg(argc, argv, "-show");
int letter_box = find_arg(argc, argv, "-letter_box");//是否对图像做letter-box变换
int calc_map = find_arg(argc, argv, "-map");//是否计算map值
int map_points = find_int_arg(argc, argv, "-points", 0);
check_mistakes = find_arg(argc, argv, "-check_mistakes");//检查数据是否有误
int show_imgs = find_arg(argc, argv, "-show_imgs");//显示图片
int mjpeg_port = find_int_arg(argc, argv, "-mjpeg_port", -1);
int json_port = find_int_arg(argc, argv, "-json_port", -1);
char *http_post_host = find_char_arg(argc, argv, "-http_post_host", 0);
int time_limit_sec = find_int_arg(argc, argv, "-time_limit_sec", 0);
char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
char *outfile = find_char_arg(argc, argv, "-out", 0);
char *prefix = find_char_arg(argc, argv, "-prefix", 0);//模型保存的前缀
float thresh = find_float_arg(argc, argv, "-thresh", .25); // 置信度
float iou_thresh = find_float_arg(argc, argv, "-iou_thresh", .5); // 0.5 for mAP
float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
int cam_index = find_int_arg(argc, argv, "-c", 0);//摄像头编号
int frame_skip = find_int_arg(argc, argv, "-s", 0);//跳帧检测间隔
int num_of_clusters = find_int_arg(argc, argv, "-num_of_clusters", 5);
int width = find_int_arg(argc, argv, "-width", -1);// 输入网络的图像宽度
int height = find_int_arg(argc, argv, "-height", -1);// 输入网络的图像高度
// extended output in test mode (output of rect bound coords)
// and for recall mode (extended output table-like format with results for best_class fit)
int ext_output = find_arg(argc, argv, "-ext_output");
int save_labels = find_arg(argc, argv, "-save_labels");
if (argc < 4) {
fprintf(stderr, "usage: %s %s [train/test/valid/demo/map] [data] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);// 多个gpu训练
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
if (gpu_list) {
printf("%s\n", gpu_list);
int len = (int)strlen(gpu_list);
ngpus = 1;
int i;
for (i = 0; i < len; ++i) {
if (gpu_list[i] == ',') ++ngpus;
}
gpus = (int*)xcalloc(ngpus, sizeof(int));
for (i = 0; i < ngpus; ++i) {
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',') + 1;
}
}
else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
}

int clear = find_arg(argc, argv, "-clear");

char *datacfg = argv[3];//存储训练集,验证集,以及类别对应名字等信息的cfg文件
char *cfg = argv[4];//要训练的网络cfg文件
char *weights = (argc > 5) ? argv[5] : 0;//是否有预训练模型
if (weights)
if (strlen(weights) > 0)
if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0;
char *filename = (argc > 6) ? argv[6] : 0;
if (0 == strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show, ext_output, save_labels, outfile, letter_box, benchmark_layers);//执行目标检测模型测试
else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show, calc_map, mjpeg_port, show_imgs, benchmark_layers);//目标检测模型训练
else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);//目标检测模型验证
else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights);///计算验证集的召回率
else if (0 == strcmp(argv[2], "map")) validate_detector_map(datacfg, cfg, weights, thresh, iou_thresh, map_points, letter_box, NULL);//计算验证集的map值
else if (0 == strcmp(argv[2], "calc_anchors")) calc_anchors(datacfg, num_of_clusters, width, height, show);//计算验证集的anchors
else if (0 == strcmp(argv[2], "demo")) {//demo展示
list *options = read_data_cfg(datacfg);
int classes = option_find_int(options, "classes", 20);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
if (filename)
if (strlen(filename) > 0)
if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0;
demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename,
mjpeg_port, json_port, dont_show, ext_output, letter_box, time_limit_sec, http_post_host, benchmark, benchmark_layers);

free_list_contents_kvp(options);
free_list(options);
}
else printf(" There isn't such command: %s", argv[2]);

if (gpus && gpu_list && ngpus > 1) free(gpus);
}

跟进train_detector

由于训练,验证和测试阶段代码几乎是差不多的,只不过训练多了一个反向传播的过程。所以我们主要分析一下训练过程,训练过程是一个比较复杂的过程,不过宏观上大致分为解析网络配置文件,加载训练样本图像和标签,开启训练,结束训练保存模型这样一个过程,部分代码如下(我省略了很多代码,因为这一节是框架总览,后面会详细解释的):

void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs, int benchmark_layers)
{
// 从options找出训练图片路径信息,如果没找到,默认使用"data/train.list"路径下的图片信息(train.list含有标准的信息格式:<object-class> <x> <y> <width> <height>),
// 该文件可以由darknet提供的scripts/voc_label.py根据自行在网上下载的voc数据集生成,所以说是默认路径,其实也需要使用者自行调整,也可以任意命名,不一定要为train.list,
// 甚至可以不用voc_label.py生成,可以自己不厌其烦的制作一个(当然规模应该是很小的,不然太累了。。。)
// 读入后,train_images将含有训练图片中所有图片的标签以及定位信息
list *options = read_data_cfg(datacfg);
char *train_images = option_find_str(options, "train", "data/train.txt");
char *valid_images = option_find_str(options, "valid", train_images);
char *backup_directory = option_find_str(options, "backup", "/backup/");

network net_map;
//如果要计算map
if (calc_map) {
FILE* valid_file = fopen(valid_images, "r");
if (!valid_file) {
printf("\n Error: There is no %s file for mAP calculation!\n Don't use -map flag.\n Or set valid=%s in your %s file. \n", valid_images, train_images, datacfg);
getchar();
exit(-1);
}
else fclose(valid_file);

cuda_set_device(gpus[0]);
printf(" Prepare additional network for mAP calculation...\n");
net_map = parse_network_cfg_custom(cfgfile, 1, 1);
//分类数
const int net_classes = net_map.layers[net_map.n - 1].classes;

int k; // free memory unnecessary arrays
for (k = 0; k < net_map.n - 1; ++k) free_layer_custom(net_map.layers[k], 1);

char *name_list = option_find_str(options, "names", "data/names.list");
int names_size = 0;
//获取类别对应的名字
char **names = get_labels_custom(name_list, &names_size);
if (net_classes != names_size) {
printf(" Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n",
name_list, names_size, net_classes, cfgfile);
if (net_classes > names_size) getchar();
}
free_ptrs((void**)names, net_map.layers[net_map.n - 1].classes);
}

srand(time(0));
// 提取配置文件名称中的主要信息,用于输出打印(并无实质作用),比如提取cfg/yolo.cfg中的yolo,用于下面的输出打印
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
// 构建网络:用多少块GPU,就会构建多少个相同的网络(不使用GPU时,ngpus=1)
network* nets = (network*)xcalloc(ngpus, sizeof(network));

//设定随机数种子
srand(time(0));
int seed = rand();
int i;
// for循环次数为ngpus,使用多少块GPU,就循环多少次(不使用GPU时,ngpus=1,也会循环一次)
// 这里每一次循环都会构建一个相同的神经网络,如果提供了初始训练参数,也会为每个网络导入相同的初始训练参数
for (i = 0; i < ngpus; ++i) {
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);
#endif
//解析网络配置文件
nets[i] = parse_network_cfg(cfgfile);
//测试某一个网络层的相关指标如运行时间
nets[i].benchmark_layers = benchmark_layers;
//如果有预训练模型则加载
if (weightfile) {
load_weights(&nets[i], weightfile);
}
//
if (clear) *nets[i].seen = 0;
nets[i].learning_rate *= ngpus;
}
...
}

解析配置文件

截图部分yolov3.cfg网络配置文件如下:

可以看到配置参数大概分为2类:

  • 与训练相关的项,以 [net] 行开头的段. 其中包含的参数有: batch_size, width,height,channel,momentum,decay,angle,saturation, exposure,hue,learning_rate,burn_in,max_batches,policy,steps,scales
  • 不同类型的层的配置参数. 如[convolutional], [short_cut], [yolo], [route], [upsample]层等。

在src/parse.c中我们会看到一行代码,net->batch /= net->subdivisions;,也就是说batch_size 在 darknet 内部又被均分为 net->subdivisions份, 成为更小的batch_size。 但是这些小的 batch_size 最终又被汇总, 因此 darknet 中的batch_size = net->batch / net->subdivisions * net->subdivisions。此外,和这个参数相关的计算训练图片数目的时候是这样,int imgs = net->batch * net->subdivisions * ngpus;,这样可以保证imgs可以被subdivisions整除,因此,通常将这个参数设为8的倍数。从这里也可以看出每个gpu或者cpu都会训练batch个样本。

我们知道了参数是什么样子,那么darknet是如何保存这些参数的呢?这就要看下基本数据结构了。


About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK