目标检测 -darknet 之模型初始化

关注
发布于: 2020 年 11 月 14 日
﻿首先看的是darknet中对于net struct 的具体定义
﻿
	// 首先看网络中定义的network这个大的struct,包含了网络的所有参数
	typedef struct network {
	    int n;           //网络的层数, 调用make_network(int n)时赋值
	    int batch;       //一批训练中的图片参数, 和subdivsions参数相关
	    uint64_t *seen;  //目前已经读入的图片张数(网络已经处理的图片张数)
	    int *cur_iteration; // 当前的iteration
	    float loss_scale;
	    int *t;
	    float epoch;     //到目前为止训练了整个数据集的次数
	    int subdivisions;
	    layer *layers;   //用于存储网络中的所有层
	    float *output;
	    learningratepolicy policy;  // 学习率下降策略
	    int benchmark_layers;
	    int *total_bbox;
	    int *rewritten_bbox;
	    // 梯度下降法相关参数
	    float learning_rate;      // 学习率
	    float learningratemin;  // 学习率最小值
	    float learningratemax;  // 学习率最大值
	    int batchespercycle;
	    int batchescyclemult;
	    float momentum;
	    float decay;
	    float gamma;
	    float scale;
	    float power;
	    int time_steps;
	    int step;
	    int max_batches;
	    int num_boxes;
	    int trainimagesnum;
	    float *seq_scales;
	    float *scales;
	    int   *steps;
	    int num_steps;
	    int burn_in;
	    int cudnn_half;
	    // ADAM优化方法相关策略
	    int adam;
	    float B1;
	    float B2;
	    float eps;
	
	    int inputs;
	    int outputs;
	    int truths;
	    int notruth;
	    int h, w, c;
	    int max_crop;
	    int min_crop;
	    float max_ratio;
	    float min_ratio;
	    int center;
	    int flip; // horizontal flip 50% probability augmentaiont for classifier training (default = 1)
	    int gaussian_noise;
	    int blur;
	    int mixup;
	    float labelsmootheps;
	    int resize_step;
	    int attention;
	    int adversarial;    
	    float adversarial_lr;
	    float maxchartloss;
	    int letter_box;
	    int mosaic_bound;
	    int contrastive;
	    int contrastivejitflip;
	    int unsupervised;
	    float angle;
	    float aspect;
	    float exposure;
	    float saturation;
	    float hue;
	    int random;
	    int track;
	    int augment_speed;
	    int sequential_subdivisions;
	    int initsequentialsubdivisions;
	    int current_subdivision;
	    int tryfixnan;
	    //darknet 为每个GPU维护一个相同的network, 每个network以gpu_index区分
	    int gpu_index;
	    tree *hierarchy;
	    //中间变量, 用来暂存某层网络的输入(包含一个batch的输入,比如某层网络完成前向,
	    //将其输出赋给该变量,作为下一层的输入,可以参看network.c中的forward_network()
	    float *input;
	    // 中间变量,与上面的 input 对应,用来暂存 input 数据对应的标签数据(真实数据)
	    float *truth;
	    // 中间变量,用来暂存某层网络的梯度图(反向传播处理当前层时,用来存储上一层的敏
	    //感度图,因为当前层会计算部分上一层的敏感度图,可以参看network.c中的backward_network()函数)
	    float *delta;
	    // 网络的工作空间, 指的是所有层中占用运算空间最大的那个层的workspace_size,
	    // 因为实际上在GPU或CPU中某个时刻只有一个层在做前向或反向运算
	    float *workspace;
	    // 网络是否处于训练阶段的标志参数,如果是则值为1. 这个参数一般用于训练与测试阶段有不
	    // 同操作的情况,比如 dropout 层,在训练阶段才需要进行 forwarddropoutlayer()
	    // 函数, 测试阶段则不需要进入到该函数
	    int train;
	    // 标志参数,当前网络的活跃层
	    int index;
	    //每一层的损失,只有[yolo]层有值
	    float *cost;
	    float clip;
	
		//#ifdef GPU
	    //float *input_gpu;
	    //float *truth_gpu;
	    float *delta_gpu;
	    float *output_gpu;
	
	    float *inputstategpu;
	    float *inputpinnedcpu;
	    int inputpinnedcpu_flag;
	
	    float **input_gpu;
	    float **truth_gpu;
	    float **input16_gpu;
	    float **output16_gpu;
	    sizet *maxinput16_size;
	    sizet *maxoutput16_size;
	    int wait_stream;
	
	    float *globaldeltagpu;
	    float *statedeltagpu;
	    sizet maxdeltagpusize;
	//#endif  // GPU
	    int optimized_memory;
	    int dynamic_minibatch;
	    sizet workspacesize_limit;
	} network;
	
	// train_detector()读取完数据参数，就开始初始化网络了
	// 这里可以看出对于多卡训练的方式是在每个GPU上初始化一个模型。    
	network nets = (network)xcalloc(ngpus, sizeof(network));
	
接着通过 parsenetworkcfg_custom（）
﻿
		network parsenetworkcfg_custom(char *filename, int batch, int time_steps)
	{
    // 从神经网络结构参数文件中读入所有神经网络层的结构参数, 存储到sections中,
    // sections的每个node包含一层神经网络的所有结构参数
    list *sections = read_cfg(filename);  // 每创建一个网络, 都要读一把cfg配置文件
    // 获取sections的第一个节点, 可以查看一下cfg/***.cfg文件, 其实第一块参数(以[net]开头)不是某层神经网络的参数,
    // 而是关于整个网络的一些通用参数, 比如学习率, 衰减率, 输入图像宽高, batch大小等,
    // 具体的关于某个网络层的参数是从第二块开始的,如[convolutional],[maxpool]...,
    // 这些层并没有编号,只说明了层的属性,但层的参数都是按顺序在文件中排好的,读入时,
    // sections链表上的顺序就是文件中的排列顺序。
    node *n = sections->front;  // sections->front为大链上的第一个节点, sections可以称之为大链的虚拟头节点
    if(!n) error("Config file has no sections");
    // 创建网络结构并动态分配内存：输入网络层数为sections->size - 1,sections的第一段不是网络层,而是通用网络参数
    network net = make_network(sections->size - 1);  // 构建网络, sections->size-1表示网络层数
    // 所用显卡的卡号(gpu_index在cuda.c中用extern关键字声明)
    // 在调用parsenetworkcfg()之前,使用了cudasetdevice()设置了gpu_index的值号为当前活跃GPU卡号
    net.gpuindex = gpuindex;
    size_params params;
﻿
    if (batch > 0) params.train = 0;    // allocates memory for Detection only
    else params.train = 1;              // allocates memory for Detection & Training
﻿
    section s = (section )n->val;     // n->val是一个void*类型的指针.用时需要进行强制类型转换
    list *options = s->options;         // 获取大链上第一个节点上的val下面的options, 其实也就是cfg中[net]段对应的一系列配置
    if(!is_network(s)) error("First section must be [net] or [network]");
    parsenetoptions(options, &net);   // 这里主要是解析***.cfg中的[net]字段部分的配置, 并为net网络赋值相关参数
    
接下来看初始化网络
		
		//只是简单的给net struct 里面的变量赋初始值。
		network make_network(int n)  //为network结构体分配内存空间
	{
    network net = {0};  // 这句话表示为结构体network所有变量首先进行初始化,所有元素初始化为0, NULL也是0.
    net.n = n;  // 网络层数，传入的是net，size -1，是减去了一层的网络配置，其余的就是各种不同层的数量
    net.layers = (layer*)xcalloc(net.n, sizeof(layer));         // 为每个网络层在堆上开辟内存空间
    net.seen = (uint64t*)xcalloc(1, sizeof(uint64t));  // 已被训练的图片数
    net.cur_iteration = (int*)xcalloc(1, sizeof(int));   // 当前迭代次数
    net.total_bbox = (int*)xcalloc(1, sizeof(int));
    net.rewritten_bbox = (int*)xcalloc(1, sizeof(int));
    net.rewritten_bbox = net.total_bbox = 0;
	#ifdef GPU
    net.input_gpu = (float*)xcalloc(1, sizeof(float));
    net.truth_gpu = (float*)xcalloc(1, sizeof(float));
﻿
    net.input16_gpu = (float*)xcalloc(1, sizeof(float));
    net.output16_gpu = (float*)xcalloc(1, sizeof(float));
    net.maxinput16size = (sizet*)xcalloc(1, sizeof(sizet));
    net.maxoutput16size = (sizet*)xcalloc(1, sizeof(sizet));
	 #endif
    return net;
    
  对cfg 中network相关的参数处理的函数比较长，很多都有默认的设置值，在明确配置的情况下，按默认的情况取值。
  
		void parsenetoptions(list options, network net)
	{   // 这个max_batchs也就是网络一共要迭代多少次, 每次迭代都会seen batchsize数目的图片
    net->maxbatches = optionfind_int(options, "max_batches", 0);
    net->batch = optionfindint(options, "batch",1);
    net->learningrate = optionfind_float(options, "learning_rate", .001);
    net->learningratemin = optionfindfloatquiet(options, "learningrate_min", .00001);
    net->batchespercycle = optionfindint_quiet(options, "sgdr_cycle", net->max_batches);
    net->batchescyclemult = optionfindint_quiet(options, "sgdr_mult", 2);
    net->momentum = optionfindfloat(options, "momentum", .9);
    net->decay = optionfindfloat(options, "decay", .0001);
    int subdivs = optionfindint(options, "subdivisions",1);
    // timesteps是RNN中的概念,即在更新梯度时,不仅仅考虑当前的输入的一个batch,而且还考虑前面timesteps个
    // batch,所以才在后面又对net->batch做了乘time_steps的运算。当然我们这里是目标检测,不用考虑以前的历史数
    // 据,自然time_steps等于1.
    net->timesteps = optionfind_int_quiet(options, "time_steps",1);
    net->track = optionfindint_quiet(options, "track", 0);
    net->augmentspeed = optionfind_int_quiet(options, "augment_speed", 2);
    net->initsequentialsubdivisions = net->sequentialsubdivisions = optionfind_int_quiet(options, "sequential_subdivisions", subdivs);
    if (net->sequentialsubdivisions > subdivs) net->initsequentialsubdivisions = net->sequentialsubdivisions = subdivs;
    net->tryfixnan = optionfindint_quiet(options, "try_fix_nan", 0);
    // 这里解释一下为什么net->batch/subdivs之后又乘以net->time_steps, 主要是为了服务RNN.
    // 参考网址: https://blog.csdn.net/avideointerfaces/article/details/90695438
    net->batch /= subdivs;          // mini_batch. 网络每次迭代都会吃net->batch*subdivs数量的图片, 但是会被分成mini-batch份.
    const int mini_batch = net->batch;
    net->batch = net->time_steps;  // mini_batch  time_steps
    net->subdivisions = subdivs;    // number of mini_batches
﻿
    *net->seen = 0;            // 当前训练的图片数量
    *net->cur_iteration = 0;   // 当前网络的迭代次数
    net->lossscale = optionfind_float_quiet(options, "loss_scale", 1);
    net->dynamicminibatch = optionfind_int_quiet(options, "dynamic_minibatch", 0);
    net->optimizedmemory = optionfind_int_quiet(options, "optimized_memory", 0);
    // 网络工作空间的最大容量限制.
    net->workspacesizelimit = (size_t)10241024  optionfindfloatquiet(options, "workspacesize_limit_MB", 1024);
    // ADAM优化器相关参数: B1, B2和eps
    net->adam = optionfindint_quiet(options, "adam", 0);  // 如果采用的是adam优化器, 则需要定义B1, B2和eps
    if(net->adam){
        net->B1 = optionfindfloat(options, "B1", .9);
        net->B2 = optionfindfloat(options, "B2", .999);
        net->eps = optionfindfloat(options, "eps", .000001);
    }
﻿
    net->h = optionfindint_quiet(options, "height",0);  // 网络输入图片的高度
    net->w = optionfindint_quiet(options, "width",0);   // 网络输入图片的宽度
    net->c = optionfindint_quiet(options, "channels",0);  // 网络输入图片的通道数
    // 由于数据在内存上是线性存储, net->inputs就表示一张输入图片的内存占有空间量.
    net->inputs = optionfindint_quiet(options, "inputs", net->h  net->w  net->c);
    // maxcrop/mincrop: 输入图的最小边需要在[mincrop,maxcrop]区间内,如果输入和这个不符合,
    // 则通过缩放使之符合。另外有一对参数maxratio/minratio和一对参数作用是一样的.
    // mincrop默认为net->w; maxcrop默认为net->2*w.
    net->maxcrop = optionfind_int_quiet(options, "max_crop",net->w*2);
    net->mincrop = optionfind_int_quiet(options, "min_crop",net->w);
    // 数据增强操作, flip, blur, noise用于公共数据增强, cutmix, mixup, mosaic主要用于目标检测
    net->flip = optionfindint_quiet(options, "flip", 1);
    net->blur = optionfindint_quiet(options, "blur", 0);
    net->gaussiannoise = optionfind_int_quiet(options, "gaussian_noise", 0);
    net->mixup = optionfindint_quiet(options, "mixup", 0);
    int cutmix = optionfindint_quiet(options, "cutmix", 0);
    int mosaic = optionfindint_quiet(options, "mosaic", 0);
    if (mosaic && cutmix) net->mixup = 4;
    else if (cutmix) net->mixup = 2;
    else if (mosaic) net->mixup = 3;
    net->letterbox = optionfind_int_quiet(options, "letter_box", 0);
    net->mosaicbound = optionfind_int_quiet(options, "mosaic_bound", 0);
    net->contrastive = optionfindint_quiet(options, "contrastive", 0);
    net->contrastivejitflip = optionfindint_quiet(options, "contrastive_jit_flip", 0);
    net->unsupervised = optionfindint_quiet(options, "unsupervised", 0);
    if (net->contrastive && mini_batch < 2) {
        printf(" Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss \n");
        exit(0);
    }
    net->labelsmootheps = optionfindfloatquiet(options, "labelsmooth_eps", 0.0f);  // 执行标签平滑操作
    net->resizestep = optionfind_float_quiet(options, "resize_step", 32);
    net->attention = optionfindint_quiet(options, "attention", 0);
    net->adversariallr = optionfind_float_quiet(options, "adversarial_lr", 0);
    net->maxchartloss = optionfindfloatquiet(options, "maxchart_loss", 20.0);
    // 这也是一些数据增强的操作
    net->angle = optionfindfloat_quiet(options, "angle", 0);     // 图片旋转角度
    net->aspect = optionfindfloat_quiet(options, "aspect", 1);   // 宽高比
    net->saturation = optionfindfloat_quiet(options, "saturation", 1);  // 饱和度
    net->exposure = optionfindfloat_quiet(options, "exposure", 1);  // 曝光
    net->hue = optionfindfloat_quiet(options, "hue", 0);  // 色度
    net->power = optionfindfloat_quiet(options, "power", 4);  // power是啥?
﻿
    if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
    // 学习率修改策略
    char *policys = optionfind_str(options, "policy", "constant");
    net->policy = getpolicy(policys);
    net->burnin = optionfind_int_quiet(options, "burnin", 0);  // warmup
	#ifdef GPU
    if (net->gpu_index >= 0) {
        char device_name[1024];
        int computecapability = getgpu_compute_capability(net->gpuindex, devicename);
	#ifdef CUDNN_HALF
        if (computecapability >= 700) net->cudnnhalf = 1;
        else net->cudnn_half = 0;
	#endif// CUDNN_HALF
        fprintf(stderr, " %d : computecapability = %d, cudnnhalf = %d, GPU: %s \n", net->gpuindex, computecapability, net->cudnnhalf, devicename);
    }
    else fprintf(stderr, " GPU isn't used \n");
	#endif// GPU
    if(net->policy == STEP){
        net->step = optionfindint(options, "step", 1);
        net->scale = optionfindfloat(options, "scale", 1);
    }
    else if (net->policy == STEPS || net->policy == SGDR){
        char *l = option_find(options, "steps");
        char *p = option_find(options, "scales");
        char *s = optionfind(options, "seqscales");
﻿
        if(net->policy == STEPS && (!l || !p))
            error("STEPS policy must have steps and scales in cfg file");
﻿
        if (l) {
            int len = strlen(l);
            int n = 1;
            int i;
            for (i = 0; i < len; ++i) {
                if (l[i] == '#') break;
                if (l[i] == ',') ++n;  // 通过检索字符','来确定步数间隔区间
            }
            int steps = (int)xcalloc(n, sizeof(int));
            float scales = (float)xcalloc(n, sizeof(float));
            float seq_scales = (float)xcalloc(n, sizeof(float));
            for (i = 0; i < n; ++i) {
                float scale = 1.0;
                if (p) {
                    scale = atof(p);
                    p = strchr(p, ',') + 1;  // strchr()函数用于查找给定字符串中某一个特定字符.
                }
                float sequence_scale = 1.0;
                if (s) {
                    sequence_scale = atof(s);
                    s = strchr(s, ',') + 1;
                }
                int step = atoi(l);
                l = strchr(l, ',') + 1;
                steps[i] = step;
                scales[i] = scale;
                seqscales[i] = sequencescale;
            }
            net->scales = scales;
            net->steps = steps;
            net->seqscales = seqscales;
            net->num_steps = n;
        }
    } else if (net->policy == EXP){
        net->gamma = optionfindfloat(options, "gamma", 1);
    } else if (net->policy == SIG){
        net->gamma = optionfindfloat(options, "gamma", 1);
        net->step = optionfindint(options, "step", 1);
    } else if (net->policy == POLY || net->policy == RANDOM){
        //net->power = optionfindfloat(options, "power", 1);
    }
﻿
	}
﻿
有人提出了一个很好的问题，为什么不是直接把cfg文件中的参数读入到net结构体中？目前看到BBUF的一个解释，觉得还是很有道理的。这是因为先读入缓存中，再传到结构体内的方式，可以避免cfg文件发生噶变对最终网络初始化的影响。
﻿
后面会发的是具体的加载预训练参数的流程。
﻿
发布于: 2020 年 11 月 14 日阅读数: 39
Dreamer

关注
一个不想做搜索的NLPer不是一个好的CVer 2019.12.18 加入
还未添加个人简介
发布
暂无评论
创作场景

目标检测 -darknet 之模型初始化

﻿

Dreamer

评论