diff --git a/.vs/yolo_cpu/v14/.suo b/.vs/yolo_cpu/v14/.suo index 53d8e43..a289d8d 100644 Binary files a/.vs/yolo_cpu/v14/.suo and b/.vs/yolo_cpu/v14/.suo differ diff --git a/bin/coco.data b/bin/coco.data deleted file mode 100644 index c8fb232..0000000 --- a/bin/coco.data +++ /dev/null @@ -1,8 +0,0 @@ -classes= 80 -train = data/coco/trainvalno5k.txt -valid = data/5k.txt -#valid = data/coco_val_5k.list -names = coco.names -backup = backup/ -eval=coco - diff --git a/bin/coco.names b/bin/coco.names deleted file mode 100644 index ca76c80..0000000 --- a/bin/coco.names +++ /dev/null @@ -1,80 +0,0 @@ -person -bicycle -car -motorbike -aeroplane -bus -train -truck -boat -traffic light -fire hydrant -stop sign -parking meter -bench -bird -cat -dog -horse -sheep -cow -elephant -bear -zebra -giraffe -backpack -umbrella -handbag -tie -suitcase -frisbee -skis -snowboard -sports ball -kite -baseball bat -baseball glove -skateboard -surfboard -tennis racket -bottle -wine glass -cup -fork -knife -spoon -bowl -banana -apple -sandwich -orange -broccoli -carrot -hot dog -pizza -donut -cake -chair -sofa -pottedplant -bed -diningtable -toilet -tvmonitor -laptop -mouse -remote -keyboard -cell phone -microwave -oven -toaster -sink -refrigerator -book -clock -vase -scissors -teddy bear -hair drier -toothbrush diff --git a/bin/opencv_ffmpeg340_64.dll b/bin/opencv_ffmpeg340_64.dll new file mode 100644 index 0000000..45dc839 Binary files /dev/null and b/bin/opencv_ffmpeg340_64.dll differ diff --git a/bin/opencv_world340.dll b/bin/opencv_world340.dll new file mode 100644 index 0000000..8836c52 Binary files /dev/null and b/bin/opencv_world340.dll differ diff --git a/bin/predictions.png b/bin/predictions.png index 7cec9c8..dcb94b0 100644 Binary files a/bin/predictions.png and b/bin/predictions.png differ diff --git a/bin/video_yolo.sh b/bin/video_yolo.sh deleted file mode 100644 index e1f06c5..0000000 --- a/bin/video_yolo.sh +++ /dev/null @@ -1,3 +0,0 @@ -./darknet detector demo coco.names yolov3.cfg yolov3.weights -thresh 0.24 test.mp4 - - diff --git a/bin/xnor_voc.cmd b/bin/xnor_voc.cmd index 6b02ffe..c5dc02f 100644 --- a/bin/xnor_voc.cmd +++ b/bin/xnor_voc.cmd @@ -1 +1 @@ -yolo_cpu.exe detector test tiny-yolo-voc_xnor/voc.names tiny-yolo-voc_xnor/tiny_yolo_xnor.cfg tiny-yolo-voc_xnor/tiny_yolo_xnor.weights -thresh 0.15 dog.jpg \ No newline at end of file +yolo_cpu.exe detector test tiny-yolo-voc_xnor/voc.names tiny-yolo-voc_xnor/tiny_yolo_xnor.cfg tiny-yolo-voc_xnor/tiny_yolo_xnor.weights -thresh 0.25 dog.jpg \ No newline at end of file diff --git a/bin/yolo_cpu.exe b/bin/yolo_cpu.exe index 3f38c21..de162fa 100644 Binary files a/bin/yolo_cpu.exe and b/bin/yolo_cpu.exe differ diff --git a/bin/yolo_cpu.ilk b/bin/yolo_cpu.ilk deleted file mode 100644 index 51b2f5e..0000000 Binary files a/bin/yolo_cpu.ilk and /dev/null differ diff --git a/bin/yolo_cpu.iobj b/bin/yolo_cpu.iobj index 48710c7..fb64733 100644 Binary files a/bin/yolo_cpu.iobj and b/bin/yolo_cpu.iobj differ diff --git a/bin/yolo_cpu.ipdb b/bin/yolo_cpu.ipdb index fd276c3..7e1d059 100644 Binary files a/bin/yolo_cpu.ipdb and b/bin/yolo_cpu.ipdb differ diff --git a/bin/yolo_cpu.pdb b/bin/yolo_cpu.pdb index c0a0f6e..65e738f 100644 Binary files a/bin/yolo_cpu.pdb and b/bin/yolo_cpu.pdb differ diff --git a/bin/yolo_cpu_demo.cmd b/bin/yolo_cpu_demo.cmd deleted file mode 100644 index e0ea40d..0000000 --- a/bin/yolo_cpu_demo.cmd +++ /dev/null @@ -1,6 +0,0 @@ - - -yolo_cpu.exe detector demo coco.names yolov3.cfg yolov3.weights -thresh 0.24 test.mp4 - - -pause \ No newline at end of file diff --git a/src/additionally.c b/src/additionally.c index c123a2c..16253a0 100644 --- a/src/additionally.c +++ b/src/additionally.c @@ -1,14 +1,6 @@ #include "additionally.h" #include "gpu.h" -#ifdef OPENCL -#include "ocl.h" -#endif - -#ifdef CUDNN -#pragma comment(lib, "cudnn.lib") -#endif - #ifdef _OPENMP #include #endif @@ -89,17 +81,6 @@ void yolov2_fuse_conv_batchnorm(network net) } l->batch_normalize = 0; -#ifdef GPU - if (gpu_index >= 0) { - push_convolutional_layer(*l); - } -#endif - -#ifdef OPENCL - //if (gpu_index >= 0) { - ocl_push_convolutional_layer(*l); - //} -#endif } } else { @@ -219,25 +200,6 @@ void binary_align_weights(convolutional_layer *l) l->mean_arr = calloc(l->n, sizeof(float)); get_mean_array(align_weights, align_weights_size, l->n, l->mean_arr); -#ifdef GPU - cudaError_t status; - l->align_workspace_size = l->bit_align * l->size * l->size * l->c; - status = cudaMalloc((void **)&l->align_workspace_gpu, l->align_workspace_size * sizeof(float)); - status = cudaMalloc((void **)&l->transposed_align_workspace_gpu, l->align_workspace_size * sizeof(float)); - check_error(status); - - //l->align_bit_weights_gpu = cuda_make_array(l->align_bit_weights, l->align_bit_weights_size * sizeof(char)/sizeof(float)); - status = cudaMalloc((void **)&l->align_bit_weights_gpu, l->align_bit_weights_size); - check_error(status); - status = cudaMemcpy(l->align_bit_weights_gpu, l->align_bit_weights, l->align_bit_weights_size, cudaMemcpyHostToDevice); - check_error(status); - status = cudaMemcpy(l->binary_weights_gpu, l->binary_weights, m*k * sizeof(float), cudaMemcpyHostToDevice); - check_error(status); - - l->mean_arr_gpu = cuda_make_array(l->mean_arr, l->n); - cudaDeviceSynchronize(); -#endif // GPU - free(align_weights); } @@ -279,72 +241,6 @@ static inline unsigned char get_bit(unsigned char const*const src, size_t index) return val; } -/* -static inline unsigned char reverse_byte_1(char a) -{ - return ((a & 0x1) << 7) | ((a & 0x2) << 5) | - ((a & 0x4) << 3) | ((a & 0x8) << 1) | - ((a & 0x10) >> 1) | ((a & 0x20) >> 3) | - ((a & 0x40) >> 5) | ((a & 0x80) >> 7); -} - -static inline unsigned char reverse_byte(unsigned char a) -{ - return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16; -} - -static unsigned char lookup[16] = { - 0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe, - 0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf, }; - -static inline unsigned char reverse_byte_3(unsigned char n) { - // Reverse the top and bottom nibble then swap them. - return (lookup[n & 0b1111] << 4) | lookup[n >> 4]; -} - - -static inline void transpose8rS32_reversed_diagonale(unsigned char* A, int m, int n, unsigned char* B) -{ - unsigned x, y, t; - - // Load the array and pack it into x and y. - x = (A[0] << 24) | (A[m] << 16) | (A[2 * m] << 8) | A[3 * m]; - y = (A[4 * m] << 24) | (A[5 * m] << 16) | (A[6 * m] << 8) | A[7 * m]; - - t = (x ^ (x >> 7)) & 0x00AA00AA; x = x ^ t ^ (t << 7); - t = (y ^ (y >> 7)) & 0x00AA00AA; y = y ^ t ^ (t << 7); - - t = (x ^ (x >> 14)) & 0x0000CCCC; x = x ^ t ^ (t << 14); - t = (y ^ (y >> 14)) & 0x0000CCCC; y = y ^ t ^ (t << 14); - - t = (x & 0xF0F0F0F0) | ((y >> 4) & 0x0F0F0F0F); - y = ((x << 4) & 0xF0F0F0F0) | (y & 0x0F0F0F0F); - x = t; - - B[7 * n] = reverse_byte(x >> 24); B[6 * n] = reverse_byte(x >> 16); B[5 * n] = reverse_byte(x >> 8); B[4 * n] = reverse_byte(x); - B[3 * n] = reverse_byte(y >> 24); B[2 * n] = reverse_byte(y >> 16); B[1 * n] = reverse_byte(y >> 8); B[0 * n] = reverse_byte(y); -} - -void transpose_bin(char *A, char *B, const int n, const int m, - const int lda, const int ldb, const int block_size) -{ - int i; - #pragma omp parallel for - for (i = 0; i < n; i += 8) { - int j; - for (j = 0; j < m - 8; j += 8) { - int a_index = i*lda + j; - int b_index = j*ldb + i; - //transpose_8x8_bits_my(&A[a_index/8], &B[b_index/8], lda/8, ldb/8); - transpose8rS32_reversed_diagonale(&A[a_index / 8], lda / 8, ldb / 8, &B[b_index / 8]); - } - for (; j < m; ++j) { - if (get_bit(A, i*lda + j)) set_bit(B, j*ldb + i); - } - } -} -*/ - uint8_t reverse_8_bit(uint8_t a) { return ((a * 0x0802LU & 0x22110LU) | (a * 0x8020LU & 0x88440LU)) * 0x10101LU >> 16; } @@ -434,563 +330,6 @@ void transpose_bin(uint32_t *A, uint32_t *B, const int n, const int m, // -------------- blas.c -------------- - -#ifdef AVX - -#ifdef _WIN64 -// Windows -#include -#else -// Linux -#include -#endif - -#include -#include -#include -#include -// https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=broad&expand=561 - -// https://software.intel.com/sites/landingpage/IntrinsicsGuide -void gemm_nn(int M, int N, int K, float ALPHA, - float *A, int lda, - float *B, int ldb, - float *C, int ldc) -{ - int i, j, k; - for (i = 0; i < M; ++i) { - for (k = 0; k < K; ++k) { - float A_PART = ALPHA*A[i*lda + k]; - __m256 a256, b256, c256, result256; // AVX - a256 = _mm256_set1_ps(A_PART); - for (j = 0; j < N - 8; j += 8) { - b256 = _mm256_loadu_ps(&B[k*ldb + j]); - c256 = _mm256_loadu_ps(&C[i*ldc + j]); - // FMA - Intel Haswell (2013), AMD Piledriver (2012) - //result256 = _mm256_fmadd_ps(a256, b256, c256); - result256 = _mm256_mul_ps(a256, b256); - result256 = _mm256_add_ps(result256, c256); - _mm256_storeu_ps(&C[i*ldc + j], result256); - } - - int prev_end = (N % 8 == 0) ? (N - 8) : (N / 8) * 8; - for (j = prev_end; j < N; ++j) - C[i*ldc + j] += A_PART*B[k*ldb + j]; - } - } -} - - -#if defined(_MSC_VER) && _MSC_VER <= 1900 -static inline __int32 _mm256_extract_epi64(__m256i a, const int index) { - return a.m256i_i64[index]; -} - -static inline __int32 _mm256_extract_epi32(__m256i a, const int index) { - return a.m256i_i32[index]; -} -#endif - -static inline float _castu32_f32(uint32_t a) { - return *((float *)&a); -} - -#if defined(_MSC_VER) -// Windows -static inline float _mm256_extract_float32(__m256 a, const int index) { - return a.m256_f32[index]; -} -#else -// Linux -static inline float _mm256_extract_float32(__m256 a, const int index) { - return _castu32_f32(_mm256_extract_epi32(_mm256_castps_si256(a), index)); -} -#endif - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu_custom(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col) -{ - - int c; - const int height_col = (height + 2 * pad - ksize) / stride + 1; - const int width_col = (width + 2 * pad - ksize) / stride + 1; - const int channels_col = channels * ksize * ksize; - - // optimized version - if (height_col == height && width_col == width && stride == 1 && pad == 1)// && is_fma_avx()) - { - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = pad; h < height_col - pad; ++h) { - for (w = pad; w < width_col - pad - 8; w += 8) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - int col_index = (c * height_col + h) * width_col + w; - - //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - __m256 src256 = _mm256_loadu_ps((float *)(&data_im[im_col + width*(im_row + height*c_im)])); - _mm256_storeu_ps(&data_col[col_index], src256); - } - - for (; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - int col_index = (c * height_col + h) * width_col + w; - - data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - } - } - - { - w = 0; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - w = width_col - 1; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - h = 0; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - - { - h = height_col - 1; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - int col_index = (c * height_col + h) * width_col + w; - data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, - im_row, im_col, c_im, pad); - } - } - } - - } - else { - //printf("\n Error: is no non-optimized version \n"); - im2col_cpu(data_im, channels, height, width, ksize, stride, pad, data_col); - } -} - -//From Berkeley Vision's Caffe! -//https://github.com/BVLC/caffe/blob/master/LICENSE -void im2col_cpu_custom_bin(float* data_im, - int channels, int height, int width, - int ksize, int stride, int pad, float* data_col, int bit_align) -{ - int c; - const int height_col = (height + 2 * pad - ksize) / stride + 1; - const int width_col = (width + 2 * pad - ksize) / stride + 1; - const int channels_col = channels * ksize * ksize; - - // optimized version - if (height_col == height && width_col == width && stride == 1 && pad == 1) - { - //__m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - __m256 float_zero256 = _mm256_set1_ps(0.00); - - int new_ldb = bit_align; - - #pragma omp parallel for - for (c = 0; c < channels_col; ++c) { - int h, w; - int w_offset = c % ksize; - int h_offset = (c / ksize) % ksize; - int c_im = c / ksize / ksize; - for (h = pad; h < height_col - pad; ++h) { - for (w = pad; w < width_col - pad - 8; w += 8) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //__m256i src256 = _mm256_loadu_si256((__m256i *)(&data_im[im_col + width*(im_row + height*c_im)])); - //__m256i result256 = _mm256_and_si256(src256, all256_sing1); // check sign in 8 x 32-bit floats - //uint16_t mask = _mm256_movemask_ps(_mm256_castsi256_ps(result256)); // (val >= 0) ? 0 : 1 - //mask = ~mask; // inverse mask, (val >= 0) ? 1 : 0 - - __m256 src256 = _mm256_loadu_ps((float *)(&data_im[im_col + width*(im_row + height*c_im)])); - __m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS); - uint16_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1 - - uint16_t *dst_ptr = &((unsigned char*)data_col)[col_index / 8]; - *dst_ptr |= (mask << (col_index % 8)); - } - - for (; w < width_col - pad; ++w) { - int im_row = h_offset + h - pad; - int im_col = w_offset + w - pad; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = data_im[im_col + width*(im_row + height*c_im)]; - float val = data_im[im_col + width*(im_row + height*c_im)]; - if (val > 0) set_bit(data_col, col_index); - } - } - - { - w = 0; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit(data_col, col_index); - } - } - - { - w = width_col - 1; - for (h = 0; h < height_col; ++h) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit(data_col, col_index); - } - } - - { - h = 0; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit(data_col, col_index); - } - } - - { - h = height_col - 1; - for (w = 0; w < width_col; ++w) { - int im_row = h_offset + h; - int im_col = w_offset + w; - //int col_index = (c * height_col + h) * width_col + w; - int col_index = c * new_ldb + h * width_col + w; - - //data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - float val = im2col_get_pixel(data_im, height, width, channels, im_row, im_col, c_im, pad); - if (val > 0) set_bit(data_col, col_index); - } - } - } - - } - else { - printf("\n Error: is no non-optimized version \n"); - //im2col_cpu(data_im, channels, height, width, ksize, stride, pad, data_col); // must be aligned for transpose after float_to_bin - // float_to_bit(b, t_input, src_size); - // transpose_bin(t_input, *t_bit_input, k, n, bit_align, new_ldb, 8); - } -} - -void activate_array_cpu_custom(float *x, const int n, const ACTIVATION a) -{ - int i = 0; - if (a == LINEAR) - { - } - else if (a == LEAKY) - { - { - __m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - __m256 all256_01 = _mm256_set1_ps(0.1F); - - for (i = 0; i < n - 8; i += 8) { - //x[i] = (x[i]>0) ? x[i] : .1*x[i]; - - __m256 src256 = _mm256_loadu_ps(&x[i]); - __m256 mult256 = _mm256_mul_ps((src256), all256_01); // mult * 0.1 - - __m256i sign256 = _mm256_and_si256(_mm256_castps_si256(src256), all256_sing1); // check sign in 8 x 32-bit floats - - __m256 result256 = _mm256_blendv_ps(src256, mult256, _mm256_castsi256_ps(sign256)); // (sign>0) ? src : mult; - _mm256_storeu_ps(&x[i], result256); - } - } - - for (; i < n; ++i) { - x[i] = (x[i]>0) ? x[i] : .1*x[i]; - } - } - else { - for (i = 0; i < n; ++i) { - x[i] = activate(x[i], a); - } - } -} - - -void forward_maxpool_layer_avx(float *src, float *dst, int *indexes, int size, int w, int h, int out_w, int out_h, int c, - int pad, int stride, int batch) -{ - - const int w_offset = -pad / 2; - const int h_offset = -pad / 2; - int b, k; - - for (b = 0; b < batch; ++b) { - #pragma omp parallel for - for (k = 0; k < c; ++k) { - int i, j, m, n; - for (i = 0; i < out_h; ++i) { - //for (j = 0; j < out_w; ++j) { - j = 0; - - if (stride == 1) { - for (j = 0; j < out_w - 8 - (size - 1); j += 8) { - int out_index = j + out_w*(i + out_h*(k + c*b)); - __m256 max256 = _mm256_set1_ps(-FLT_MAX); - for (n = 0; n < size; ++n) { - for (m = 0; m < size; ++m) { - int cur_h = h_offset + i*stride + n; - int cur_w = w_offset + j*stride + m; - int index = cur_w + w*(cur_h + h*(k + b*c)); - int valid = (cur_h >= 0 && cur_h < h && - cur_w >= 0 && cur_w < w); - if (!valid) continue; - - __m256 src256 = _mm256_loadu_ps(&src[index]); - max256 = _mm256_max_ps(src256, max256); - } - } - _mm256_storeu_ps(&dst[out_index], max256); - - } - } - else if (size == 2 && stride == 2) { - for (j = 0; j < out_w - 4; j += 4) { - int out_index = j + out_w*(i + out_h*(k + c*b)); - float max = -FLT_MAX; - int max_i = -1; - __m128 max128 = _mm_set1_ps(-FLT_MAX); - - for (n = 0; n < size; ++n) { - //for (m = 0; m < size; ++m) - m = 0; - { - int cur_h = h_offset + i*stride + n; - int cur_w = w_offset + j*stride + m; - int index = cur_w + w*(cur_h + h*(k + b*c)); - int valid = (cur_h >= 0 && cur_h < h && - cur_w >= 0 && cur_w < w); - if (!valid) continue; - - __m256 src256 = _mm256_loadu_ps(&src[index]); - __m256 src256_2 = _mm256_permute_ps(src256, (1 << 0) | (3 << 4)); - __m256 max256 = _mm256_max_ps(src256, src256_2); - - __m128 src128_0 = _mm256_extractf128_ps(max256, 0); - __m128 src128_1 = _mm256_extractf128_ps(max256, 1); - __m128 src128 = _mm_shuffle_ps(src128_0, src128_1, (2 << 2) | (2 << 6)); - - max128 = _mm_max_ps(src128, max128); - } - } - _mm_storeu_ps(&dst[out_index], max128); - } - } - - for (; j < out_w; ++j) { - int out_index = j + out_w*(i + out_h*(k + c*b)); - float max = -FLT_MAX; - int max_i = -1; - for (n = 0; n < size; ++n) { - for (m = 0; m < size; ++m) { - int cur_h = h_offset + i*stride + n; - int cur_w = w_offset + j*stride + m; - int index = cur_w + w*(cur_h + h*(k + b*c)); - int valid = (cur_h >= 0 && cur_h < h && - cur_w >= 0 && cur_w < w); - float val = (valid != 0) ? src[index] : -FLT_MAX; - max_i = (val > max) ? index : max_i; - max = (val > max) ? val : max; - } - } - dst[out_index] = max; - indexes[out_index] = max_i; - } - } - } - } -} - - -// http://graphics.stanford.edu/~seander/bithacks.html -// https://stackoverflow.com/questions/17354971/fast-counting-the-number-of-set-bits-in-m128i-register -// https://arxiv.org/pdf/1611.07612.pdf - -static inline int popcnt128(__m128i n) { - const __m128i n_hi = _mm_unpackhi_epi64(n, n); -#ifdef _MSC_VER - return __popcnt64(_mm_cvtsi128_si64(n)) + __popcnt64(_mm_cvtsi128_si64(n_hi)); -#else - return __popcntq(_mm_cvtsi128_si64(n)) + __popcntq(_mm_cvtsi128_si64(n_hi)); -#endif -} - -static inline int popcnt256(__m256i n) { - return popcnt128(_mm256_extractf128_si256(n, 0)) + popcnt128(_mm256_extractf128_si256(n, 1)); -} - -static inline __m256i count256(__m256i v) { - __m256i lookup = - _mm256_setr_epi8(0, 1, 1, 2, 1, 2, 2, 3, 1, 2, - 2, 3, 2, 3, 3, 4, 0, 1, 1, 2, 1, 2, 2, 3, - 1, 2, 2, 3, 2, 3, 3, 4); - - __m256i low_mask = _mm256_set1_epi8(0x0f); - - __m256i lo = _mm256_and_si256(v, low_mask); - __m256i hi = _mm256_and_si256(_mm256_srli_epi32(v, 4), low_mask); - __m256i popcnt1 = _mm256_shuffle_epi8(lookup, lo); - __m256i popcnt2 = _mm256_shuffle_epi8(lookup, hi); - __m256i total = _mm256_add_epi8(popcnt1, popcnt2); - - return _mm256_sad_epu8(total, _mm256_setzero_si256()); -} - -static inline int popcnt256_custom(__m256i n) { - __m256i val = count256(n); - - //return val.m256i_i64[0] + - //val.m256i_i64[1] + - //val.m256i_i64[2] + - //val.m256i_i64[3]; - return _mm256_extract_epi64(val, 0) - + _mm256_extract_epi64(val, 1) - + _mm256_extract_epi64(val, 2) - + _mm256_extract_epi64(val, 3); -} - -// 5x times faster than gemm()-float32 -// further optimizations: do mean-mult only for the last layer -void gemm_nn_custom_bin_mean_transposed(int M, int N, int K, float ALPHA_UNUSED, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr) -{ - -#if defined(_OPENMP) - static int max_num_threads = 0; - if (max_num_threads == 0) { - max_num_threads = omp_get_max_threads(); - //omp_set_num_threads(max_num_threads / 2); - } -#endif - - int i; - #pragma omp parallel for - for (i = 0; i < M; ++i) - { // l.n - filters [16 - 55 - 1024] - float mean_val = mean_arr[i]; - int j, k; - __m256i all_1 = _mm256_set1_epi8(255); - - for (j = 0; j < N; ++j) { // out_h*out_w - one channel output size [169 - 173056] - int count = 0; - const int bit_step = 256; - __m256i count_sum = _mm256_set1_epi8(0); - - for (k = 0; k < K; k += bit_step) { // l.size*l.size*l.c - one filter size [27 - 9216] - __m256i a_bit256 = _mm256_loadu_si256((__m256i *)(A + (i*lda + k) / 8)); - __m256i b_bit256 = _mm256_loadu_si256((__m256i *)(B + (j*ldb + k) / 8)); - __m256i xor256 = _mm256_xor_si256(a_bit256, b_bit256); // xnor = not(xor(a,b)) - __m256i c_bit256 = _mm256_andnot_si256(xor256, all_1); // can be optimized - we can do other NOT for wegihts once and do not do this NOT - - count_sum = _mm256_add_epi64(count256(c_bit256), count_sum); // Mula’s algorithm - - //count += popcnt256(c_bit256); - - //binary_int64_printf(c_bit64); - //printf(", count = %d \n\n", tmp_count); - } - - // count of 1 bits - //count = count_sum.m256i_i64[0] + - // count_sum.m256i_i64[1] + - // count_sum.m256i_i64[2] + - // count_sum.m256i_i64[3]; - count = _mm256_extract_epi64(count_sum, 0) - + _mm256_extract_epi64(count_sum, 1) - + _mm256_extract_epi64(count_sum, 2) - + _mm256_extract_epi64(count_sum, 3); - - int f1 = (K % bit_step == 0) ? 0 : (bit_step - (K % bit_step)); - count = count - f1; // remove extra bits (from empty space for align only) - - C[i*ldc + j] = (2 * count - K) * mean_val; - } - } -} - - - -void float_to_bit(float *src, unsigned char *dst, size_t size) -{ - size_t dst_size = size / 8 + 1; - memset(dst, 0, dst_size); - - size_t i; - //__m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); - __m256 float_zero256 = _mm256_set1_ps(0.0); - - for (i = 0; i < size; i += 8) - { - //__m256i src256 = _mm256_loadu_si256((__m256i *)(&src[i])); - //__m256i result256 = _mm256_and_si256(src256, all256_sing1); // check sign in 8 x 32-bit floats - //uint32_t mask = _mm256_movemask_ps(_mm256_castsi256_ps(result256)); // (val >= 0) ? 0 : 1 - ////mask = ~mask; // inverse mask, (val >= 0) ? 1 : 0 - - __m256 src256 = _mm256_loadu_ps((float *)(&src[i])); - __m256 result256 = _mm256_cmp_ps(src256, float_zero256, _CMP_GT_OS); - uint32_t mask = _mm256_movemask_ps(result256); // (val > 0) ? 0 : 1 - - dst[i / 8] = mask; - } -} - -#else // AVX - void gemm_nn(int M, int N, int K, float ALPHA, float *A, int lda, float *B, int ldb, @@ -1259,7 +598,6 @@ void float_to_bit(float *src, unsigned char *dst, size_t size) } free(byte_arr); } -#endif // __x86_64 /* void gemm_nn(int M, int N, int K, float ALPHA, @@ -1718,9 +1056,6 @@ char **get_labels(char *filename) // network.c float *get_network_output(network net) { -#ifdef GPU - if (gpu_index >= 0) return get_network_output_gpu(net); -#endif int i; for (i = net.n - 1; i > 0; --i) if (net.layers[i].type != COST) break; return net.layers[i].output; @@ -1741,10 +1076,6 @@ network make_network(int n) net.n = n; net.layers = calloc(net.n, sizeof(layer)); net.seen = calloc(1, sizeof(uint64_t)); -#ifdef GPU - net.input_gpu = calloc(1, sizeof(float *)); - net.truth_gpu = calloc(1, sizeof(float *)); -#endif return net; } @@ -1760,89 +1091,8 @@ void free_network(network net) free(net.steps); free(net.seen); -#ifdef GPU - if (gpu_index >= 0) cuda_free(net.workspace); - else free(net.workspace); - if (net.input_state_gpu) cuda_free(net.input_state_gpu); - if (*net.input_gpu) cuda_free(*net.input_gpu); - if (*net.truth_gpu) cuda_free(*net.truth_gpu); - if (net.input_gpu) free(net.input_gpu); - if (net.truth_gpu) free(net.truth_gpu); - - //if (*net.input16_gpu) cuda_free(*net.input16_gpu); - //if (*net.output16_gpu) cuda_free(*net.output16_gpu); - //if (net.input16_gpu) free(net.input16_gpu); - //if (net.output16_gpu) free(net.output16_gpu); - //if (net.max_input16_size) free(net.max_input16_size); - //if (net.max_output16_size) free(net.max_output16_size); -#else free(net.workspace); -#endif -} - - -// network.c -#ifdef GPU -#ifdef CUDNN -void cudnn_convolutional_setup(layer *l) -{ -#if(CUDNN_MAJOR >= 7) - cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH); -#if((CUDNN_MAJOR*10 + CUDNN_MINOR) >= 72) // cuDNN >= 7.2 - cudnnSetConvolutionMathType(l->convDesc, CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION); -#endif //(CUDNN_MAJOR >= 7.2) -#endif //(CUDNN_MAJOR >= 7) - - if (l->quantized) - { - cudnnDataType_t cudnn_data_type = CUDNN_DATA_INT8x4; - cudnnTensorFormat_t tensor_format = CUDNN_TENSOR_NCHW_VECT_C; - cudnnTensorFormat_t dst_tensor_format = CUDNN_TENSOR_NCHW; - -#if((CUDNN_MAJOR*10 + CUDNN_MINOR) >= 72) - //if (l->c % 32 == 0) cudnn_data_type = CUDNN_DATA_INT8x32; // Tensor Cores for INT8 -#endif //(CUDNN_MAJOR >= 7.2) - - cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW_VECT_C, cudnn_data_type, l->batch, l->c, l->h, l->w); - cudnnSetFilter4dDescriptor(l->weightDesc, cudnn_data_type, CUDNN_TENSOR_NCHW_VECT_C, l->n, l->c, l->size, l->size); - cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); - cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_INT32); // cudnn 7 - - l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; // only supported for DP4A INT8x4 - - // BIAS float - cudnnSetTensor4dDescriptor(l->biasTensorDesc, dst_tensor_format, CUDNN_DATA_FLOAT, 1, l->out_c, 1, 1); - - // https://en.wikipedia.org/wiki/Activation_function - // CUDNN_ACTIVATION_IDENTITY - cudnnSetActivationDescriptor(l->activationDesc, CUDNN_ACTIVATION_IDENTITY, CUDNN_NOT_PROPAGATE_NAN, 0); - //cudnnSetActivationDescriptor(activationDesc, CUDNN_ACTIVATION_RELU, CUDNN_NOT_PROPAGATE_NAN, 0.1); // RELU or ELU can't replace LEAKY_RELU - } - else { - cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); - cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); - cudnnSetFilter4dDescriptor(l->weightDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size); -#if(CUDNN_MAJOR >= 6) - cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION, CUDNN_DATA_FLOAT); // cudnn 6.0 -#else - cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION); // cudnn 5.1 -#endif - cudnnGetConvolutionForwardAlgorithm(cudnn_handle(), - l->srcTensorDesc, - l->weightDesc, - l->convDesc, - l->dstTensorDesc, - CUDNN_CONVOLUTION_FWD_PREFER_FASTEST, - 0, - &l->fw_algo); - - //l->fw_algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM; // un-comment to use Tensor Cores for cuDNN >= 7.2 - } } -#endif -#endif - - // network.c void set_batch_network(network *net, int b) @@ -1852,11 +1102,6 @@ void set_batch_network(network *net, int b) for (i = 0; i < net->n; ++i) { layer l = net->layers[i]; l.batch = b; -#ifdef CUDNN - if (l.type == CONVOLUTIONAL) { - cudnn_convolutional_setup(net->layers + i); - } -#endif } } @@ -1868,9 +1113,6 @@ void free_layer(layer l) { if (l.type == DROPOUT) { if (l.rand) free(l.rand); -#ifdef GPU - if (l.rand_gpu) cuda_free(l.rand_gpu); -#endif return; } if (l.cweights) free(l.cweights); @@ -1917,55 +1159,6 @@ void free_layer(layer l) if (l.r_cpu) free(l.r_cpu); if (l.h_cpu) free(l.h_cpu); if (l.binary_input) free(l.binary_input); - -#ifdef GPU - if (l.indexes_gpu) cuda_free((float *)l.indexes_gpu); - - if (l.z_gpu) cuda_free(l.z_gpu); - if (l.r_gpu) cuda_free(l.r_gpu); - if (l.h_gpu) cuda_free(l.h_gpu); - if (l.m_gpu) cuda_free(l.m_gpu); - if (l.v_gpu) cuda_free(l.v_gpu); - if (l.prev_state_gpu) cuda_free(l.prev_state_gpu); - if (l.forgot_state_gpu) cuda_free(l.forgot_state_gpu); - if (l.forgot_delta_gpu) cuda_free(l.forgot_delta_gpu); - if (l.state_gpu) cuda_free(l.state_gpu); - if (l.state_delta_gpu) cuda_free(l.state_delta_gpu); - if (l.gate_gpu) cuda_free(l.gate_gpu); - if (l.gate_delta_gpu) cuda_free(l.gate_delta_gpu); - if (l.save_gpu) cuda_free(l.save_gpu); - if (l.save_delta_gpu) cuda_free(l.save_delta_gpu); - if (l.concat_gpu) cuda_free(l.concat_gpu); - if (l.concat_delta_gpu) cuda_free(l.concat_delta_gpu); - if (l.binary_input_gpu) cuda_free(l.binary_input_gpu); - if (l.binary_weights_gpu) cuda_free(l.binary_weights_gpu); - if (l.mean_gpu) cuda_free(l.mean_gpu); - if (l.variance_gpu) cuda_free(l.variance_gpu); - if (l.rolling_mean_gpu) cuda_free(l.rolling_mean_gpu); - if (l.rolling_variance_gpu) cuda_free(l.rolling_variance_gpu); - if (l.variance_delta_gpu) cuda_free(l.variance_delta_gpu); - if (l.mean_delta_gpu) cuda_free(l.mean_delta_gpu); - if (l.x_gpu) cuda_free(l.x_gpu); - if (l.x_norm_gpu) cuda_free(l.x_norm_gpu); - - if (l.align_bit_weights_gpu) cuda_free(l.align_bit_weights_gpu); - if (l.mean_arr_gpu) cuda_free(l.mean_arr_gpu); - if (l.align_workspace_gpu) cuda_free(l.align_workspace_gpu); - if (l.transposed_align_workspace_gpu) cuda_free(l.transposed_align_workspace_gpu); - - if (l.weights_gpu) cuda_free(l.weights_gpu); - //if (l.weight_updates_gpu) cuda_free(l.weight_updates_gpu); - if (l.biases_gpu) cuda_free(l.biases_gpu); - //if (l.bias_updates_gpu) cuda_free(l.bias_updates_gpu); - if (l.scales_gpu) cuda_free(l.scales_gpu); - //if (l.scale_updates_gpu) cuda_free(l.scale_updates_gpu); - if (l.output_gpu) cuda_free(l.output_gpu); - if (l.output_gpu_int8) cuda_free(l.output_gpu_int8); - if (l.delta_gpu) cuda_free(l.delta_gpu); - if (l.rand_gpu) cuda_free(l.rand_gpu); - if (l.squared_gpu) cuda_free(l.squared_gpu); - if (l.norms_gpu) cuda_free(l.norms_gpu); -#endif } @@ -1988,21 +1181,6 @@ softmax_layer make_softmax_layer(int batch, int inputs, int groups) // commented only for this custom version of Yolo v2 //l.forward = forward_softmax_layer; //l.backward = backward_softmax_layer; -#ifdef GPU - // commented only for this custom version of Yolo v2 - //l.forward_gpu = forward_softmax_layer_gpu; - //l.backward_gpu = backward_softmax_layer_gpu; - - l.output_gpu = cuda_make_array(l.output, inputs*batch); - cudaError_t status; - status = cudaMalloc((void **)&(l.output_gpu_int8), sizeof(int8_t)*inputs*batch); - //l.delta_gpu = cuda_make_array(l.delta, inputs*batch); -#endif - -#ifdef OPENCL - l.output_ocl = ocl_make_array(l.output, inputs*batch); -#endif - return l; } @@ -2033,11 +1211,6 @@ layer make_upsample_layer(int batch, int w, int h, int c, int stride) l.output = calloc(l.outputs*batch, sizeof(float));; //l.forward = forward_upsample_layer; -#ifdef GPU - //l.forward_gpu = forward_upsample_layer_gpu; - - l.output_gpu = cuda_make_array(l.output, l.outputs*batch); -#endif if (l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); return l; @@ -2063,10 +1236,7 @@ layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int l.index = index; - l.output = calloc(l.outputs*batch, sizeof(float));; -#ifdef GPU - l.output_gpu = cuda_make_array(l.output, l.outputs*batch); -#endif + l.output = calloc(l.outputs*batch, sizeof(float)); return l; } @@ -2104,20 +1274,6 @@ layer make_reorg_layer(int batch, int w, int h, int c, int stride, int reverse) // commented only for this custom version of Yolo v2 //l.forward = forward_reorg_layer; //l.backward = backward_reorg_layer; -#ifdef GPU - // commented only for this custom version of Yolo v2 - //l.forward_gpu = forward_reorg_layer_gpu; - //l.backward_gpu = backward_reorg_layer_gpu; - - l.output_gpu = cuda_make_array(l.output, output_size); - cudaError_t status; - status = cudaMalloc((void **)&(l.output_gpu_int8), sizeof(int8_t)*output_size); - //l.delta_gpu = cuda_make_array(l.delta, output_size); -#endif - -#ifdef OPENCL - l.output_ocl = ocl_make_array(l.output, output_size); -#endif return l; } @@ -2149,20 +1305,6 @@ route_layer make_route_layer(int batch, int n, int *input_layers, int *input_siz // commented only for this custom version of Yolo v2 //l.forward = forward_route_layer; //l.backward = backward_route_layer; -#ifdef GPU - // commented only for this custom version of Yolo v2 - //l.forward_gpu = forward_route_layer_gpu; - //l.backward_gpu = backward_route_layer_gpu; - - //l.delta_gpu = cuda_make_array(l.delta, outputs*batch); - l.output_gpu = cuda_make_array(l.output, outputs*batch); - cudaError_t status; - status = cudaMalloc((void **)&(l.output_gpu_int8), sizeof(int8_t)*outputs*batch); -#endif - -#ifdef OPENCL - l.output_ocl = ocl_make_array(l.output, outputs*batch); -#endif return l; } @@ -2204,10 +1346,6 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int l.biases[i] = .5; } -#ifdef GPU - l.output_gpu = cuda_make_array(l.output, batch*l.outputs); -#endif - fprintf(stderr, "yolo\n"); srand(0); @@ -2244,20 +1382,6 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int // commented only for this custom version of Yolo v2 //l.forward = forward_region_layer; //l.backward = backward_region_layer; -#ifdef GPU - // commented only for this custom version of Yolo v2 - //l.forward_gpu = forward_region_layer_gpu; - //l.backward_gpu = backward_region_layer_gpu; - l.output_gpu = cuda_make_array(l.output, batch*l.outputs); - cudaError_t status; - status = cudaMalloc((void **)&(l.output_gpu_int8), sizeof(int8_t)*l.outputs*batch); - //l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); -#endif - -#ifdef OPENCL - l.output_ocl = ocl_make_array(l.output, batch*l.outputs); -#endif - fprintf(stderr, "detection\n"); srand(0); @@ -2292,40 +1416,6 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s // commented only for this custom version of Yolo v2 //l.forward = forward_maxpool_layer; //l.backward = backward_maxpool_layer; -#ifdef GPU - // commented only for this custom version of Yolo v2 - //l.forward_gpu = forward_maxpool_layer_gpu; - //l.backward_gpu = backward_maxpool_layer_gpu; - l.indexes_gpu = cuda_make_int_array(output_size); - l.output_gpu = cuda_make_array(l.output, output_size); - cudaError_t status; - status = cudaMalloc((void **)&(l.output_gpu_int8), sizeof(int8_t)*output_size); - //l.delta_gpu = cuda_make_array(l.delta, output_size); - - cudnnStatus_t maxpool_status; - maxpool_status = cudnnCreatePoolingDescriptor(&l.poolingDesc); - - maxpool_status = cudnnSetPooling2dDescriptor( - l.poolingDesc, - CUDNN_POOLING_MAX, - CUDNN_PROPAGATE_NAN, // CUDNN_PROPAGATE_NAN, CUDNN_NOT_PROPAGATE_NAN - l.size, - l.size, - 0, //l.pad, - 0, //l.pad, - l.stride, - l.stride); - - cudnnCreateTensorDescriptor(&l.srcTensorDesc); - cudnnCreateTensorDescriptor(&l.dstTensorDesc); - cudnnSetTensor4dDescriptor(l.srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.c, l.h, l.w); - cudnnSetTensor4dDescriptor(l.dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l.batch, l.out_c, l.out_h, l.out_w); -#endif - -#ifdef OPENCL - l.indexes_ocl = ocl_make_int_array(output_size); - l.output_ocl = ocl_make_array(l.output, output_size); -#endif fprintf(stderr, "max %d x %d / %d %4d x%4d x%4d -> %4d x%4d x%4d\n", size, size, stride, w, h, c, l.out_w, l.out_h, l.out_c); return l; } @@ -2335,38 +1425,6 @@ maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int s // convolutional_layer.c size_t get_workspace_size(layer l) { -#ifdef CUDNN - if (gpu_index >= 0) { - size_t most = 0; - size_t s = 0; - cudnnGetConvolutionForwardWorkspaceSize(cudnn_handle(), - l.srcTensorDesc, - l.weightDesc, - l.convDesc, - l.dstTensorDesc, - l.fw_algo, - &s); - /* - if (s > most) most = s; - cudnnGetConvolutionBackwardFilterWorkspaceSize(cudnn_handle(), - l.srcTensorDesc, - l.ddstTensorDesc, - l.convDesc, - l.dweightDesc, - l.bf_algo, - &s); - if (s > most) most = s; - cudnnGetConvolutionBackwardDataWorkspaceSize(cudnn_handle(), - l.weightDesc, - l.ddstTensorDesc, - l.convDesc, - l.dsrcTensorDesc, - l.bd_algo, - &s);*/ - if (s > most) most = s; - return most; - } -#endif if (l.xnor) return (size_t)l.bit_align*l.size*l.size*l.c * sizeof(float); return (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float); } @@ -2468,86 +1526,6 @@ convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int l.v = calloc(c*n*size*size, sizeof(float)); } -#ifdef GPU - // commented only for this custom version of Yolo v2 - //l.forward_gpu = forward_convolutional_layer_gpu; - //l.backward_gpu = backward_convolutional_layer_gpu; - //l.update_gpu = update_convolutional_layer_gpu; - - if (gpu_index >= 0) { - //if (adam) { - // l.m_gpu = cuda_make_array(l.m, c*n*size*size); - // l.v_gpu = cuda_make_array(l.v, c*n*size*size); - //} - - l.weights_gpu = cuda_make_array(l.weights, c*n*size*size); - //l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size); - - l.biases_gpu = cuda_make_array(l.biases, n); - //l.bias_updates_gpu = cuda_make_array(l.bias_updates, n); - - //l.delta_gpu = cuda_make_array(l.delta, l.batch*out_h*out_w*n); - l.output_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); - - cudaError_t status; - status = cudaMalloc((void **)&(l.output_gpu_int8), sizeof(int8_t)*l.batch*out_h*out_w*n); - - //if (binary) { - // l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size); - //} - if (xnor) { - l.binary_weights_gpu = cuda_make_array(l.weights, c*n*size*size); - l.binary_input_gpu = cuda_make_array(0, l.inputs*l.batch); - } - - if (batch_normalize) { - //l.mean_gpu = cuda_make_array(l.mean, n); - //l.variance_gpu = cuda_make_array(l.variance, n); - - l.rolling_mean_gpu = cuda_make_array(l.mean, n); - l.rolling_variance_gpu = cuda_make_array(l.variance, n); - - //l.mean_delta_gpu = cuda_make_array(l.mean, n); - //l.variance_delta_gpu = cuda_make_array(l.variance, n); - - l.scales_gpu = cuda_make_array(l.scales, n); - //l.scale_updates_gpu = cuda_make_array(l.scale_updates, n); - - l.x_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); - //l.x_norm_gpu = cuda_make_array(l.output, l.batch*out_h*out_w*n); - } -#ifdef CUDNN - cudnnCreateTensorDescriptor(&l.biasTensorDesc); - cudnnCreateActivationDescriptor(&l.activationDesc); - cudnnCreateTensorDescriptor(&l.srcTensorDesc); - cudnnCreateTensorDescriptor(&l.dstTensorDesc); - cudnnCreateFilterDescriptor(&l.weightDesc); - //cudnnCreateTensorDescriptor(&l.dsrcTensorDesc); - //cudnnCreateTensorDescriptor(&l.ddstTensorDesc); - //cudnnCreateFilterDescriptor(&l.dweightDesc); - cudnnCreateConvolutionDescriptor(&l.convDesc); - cudnn_convolutional_setup(&l); -#endif - } -#endif - -#ifdef OPENCL - //if (gpu_index >= 0) { - - l.weights_ocl = ocl_make_array(l.weights, c*n*size*size); - l.biases_ocl = ocl_make_array(l.biases, n); - l.output_ocl = ocl_make_array(l.output, l.batch*out_h*out_w*n); - - if (batch_normalize) { - l.rolling_mean_ocl = ocl_make_array(l.rolling_mean, n); // l.mean - l.rolling_variance_ocl = ocl_make_array(l.rolling_variance, n); // l.variance - l.scales_ocl = ocl_make_array(l.scales, n); - - l.x_ocl = ocl_make_array(l.output, l.batch*out_h*out_w*n); - } - //} -#endif - l.workspace_size = get_workspace_size(l); l.activation = activation; @@ -3117,35 +2095,11 @@ void load_convolutional_weights_cpu(layer l, FILE *fp) fread(l.rolling_variance, sizeof(float), l.n, fp); } fread(l.weights, sizeof(float), num, fp); - /* if (l.adam) { - fread(l.m, sizeof(float), num, fp); - fread(l.v, sizeof(float), num, fp); - } - if (l.flipped) { - transpose_matrix(l.weights, l.c*l.size*l.size, l.n); - }*/ - //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights); -#ifdef GPU - if (gpu_index >= 0) { - push_convolutional_layer(l); - } -#endif - -#ifdef OPENCL - //if (gpu_index >= 0) { - ocl_push_convolutional_layer(l); - //} -#endif } // parser.c void load_weights_upto_cpu(network *net, char *filename, int cutoff) { -#ifdef GPU - if (net->gpu_index >= 0) { - cuda_set_device(net->gpu_index); - } -#endif fprintf(stderr, "Loading weights from %s...", filename); fflush(stdout); FILE *fp = fopen(filename, "rb"); @@ -3704,26 +2658,7 @@ network parse_network_cfg(char *filename, int batch, int quantized) net.output = get_network_output(net); if (workspace_size) { //printf("%ld\n", workspace_size); -#ifdef GPU - if (gpu_index >= 0) { - net.workspace = cuda_make_array(0, (workspace_size - 1) / sizeof(float) + 1); - int size = net.layers[0].inputs * net.batch; //get_network_input_size(net) * net.batch; - net.input_state_gpu = cuda_make_array(0, size); - } - else { - net.workspace = calloc(1, workspace_size); - } -#else // GPU net.workspace = calloc(1, workspace_size); -#endif // GPU - -#ifdef OPENCL - //if (gpu_index >= 0) { - net.workspace_ocl = ocl_make_array(0, workspace_size / sizeof(float)); - //net.workspace_ocl = ocl_make_array(0, (workspace_size - 1) / sizeof(float) + 1); - //net.workspace_ocl = ocl_make_array(NULL, 1024*1024*1024); - //} -#endif // OPENCL } return net; } @@ -4281,18 +3216,6 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float //char *id = basecfg(path); float *X = val_resized[t].data; //network_predict(net, X); -#ifdef GPU - if (quantized) { - network_predict_gpu_cudnn_quantized(net, X); // quantized - //nms = 0.2; - } - else { - network_predict_gpu_cudnn(net, X); - } -#else // GPU -#ifdef OPENCL - network_predict_opencl(net, X); -#else // OPENCL if (quantized) { network_predict_quantized(net, X); // quantized //nms = 0.2; @@ -4300,8 +3223,6 @@ void validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, float else { network_predict_cpu(net, X); } -#endif // OPENCL -#endif // GPU int nboxes = 0; int letterbox = (args.type == LETTERBOX_DATA); @@ -4566,17 +3487,6 @@ void validate_calibrate_valid(char *datacfg, char *cfgfile, char *weightfile, in yolov2_fuse_conv_batchnorm(net); srand(time(0)); -#ifdef GPU - size_t workspace_size = 0; - for (j = 0; j < net.n; ++j) { - layer l = net.layers[j]; - size_t cur_workspace_size = (size_t)l.out_h*l.out_w*l.size*l.size*l.c * sizeof(float); - if (cur_workspace_size > workspace_size) workspace_size = cur_workspace_size; - } - cudaFree(net.workspace); - net.workspace = calloc(1, workspace_size); -#endif // GPU - list *plist = get_paths(valid_images); char **paths = (char **)list_to_array(plist); diff --git a/src/additionally.h b/src/additionally.h index daf461f..f82ca92 100644 --- a/src/additionally.h +++ b/src/additionally.h @@ -14,20 +14,6 @@ #include #include -#ifdef CUDNN -#include "cudnn.h" -#endif - -#ifdef GPU -#include "cuda_runtime.h" -#include "curand.h" -#include "cublas_v2.h" -#endif - -#ifdef OPENCL -#include "CL/cl.h" -#endif - #ifdef OPENCV #include #include "opencv2/highgui/highgui_c.h" @@ -184,46 +170,6 @@ extern "C" { // float32 to bit-1 and align weights for ALL layers void calculate_binary_weights(struct network net); - // -------------- XNOR-net GPU ------------ - -#ifdef GPU - void swap_binary(convolutional_layer *l); - - void binarize_weights_gpu(float *weights, int n, int size, float *binary); - - void binarize_gpu(float *x, int n, float *binary); - - void im2col_align_ongpu(float *im, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_col, int bit_align); - - void im2col_align_bin_ongpu(float *im, - int channels, int height, int width, - int ksize, int stride, int pad, float *data_col, int bit_align); - - void float_to_bit_gpu(float *src, unsigned char *dst, size_t size); - - void transpose_bin_gpu(unsigned char *A, unsigned char *B, const int n, const int m, - const int lda, const int ldb, const int block_size); - - void fill_int8_gpu(unsigned char *src, unsigned char val, size_t size); - - //void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K, - // unsigned char *A, int lda, - // unsigned char *B, int ldb, - // float *C, int ldc, float *mean_arr); - - void gemm_nn_custom_bin_mean_transposed_gpu(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr, float *bias); - - void gemm_nn_custom_bin_mean_transposed_sequentially_gpu(int M, int N, int K, - unsigned char *A, int lda, - unsigned char *B, int ldb, - float *C, int ldc, float *mean_arr); - -#endif // GPU // -------------- blas.h -------------- @@ -611,85 +557,6 @@ extern "C" { float *binary_input; size_t workspace_size; - -#ifdef GPU - float *z_gpu; - float *r_gpu; - float *h_gpu; - - int *indexes_gpu; - float * prev_state_gpu; - float * forgot_state_gpu; - float * forgot_delta_gpu; - float * state_gpu; - float * state_delta_gpu; - float * gate_gpu; - float * gate_delta_gpu; - float * save_gpu; - float * save_delta_gpu; - float * concat_gpu; - float * concat_delta_gpu; - - float *binary_input_gpu; - float *binary_weights_gpu; - - float * mean_gpu; - float * variance_gpu; - - float * rolling_mean_gpu; - float * rolling_variance_gpu; - - float * variance_delta_gpu; - float * mean_delta_gpu; - - float * col_image_gpu; - - float * x_gpu; - float * x_norm_gpu; - float * weights_gpu; - //float * weight_updates_gpu; - int8_t * weights_int8_gpu; - int8_t * weights_int8_int8x4_gpu; - - float * biases_gpu; - //float * bias_updates_gpu; - float * biases_quant_gpu; - - float * scales_gpu; - //float * scale_updates_gpu; - - float * output_gpu; - int8_t *output_gpu_int8; - float * delta_gpu; - float * rand_gpu; - float * squared_gpu; - float * norms_gpu; -#ifdef CUDNN - cudnnTensorDescriptor_t biasTensorDesc; - cudnnActivationDescriptor_t activationDesc; - cudnnTensorDescriptor_t srcTensorDesc, dstTensorDesc; - //cudnnTensorDescriptor_t dsrcTensorDesc, ddstTensorDesc; - cudnnFilterDescriptor_t weightDesc; - //cudnnFilterDescriptor_t dweightDesc; - cudnnConvolutionDescriptor_t convDesc; - cudnnConvolutionFwdAlgo_t fw_algo; - //cudnnConvolutionBwdDataAlgo_t bd_algo; - //cudnnConvolutionBwdFilterAlgo_t bf_algo; - cudnnPoolingDescriptor_t poolingDesc; -#endif -#endif - -#ifdef OPENCL - cl_mem weights_ocl; - cl_mem biases_ocl; - cl_mem scales_ocl; - cl_mem rolling_mean_ocl; - cl_mem rolling_variance_ocl; - - cl_mem output_ocl; - cl_mem indexes_ocl; - cl_mem x_ocl; -#endif }; typedef layer local_layer; @@ -756,17 +623,6 @@ extern "C" { int gpu_index; tree *hierarchy; int do_input_calibration; - -#ifdef GPU - float *input_state_gpu; - - float **input_gpu; - float **truth_gpu; -#endif - -#ifdef OPENCL - cl_mem workspace_ocl; -#endif } network; typedef struct network_state { @@ -778,29 +634,12 @@ extern "C" { int train; int index; network net; -#ifdef OPENCL - cl_mem input_ocl; - cl_mem workspace_ocl; -#endif } network_state; // network.c network make_network(int n); - - // network.c -#ifdef GPU -#ifdef CUDNN - void cudnn_convolutional_setup(layer *l); - void cuda_set_device(int n); -#endif -#endif - -#ifdef OPENCL - bool ocl_initialize(); -#endif - // network.c void set_batch_network(network *net, int b); @@ -957,25 +796,6 @@ extern "C" { // -------------- yolov2_forward_network_gpu.c -------------------- -#ifdef GPU - // detect on GPU: yolov2_forward_network_gpu.cu - float *network_predict_gpu_cudnn(network net, float *input); - - // detect on GPU: yolov2_forward_network_gpu.cu - quantized INT8x4 - float *network_predict_gpu_cudnn_quantized(network net, float *input); - - // // init weights and cuDNN for quantized IINT8x4 - void init_gpu_int8x4(network net); -#endif - - // -------------- yolov2_forward_network_ocl.c -------------------- - -#ifdef OPENCL - // detect using OpenCL: yolov2_forward_network_gpu.cpp - float *network_predict_opencl(network net, float *input); -#endif - - // -------------- gettimeofday for Windows-------------------- #if defined(_MSC_VER) diff --git a/src/main.c b/src/main.c index 95dbb5a..959ca22 100644 --- a/src/main.c +++ b/src/main.c @@ -110,13 +110,6 @@ void draw_detections_v3(image im, detection *dets, int num, float thresh, char * if (width < 1) width = 1; - /* - if(0){ - width = pow(prob, 1./2.)*10+1; - alphabet = 0; - } - */ - //printf("%d %s: %.0f%%\n", i, names[selected_detections[i].best_class], prob*100); int offset = selected_detections[i].best_class * 123457 % classes; float red = get_color(2, offset, classes); @@ -195,19 +188,8 @@ void test_detector_cpu(char **names, char *cfgfile, char *weightfile, char *file float *X = sized.data; time = clock(); - //network_predict(net, X); -#ifdef GPU - if (quantized) { - network_predict_gpu_cudnn_quantized(net, X); // quantized works only with Yolo v2 - //nms = 0.2; - } - else { - network_predict_gpu_cudnn(net, X); - } -#else -#ifdef OPENCL - network_predict_opencl(net, X); -#else + + //network_predict(net, X); if (quantized) { network_predict_quantized(net, X); // quantized works only with Yolo v2 nms = 0.2; @@ -215,9 +197,8 @@ void test_detector_cpu(char **names, char *cfgfile, char *weightfile, char *file else { network_predict_cpu(net, X); } -#endif -#endif - printf("%s: Predicted in %f seconds.\n", input, (float)(clock() - time) / CLOCKS_PER_SEC); //sec(clock() - time)); + + printf("%s: Predicted in %f seconds.\n", input, (float)(clock() - time) / CLOCKS_PER_SEC); //sec(clock() - time)); //get_region_boxes_cpu(l, 1, 1, thresh, probs, boxes, 0, 0); // get_region_boxes(): region_layer.c // nms (non maximum suppression) - if (IoU(box[i], box[j]) > nms) then remove one of two boxes with lower probability @@ -238,6 +219,7 @@ void test_detector_cpu(char **names, char *cfgfile, char *weightfile, char *file free_image(sized); // image.c free(boxes); free_ptrs((void **)probs, l.w*l.h*l.n); // utils.c + #ifdef OPENCV cvWaitKey(0); cvDestroyAllWindows(); @@ -246,340 +228,6 @@ void test_detector_cpu(char **names, char *cfgfile, char *weightfile, char *file } } - -// --------------- Detect on the Video --------------- - -#ifdef OPENCV -static char **demo_names; -static int demo_classes; -static int demo_quantized; - -static float **probs; -static box *boxes; -static network net; -static image in; -static image in_s; -static image det; -static image det_s; -static image disp = { 0 }; -static CvCapture * cap; -static float fps = 0; -static float demo_thresh = 0; - -IplImage* in_img; -IplImage* det_img; -IplImage* show_img; - -// draw bounded boxes of found objects on the image, from: image.c -void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output) -{ - int i, j; - if (!show_img) return; - static int frame_id = 0; - frame_id++; - - for (i = 0; i < num; ++i) { - char labelstr[4096] = { 0 }; - int class_id = -1; - for (j = 0; j < classes; ++j) { - if (dets[i].prob[j] > thresh) { - if (class_id < 0) { - strcat(labelstr, names[j]); - class_id = j; - } - else { - strcat(labelstr, ", "); - strcat(labelstr, names[j]); - } - printf("%s: %.0f%% ", names[j], dets[i].prob[j] * 100); - } - } - if (class_id >= 0) { - int width = show_img->height * .006; - - //printf("%d %s: %.0f%%\n", i, names[class_id], prob*100); - int offset = class_id * 123457 % classes; - float red = get_color(2, offset, classes); - float green = get_color(1, offset, classes); - float blue = get_color(0, offset, classes); - float rgb[3]; - - //width = prob*20+2; - - rgb[0] = red; - rgb[1] = green; - rgb[2] = blue; - box b = dets[i].bbox; - //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); - - int left = (b.x - b.w / 2.)*show_img->width; - int right = (b.x + b.w / 2.)*show_img->width; - int top = (b.y - b.h / 2.)*show_img->height; - int bot = (b.y + b.h / 2.)*show_img->height; - - if (left < 0) left = 0; - if (right > show_img->width - 1) right = show_img->width - 1; - if (top < 0) top = 0; - if (bot > show_img->height - 1) bot = show_img->height - 1; - - float const font_size = show_img->height / 1000.F; - CvPoint pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2; - pt1.x = left; - pt1.y = top; - pt2.x = right; - pt2.y = bot; - pt_text.x = left; - pt_text.y = top - 12; - pt_text_bg1.x = left; - pt_text_bg1.y = top - (10 + 25 * font_size); - pt_text_bg2.x = right; - pt_text_bg2.y = top; - CvScalar color; - color.val[0] = red * 256; - color.val[1] = green * 256; - color.val[2] = blue * 256; - - cvRectangle(show_img, pt1, pt2, color, width, 8, 0); - if (ext_output) - printf("\t(left_x: %4.0f top_y: %4.0f width: %4.0f height: %4.0f)\n", - (float)left, (float)top, b.w*show_img->width, b.h*show_img->height); - else - printf("\n"); - cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0); - cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0); // filled - CvScalar black_color; - black_color.val[0] = 0; - CvFont font; - cvInitFont(&font, CV_FONT_HERSHEY_SIMPLEX, font_size, font_size, 0, font_size * 3, 8); - cvPutText(show_img, labelstr, pt_text, &font, black_color); - } - } -} - - - -image get_image_from_stream_resize_cpu(CvCapture *cap, int w, int h, IplImage** in_img) -{ - IplImage* src = cvQueryFrame(cap); - if (!src) return make_empty_image(0, 0, 0); - IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, 3); - *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, 3); - cvResize(src, *in_img, CV_INTER_LINEAR); - cvResize(src, new_img, CV_INTER_LINEAR); - image im = ipl_to_image(new_img); - cvReleaseImage(&new_img); - rgbgr_image(im); - return im; -} - -static void *fetch_in_thread(void *ptr) -{ - in = get_image_from_stream_resize_cpu(cap, net.w, net.h, &in_img); // image.c - if (!in.data) { - error("Stream closed."); - } - in_s = make_image(in.w, in.h, in.c); // image.c - memcpy(in_s.data, in.data, in.h*in.w*in.c * sizeof(float)); - - return 0; -} - -static void *detect_in_thread(void *ptr) -{ - float nms = .4; - layer l = net.layers[net.n - 1]; - float *X = det_s.data; - - //float *prediction = network_predict(net, X); -#ifdef GPU - if (demo_quantized) { - network_predict_gpu_cudnn_quantized(net, X); // quantized works only with Yolo v2 - //nms = 0.2; - } - else { - network_predict_gpu_cudnn(net, X); - } -#else -#ifdef OPENCL - network_predict_opencl(net, X); -#else - if (demo_quantized) { - network_predict_quantized(net, X); // quantized works only with Yolo v2 - nms = 0.2; - } - else { - network_predict_cpu(net, X); - } -#endif -#endif - - free_image(det_s); - //get_region_boxes_cpu(l, 1, 1, demo_thresh, probs, boxes, 0, 0); // get_region_boxes(): region_layer.c - //if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); // box.c - float hier_thresh = 0.5; - int ext_output = 1, letterbox = 0, nboxes = 0; - detection *dets = NULL; - if (letterbox) - dets = get_network_boxes(&net, in_img->width, in_img->height, demo_thresh, demo_thresh, 0, 1, &nboxes, 1); // letter box - else - dets = get_network_boxes(&net, det_s.w, det_s.h, demo_thresh, demo_thresh, 0, 1, &nboxes, 0); // resized - //if (nms) do_nms_obj(dets, nboxes, l.classes, nms); // bad results - if (nms) do_nms_sort(dets, nboxes, l.classes, nms); - draw_detections_cv_v3(det_img, dets, nboxes, demo_thresh, demo_names, NULL, demo_classes, ext_output); - free_detections(dets, nboxes); - - printf("\033[2J"); - printf("\033[1;1H"); - printf("\nFPS:%.1f\n", fps); - printf("Objects:\n\n"); - - return 0; -} - -static double get_wall_time() -{ - struct timeval time; - if (gettimeofday(&time, NULL)) { - return 0; - } - return (double)time.tv_sec + (double)time.tv_usec * .000001; -} - - -// Detect on Video: this function uses other functions not from this file -void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, - int frame_skip, char *prefix, int quantized, char *out_filename, int dont_show) -{ - int delay = frame_skip; - demo_names = names; - demo_classes = classes; - demo_thresh = thresh; - printf("Demo\n"); - net = parse_network_cfg(cfgfile, 1, quantized); - if (weightfile) { - //load_weights(&net, weightfile); // parser.c - load_weights_upto_cpu(&net, weightfile, net.n); - } - //set_batch_network(&net, 1); - yolov2_fuse_conv_batchnorm(net); - calculate_binary_weights(net); - if (quantized) { - printf("\n\n Quantinization! \n\n"); - demo_quantized = 1; - quantinization_and_get_multipliers(net); - } - srand(2222222); - - if (filename) { - printf("video file: %s\n", filename); - cap = cvCaptureFromFile(filename); - } - else { - cap = cvCaptureFromCAM(cam_index); - } - - if (!cap) error("Couldn't connect to webcam.\n"); - - layer l = net.layers[net.n - 1]; - int j; - - boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box)); - probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *)); - for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *)); - - pthread_t fetch_thread; - pthread_t detect_thread; - - fetch_in_thread(0); - det_img = in_img; - det = in; - det_s = in_s; - - fetch_in_thread(0); - detect_in_thread(0); - disp = det; - show_img = det_img; - det_img = in_img; - det = in; - det_s = in_s; - - int count = 0; - if (!prefix && !dont_show) { - cvNamedWindow("Demo", CV_WINDOW_NORMAL); - cvMoveWindow("Demo", 0, 0); - cvResizeWindow("Demo", 1352, 1013); - } - - CvVideoWriter* output_video_writer = NULL; // cv::VideoWriter output_video; - if (out_filename) - { - CvSize size; - size.width = det_img->width, size.height = det_img->height; - int src_fps = 25; - src_fps = cvGetCaptureProperty(cap, CV_CAP_PROP_FPS); - output_video_writer = cvCreateVideoWriter(out_filename, CV_FOURCC('D', 'I', 'V', 'X'), src_fps, size, 1); - } - - double before = get_wall_time(); - - while (1) { - ++count; - if (pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed"); - if (pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed"); - - if (!prefix) { - if (!dont_show) { - //show_image(disp, "Demo"); - show_image_cv_ipl(show_img, "Demo"); - int c = cvWaitKey(1); - } - } - else { - char buff[256]; - sprintf(buff, "%s_%08d", prefix, count); - save_image_png(disp, buff); - } - - // save video file - if (output_video_writer && show_img) { - cvWriteFrame(output_video_writer, show_img); - //printf("\n cvWriteFrame \n"); - } - - cvReleaseImage(&show_img); - - pthread_join(fetch_thread, 0); - pthread_join(detect_thread, 0); - - if (delay == 0) { - free_image(disp); - disp = det; - show_img = det_img; - } - det_img = in_img; - det = in; - det_s = in_s; - - --delay; - if (delay < 0) { - delay = frame_skip; - - double after = get_wall_time(); - float curr = 1. / (after - before); - fps = curr; - before = after; - } - } -} -#else -void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, - int frame_skip, char *prefix, int quantized, char *out_filename, int dont_show) -{ - fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); -} -#endif - - // get command line parameters and load objects names void run_detector(int argc, char **argv) { @@ -592,8 +240,9 @@ void run_detector(int argc, char **argv) int quantized = find_arg(argc, argv, "-quantized"); int input_calibration = find_int_arg(argc, argv, "-input_calibration", 0); int frame_skip = find_int_arg(argc, argv, "-s", 0); - if (argc < 4) { - fprintf(stderr, "usage: %s %s [demo/test/] [cfg] [weights (optional)]\n", argv[0], argv[1]); + + if (argc < 4) { + fprintf(stderr, "usage: %s %s [test] [cfg] [weights (optional)]\n", argv[0], argv[1]); return; } @@ -620,15 +269,7 @@ void run_detector(int argc, char **argv) int classes = obj_count; if (0 == strcmp(argv[2], "test")) test_detector_cpu(names, cfg, weights, filename, thresh, quantized, dont_show); - //else if (0 == strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear); - //else if (0 == strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights); - //else if (0 == strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights); - else if (0 == strcmp(argv[2], "map")) validate_detector_map(obj_names, cfg, weights, thresh, quantized, iou_thresh); - else if (0 == strcmp(argv[2], "calibrate")) validate_calibrate_valid(obj_names, cfg, weights, input_calibration); - else if (0 == strcmp(argv[2], "demo")) { - demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, quantized, out_filename, dont_show); - } - + int i; for (i = 0; i < obj_count; ++i) free(names[i]); free(names); diff --git a/x64/cpu_Release/additionally.obj b/x64/cpu_Release/additionally.obj new file mode 100644 index 0000000..5ce2b62 Binary files /dev/null and b/x64/cpu_Release/additionally.obj differ diff --git a/x64/cpu_Release/box.obj b/x64/cpu_Release/box.obj new file mode 100644 index 0000000..4b3c793 Binary files /dev/null and b/x64/cpu_Release/box.obj differ diff --git a/x64/cpu_Release/main.obj b/x64/cpu_Release/main.obj new file mode 100644 index 0000000..2147715 Binary files /dev/null and b/x64/cpu_Release/main.obj differ diff --git a/x64/cpu_Release/vc140.pdb b/x64/cpu_Release/vc140.pdb new file mode 100644 index 0000000..838640e Binary files /dev/null and b/x64/cpu_Release/vc140.pdb differ diff --git a/x64/cpu_Release/yolo_cpu.Build.CppClean.log b/x64/cpu_Release/yolo_cpu.Build.CppClean.log new file mode 100644 index 0000000..3b764e4 --- /dev/null +++ b/x64/cpu_Release/yolo_cpu.Build.CppClean.log @@ -0,0 +1,16 @@ +f:\github\yolo2_light_cpu\x64\cpu_release\yolov2_forward_network.obj +f:\github\yolo2_light_cpu\x64\cpu_release\yolov2_forward_network_quantized.obj +f:\github\yolo2_light_cpu\x64\cpu_release\main.obj +f:\github\yolo2_light_cpu\x64\cpu_release\additionally.obj +f:\github\yolo2_light_cpu\x64\cpu_release\box.obj +f:\github\yolo2_light_cpu\x64\cpu_release\vc140.pdb +f:\github\yolo2_light_cpu\bin\yolo_cpu.exe +f:\github\yolo2_light_cpu\bin\yolo_cpu.ipdb +f:\github\yolo2_light_cpu\bin\yolo_cpu.iobj +f:\github\yolo2_light_cpu\bin\yolo_cpu.pdb +f:\github\yolo2_light_cpu\x64\cpu_release\yolo_cpu.tlog\cl.command.1.tlog +f:\github\yolo2_light_cpu\x64\cpu_release\yolo_cpu.tlog\cl.read.1.tlog +f:\github\yolo2_light_cpu\x64\cpu_release\yolo_cpu.tlog\cl.write.1.tlog +f:\github\yolo2_light_cpu\x64\cpu_release\yolo_cpu.tlog\link.command.1.tlog +f:\github\yolo2_light_cpu\x64\cpu_release\yolo_cpu.tlog\link.read.1.tlog +f:\github\yolo2_light_cpu\x64\cpu_release\yolo_cpu.tlog\link.write.1.tlog diff --git a/x64/cpu_Release/yolo_cpu.log b/x64/cpu_Release/yolo_cpu.log new file mode 100644 index 0000000..4a698a7 --- /dev/null +++ b/x64/cpu_Release/yolo_cpu.log @@ -0,0 +1,198 @@ + additionally.c + box.c + main.c + yolov2_forward_network.c + yolov2_forward_network_quantized.c +src\box.c(104): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data +src\box.c(139): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(140): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(141): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(142): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(144): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(161): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(162): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(163): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(164): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(166): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(186): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(187): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(188): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(189): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(191): warning C4305: 'initializing': truncation from 'double' to 'float' +src\box.c(226): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\box.c(227): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\box.c(228): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\box.c(229): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\box.c(355): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\box.c(356): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\box.c(365): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\box.c(366): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +f:\github\yolo2_light_cpu\src\additionally.h(61): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(62): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(61): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(60): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(61): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(62): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(61): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(71): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(62): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(60): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(62): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(72): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(60): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(71): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(60): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(74): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(71): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(72): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(71): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(75): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(72): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(74): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(72): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(76): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(74): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(75): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(74): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(77): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(75): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(76): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(75): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(78): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(76): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(77): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(76): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(81): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(77): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(78): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(77): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(82): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(78): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(81): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(78): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(83): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(81): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(82): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(81): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(88): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(82): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(83): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(82): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(89): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network.c) +f:\github\yolo2_light_cpu\src\additionally.h(83): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(88): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(83): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(88): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(89): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\additionally.c) +f:\github\yolo2_light_cpu\src\additionally.h(88): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +f:\github\yolo2_light_cpu\src\additionally.h(89): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\yolov2_forward_network_quantized.c) +f:\github\yolo2_light_cpu\src\additionally.h(89): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data (compiling source file src\main.c) +src\yolov2_forward_network.c(24): warning C4133: 'function': incompatible types - from 'float *' to 'uint32_t *' +src\yolov2_forward_network.c(24): warning C4133: 'function': incompatible types - from 'char *' to 'uint32_t *' +src\yolov2_forward_network.c(24): warning C4267: 'function': conversion from 'size_t' to 'const int', possible loss of data +src\yolov2_forward_network.c(127): warning C4267: 'function': conversion from 'size_t' to 'int', possible loss of data +src\yolov2_forward_network_quantized.c(111): warning C4244: '=': conversion from 'double' to 'int', possible loss of data +src\yolov2_forward_network_quantized.c(138): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\main.c(109): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\yolov2_forward_network_quantized.c(157): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\main.c(128): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\yolov2_forward_network_quantized.c(104): warning C4101: 'i': unreferenced local variable +src\main.c(129): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\main.c(130): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\main.c(131): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\yolov2_forward_network_quantized.c(554): warning C4133: '=': incompatible types - from 'int *' to 'int8_t *' +src\main.c(195): warning C4305: '=': truncation from 'double' to 'float' +src\yolov2_forward_network_quantized.c(558): warning C4244: 'initializing': conversion from 'float' to 'int16_t', possible loss of data +src\main.c(169): warning C4305: 'initializing': truncation from 'double' to 'float' +src\yolov2_forward_network_quantized.c(532): warning C4101: 'f': unreferenced local variable +src\yolov2_forward_network_quantized.c(757): warning C4244: '=': conversion from 'float' to 'conv_t', possible loss of data +src\yolov2_forward_network_quantized.c(764): warning C4244: '+=': conversion from 'float' to 'conv_t', possible loss of data +src\yolov2_forward_network_quantized.c(641): warning C4101: 'f': unreferenced local variable +src\yolov2_forward_network_quantized.c(1042): warning C4013: 'forward_maxpool_layer_cpu' undefined; assuming extern returning int +src\yolov2_forward_network_quantized.c(1046): warning C4013: 'forward_route_layer_cpu' undefined; assuming extern returning int +src\yolov2_forward_network_quantized.c(1050): warning C4013: 'forward_reorg_layer_cpu' undefined; assuming extern returning int +src\yolov2_forward_network_quantized.c(1054): warning C4013: 'forward_upsample_layer_cpu' undefined; assuming extern returning int +src\yolov2_forward_network_quantized.c(1058): warning C4013: 'forward_shortcut_layer_cpu' undefined; assuming extern returning int +src\yolov2_forward_network_quantized.c(1062): warning C4013: 'forward_yolo_layer_cpu' undefined; assuming extern returning int +src\yolov2_forward_network_quantized.c(1066): warning C4013: 'forward_region_layer_cpu' undefined; assuming extern returning int +src\yolov2_forward_network_quantized.c(1030): warning C4101: 'k': unreferenced local variable +src\yolov2_forward_network_quantized.c(1150): warning C4244: 'initializing': conversion from 'double' to 'int16_t', possible loss of data +src\yolov2_forward_network_quantized.c(1095): warning C4101: 'k': unreferenced local variable +src\yolov2_forward_network_quantized.c(1201): warning C4244: 'initializing': conversion from 'float' to 'int16_t', possible loss of data +src\yolov2_forward_network_quantized.c(1334): warning C4244: '+=': conversion from 'float' to 'uint64_t', possible loss of data +src\yolov2_forward_network_quantized.c(1370): warning C4244: '+=': conversion from 'double' to 'float', possible loss of data +src\yolov2_forward_network_quantized.c(1381): warning C4244: '=': conversion from 'int' to 'float', possible loss of data +src\yolov2_forward_network_quantized.c(1296): warning C4244: 'initializing': conversion from 'const float' to 'const int', possible loss of data +src\yolov2_forward_network_quantized.c(1385): warning C4244: 'initializing': conversion from 'double' to 'float', possible loss of data +src\yolov2_forward_network_quantized.c(1429): warning C4267: 'function': conversion from 'size_t' to 'int', possible loss of data +src\yolov2_forward_network_quantized.c(1444): warning C4244: 'function': conversion from 'float' to 'int', possible loss of data +src\yolov2_forward_network_quantized.c(1418): warning C4101: 'k': unreferenced local variable +src\additionally.c(77): warning C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(100): warning C4244: '+=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(113): warning C4244: '=': conversion from 'int' to 'float', possible loss of data +src\additionally.c(173): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(182): warning C4267: '=': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(187): warning C4267: '=': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(245): warning C4244: 'return': conversion from 'unsigned long' to 'uint8_t', possible loss of data +src\additionally.c(304): warning C4068: unknown pragma +src\additionally.c(307): warning C4068: unknown pragma +src\additionally.c(326): warning C4133: 'function': incompatible types - from 'uint32_t *' to 'const unsigned char *const ' +src\additionally.c(326): warning C4133: 'function': incompatible types - from 'uint32_t *' to 'unsigned char *const ' +src\additionally.c(390): warning C4133: 'function': incompatible types - from 'float *' to 'unsigned char *const ' +src\additionally.c(401): warning C4133: 'function': incompatible types - from 'float *' to 'unsigned char *const ' +src\additionally.c(415): warning C4133: 'function': incompatible types - from 'float *' to 'unsigned char *const ' +src\additionally.c(429): warning C4133: 'function': incompatible types - from 'float *' to 'unsigned char *const ' +src\additionally.c(443): warning C4133: 'function': incompatible types - from 'float *' to 'unsigned char *const ' +src\additionally.c(457): warning C4133: 'function': incompatible types - from 'float *' to 'unsigned char *const ' +src\additionally.c(523): warning C4244: 'initializing': conversion from 'DWORD64' to 'int', possible loss of data +src\additionally.c(545): warning C4101: 'h': unreferenced local variable +src\additionally.c(657): warning C4267: 'function': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(675): warning C4267: 'function': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(744): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(820): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data +src\additionally.c(849): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data +src\additionally.c(860): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data +src\additionally.c(888): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data +src\additionally.c(898): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data +src\additionally.c(1532): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(1473): warning C4244: 'initializing': conversion from 'double' to 'float', possible loss of data +src\additionally.c(1729): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(1754): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(1880): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\additionally.c(1881): warning C4244: 'initializing': conversion from 'double' to 'int', possible loss of data +src\additionally.c(2016): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2024): warning C4244: 'return': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2192): warning C4305: 'function': truncation from 'double' to 'float' +src\additionally.c(2213): warning C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(2220): warning C4244: 'initializing': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2233): warning C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(2270): warning C4305: 'function': truncation from 'double' to 'float' +src\additionally.c(2279): warning C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(2286): warning C4244: 'initializing': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2372): warning C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(2466): warning C4305: 'function': truncation from 'double' to 'float' +src\additionally.c(2467): warning C4305: 'function': truncation from 'double' to 'float' +src\additionally.c(2468): warning C4305: 'function': truncation from 'double' to 'float' +src\additionally.c(2494): warning C4305: 'function': truncation from 'double' to 'float' +src\additionally.c(2495): warning C4305: 'function': truncation from 'double' to 'float' +src\additionally.c(2496): warning C4305: 'function': truncation from 'double' to 'float' +src\additionally.c(2477): warning C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(2486): warning C4244: 'initializing': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2526): warning C4267: 'initializing': conversion from 'size_t' to 'int', possible loss of data +src\additionally.c(2536): warning C4244: 'initializing': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2882): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2883): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2902): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(2903): warning C4244: '=': conversion from 'double' to 'float', possible loss of data +src\additionally.c(3141): warning C4244: 'function': conversion from 'time_t' to 'unsigned int', possible loss of data +src\additionally.c(3160): warning C4305: 'initializing': truncation from 'double' to 'const float' +src\additionally.c(3161): warning C4305: 'initializing': truncation from 'double' to 'float' +src\additionally.c(3488): warning C4244: 'function': conversion from 'time_t' to 'unsigned int', possible loss of data +src\additionally.c(3500): warning C4305: 'initializing': truncation from 'double' to 'const float' +src\additionally.c(3501): warning C4305: 'initializing': truncation from 'double' to 'float' +src\additionally.c(3470): warning C4101: 'j': unreferenced local variable + Generating code + All 226 functions were compiled because no usable IPDB/IOBJ from previous compilation was found. + Finished generating code + yolo_cpu.vcxproj -> F:\GitHub\yolo2_light_cpu\bin\yolo_cpu.exe + yolo_cpu.vcxproj -> bin\yolo_cpu.pdb (Full PDB) diff --git a/x64/cpu_Release/yolo_cpu.tlog/CL.command.1.tlog b/x64/cpu_Release/yolo_cpu.tlog/CL.command.1.tlog new file mode 100644 index 0000000..fa0b43e Binary files /dev/null and b/x64/cpu_Release/yolo_cpu.tlog/CL.command.1.tlog differ diff --git a/x64/cpu_Release/yolo_cpu.tlog/CL.read.1.tlog b/x64/cpu_Release/yolo_cpu.tlog/CL.read.1.tlog new file mode 100644 index 0000000..f87400e Binary files /dev/null and b/x64/cpu_Release/yolo_cpu.tlog/CL.read.1.tlog differ diff --git a/x64/cpu_Release/yolo_cpu.tlog/CL.write.1.tlog b/x64/cpu_Release/yolo_cpu.tlog/CL.write.1.tlog new file mode 100644 index 0000000..81b29a4 Binary files /dev/null and b/x64/cpu_Release/yolo_cpu.tlog/CL.write.1.tlog differ diff --git a/x64/cpu_Release/yolo_cpu.tlog/link.command.1.tlog b/x64/cpu_Release/yolo_cpu.tlog/link.command.1.tlog new file mode 100644 index 0000000..b162be8 Binary files /dev/null and b/x64/cpu_Release/yolo_cpu.tlog/link.command.1.tlog differ diff --git a/x64/cpu_Release/yolo_cpu.tlog/link.read.1.tlog b/x64/cpu_Release/yolo_cpu.tlog/link.read.1.tlog new file mode 100644 index 0000000..8d36393 Binary files /dev/null and b/x64/cpu_Release/yolo_cpu.tlog/link.read.1.tlog differ diff --git a/x64/cpu_Release/yolo_cpu.tlog/link.write.1.tlog b/x64/cpu_Release/yolo_cpu.tlog/link.write.1.tlog new file mode 100644 index 0000000..1df0b3f Binary files /dev/null and b/x64/cpu_Release/yolo_cpu.tlog/link.write.1.tlog differ diff --git a/x64/cpu_Release/yolo_cpu.tlog/yolo_cpu.lastbuildstate b/x64/cpu_Release/yolo_cpu.tlog/yolo_cpu.lastbuildstate new file mode 100644 index 0000000..9aaea05 --- /dev/null +++ b/x64/cpu_Release/yolo_cpu.tlog/yolo_cpu.lastbuildstate @@ -0,0 +1,2 @@ +#TargetFrameworkVersion=v4.0:PlatformToolSet=v140:EnableManagedIncrementalBuild=false:VCToolArchitecture=Native32Bit:WindowsTargetPlatformVersion=8.1 +Release|x64|F:\GitHub\yolo2_light_cpu\| diff --git a/x64/cpu_Release/yolov2_forward_network.obj b/x64/cpu_Release/yolov2_forward_network.obj new file mode 100644 index 0000000..4fdbe88 Binary files /dev/null and b/x64/cpu_Release/yolov2_forward_network.obj differ diff --git a/x64/cpu_Release/yolov2_forward_network_quantized.obj b/x64/cpu_Release/yolov2_forward_network_quantized.obj new file mode 100644 index 0000000..b0fcb4a Binary files /dev/null and b/x64/cpu_Release/yolov2_forward_network_quantized.obj differ diff --git a/yolo_cpu.vcxproj b/yolo_cpu.vcxproj index 70fd9f1..4801e1c 100644 --- a/yolo_cpu.vcxproj +++ b/yolo_cpu.vcxproj @@ -128,7 +128,7 @@ true true C:\opencv_3.0\opencv\build\include;3rdparty\include;%(AdditionalIncludeDirectories) - AVX;OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + OPENCV;_TIMESPEC_DEFINED;_CRT_SECURE_NO_WARNINGS;WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) true true