diff --git a/include/caffe/filler.hpp b/include/caffe/filler.hpp index 988cfdf..dad9ad4 100644 --- a/include/caffe/filler.hpp +++ b/include/caffe/filler.hpp @@ -45,7 +45,6 @@ class ConstantFiller : public Filler { } }; -#ifdef NO_CAFFE_MOBILE /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$. template class UniformFiller : public Filler { @@ -124,7 +123,6 @@ class PositiveUnitballFiller : public Filler { << "Sparsity not supported by this Filler."; } }; -#endif /** * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is @@ -167,7 +165,6 @@ class XavierFiller : public Filler { } }; -#ifdef NO_CAFFE_MOBILE /** * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming @@ -263,7 +260,6 @@ class BilinearFiller : public Filler { << "Sparsity not supported by this Filler."; } }; -#endif /** * @brief Get a specific filler from the specification given in FillerParameter. @@ -276,22 +272,18 @@ Filler* GetFiller(const FillerParameter& param) { const std::string& type = param.type(); if (type == "constant") { return new ConstantFiller(param); -#ifdef NO_CAFFE_MOBILE } else if (type == "gaussian") { return new GaussianFiller(param); } else if (type == "positive_unitball") { return new PositiveUnitballFiller(param); } else if (type == "uniform") { return new UniformFiller(param); -#endif } else if (type == "xavier") { return new XavierFiller(param); -#ifdef NO_CAFFE_MOBILE } else if (type == "msra") { return new MSRAFiller(param); } else if (type == "bilinear") { return new BilinearFiller(param); -#endif } else { CHECK(false) << "Unknown filler name: " << param.type(); } diff --git a/include/caffe/fmath.hpp b/include/caffe/fmath.hpp deleted file mode 100644 index 5f0274e..0000000 --- a/include/caffe/fmath.hpp +++ /dev/null @@ -1,837 +0,0 @@ -#pragma once -/** - @brief fast math library for float - @author herumi - @url http://homepage1.nifty.com/herumi/ - @note modified new BSD license - http://opensource.org/licenses/BSD-3-Clause - - cl /Ox /Ob2 /arch:SSE2 /fp:fast bench.cpp -I../xbyak /EHsc /DNOMINMAX - g++ -O3 -fomit-frame-pointer -fno-operator-names -march=core2 -mssse3 -mfpmath=sse -ffast-math -fexcess-precision=fast -*/ -/* - function prototype list - - float fmath::exp(float); - double fmath::expd(double); - float fmath::log(float); - - __m128 fmath::exp_ps(__m128); - __m256 fmath::exp_ps256(__m256); - __m128 fmath::log_ps(__m128); - - double fmath::expd_v(double *, size_t n); - - if FMATH_USE_XBYAK is defined then Xbyak version are used -*/ -//#define FMATH_USE_XBYAK - -#include -#include -#include -#include -#include -#include -#include // for memcpy -#if defined(_WIN32) && !defined(__GNUC__) - #include - #ifndef MIE_ALIGN - #define MIE_ALIGN(x) __declspec(align(x)) - #endif -#else - #ifndef __GNUC_PREREQ - #define __GNUC_PREREQ(major, minor) ((((__GNUC__) << 16) + (__GNUC_MINOR__)) >= (((major) << 16) + (minor))) - #endif - #if __GNUC_PREREQ(4, 4) || !defined(__GNUC__) - /* GCC >= 4.4 and non-GCC compilers */ - #include - #elif __GNUC_PREREQ(4, 1) - /* GCC 4.1, 4.2, and 4.3 do not have x86intrin.h, directly include SSE2 header */ - #include - #endif - #ifndef MIE_ALIGN - #define MIE_ALIGN(x) __attribute__((aligned(x))) - #endif -#endif -#ifndef MIE_PACK - #define MIE_PACK(x, y, z, w) ((x) * 64 + (y) * 16 + (z) * 4 + (w)) -#endif -#ifdef FMATH_USE_XBYAK - #define XBYAK_NO_OP_NAMES - #include "xbyak/xbyak.h" - #include "xbyak/xbyak_util.h" -#endif - -namespace fmath { - -namespace local { - -const size_t EXP_TABLE_SIZE = 10; -const size_t EXPD_TABLE_SIZE = 11; -const size_t LOG_TABLE_SIZE = 12; - -typedef unsigned long long uint64_t; - -union fi { - float f; - unsigned int i; -}; - -union di { - double d; - uint64_t i; -}; - -inline unsigned int mask(int x) -{ - return (1U << x) - 1; -} - -inline uint64_t mask64(int x) -{ - return (1ULL << x) - 1; -} - -template -inline const T* cast_to(const void *p) -{ - return reinterpret_cast(p); -} - -template -size_t NumOfArray(const T (&)[N]) { return N; } - -/* - exp(88.722839f) = inf ; 0x42b17218 - exp(-87.33655f) = 1.175491e-038f(007fffe6) denormal ; 0xc2aeac50 - exp(-103.972081f) = 0 ; 0xc2cff1b5 -*/ -template -struct ExpVar { - enum { - s = N, - n = 1 << s, - f88 = 0x42b00000 /* 88.0 */ - }; - float minX[8]; - float maxX[8]; - float a[8]; - float b[8]; - float f1[8]; - unsigned int i127s[8]; - unsigned int mask_s[8]; - unsigned int i7fffffff[8]; - unsigned int tbl[n]; - ExpVar() - { - float log_2 = ::logf(2.0f); - for (int i = 0; i < 8; i++) { - maxX[i] = 88; - minX[i] = -88; - a[i] = n / log_2; - b[i] = log_2 / n; - f1[i] = 1.0f; - i127s[i] = 127 << s; - i7fffffff[i] = 0x7fffffff; - mask_s[i] = mask(s); - } - - for (int i = 0; i < n; i++) { - float y = pow(2.0f, (float)i / n); - fi fi; - fi.f = y; - tbl[i] = fi.i & mask(23); - } - } -}; - -template -struct ExpdVar { - enum { - sbit = sbit_, - s = 1UL << sbit, - adj = (1UL << (sbit + 10)) - (1UL << sbit) - }; - // A = 1, B = 1, C = 1/2, D = 1/6 - double C1[2]; // A - double C2[2]; // D - double C3[2]; // C/D - uint64_t tbl[s]; - double a; - double ra; - ExpdVar() - : a(s / ::log(2.0)) - , ra(1 / a) - { - for (int i = 0; i < 2; i++) { -#if 0 - C1[i] = 1.0; - C2[i] = 0.16667794882310216; - C3[i] = 2.9997969303278795; -#else - C1[i] = 1.0; - C2[i] = 0.16666666685227835064; - C3[i] = 3.0000000027955394; -#endif - } - for (int i = 0; i < s; i++) { - di di; - di.d = ::pow(2.0, i * (1.0 / s)); - tbl[i] = di.i & mask64(52); - } - } -}; - -template -struct LogVar { - enum { - LEN = N - 1 - }; - unsigned int m1[4]; // 0 - unsigned int m2[4]; // 16 - unsigned int m3[4]; // 32 - float m4[4]; // 48 - unsigned int m5[4]; // 64 - struct { - float app; - float rev; - } tbl[1 << LEN]; - float c_log2; - LogVar() - : c_log2(::logf(2.0f) / (1 << 23)) - { - const double e = 1 / double(1 << 24); - const double h = 1 / double(1 << LEN); - const size_t n = 1U << LEN; - for (size_t i = 0; i < n; i++) { - double x = 1 + double(i) / n; - double a = ::log(x); - tbl[i].app = (float)a; - if (i < n - 1) { - double b = ::log(x + h - e); - tbl[i].rev = (float)((b - a) / ((h - e) * (1 << 23))); - } else { - tbl[i].rev = (float)(1 / (x * (1 << 23))); - } - } - for (int i = 0; i < 4; i++) { - m1[i] = mask(8) << 23; - m2[i] = mask(LEN) << (23 - LEN); - m3[i] = mask(23 - LEN); - m4[i] = c_log2; - m5[i] = 127U << 23; - } - } -}; - -#ifdef FMATH_USE_XBYAK -struct ExpCode : public Xbyak::CodeGenerator { - float (*exp_)(float); - __m128 (*exp_ps_)(__m128); - template - ExpCode(const ExpVar *self) - { - Xbyak::util::Cpu cpu; - try { - makeExp(self, cpu); - exp_ = (float(*)(float))getCode(); - align(16); - exp_ps_ = (__m128(*)(__m128))getCurr(); - makeExpPs(self, cpu); - return; - } catch (std::exception& e) { - fprintf(stderr, "ExpCode ERR:%s\n", e.what()); - } catch (...) { - fprintf(stderr, "ExpCode ERR:unknown error\n"); - } - ::exit(1); - } - template - void makeExp(const ExpVar *self, const Xbyak::util::Cpu& /*cpu*/) - { - typedef ExpVar Self; - using namespace local; - using namespace Xbyak; - - inLocalLabel(); -#ifdef XBYAK64 - const Reg64& base = rcx; - const Reg64& a = rax; -#else - const Reg32& base = ecx; - const Reg32& a = eax; -#endif - - mov(base, (size_t)self); - -#ifdef XBYAK32 - movss(xm0, ptr [esp + 4]); -#endif - L(".retry"); - movaps(xm1, xm0); - movd(edx, xm0); - mulss(xm1, ptr [base + offsetof(Self, a)]); // t - and_(edx, 0x7fffffff); - cvtss2si(eax, xm1); - cmp(edx, ExpVar::f88); - jg(".overflow"); - lea(edx, ptr [eax + (127 << self->s)]); - cvtsi2ss(xm1, eax); - and_(eax, mask(self->s)); // v - mov(eax, ptr [base + a * 4 + offsetof(Self, tbl)]); // expVar.tbl[v] - shr(edx, self->s); - mulss(xm1, ptr [base + offsetof(Self, b)]); - shl(edx, 23); // u - subss(xm0, xm1); // t - or_(eax, edx); // fi.f - addss(xm0, ptr [base + offsetof(Self, f1)]); - movd(xm1, eax); - mulss(xm0, xm1); -#ifdef XBYAK32 - movss(ptr[esp + 4], xm0); - fld(dword[esp + 4]); -#endif - ret(); - L(".overflow"); - minss(xm0, ptr [base + offsetof(Self, maxX)]); - maxss(xm0, ptr [base + offsetof(Self, minX)]); - jmp(".retry"); - outLocalLabel(); - } - template - void makeExpPs(const ExpVar *self, const Xbyak::util::Cpu& cpu) - { - typedef ExpVar Self; - using namespace local; - using namespace Xbyak; - - inLocalLabel(); -#ifdef XBYAK64 - const Reg64& base = rcx; - const Reg64& a = rax; - const Reg64& d = rdx; -#else - const Reg32& base = ecx; - const Reg32& a = eax; - const Reg32& d = edx; -#endif - -/* - if abs(x) >= maxX then x = max(min(x, maxX), -maxX) and try - minps, maxps are very slow then avoid them -*/ - const bool useSSE41 = cpu.has(Xbyak::util::Cpu::tSSE41); -#if defined(XBYAK64_WIN) && !defined(__INTEL_COMPILER) - movaps(xm0, ptr [rcx]); -#endif - mov(base, (size_t)self); - L(".retry"); - movaps(xm5, xm0); - andps(xm5, ptr [base + offsetof(Self, i7fffffff)]); - movaps(xm3, ptr [base + offsetof(Self, a)]); - movaps(xm4, ptr [base + offsetof(Self, b)]); - pcmpgtd(xm5, ptr [base + offsetof(Self, maxX)]); - mulps(xm3, xm0); - movaps(xm1, ptr [base + offsetof(Self, i127s)]); - pmovmskb(eax, xm5); - movaps(xm5, ptr [base + offsetof(Self, mask_s)]); - cvtps2dq(xm2, xm3); - pand(xm5, xm2); - cvtdq2ps(xm3, xm2); - test(eax, eax); - jnz(".overflow"); - paddd(xm1, xm2); - movd(eax, xm5); - mulps(xm4, xm3); - pextrw(edx, xm5, 2); - subps(xm0, xm4); - movd(xm4, ptr [base + a * 4 + offsetof(Self, tbl)]); - addps(xm0, ptr [base + offsetof(Self, f1)]); - pextrw(eax, xm5, 4); - if (useSSE41) { - pinsrd(xm4, ptr [base + d * 4 + offsetof(Self, tbl)], 1); - } else { - movd(xm3, ptr [base + d * 4 + offsetof(Self, tbl)]); - movlhps(xm4, xm3); - } - pextrw(edx, xm5, 6); - psrld(xm1, self->s); - pslld(xm1, 23); - if (useSSE41) { - pinsrd(xm4, ptr [base + a * 4 + offsetof(Self, tbl)], 2); - pinsrd(xm4, ptr [base + d * 4 + offsetof(Self, tbl)], 3); - } else { - movd(xm2, ptr [base + a * 4 + offsetof(Self, tbl)]); - movd(xm3, ptr [base + d * 4 + offsetof(Self, tbl)]); - movlhps(xm2, xm3); - shufps(xm4, xm2, MIE_PACK(2, 0, 2, 0)); - } - por(xm1, xm4); - mulps(xm0, xm1); - ret(); - L(".overflow"); - minps(xm0, ptr [base + offsetof(Self, maxX)]); - maxps(xm0, ptr [base + offsetof(Self, minX)]); - jmp(".retry"); - outLocalLabel(); - } -}; -#endif - -/* to define static variables in fmath.hpp */ -template -struct C { - static const ExpVar expVar; - static const LogVar logVar; - static const ExpdVar expdVar; -#ifdef FMATH_USE_XBYAK - static const ExpCode& getInstance() { - static const ExpCode expCode(&expVar); - return expCode; - } -#endif -}; - -template -MIE_ALIGN(32) const ExpVar C::expVar; - -template -MIE_ALIGN(32) const LogVar C::logVar; - -template -MIE_ALIGN(32) const ExpdVar C::expdVar; - -} // fmath::local - -#ifdef FMATH_USE_XBYAK -inline float expC(float x) -#else -inline float exp(float x) -#endif -{ - using namespace local; - const ExpVar<>& expVar = C<>::expVar; - -#if 1 - __m128 x1 = _mm_set_ss(x); - - int limit = _mm_cvtss_si32(x1) & 0x7fffffff; - if (limit > ExpVar<>::f88) { - x1 = _mm_min_ss(x1, _mm_load_ss(expVar.maxX)); - x1 = _mm_max_ss(x1, _mm_load_ss(expVar.minX)); - } - - int r = _mm_cvtss_si32(_mm_mul_ss(x1, _mm_load_ss(expVar.a))); - unsigned int v = r & mask(expVar.s); - float t = _mm_cvtss_f32(x1) - r * expVar.b[0]; - int u = r >> expVar.s; - fi fi; - fi.i = ((u + 127) << 23) | expVar.tbl[v]; - return (1 + t) * fi.f; -#else - x = std::min(x, expVar.maxX[0]); - x = std::max(x, expVar.minX[0]); - float t = x * expVar.a[0]; - const float magic = (1 << 23) + (1 << 22); // to round - t += magic; - fi fi; - fi.f = t; - t = x - (t - magic) * expVar.b[0]; - int u = ((fi.i + (127 << expVar.s)) >> expVar.s) << 23; - unsigned int v = fi.i & mask(expVar.s); - fi.i = u | expVar.tbl[v]; - return (1 + t) * fi.f; -// return (1 + t) * pow(2, (float)u) * pow(2, (float)v / n); -#endif -} - -inline double expd(double x) -{ - if (x <= -708.39641853226408) return 0; - if (x >= 709.78271289338397) return std::numeric_limits::infinity(); - using namespace local; - const ExpdVar<>& c = C<>::expdVar; -#if 1 - const double _b = double(uint64_t(3) << 51); - __m128d b = _mm_load_sd(&_b); - __m128d xx = _mm_load_sd(&x); - __m128d d = _mm_add_sd(_mm_mul_sd(xx, _mm_load_sd(&c.a)), b); - uint64_t di = _mm_cvtsi128_si32(_mm_castpd_si128(d)); - uint64_t iax = c.tbl[di & mask(c.sbit)]; - __m128d _t = _mm_sub_sd(_mm_mul_sd(_mm_sub_sd(d, b), _mm_load_sd(&c.ra)), xx); - uint64_t u = ((di + c.adj) >> c.sbit) << 52; - double t; - _mm_store_sd(&t, _t); - double y = (c.C3[0] - t) * (t * t) * c.C2[0] - t + c.C1[0]; - double did; - u |= iax; - memcpy(&did, &u, sizeof(did)); - return y * did; -#else -/* - remark : -ffast-math option of gcc may generate bad code for fmath::expd -*/ - const uint64_t b = 3ULL << 51; - di di; - di.d = x * c.a + b; - uint64_t iax = c.tbl[di.i & mask(c.sbit)]; - - double t = (di.d - b) * c.ra - x; - uint64_t u = ((di.i + c.adj) >> c.sbit) << 52; - double y = (c.C3[0] - t) * (t * t) * c.C2[0] - t + c.C1[0]; - - di.i = u | iax; - return y * di.d; -#endif -} - -// not fast -#if 0 -inline __m128d exp_pd(__m128d x) -{ - using namespace local; - const ExpdVar<>& c = C<>::expdVar; - const double b = double(3ULL << 51); - const __m128d mC1 = *cast_to<__m128d>(c.C1); - const __m128d mC2 = *cast_to<__m128d>(c.C2); - const __m128d mC3 = *cast_to<__m128d>(c.C3); - const __m128d ma = _mm_set1_pd(c.a); - const __m128d mra = _mm_set1_pd(c.ra); - const __m128i madj = _mm_set1_epi32(c.adj); - MIE_ALIGN(16) const double expMax[2] = { 709.78271289338397, 709.78271289338397 }; - MIE_ALIGN(16) const double expMin[2] = { -708.39641853226408, -708.39641853226408 }; - x = _mm_min_pd(x, *(const __m128d*)expMax); - x = _mm_max_pd(x, *(const __m128d*)expMin); - - __m128d d = _mm_mul_pd(x, ma); - d = _mm_add_pd(d, _mm_set1_pd(b)); - int adr0 = _mm_cvtsi128_si32(_mm_castpd_si128(d)) & mask(c.sbit); - int adr1 = _mm_cvtsi128_si32(_mm_srli_si128(_mm_castpd_si128(d), 8)) & mask(c.sbit); -__m128i iaxL = _mm_castpd_si128(_mm_load_sd((const double*)&c.tbl[adr0])); - __m128i iax = _mm_castpd_si128(_mm_load_sd((const double*)&c.tbl[adr1])); - iax = _mm_unpacklo_epi64(iaxL, iax); - - __m128d t = _mm_sub_pd(_mm_mul_pd(_mm_sub_pd(d, _mm_set1_pd(b)), mra), x); - __m128i u = _mm_castpd_si128(d); - u = _mm_add_epi64(u, madj); - u = _mm_srli_epi64(u, c.sbit); - u = _mm_slli_epi64(u, 52); - u = _mm_or_si128(u, iax); - __m128d y = _mm_mul_pd(_mm_sub_pd(mC3, t), _mm_mul_pd(t, t)); - y = _mm_mul_pd(y, mC2); - y = _mm_add_pd(_mm_sub_pd(y, t), mC1); - y = _mm_mul_pd(y, _mm_castsi128_pd(u)); - return y; -} -#endif - -inline void expd_v(double *px, size_t n) -{ - using namespace local; - const ExpdVar<>& c = C<>::expdVar; - const double b = double(3ULL << 51); -#ifdef __AVX2__ - assert((n % 4) == 0); - const __m256d mC1 = _mm256_set1_pd(c.C1[0]); - const __m256d mC2 = _mm256_set1_pd(c.C2[0]); - const __m256d mC3 = _mm256_set1_pd(c.C3[0]); - const __m256d ma = _mm256_set1_pd(c.a); - const __m256d mra = _mm256_set1_pd(c.ra); - const __m256i madj = _mm256_set1_epi64x(c.adj); - const __m256i maskSbit = _mm256_set1_epi64x(mask(c.sbit)); - const __m256d expMax = _mm256_set1_pd(709.78272569338397); - const __m256d expMin = _mm256_set1_pd(-708.39641853226408); - for (size_t i = 0; i < n; i += 4) { - __m256d x = _mm256_load_pd(px); - x = _mm256_min_pd(x, expMax); - x = _mm256_max_pd(x, expMin); - - __m256d d = _mm256_mul_pd(x, ma); - d = _mm256_add_pd(d, _mm256_set1_pd(b)); - __m256i adr = _mm256_and_si256(_mm256_castpd_si256(d), maskSbit); - __m256i iax = _mm256_i64gather_epi64((const long long*)c.tbl, adr, 8); - __m256d t = _mm256_sub_pd(_mm256_mul_pd(_mm256_sub_pd(d, _mm256_set1_pd(b)), mra), x); - __m256i u = _mm256_castpd_si256(d); - u = _mm256_add_epi64(u, madj); - u = _mm256_srli_epi64(u, c.sbit); - u = _mm256_slli_epi64(u, 52); - u = _mm256_or_si256(u, iax); - __m256d y = _mm256_mul_pd(_mm256_sub_pd(mC3, t), _mm256_mul_pd(t, t)); - y = _mm256_mul_pd(y, mC2); - y = _mm256_add_pd(_mm256_sub_pd(y, t), mC1); - _mm256_store_pd(px, _mm256_mul_pd(y, _mm256_castsi256_pd(u))); - px += 4; - } -#else - assert((n % 2) == 0); - const __m128d mC1 = _mm_set1_pd(c.C1[0]); - const __m128d mC2 = _mm_set1_pd(c.C2[0]); - const __m128d mC3 = _mm_set1_pd(c.C3[0]); - const __m128d ma = _mm_set1_pd(c.a); - const __m128d mra = _mm_set1_pd(c.ra); -#if defined(__x86_64__) || defined(_WIN64) - const __m128i madj = _mm_set1_epi64x(c.adj); -#else - const __m128i madj = _mm_set_epi32(0, c.adj, 0, c.adj); -#endif - const __m128d expMax = _mm_set1_pd(709.78272569338397); - const __m128d expMin = _mm_set1_pd(-708.39641853226408); - for (size_t i = 0; i < n; i += 2) { - __m128d x = _mm_load_pd(px); - x = _mm_min_pd(x, expMax); - x = _mm_max_pd(x, expMin); - - __m128d d = _mm_mul_pd(x, ma); - d = _mm_add_pd(d, _mm_set1_pd(b)); - int adr0 = _mm_cvtsi128_si32(_mm_castpd_si128(d)) & mask(c.sbit); - int adr1 = _mm_cvtsi128_si32(_mm_srli_si128(_mm_castpd_si128(d), 8)) & mask(c.sbit); - - __m128i iaxL = _mm_castpd_si128(_mm_load_sd((const double*)&c.tbl[adr0])); - __m128i iax = _mm_castpd_si128(_mm_load_sd((const double*)&c.tbl[adr1])); - iax = _mm_unpacklo_epi64(iaxL, iax); - - __m128d t = _mm_sub_pd(_mm_mul_pd(_mm_sub_pd(d, _mm_set1_pd(b)), mra), x); - __m128i u = _mm_castpd_si128(d); - u = _mm_add_epi64(u, madj); - u = _mm_srli_epi64(u, c.sbit); - u = _mm_slli_epi64(u, 52); - u = _mm_or_si128(u, iax); - __m128d y = _mm_mul_pd(_mm_sub_pd(mC3, t), _mm_mul_pd(t, t)); - y = _mm_mul_pd(y, mC2); - y = _mm_add_pd(_mm_sub_pd(y, t), mC1); - _mm_store_pd(px, _mm_mul_pd(y, _mm_castsi128_pd(u))); - px += 2; - } -#endif -} - -#ifdef FMATH_USE_XBYAK -inline __m128 exp_psC(__m128 x) -#else -inline __m128 exp_ps(__m128 x) -#endif -{ - using namespace local; - const ExpVar<>& expVar = C<>::expVar; - - __m128i limit = _mm_castps_si128(_mm_and_ps(x, *cast_to<__m128>(expVar.i7fffffff))); - int over = _mm_movemask_epi8(_mm_cmpgt_epi32(limit, *cast_to<__m128i>(expVar.maxX))); - if (over) { - x = _mm_min_ps(x, _mm_load_ps(expVar.maxX)); - x = _mm_max_ps(x, _mm_load_ps(expVar.minX)); - } - - __m128i r = _mm_cvtps_epi32(_mm_mul_ps(x, *cast_to<__m128>(expVar.a))); - __m128 t = _mm_sub_ps(x, _mm_mul_ps(_mm_cvtepi32_ps(r), *cast_to<__m128>(expVar.b))); - t = _mm_add_ps(t, *cast_to<__m128>(expVar.f1)); - - __m128i v4 = _mm_and_si128(r, *cast_to<__m128i>(expVar.mask_s)); - __m128i u4 = _mm_add_epi32(r, *cast_to<__m128i>(expVar.i127s)); - u4 = _mm_srli_epi32(u4, expVar.s); - u4 = _mm_slli_epi32(u4, 23); - -#ifdef __AVX2__ // fast? - __m128i ti = _mm_i32gather_epi32((const int*)expVar.tbl, v4, 4); - __m128 t0 = _mm_castsi128_ps(ti); -#else - unsigned int v0, v1, v2, v3; - v0 = _mm_cvtsi128_si32(v4); - v1 = _mm_extract_epi16(v4, 2); - v2 = _mm_extract_epi16(v4, 4); - v3 = _mm_extract_epi16(v4, 6); -#if 1 - __m128 t0, t1, t2, t3; - - t0 = _mm_castsi128_ps(_mm_set1_epi32(expVar.tbl[v0])); - t1 = _mm_castsi128_ps(_mm_set1_epi32(expVar.tbl[v1])); - t2 = _mm_castsi128_ps(_mm_set1_epi32(expVar.tbl[v2])); - t3 = _mm_castsi128_ps(_mm_set1_epi32(expVar.tbl[v3])); - - t1 = _mm_movelh_ps(t1, t3); - t1 = _mm_castsi128_ps(_mm_slli_epi64(_mm_castps_si128(t1), 32)); - t0 = _mm_movelh_ps(t0, t2); - t0 = _mm_or_ps(t0, t1); -#else - __m128i ti = _mm_castps_si128(_mm_load_ss((const float*)&expVar.tbl[v0])); - ti = _mm_insert_epi32(ti, expVar.tbl[v1], 1); - ti = _mm_insert_epi32(ti, expVar.tbl[v2], 2); - ti = _mm_insert_epi32(ti, expVar.tbl[v3], 3); - __m128 t0 = _mm_castsi128_ps(ti); -#endif -#endif - t0 = _mm_or_ps(t0, _mm_castsi128_ps(u4)); - - t = _mm_mul_ps(t, t0); - - return t; -} -#ifdef __AVX2__ -inline __m256 exp_ps256(__m256 x) -{ - using namespace local; - const ExpVar<>& expVar = C<>::expVar; - - __m256i limit = _mm256_castps_si256(_mm256_and_ps(x, *(const __m256*)expVar.i7fffffff)); - int over = _mm256_movemask_epi8(_mm256_cmpgt_epi32(limit, *(const __m256i*)expVar.maxX)); - if (over) { - x = _mm256_min_ps(x, _mm256_load_ps(expVar.maxX)); - x = _mm256_max_ps(x, _mm256_load_ps(expVar.minX)); - } - __m256i r = _mm256_cvtps_epi32(_mm256_mul_ps(x, *(const __m256*)expVar.a)); - __m256 t = _mm256_sub_ps(x, _mm256_mul_ps(_mm256_cvtepi32_ps(r), *(const __m256*)expVar.b)); - t = _mm256_add_ps(t, *(const __m256*)expVar.f1); - __m256i v8 = _mm256_and_si256(r, *(const __m256i*)expVar.mask_s); - __m256i u8 = _mm256_add_epi32(r, *(const __m256i*)expVar.i127s); - u8 = _mm256_srli_epi32(u8, expVar.s); - u8 = _mm256_slli_epi32(u8, 23); -#if 1 - __m256i ti = _mm256_i32gather_epi32((const int*)expVar.tbl, v8, 4); -#else - unsigned int v0, v1, v2, v3, v4, v5, v6, v7; - v0 = _mm256_extract_epi16(v8, 0); - v1 = _mm256_extract_epi16(v8, 2); - v2 = _mm256_extract_epi16(v8, 4); - v3 = _mm256_extract_epi16(v8, 6); - v4 = _mm256_extract_epi16(v8, 8); - v5 = _mm256_extract_epi16(v8, 10); - v6 = _mm256_extract_epi16(v8, 12); - v7 = _mm256_extract_epi16(v8, 14); - __m256i ti = _mm256_setzero_si256(); - ti = _mm256_insert_epi32(ti, expVar.tbl[v0], 0); - ti = _mm256_insert_epi32(ti, expVar.tbl[v1], 1); - ti = _mm256_insert_epi32(ti, expVar.tbl[v2], 2); - ti = _mm256_insert_epi32(ti, expVar.tbl[v3], 3); - ti = _mm256_insert_epi32(ti, expVar.tbl[v4], 4); - ti = _mm256_insert_epi32(ti, expVar.tbl[v5], 5); - ti = _mm256_insert_epi32(ti, expVar.tbl[v6], 6); - ti = _mm256_insert_epi32(ti, expVar.tbl[v7], 7); -#endif - __m256 t0 = _mm256_castsi256_ps(ti); - t0 = _mm256_or_ps(t0, _mm256_castsi256_ps(u8)); - t = _mm256_mul_ps(t, t0); - return t; -} -#endif - -inline float log(float x) -{ - using namespace local; - const LogVar<>& logVar = C<>::logVar; - const size_t logLen = logVar.LEN; - - fi fi; - fi.f = x; - int a = fi.i & (mask(8) << 23); - unsigned int b1 = fi.i & (mask(logLen) << (23 - logLen)); - unsigned int b2 = fi.i & mask(23 - logLen); - int idx = b1 >> (23 - logLen); - float f = float(a - (127 << 23)) * logVar.c_log2 + logVar.tbl[idx].app + float(b2) * logVar.tbl[idx].rev; - return f; -} - -inline __m128 log_ps(__m128 x) -{ - using namespace local; - const LogVar<>& logVar = C<>::logVar; - - __m128i xi = _mm_castps_si128(x); - __m128i idx = _mm_srli_epi32(_mm_and_si128(xi, *cast_to<__m128i>(logVar.m2)), (23 - logVar.LEN)); - __m128 a = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_and_si128(xi, *cast_to<__m128i>(logVar.m1)), *cast_to<__m128i>(logVar.m5))); - __m128 b2 = _mm_cvtepi32_ps(_mm_and_si128(xi, *cast_to<__m128i>(logVar.m3))); - - a = _mm_mul_ps(a, *cast_to<__m128>(logVar.m4)); // c_log2 - - unsigned int i0 = _mm_cvtsi128_si32(idx); - -#if 1 - unsigned int i1 = _mm_extract_epi16(idx, 2); - unsigned int i2 = _mm_extract_epi16(idx, 4); - unsigned int i3 = _mm_extract_epi16(idx, 6); -#else - idx = _mm_srli_si128(idx, 4); - unsigned int i1 = _mm_cvtsi128_si32(idx); - - idx = _mm_srli_si128(idx, 4); - unsigned int i2 = _mm_cvtsi128_si32(idx); - - idx = _mm_srli_si128(idx, 4); - unsigned int i3 = _mm_cvtsi128_si32(idx); -#endif - - __m128 app, rev; - __m128i L = _mm_loadl_epi64(cast_to<__m128i>(&logVar.tbl[i0].app)); - __m128i H = _mm_loadl_epi64(cast_to<__m128i>(&logVar.tbl[i1].app)); - __m128 t = _mm_castsi128_ps(_mm_unpacklo_epi64(L, H)); - L = _mm_loadl_epi64(cast_to<__m128i>(&logVar.tbl[i2].app)); - H = _mm_loadl_epi64(cast_to<__m128i>(&logVar.tbl[i3].app)); - rev = _mm_castsi128_ps(_mm_unpacklo_epi64(L, H)); - app = _mm_shuffle_ps(t, rev, MIE_PACK(2, 0, 2, 0)); - rev = _mm_shuffle_ps(t, rev, MIE_PACK(3, 1, 3, 1)); - - a = _mm_add_ps(a, app); - rev = _mm_mul_ps(b2, rev); - return _mm_add_ps(a, rev); -} - -#ifndef __CYGWIN__ -// cygwin defines log2() in global namespace! -// log2(x) = log(x) / log(2) -inline float log2(float x) { return fmath::log(x) * 1.442695f; } -#endif - -/* - for given y > 0 - get f_y(x) := pow(x, y) for x >= 0 -*/ -class PowGenerator { - enum { - N = 11 - }; - float tbl0_[256]; - struct { - float app; - float rev; - } tbl1_[1 << N]; -public: - PowGenerator(float y) - { - for (int i = 0; i < 256; i++) { - tbl0_[i] = ::powf(2, (i - 127) * y); - } - const double e = 1 / double(1 << 24); - const double h = 1 / double(1 << N); - const size_t n = 1U << N; - for (size_t i = 0; i < n; i++) { - double x = 1 + double(i) / n; - double a = ::pow(x, (double)y); - tbl1_[i].app = (float)a; - double b = ::pow(x + h - e, (double)y); - tbl1_[i].rev = (float)((b - a) / (h - e) / (1 << 23)); - } - } - float get(float x) const - { - using namespace local; - fi fi; - fi.f = x; - int a = (fi.i >> 23) & mask(8); - unsigned int b = fi.i & mask(23); - unsigned int b1 = b & (mask(N) << (23 - N)); - unsigned int b2 = b & mask(23 - N); - float f; - int idx = b1 >> (23 - N); - f = tbl0_[a] * (tbl1_[idx].app + float(b2) * tbl1_[idx].rev); - return f; - } -}; - -// for Xbyak version -#ifdef FMATH_USE_XBYAK -float (*const exp)(float) = local::C<>::getInstance().exp_; -__m128 (*const exp_ps)(__m128) = local::C<>::getInstance().exp_ps_; -#endif - -// exp2(x) = pow(2, x) -inline float exp2(float x) { return fmath::exp(x * 0.6931472f); } - -} // fmath diff --git a/patch/include/caffe/filler.hpp.patch b/patch/include/caffe/filler.hpp.patch deleted file mode 100644 index f1f3c4a..0000000 --- a/patch/include/caffe/filler.hpp.patch +++ /dev/null @@ -1,57 +0,0 @@ ---- caffe/include/caffe/filler.hpp 2017-01-27 09:51:55.350123200 +0800 -+++ ../include/caffe/filler.hpp 2017-02-04 22:35:34.594153197 +0800 -@@ -45,6 +45,7 @@ - } - }; - -+#ifdef NO_CAFFE_MOBILE - /// @brief Fills a Blob with uniformly distributed values @f$ x\sim U(a, b) @f$. - template - class UniformFiller : public Filler { -@@ -123,6 +124,7 @@ - << "Sparsity not supported by this Filler."; - } - }; -+#endif - - /** - * @brief Fills a Blob with values @f$ x \sim U(-a, +a) @f$ where @f$ a @f$ is -@@ -165,6 +167,7 @@ - } - }; - -+#ifdef NO_CAFFE_MOBILE - /** - * @brief Fills a Blob with values @f$ x \sim N(0, \sigma^2) @f$ where - * @f$ \sigma^2 @f$ is set inversely proportional to number of incoming -@@ -260,6 +263,7 @@ - << "Sparsity not supported by this Filler."; - } - }; -+#endif - - /** - * @brief Get a specific filler from the specification given in FillerParameter. -@@ -272,18 +276,22 @@ - const std::string& type = param.type(); - if (type == "constant") { - return new ConstantFiller(param); -+#ifdef NO_CAFFE_MOBILE - } else if (type == "gaussian") { - return new GaussianFiller(param); - } else if (type == "positive_unitball") { - return new PositiveUnitballFiller(param); - } else if (type == "uniform") { - return new UniformFiller(param); -+#endif - } else if (type == "xavier") { - return new XavierFiller(param); -+#ifdef NO_CAFFE_MOBILE - } else if (type == "msra") { - return new MSRAFiller(param); - } else if (type == "bilinear") { - return new BilinearFiller(param); -+#endif - } else { - CHECK(false) << "Unknown filler name: " << param.type(); - } diff --git a/patch/src/caffe/common.cpp.patch b/patch/src/caffe/common.cpp.patch index 7065a3d..416ec44 100644 --- a/patch/src/caffe/common.cpp.patch +++ b/patch/src/caffe/common.cpp.patch @@ -1,14 +1,16 @@ --- caffe/src/caffe/common.cpp 2017-01-27 09:51:56.283123700 +0800 -+++ ../src/caffe/common.cpp 2017-02-04 22:35:34.654099433 +0800 -@@ -1,5 +1,6 @@ ++++ ../src/caffe/common.cpp 2017-02-04 23:04:01.367500325 +0800 +@@ -1,5 +1,9 @@ +#ifdef USE_BOOST #include --#include ++#endif ++#ifdef USE_GLOG + #include +#endif #include #include #include -@@ -7,16 +8,36 @@ +@@ -7,16 +11,36 @@ #include "caffe/common.hpp" #include "caffe/util/rng.hpp" @@ -45,7 +47,7 @@ } // random seeding -@@ -41,12 +62,16 @@ +@@ -41,12 +65,16 @@ void GlobalInit(int* pargc, char*** pargv) { diff --git a/patch/src/caffe/layers/dropout_layer.cpp.patch b/patch/src/caffe/layers/dropout_layer.cpp.patch deleted file mode 100644 index 623ae72..0000000 --- a/patch/src/caffe/layers/dropout_layer.cpp.patch +++ /dev/null @@ -1,16 +0,0 @@ ---- caffe/src/caffe/layers/dropout_layer.cpp 2017-01-27 09:51:56.561122800 +0800 -+++ ../src/caffe/layers/dropout_layer.cpp 2017-02-04 22:35:34.654099433 +0800 -@@ -35,11 +35,13 @@ - unsigned int* mask = rand_vec_.mutable_cpu_data(); - const int count = bottom[0]->count(); - if (this->phase_ == TRAIN) { -+#ifdef NO_CAFFE_MOBILE - // Create random numbers - caffe_rng_bernoulli(count, 1. - threshold_, mask); - for (int i = 0; i < count; ++i) { - top_data[i] = bottom_data[i] * mask[i] * scale_; - } -+#endif - } else { - caffe_copy(bottom[0]->count(), bottom_data, top_data); - } diff --git a/patch/src/caffe/net.cpp.patch b/patch/src/caffe/net.cpp.patch index c677d81..f9f70b0 100644 --- a/patch/src/caffe/net.cpp.patch +++ b/patch/src/caffe/net.cpp.patch @@ -1,5 +1,5 @@ --- caffe/src/caffe/net.cpp 2017-01-27 09:51:56.935642700 +0800 -+++ ../src/caffe/net.cpp 2017-02-04 22:35:34.654099433 +0800 ++++ ../src/caffe/net.cpp 2017-02-04 23:04:40.954733321 +0800 @@ -5,19 +5,27 @@ #include #include @@ -390,33 +390,3 @@ template void Net::ClearParamDiffs() { -@@ -981,4 +1052,29 @@ - - INSTANTIATE_CLASS(Net); - -+#if 0 -+// FIXME need? -+/* force register */ -+#define FORCE_REG(type) \ -+ extern LayerRegistry g_creator_f_##type; \ -+ extern LayerRegistry g_creator_d_##type; \ -+ LayerRegistry *__g_creator_f_##type = &g_creator_f_##type; \ -+ LayerRegistry *__g_creator_d_##type = &g_creator_d_##type -+ -+FORCE_REG(TanH); -+FORCE_REG(Pooling); -+FORCE_REG(ReLU); -+FORCE_REG(Sigmoid); -+FORCE_REG(Softmax); -+FORCE_REG(Convolution); -+ -+FORCE_REG(Concat); -+FORCE_REG(BNLL); -+FORCE_REG(Flatten); -+FORCE_REG(InnerProduct); -+FORCE_REG(LRN); -+FORCE_REG(MemoryData); -+ -+FORCE_REG(Split); -+#endif - } // namespace caffe diff --git a/patch/src/caffe/util/benchmark.cpp.patch b/patch/src/caffe/util/benchmark.cpp.patch index 8169296..fbdc08c 100644 --- a/patch/src/caffe/util/benchmark.cpp.patch +++ b/patch/src/caffe/util/benchmark.cpp.patch @@ -1,24 +1,13 @@ --- caffe/src/caffe/util/benchmark.cpp 2017-01-27 09:51:57.449642800 +0800 -+++ ../src/caffe/util/benchmark.cpp 2017-02-04 22:35:34.654099433 +0800 -@@ -1,10 +1,17 @@ ++++ ../src/caffe/util/benchmark.cpp 2017-02-04 23:00:28.602281548 +0800 +@@ -1,4 +1,6 @@ +#ifdef USE_BOOST #include +#endif #include "caffe/common.hpp" #include "caffe/util/benchmark.hpp" - - namespace caffe { - -+static float time_diff_ms(struct timeval start, struct timeval stop) { -+ return (stop.tv_sec - start.tv_sec)*1000 -+ + (float)(stop.tv_usec - start.tv_usec)/1000; -+} -+ - Timer::Timer() - : initted_(false), - running_(false), -@@ -32,7 +39,11 @@ +@@ -32,7 +34,11 @@ NO_GPU; #endif } else { @@ -30,7 +19,7 @@ } running_ = true; has_run_at_least_once_ = true; -@@ -48,7 +59,11 @@ +@@ -48,7 +54,11 @@ NO_GPU; #endif } else { @@ -42,7 +31,7 @@ } running_ = false; } -@@ -74,7 +89,12 @@ +@@ -74,7 +84,12 @@ NO_GPU; #endif } else { @@ -55,7 +44,7 @@ } return elapsed_microseconds_; } -@@ -96,7 +116,12 @@ +@@ -96,7 +111,12 @@ NO_GPU; #endif } else { @@ -68,7 +57,7 @@ } return elapsed_milliseconds_; } -@@ -127,7 +152,11 @@ +@@ -127,7 +147,11 @@ void CPUTimer::Start() { if (!running()) { @@ -80,7 +69,7 @@ this->running_ = true; this->has_run_at_least_once_ = true; } -@@ -135,7 +164,11 @@ +@@ -135,7 +159,11 @@ void CPUTimer::Stop() { if (running()) { @@ -92,7 +81,7 @@ this->running_ = false; } } -@@ -148,8 +181,13 @@ +@@ -148,8 +176,13 @@ if (running()) { Stop(); } @@ -106,7 +95,7 @@ return this->elapsed_milliseconds_; } -@@ -161,8 +199,13 @@ +@@ -161,8 +194,13 @@ if (running()) { Stop(); } diff --git a/patch/src/caffe/util/io.cpp.patch b/patch/src/caffe/util/io.cpp.patch index bca071f..2da7aea 100644 --- a/patch/src/caffe/util/io.cpp.patch +++ b/patch/src/caffe/util/io.cpp.patch @@ -1,5 +1,5 @@ --- caffe/src/caffe/util/io.cpp 2017-01-27 09:51:57.498643500 +0800 -+++ ../src/caffe/util/io.cpp 2017-02-04 22:35:34.654099433 +0800 ++++ ../src/caffe/util/io.cpp 2017-02-04 23:01:28.591786478 +0800 @@ -18,6 +18,9 @@ #include "caffe/common.hpp" #include "caffe/proto/caffe.pb.h" @@ -10,12 +10,3 @@ const int kProtoReadBytesLimit = INT_MAX; // Max size of 2 GB minus 1 byte. -@@ -42,7 +45,7 @@ - } - - void WriteProtoToTextFile(const Message& proto, const char* filename) { -- int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); -+ int fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0644); - FileOutputStream* output = new FileOutputStream(fd); - CHECK(google::protobuf::TextFormat::Print(proto, output)); - delete output; diff --git a/patch/src/caffe/util/math_functions.cpp.patch b/patch/src/caffe/util/math_functions.cpp.patch index 1d35c80..8b6ba4e 100644 --- a/patch/src/caffe/util/math_functions.cpp.patch +++ b/patch/src/caffe/util/math_functions.cpp.patch @@ -1,5 +1,5 @@ --- caffe/src/caffe/util/math_functions.cpp 2017-01-27 09:51:57.502642700 +0800 -+++ ../src/caffe/util/math_functions.cpp 2017-02-04 22:35:34.654099433 +0800 ++++ ../src/caffe/util/math_functions.cpp 2017-02-04 23:03:03.164175136 +0800 @@ -1,5 +1,7 @@ +#ifdef USE_BOOST #include @@ -8,29 +8,21 @@ #include -@@ -42,7 +44,7 @@ - void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, - const int N, const double alpha, const double* A, const double* x, - const double beta, double* y) { -- cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); -+ cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); - } - - template <> -@@ -232,8 +234,12 @@ +@@ -232,8 +234,13 @@ template Dtype caffe_nextafter(const Dtype b) { +#ifdef USE_BOOST return boost::math::nextafter( b, std::numeric_limits::max()); -+#endif ++#else + return std::nextafter( + b, std::numeric_limits::max()); ++#endif } template -@@ -247,12 +253,19 @@ +@@ -247,12 +254,19 @@ CHECK_GE(n, 0); CHECK(r); CHECK_LE(a, b); @@ -50,7 +42,7 @@ } template -@@ -269,12 +282,19 @@ +@@ -269,12 +283,19 @@ CHECK_GE(n, 0); CHECK(r); CHECK_GT(sigma, 0); @@ -70,7 +62,7 @@ } template -@@ -291,12 +311,19 @@ +@@ -291,12 +312,19 @@ CHECK(r); CHECK_GE(p, 0); CHECK_LE(p, 1); @@ -90,7 +82,7 @@ } template -@@ -311,12 +338,19 @@ +@@ -311,12 +339,19 @@ CHECK(r); CHECK_GE(p, 0); CHECK_LE(p, 1); diff --git a/src/caffe/common.cpp b/src/caffe/common.cpp index b1fcb33..4856192 100644 --- a/src/caffe/common.cpp +++ b/src/caffe/common.cpp @@ -1,6 +1,9 @@ #ifdef USE_BOOST #include #endif +#ifdef USE_GLOG +#include +#endif #include #include #include diff --git a/src/caffe/layers/dropout_layer.cpp b/src/caffe/layers/dropout_layer.cpp index d323dbe..533ab26 100644 --- a/src/caffe/layers/dropout_layer.cpp +++ b/src/caffe/layers/dropout_layer.cpp @@ -35,13 +35,11 @@ void DropoutLayer::Forward_cpu(const vector*>& bottom, unsigned int* mask = rand_vec_.mutable_cpu_data(); const int count = bottom[0]->count(); if (this->phase_ == TRAIN) { -#ifdef NO_CAFFE_MOBILE // Create random numbers caffe_rng_bernoulli(count, 1. - threshold_, mask); for (int i = 0; i < count; ++i) { top_data[i] = bottom_data[i] * mask[i] * scale_; } -#endif } else { caffe_copy(bottom[0]->count(), bottom_data, top_data); } diff --git a/src/caffe/net.cpp b/src/caffe/net.cpp index 4a9c960..e904ac1 100644 --- a/src/caffe/net.cpp +++ b/src/caffe/net.cpp @@ -1052,29 +1052,4 @@ const shared_ptr > Net::layer_by_name( INSTANTIATE_CLASS(Net); -#if 0 -// FIXME need? -/* force register */ -#define FORCE_REG(type) \ - extern LayerRegistry g_creator_f_##type; \ - extern LayerRegistry g_creator_d_##type; \ - LayerRegistry *__g_creator_f_##type = &g_creator_f_##type; \ - LayerRegistry *__g_creator_d_##type = &g_creator_d_##type - -FORCE_REG(TanH); -FORCE_REG(Pooling); -FORCE_REG(ReLU); -FORCE_REG(Sigmoid); -FORCE_REG(Softmax); -FORCE_REG(Convolution); - -FORCE_REG(Concat); -FORCE_REG(BNLL); -FORCE_REG(Flatten); -FORCE_REG(InnerProduct); -FORCE_REG(LRN); -FORCE_REG(MemoryData); - -FORCE_REG(Split); -#endif } // namespace caffe diff --git a/src/caffe/util/benchmark.cpp b/src/caffe/util/benchmark.cpp index 9de960c..8f46c88 100644 --- a/src/caffe/util/benchmark.cpp +++ b/src/caffe/util/benchmark.cpp @@ -7,11 +7,6 @@ namespace caffe { -static float time_diff_ms(struct timeval start, struct timeval stop) { - return (stop.tv_sec - start.tv_sec)*1000 - + (float)(stop.tv_usec - start.tv_usec)/1000; -} - Timer::Timer() : initted_(false), running_(false), diff --git a/src/caffe/util/io.cpp b/src/caffe/util/io.cpp index 17a74af..e65fd4c 100644 --- a/src/caffe/util/io.cpp +++ b/src/caffe/util/io.cpp @@ -45,7 +45,7 @@ bool ReadProtoFromTextFile(const char* filename, Message* proto) { } void WriteProtoToTextFile(const Message& proto, const char* filename) { - int fd = open(filename, O_WRONLY|O_CREAT|O_TRUNC, 0644); + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 0644); FileOutputStream* output = new FileOutputStream(fd); CHECK(google::protobuf::TextFormat::Print(proto, output)); delete output; diff --git a/src/caffe/util/math_functions.cpp b/src/caffe/util/math_functions.cpp index 4a6af28..77a0cd9 100644 --- a/src/caffe/util/math_functions.cpp +++ b/src/caffe/util/math_functions.cpp @@ -44,7 +44,7 @@ template <> void caffe_cpu_gemv(const CBLAS_TRANSPOSE TransA, const int M, const int N, const double alpha, const double* A, const double* x, const double beta, double* y) { - cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); + cblas_dgemv(CblasRowMajor, TransA, M, N, alpha, A, N, x, 1, beta, y, 1); } template <> @@ -237,9 +237,10 @@ Dtype caffe_nextafter(const Dtype b) { #ifdef USE_BOOST return boost::math::nextafter( b, std::numeric_limits::max()); -#endif +#else return std::nextafter( b, std::numeric_limits::max()); +#endif } template