diff --git a/CMakeLists.txt b/CMakeLists.txt index 134523e7d7..682bafef46 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -192,6 +192,14 @@ else(MSVC) string(APPEND CMAKE_CXX_FLAGS " -Werror") string(APPEND CMAKE_CXX_FLAGS " -Wno-deprecated-declarations") string(APPEND CMAKE_CXX_FLAGS " -Wimplicit-fallthrough") + if(MINGW) + if(FBGEMM_LIBRARY_TYPE STREQUAL "static") + target_compile_definitions(fbgemm_generic PRIVATE ASMJIT_STATIC) + target_compile_definitions(fbgemm_avx2 PRIVATE ASMJIT_STATIC) + target_compile_definitions(fbgemm_avx512 PRIVATE ASMJIT_STATIC) + endif() + endif() + target_compile_options(fbgemm_avx2 PRIVATE "-m64" "-mavx2" "-mf16c" "-mfma") target_compile_options(fbgemm_avx512 PRIVATE @@ -244,7 +252,7 @@ if(NOT TARGET asmjit) #build asmjit #For MSVC shared build, asmjit needs to be shared also. - if (MSVC AND (FBGEMM_LIBRARY_TYPE STREQUAL "shared")) + if ((MSVC OR MINGW) AND (FBGEMM_LIBRARY_TYPE STREQUAL "shared")) set(ASMJIT_STATIC OFF) else() set(ASMJIT_STATIC ON) diff --git a/bench/AlignedVec.h b/bench/AlignedVec.h index f99866e287..21cddc1e0f 100644 --- a/bench/AlignedVec.h +++ b/bench/AlignedVec.h @@ -108,7 +108,7 @@ class aligned_allocator { // Mallocator wraps malloc(). void* pv = nullptr; int ret; -#ifdef _MSC_VER +#ifdef _WIN32 pv = _aligned_malloc(n * sizeof(T), Alignment); ret = 0; #else @@ -126,7 +126,7 @@ class aligned_allocator { } void deallocate(T* const p, const std::size_t /*n*/) const { -#ifdef _MSC_VER +#ifdef _WIN32 _aligned_free(p); #else free(p); diff --git a/bench/BenchUtils.h b/bench/BenchUtils.h index 91376e3c08..0470677490 100644 --- a/bench/BenchUtils.h +++ b/bench/BenchUtils.h @@ -367,19 +367,22 @@ void performance_test( } #if defined(USE_MKL) || defined(USE_BLAS) - // Compare results - for (size_t i = 0; i < C_ref[0].size(); i++) { - if (std::abs(C_ref[0][i] - C_fb[0][i]) > 1e-3) { - fprintf( - stderr, - "Error: too high diff between fp32 ref %f and fp16 %f at %ld\n", - C_ref[0][i], - C_fb[0][i], - i); - return; - } - } + // Compare results + for (size_t i = 0; i < C_ref[0].size(); i++) { + if (std::abs(C_ref[0][i] - C_fb[0][i]) > 1e-3) { + // Use different format specifiers for MinGW and others + #ifdef __MINGW32__ + fprintf(stderr, "Error: too high diff between fp32 ref %f and fp16 %f at %llu\n", + C_ref[0][i], C_fb[0][i], static_cast(i)); + #else + fprintf(stderr, "Error: too high diff between fp32 ref %f and fp16 %f at %zu\n", + C_ref[0][i], C_fb[0][i], i); + #endif + return; + } + } #endif + } #ifdef USE_MKL diff --git a/bench/CMakeLists.txt b/bench/CMakeLists.txt index 8b07789428..e06242e479 100644 --- a/bench/CMakeLists.txt +++ b/bench/CMakeLists.txt @@ -65,12 +65,13 @@ macro(add_benchmark BENCHNAME) if (${BLAS_FOUND}) target_compile_options(${BENCHNAME} PRIVATE "-DUSE_BLAS") target_link_libraries(${BENCHNAME} "${BLAS_LIBRARIES}") + target_include_directories(${BENCHNAME} PRIVATE "${BLAS_INCLUDE_DIRS}") endif() set_target_properties(${BENCHNAME} PROPERTIES FOLDER test) endmacro() -if(FBGEMM_BUILD_BENCHMARKS) +if(FBGEMM_BUILD_BENCHMARKS) set(BENCHMARKS "") @@ -80,6 +81,9 @@ if(FBGEMM_BUILD_BENCHMARKS) get_filename_component(BENCH_FILE_ONLY "${BENCH_FILE}" NAME) add_benchmark("${BENCH_NAME}" "${BENCH_FILE_ONLY}") + if(FBGEMM_LIBRARY_TYPE STREQUAL "static") #Needed by MSVC and MinGW + target_compile_definitions("${BENCH_NAME}" PRIVATE FBGEMM_STATIC) + endif() list(APPEND BENCHMARKS "${BENCH_NAME}") endforeach() diff --git a/bench/RowwiseAdagradBenchmark.cc b/bench/RowwiseAdagradBenchmark.cc index ac7fa01c42..39074f0ada 100644 --- a/bench/RowwiseAdagradBenchmark.cc +++ b/bench/RowwiseAdagradBenchmark.cc @@ -167,19 +167,27 @@ void run_benchmark( } } - for (size_t i = 0; i < w.size(); ++i) { - assert(fabs(w[i] - w_ref[i]) < 1e-5); - if (fabs(w[i] - w_ref[i]) >= 1e-5) { - fprintf(stderr, "%ld %f %f\n", i, w[i], w_ref[i]); - } +for (size_t i = 0; i < w.size(); ++i) { + assert(fabs(w[i] - w_ref[i]) < 1e-5); + if (fabs(w[i] - w_ref[i]) >= 1e-5) { + #ifdef __MINGW32__ + fprintf(stderr, "%llu %f %f\n", static_cast(i), w[i], w_ref[i]); + #else + fprintf(stderr, "%zu %f %f\n", i, w[i], w_ref[i]); + #endif } +} - for (size_t i = 0; i < h.size(); ++i) { - assert(fabs(h[i] - h_ref[i]) < 1e-5); - if (fabs(h[i] - h_ref[i]) >= 1e-5) { - fprintf(stderr, "%ld %f %f\n", i, h[i], h_ref[i]); - } +for (size_t i = 0; i < h.size(); ++i) { + assert(fabs(h[i] - h_ref[i]) < 1e-5); + if (fabs(h[i] - h_ref[i]) >= 1e-5) { + #ifdef __MINGW32__ + fprintf(stderr, "%llu %f %f\n", static_cast(i), h[i], h_ref[i]); + #else + fprintf(stderr, "%zu %f %f\n", i, h[i], h_ref[i]); + #endif } +} cout << "indices: " << (isIndex64b ? " 64bits " : " 32bits ") << " | "; cout << "weight_decay: " << setw(1) << adjust_weight_decay << " | "; diff --git a/bench/SparseAdagradBenchmark.cc b/bench/SparseAdagradBenchmark.cc index b871b8ff9d..951632c01f 100644 --- a/bench/SparseAdagradBenchmark.cc +++ b/bench/SparseAdagradBenchmark.cc @@ -174,19 +174,28 @@ void run_benchmark( } } - for (size_t i = 0; i < w.size(); ++i) { - assert(fabs(w[i] - w_ref[i]) < 1e-5); - if (fabs(w[i] - w_ref[i]) >= 1e-5) { - fprintf(stderr, "%ld %f %f\n", i, w[i], w_ref[i]); - } +for (size_t i = 0; i < w.size(); ++i) { + assert(fabs(w[i] - w_ref[i]) < 1e-5); + if (fabs(w[i] - w_ref[i]) >= 1e-5) { + #ifdef __MINGW32__ + fprintf(stderr, "%llu %f %f\n", static_cast(i), w[i], w_ref[i]); + #else + fprintf(stderr, "%zu %f %f\n", i, w[i], w_ref[i]); + #endif } +} - for (size_t i = 0; i < h.size(); ++i) { - assert(fabs(h[i] - h_ref[i]) < 1e-5); - if (fabs(h[i] - h_ref[i]) >= 1e-5) { - fprintf(stderr, "%ld %f %f\n", i, h[i], h_ref[i]); - } +for (size_t i = 0; i < h.size(); ++i) { + assert(fabs(h[i] - h_ref[i]) < 1e-5); + if (fabs(h[i] - h_ref[i]) >= 1e-5) { + #ifdef __MINGW32__ + fprintf(stderr, "%llu %f %f\n", static_cast(i), h[i], h_ref[i]); + #else + fprintf(stderr, "%zu %f %f\n", i, h[i], h_ref[i]); + #endif } +} + cout << "indices: " << (isIndex64b ? " 64bits " : " 32bits ") << " | "; cout << "weight_decay: " << setw(1) << adjust_weight_decay << " | "; diff --git a/bench/SparseDenseMMFP32Benchmark.cc b/bench/SparseDenseMMFP32Benchmark.cc index ba0240d267..cea84ffd9d 100644 --- a/bench/SparseDenseMMFP32Benchmark.cc +++ b/bench/SparseDenseMMFP32Benchmark.cc @@ -112,16 +112,18 @@ int main(int, char**) { // Compare results for (size_t i = 0; i < ctDataRef.size(); i++) { if (std::abs(ctDataRef[i] - ctDataIntrin[i]) > 1e-3) { - fprintf( - stderr, - "Error: Results differ ref %f and test %f at %ld\n", - ctDataRef[i], - ctDataIntrin[i], - i); + #ifdef __MINGW32__ + fprintf(stderr, "Error: Results differ ref %f and test %f at %llu\n", + ctDataRef[i], ctDataIntrin[i], static_cast(i)); + #else + fprintf(stderr, "Error: Results differ ref %f and test %f at %zu\n", + ctDataRef[i], ctDataIntrin[i], i); + #endif return 1; } } + double effective_gflops_intrin = effective_flop / secs_intrin / 1e9; cout << "[" << setw(5) << index << "]" << setw(7) << m << setw(7) << n << setw(7) << k << fixed << setw(7) << setprecision(2) << fnz diff --git a/bench/SparseDenseMMInt8Benchmark.cc b/bench/SparseDenseMMInt8Benchmark.cc index 3a92d1bf5c..00e2d1e5b0 100644 --- a/bench/SparseDenseMMInt8Benchmark.cc +++ b/bench/SparseDenseMMInt8Benchmark.cc @@ -162,18 +162,20 @@ int main(int, char**) { // "ctDataIntrin_u8"); // // Compare results - for (size_t i = 0; i < ctDataRef.size(); i++) { + for (size_t i = 0; i < ctDataRef.size(); i++) { if (std::abs(ctDataRef_u8[i] - ctDataIntrin_u8[i]) > 0) { - fprintf( - stderr, - "Error: Results differ ref %d and test %d at %ld\n", - ctDataRef_u8[i], - ctDataIntrin_u8[i], - i); + #ifdef __MINGW32__ + fprintf(stderr, "Error: Results differ ref %hhu and test %hhu at %llu\n", + ctDataRef_u8[i], ctDataIntrin_u8[i], static_cast(i)); + #else + fprintf(stderr, "Error: Results differ ref %hhu and test %hhu at %zu\n", + ctDataRef_u8[i], ctDataIntrin_u8[i], i); + #endif return 1; } } + double effective_gflops_intrin = effective_flop / secs_intrin / 1e9; cout << "[" << setw(5) << index << "]" << setw(7) << m << setw(7) << n << setw(7) << k << fixed << setw(7) << setprecision(2) << fnz diff --git a/src/Utils.cc b/src/Utils.cc index c61b1a4dd1..110f02854e 100644 --- a/src/Utils.cc +++ b/src/Utils.cc @@ -429,7 +429,7 @@ void* fbgemmAlignedAlloc( bool raiseException /*=false*/) { void* aligned_mem = nullptr; int ret; -#ifdef _MSC_VER +#ifdef _WIN32 aligned_mem = _aligned_malloc(size, align); ret = 0; #else @@ -443,7 +443,7 @@ void* fbgemmAlignedAlloc( } void fbgemmAlignedFree(void* p) { -#ifdef _MSC_VER +#ifdef _WIN32 _aligned_free(p); #else free(p); diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 2cffddf608..d9adf2524a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -47,6 +47,7 @@ if(${OpenMP_FOUND}) endif() macro(add_gtest TESTNAME) + add_executable(${TESTNAME} ${ARGN} ../bench/BenchUtils.cc EmbeddingSpMDMTestUtils.cc @@ -62,6 +63,11 @@ macro(add_gtest TESTNAME) target_compile_definitions(${TESTNAME} PRIVATE FBGEMM_STATIC) endif() else(MSVC) + if(MINGW) + #if (FBGEMM_LIBRARY_TYPE STREQUAL "static") + target_compile_definitions(${TESTNAME} PRIVATE FBGEMM_STATIC) + #endif() + endif() target_compile_options(${TESTNAME} PRIVATE "-m64" "-mavx2" "-mfma" "-masm=intel") endif(MSVC)