From a6a2c80c5286c4ceceae778f2c40e8899666cf54 Mon Sep 17 00:00:00 2001 From: "Kevin A. Brown" Date: Mon, 8 Feb 2021 07:14:29 +0000 Subject: [PATCH 01/14] Updated incast (uses only non-blocking comm), added 3 new SWMs (allreduce, spread or one_to_many, & many_to_many), and added a SWM_Mark_Iteration to print iteration time. --- swm/src/Makefile.subdir | 14 + swm/src/allreduce/allreduce.cpp | 80 + swm/src/allreduce/allreduce.h | 66 + swm/src/allreduce/allreduce256_workload.json | 14 + swm/src/allreduce/allreduce32_workload.json | 14 + swm/src/allreduce/allreduce_workload.json | 14 + swm/src/incast/all_to_one_swm_user_code.cpp | 415 ++- swm/src/incast/all_to_one_swm_user_code.h | 3 + swm/src/incast/incast.json | 21 +- swm/src/incast/incast1.json | 21 +- swm/src/incast/incast2.json | 19 +- swm/src/lammps/.deps/.dirstamp | 0 swm/src/lammps/.deps/lammps.Plo | 1 - swm/src/lammps/.deps/lammps.Tpo | 2320 ----------------- swm/src/lammps/.dirstamp | 0 swm/src/lammps/lammps_workload1.json | 21 + swm/src/many_to_many/README | 12 + .../many_to_many_swm_user_code.cpp | 238 ++ .../many_to_many/many_to_many_swm_user_code.h | 93 + .../many_to_many/many_to_many_workload.json | 21 + .../many_to_many/many_to_many_workload1.json | 21 + .../.deps/nearest_neighbor_swm_user_code.Plo | 1 - .../nekbone/.deps/nekbone_swm_user_code.Plo | 1 - swm/src/nekbone/workload1.json | 31 + swm/src/spread/one_to_many_swm_user_code.cpp | 213 ++ swm/src/spread/one_to_many_swm_user_code.h | 91 + swm/src/spread/spread_workload.json | 21 + swm/src/swm-include.h | 3 + 28 files changed, 1203 insertions(+), 2566 deletions(-) create mode 100644 swm/src/allreduce/allreduce.cpp create mode 100644 swm/src/allreduce/allreduce.h create mode 100644 swm/src/allreduce/allreduce256_workload.json create mode 100644 swm/src/allreduce/allreduce32_workload.json create mode 100644 swm/src/allreduce/allreduce_workload.json delete mode 100644 swm/src/lammps/.deps/.dirstamp delete mode 100644 swm/src/lammps/.deps/lammps.Plo delete mode 100644 swm/src/lammps/.deps/lammps.Tpo delete mode 100644 swm/src/lammps/.dirstamp create mode 100644 swm/src/lammps/lammps_workload1.json create mode 100644 swm/src/many_to_many/README create mode 100644 swm/src/many_to_many/many_to_many_swm_user_code.cpp create mode 100644 swm/src/many_to_many/many_to_many_swm_user_code.h create mode 100644 swm/src/many_to_many/many_to_many_workload.json create mode 100644 swm/src/many_to_many/many_to_many_workload1.json delete mode 100644 swm/src/nearest_neighbor/.deps/nearest_neighbor_swm_user_code.Plo delete mode 100644 swm/src/nekbone/.deps/nekbone_swm_user_code.Plo create mode 100644 swm/src/nekbone/workload1.json create mode 100644 swm/src/spread/one_to_many_swm_user_code.cpp create mode 100644 swm/src/spread/one_to_many_swm_user_code.h create mode 100644 swm/src/spread/spread_workload.json diff --git a/swm/src/Makefile.subdir b/swm/src/Makefile.subdir index ad5a468..0eaa9bf 100644 --- a/swm/src/Makefile.subdir +++ b/swm/src/Makefile.subdir @@ -8,6 +8,9 @@ include_HEADERS = \ src/nearest_neighbor/nearest_neighbor_swm_user_code.h \ src/nearest_neighbor/boost_ptree_array_to_std_vector.h \ src/incast/all_to_one_swm_user_code.h \ + src/spread/one_to_many_swm_user_code.h \ + src/many_to_many/many_to_many_swm_user_code.h \ + src/allreduce/allreduce.h \ src/milc/milc_swm_user_code.h src_libswm_la_SOURCES = src/lammps/lammps.cpp \ @@ -15,15 +18,26 @@ src_libswm_la_SOURCES = src/lammps/lammps.cpp \ src/nekbone/nekbone_swm_user_code.cpp \ src/nearest_neighbor/nearest_neighbor_swm_user_code.cpp \ src/incast/all_to_one_swm_user_code.cpp \ + src/spread/one_to_many_swm_user_code.cpp \ + src/many_to_many/many_to_many_swm_user_code.cpp \ + src/allreduce/allreduce.cpp \ src/milc/milc_swm_user_code.cpp dist_data_DATA = src/lammps/lammps_workload.json \ + src/lammps/lammps_workload1.json \ src/nearest_neighbor/skeleton.json \ src/nekbone/workload.json \ + src/nekbone/workload1.json \ src/point_to_point/example.json \ src/incast/incast.json \ src/incast/incast1.json \ src/incast/incast2.json \ + src/spread/spread_workload.json \ + src/many_to_many/many_to_many_workload.json \ + src/many_to_many/many_to_many_workload1.json \ + src/allreduce/allreduce_workload.json \ + src/allreduce/allreduce32_workload.json \ + src/allreduce/allreduce256_workload.json \ src/milc/milc_skeleton.json diff --git a/swm/src/allreduce/allreduce.cpp b/swm/src/allreduce/allreduce.cpp new file mode 100644 index 0000000..2256625 --- /dev/null +++ b/swm/src/allreduce/allreduce.cpp @@ -0,0 +1,80 @@ +#include "allreduce.h" + +AllReduceSWMUserCode::AllReduceSWMUserCode( + boost::property_tree::ptree cfg, + void**& generic_ptrs + ) : + process_cnt(cfg.get("jobs.size", 1)), + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + msg_req_bytes(cfg.get("jobs.cfg.msg_req_bytes", 1024)), + msg_rsp_bytes(cfg.get("jobs.cfg.msg_rsp_bytes", 0)), + compute_delay(cfg.get("jobs.cfg.compute_delay", 0)), + show_iterations(cfg.get("jobs.cfg.show_iterations", false)) +{ + + request_vc = 0; + response_vc = 0; + + process_id = *((int*)generic_ptrs[0]); +} + +void +AllReduceSWMUserCode::call() +{ + /* Print job description */ + if(process_id == 0) + { + std::cout << std::endl << "JOB: Allreduce | size: " << process_cnt; + std::cout << " | interation_cnt: " << iteration_cnt; + std::cout << " | compute_delay: " << compute_delay << std::endl; + } + + + uint32_t tag = 0; + for(uint32_t iter=0; iter < iteration_cnt; iter++) + { + + if (compute_delay) + SWM_Compute(compute_delay); + + //if(process_id == 0) + //{ + /* Print the start time of the Allreduce on the rank */ + if(show_iterations){ + SWM_Mark_Iteration(tag); + tag = tag +1; + } + //} + + SWM_Allreduce( + msg_req_bytes, // payload + msg_rsp_bytes, // pkt_rsp_bytes + SWM_COMM_WORLD, + request_vc, + response_vc, + NO_BUFFER, + NO_BUFFER); + + //if(process_id == 0) + //{ + /* Print the end time of the Allreduce call on the rank */ + if(show_iterations){ + SWM_Mark_Iteration(tag); + tag = tag +1; + } + //} + } + + + + SWM_Finalize(); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/swm/src/allreduce/allreduce.h b/swm/src/allreduce/allreduce.h new file mode 100644 index 0000000..22e7a69 --- /dev/null +++ b/swm/src/allreduce/allreduce.h @@ -0,0 +1,66 @@ +/* + * ===================================================================================== + * + * Filename: spread_swm_user_code.h + * + * Description: + * + * Version: 1.0 + * Created: 09/26/2020 01:05:02 PM + * Revision: none + * Compiler: gcc + * + * Author: Kevin A. Brown, kb@anl.gov + * Company: Argonne Nat Lab + * + * ===================================================================================== + */ + +#ifndef _ALLREDUCE_TEMPLATE_USER_CODE_ +#define _ALLREDUCE_TEMPLATE_USER_CODE_ + +#define SWM_APP_TAG_BASE 0 + +#include + +#include +#include +#include +#include +#include +#include + +#include "swm-include.h" +using namespace std; + +class AllReduceSWMUserCode +{ + +public: + + AllReduceSWMUserCode( +// SWMUserIF* user_if, + boost::property_tree::ptree cfg, + void**& generic_ptrs + ); + + void call(); + +protected: + uint32_t request_vc; + uint32_t response_vc; + uint32_t msg_req_bytes; + uint32_t msg_rsp_bytes; + + uint32_t process_id; + uint32_t process_cnt; + uint32_t iteration_cnt; + uint32_t compute_delay; + + // for debugging + bool show_iterations; + bool debug; + +}; + +#endif diff --git a/swm/src/allreduce/allreduce256_workload.json b/swm/src/allreduce/allreduce256_workload.json new file mode 100644 index 0000000..8dd1360 --- /dev/null +++ b/swm/src/allreduce/allreduce256_workload.json @@ -0,0 +1,14 @@ +{ +"jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/allreduce.so", + "size": 256, + "cfg": { + "app": "allreduce", + "iteration_cnt": 10, + "compute_delay": 1000, + "msg_req_bytes" : 8, + "debug" : false, + "cpu_freq" : 1e9 + } + } +} diff --git a/swm/src/allreduce/allreduce32_workload.json b/swm/src/allreduce/allreduce32_workload.json new file mode 100644 index 0000000..fb77ad4 --- /dev/null +++ b/swm/src/allreduce/allreduce32_workload.json @@ -0,0 +1,14 @@ +{ +"jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/allreduce.so", + "size": 32, + "cfg": { + "app": "allreduce", + "iteration_cnt": 10, + "compute_delay": 1000, + "msg_req_bytes" : 8, + "debug" : false, + "cpu_freq" : 1e9 + } + } +} diff --git a/swm/src/allreduce/allreduce_workload.json b/swm/src/allreduce/allreduce_workload.json new file mode 100644 index 0000000..ec37f3d --- /dev/null +++ b/swm/src/allreduce/allreduce_workload.json @@ -0,0 +1,14 @@ +{ +"jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/allreduce.so", + "size": 32, + "cfg": { + "app": "allreduce", + "iteration_cnt": 100, + "compute_delay": 0, + "msg_req_bytes" : 8, + "debug" : false, + "cpu_freq" : 4e9 + } + } +} diff --git a/swm/src/incast/all_to_one_swm_user_code.cpp b/swm/src/incast/all_to_one_swm_user_code.cpp index 8ec6878..7c614d8 100644 --- a/swm/src/incast/all_to_one_swm_user_code.cpp +++ b/swm/src/incast/all_to_one_swm_user_code.cpp @@ -1,227 +1,220 @@ #include "all_to_one_swm_user_code.h" AllToOneSWMUserCode::AllToOneSWMUserCode( - boost::property_tree::ptree cfg, - void**& generic_ptrs -) : - process_cnt(cfg.get("jobs.size", 1)), - dst_rank_id(cfg.get("jobs.cfg.dst_rank_id",0)), - scattered_start(cfg.get("jobs.cfg.scattered_start", false)), - start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), - synchronous(cfg.get("jobs.cfg.synchronous", 0)), - use_any_src(cfg.get("jobs.cfg.use_any_src", 0)), - blocking_comm(cfg.get("jobs.cfg.blocking_comm", 0)), - debug(cfg.get("jobs.cfg.debug", false)) + boost::property_tree::ptree cfg, + void**& generic_ptrs + ) : + process_cnt(cfg.get("jobs.size", 1)), + dst_rank_id(cfg.get("jobs.cfg.dst_rank_id",0)), + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + msg_req_bytes(cfg.get("jobs.cfg.msg_req_bytes", 0)), + msg_rsp_bytes(cfg.get("jobs.cfg.msg_rsp_bytes", 0)), + compute_delay(cfg.get("jobs.cfg.compute_delay", 0)), + use_any_src(cfg.get("jobs.cfg.use_any_src", false)), + blocking_comm(cfg.get("jobs.cfg.blocking_comm", false)), + scattered_start(cfg.get("jobs.cfg.scattered_start", false)), + start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), + randomize_comm_order(cfg.get("jobs.cfg.randomize_communication_order", false)), + show_iterations(cfg.get("jobs.cfg.show_iterations", false)), + debug(cfg.get("jobs.cfg.debug", false)) { - // extract the src/dst rank id intervals - int num = 0; - BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.src_rank_id_interval")) - { - std::string value = v.second.data(); + // extract the src/dst rank id intervals + int num = 0; + BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.src_rank_id_interval")) + { + std::string value = v.second.data(); - if(num == 0) min_source_id = atoi(value.c_str()); - if(num == 1) max_source_id = atoi(value.c_str()); + if(num == 0) min_source_id = atoi(value.c_str()); + if(num == 1) max_source_id = atoi(value.c_str()); - num++; - } - assert(num == 2); + num++; + } + assert(num == 2); - assert(dst_rank_id < process_cnt); + assert(dst_rank_id < process_cnt); + process_id = *((int*)generic_ptrs[0]); } void AllToOneSWMUserCode::call() { - uint32_t *send_handles = NULL; - uint32_t *recv_handles = NULL; - - uint32_t send_limit = 1; - uint32_t recv_limit = (max_source_id - min_source_id) + 1; - - //SWMPiggybackBase* dummy_piggyback = nullptr; - - if(synchronous) - { - send_handles = new uint32_t[send_limit * iteration_cnt]; - recv_handles = new uint32_t[recv_limit * iteration_cnt]; - } - - - if ((process_id != dst_rank_id) && (process_id >= min_source_id && process_id <= max_source_id) ) // do not send messages to self - { - - for(uint32_t iter=0; iter < iteration_cnt; iter++) - { - - //msg_traffic_desc msg_desc; - - //GetMsgDetails(&msg_desc); - - // if we want to scatter the start time, we mimic this delay with a compute delay - if(scattered_start) - { - assert(start_delay_max > 0); - /* TODO: Use a better random number generator here. */ - uint32_t start_delay = rand() % start_delay_max; - std::cout << "process_id: " << process_id << " delay start by " << start_delay << " cycles" << std::endl; - SWM_Compute(start_delay); - } - - /*if(!synchronous) - { - - SWM_Synthetic( - dst_rank_id, //dst - msg_desc.msg_req_vc, - msg_desc.msg_rsp_vc, - msg_desc.pkt_rsp_vc, - msg_desc.msg_req_bytes, - msg_desc.msg_rsp_bytes, - msg_desc.pkt_rsp_bytes, - msg_desc.msg_req_routing_type, - msg_desc.msg_rsp_routing_type, - msg_desc.pkt_rsp_routing_type, - dummy_piggyback, //NULL, - msg_desc.attribute -#ifdef FABSIM_EMULATION - , msg_desc.l2_encoding -#endif - ); - - - if(debug) - { - std::cout << "process_id: " << process_id << " sent synthetic message to destination: " << dst_rank_id << ", iter: " << iter << " @ " << SWM_Clock() << std::endl; - } - - } - else - {*/ - - //uint32_t process_id_offset = ( (process_id + 1) << 32); - //uint32_t iter_offset = ( (iter + 1) << 8); - //SWM_TAG this_tag = SWM_APP_TAG_BASE + process_id_offset + iter_offset; - uint32_t iter_offset = (process_cnt * (iter) ); - SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); //(iter+1) ); - //uint32_t send_handle[send_limit]; - uint32_t send_count = 0; - - if(!blocking_comm) - { - - SWM_Isend( - dst_rank_id, - SWM_COMM_WORLD, - this_tag, - -1, - -1, - NO_BUFFER, - 0, - 0, - &(send_handles[send_count]), - 0, - 0 - ); - } - else - { - SWM_Send( - dst_rank_id, - SWM_COMM_WORLD, - this_tag, - -1,// req-vc - -1, //resp-vc - NO_BUFFER, - 0, //req-bytes - 0, //resp-bytes - 0,//routing type - 0 //routing type - ); - } - - if(!blocking_comm) - { - SWM_Waitall(send_limit, send_handles); - } - - if(debug) - { - std::cout << "process_id: " << process_id << " sent message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter << std::endl; - } - - //} // else(synchronous) - //MM comment: no def for SWM_Noop in codes - /*for(uint32_t noop=0; noop= min_source_id && process_id <= max_source_id) ) // do not send messages to self + { + // if we want to scatter the start time, we mimic this delay with a compute delay + if(scattered_start) + { + assert(start_delay_max > 0); + /* TODO: Use a better random number generator here. */ + uint32_t start_delay = rand() % start_delay_max; + std::cout << std::endl << "process_id: " << process_id << " delay start by " << start_delay << " cycles"; + SWM_Compute(start_delay); + } + uint32_t marker = 0; + for(uint32_t iter=0; iter < iteration_cnt; iter++) + { + if (compute_delay) + SWM_Compute(compute_delay); + + + if(show_iterations){ + SWM_Mark_Iteration(marker); + marker++; + } + + //uint32_t process_id_offset = ( (process_id + 1) << 32); + //uint32_t iter_offset = ( (iter + 1) << 8); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + process_id_offset + iter_offset; + uint32_t iter_offset = (process_cnt * (iter) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); //(iter+1) ); + //uint32_t send_handle[send_limit]; + uint32_t send_count = 0; + + if(!blocking_comm) + { + + SWM_Isend( + dst_rank_id, + SWM_COMM_WORLD, + this_tag, + -1, + -1, + NO_BUFFER, + msg_req_bytes, + msg_rsp_bytes, + &(send_handles[send_count]), + 0, + 0 + ); + } + else + { + SWM_Send( + dst_rank_id, + SWM_COMM_WORLD, + this_tag, + -1,// req-vc + -1, //resp-vc + NO_BUFFER, + msg_req_bytes, //req-bytes + msg_rsp_bytes, //resp-bytes + 0,//routing type + 0 //routing type + ); + } + + if(!blocking_comm) + { + SWM_Waitall(send_limit, send_handles); + } + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " sent message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter ; + } + + if(show_iterations){ + SWM_Mark_Iteration(marker); + marker++; + } + } // end-for(iteration_cnt) +} +else if(process_id == dst_rank_id) +{ + + // need to receive from everybody every iteration... + for(uint32_t iter = 0; iter < iteration_cnt; iter++) + { + + uint32_t count = 0; + + for(uint32_t index = min_source_id; index <= max_source_id; index++, count++) + { + + uint32_t iter_offset = (process_cnt * (iter) ); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * (index + 1) * (iter+1) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (index + 1) + iter_offset) ); + + uint32_t receive_from_proc = (!use_any_src) ? index : -1; + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " expecting to recv data from: " << receive_from_proc << " with recv tag: " << this_tag << " | iter_" << iter; + } + + + if(!blocking_comm) + { + SWM_Irecv( + receive_from_proc, + SWM_COMM_WORLD, + this_tag, + NO_BUFFER, + &(recv_handles[count]) + ); + } + else + { + SWM_Recv( + receive_from_proc, + SWM_COMM_WORLD, + this_tag, + NO_BUFFER + ); + } + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " received data from src: " << index << ", iteration: " << iter ; + } + + } // end of for-loop(all_sources) + + if(!blocking_comm) + { + SWM_Waitall(recv_limit, recv_handles); + } + + //SWM_Mark_Iteration(iter); + } // end for-loop(iteration_cnt) + +} + +SWM_Finalize(); } +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/swm/src/incast/all_to_one_swm_user_code.h b/swm/src/incast/all_to_one_swm_user_code.h index 877fb1f..8dd1428 100644 --- a/swm/src/incast/all_to_one_swm_user_code.h +++ b/swm/src/incast/all_to_one_swm_user_code.h @@ -69,6 +69,8 @@ class AllToOneSWMUserCode uint32_t min_source_id; uint32_t max_source_id; + bool randomize_comm_order; + // are we staggering the start time of the srcs bool scattered_start; @@ -85,6 +87,7 @@ class AllToOneSWMUserCode bool blocking_comm; // for debugging + bool show_iterations; bool debug; }; diff --git a/swm/src/incast/incast.json b/swm/src/incast/incast.json index d80c9db..2b580e2 100644 --- a/swm/src/incast/incast.json +++ b/swm/src/incast/incast.json @@ -1,21 +1,20 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 9, + "size": 4, "cfg": { "app": "incast", - "iteration_cnt": 150, - "compute_delay": 0, - "noop_cnt": 0, - "msg_size": 13107200, - "start_delay_max" : 0, + "iteration_cnt": 1, + "compute_delay": 40000, + "msg_req_bytes": 16, + "msg_rsp_bytes": 0, + "start_delay_max" : 10000, "scattered_start" : false, - "synchronous": true, - "dst_rank_id":8, + "dst_rank_id":3, "randomize_communication_order": false, - "blocking_comm" : false, - "debug" : true, - "src_rank_id_interval": [0,7], + "blocking_comm" : true, + "debug" : false, + "src_rank_id_interval": [0,2], "cpu_freq" : 4e9 } } diff --git a/swm/src/incast/incast1.json b/swm/src/incast/incast1.json index b5f2b54..eef51b4 100644 --- a/swm/src/incast/incast1.json +++ b/swm/src/incast/incast1.json @@ -1,21 +1,20 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 9, + "size": 512, "cfg": { "app": "incast", - "iteration_cnt": 150, - "compute_delay": 0, - "noop_cnt": 0, - "msg_size": 13107200, - "start_delay_max" : 0, + "iteration_cnt": 500, + "compute_delay": 80000, + "msg_req_bytes": 160, + "msg_rsp_bytes": 0, + "start_delay_max" : 10000, "scattered_start" : false, - "synchronous": true, - "dst_rank_id":8, + "dst_rank_id":511, "randomize_communication_order": false, - "blocking_comm" : false, - "debug" : true, - "src_rank_id_interval": [0,7], + "blocking_comm" : true, + "debug" : false, + "src_rank_id_interval": [0,510], "cpu_freq" : 4e9 } } diff --git a/swm/src/incast/incast2.json b/swm/src/incast/incast2.json index ee49c90..241ff91 100644 --- a/swm/src/incast/incast2.json +++ b/swm/src/incast/incast2.json @@ -1,21 +1,20 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 9, + "size": 8, "cfg": { "app": "incast", - "iteration_cnt": 10, + "iteration_cnt": 2, "compute_delay": 0, - "noop_cnt": 0, - "msg_size": 13107200, - "start_delay_max" : 0, + "msg_req_bytes": 160, + "msg_rsp_bytes": 0, + "start_delay_max" : 10, "scattered_start" : false, - "synchronous": true, - "dst_rank_id":8, + "dst_rank_id":7, "randomize_communication_order": false, - "blocking_comm" : false, - "debug" : true, - "src_rank_id_interval": [0,7], + "blocking_comm" : false, + "debug" : true, + "src_rank_id_interval": [0,6], "cpu_freq" : 4e9 } } diff --git a/swm/src/lammps/.deps/.dirstamp b/swm/src/lammps/.deps/.dirstamp deleted file mode 100644 index e69de29..0000000 diff --git a/swm/src/lammps/.deps/lammps.Plo b/swm/src/lammps/.deps/lammps.Plo deleted file mode 100644 index 9ce06a8..0000000 --- a/swm/src/lammps/.deps/lammps.Plo +++ /dev/null @@ -1 +0,0 @@ -# dummy diff --git a/swm/src/lammps/.deps/lammps.Tpo b/swm/src/lammps/.deps/lammps.Tpo deleted file mode 100644 index 21a6333..0000000 --- a/swm/src/lammps/.deps/lammps.Tpo +++ /dev/null @@ -1,2320 +0,0 @@ -src/lammps/lammps.lo: src/lammps/lammps.cpp /usr/include/time.h \ - /usr/include/_types.h /usr/include/sys/_types.h \ - /usr/include/sys/cdefs.h /usr/include/sys/_symbol_aliasing.h \ - /usr/include/sys/_posix_availability.h /usr/include/machine/_types.h \ - /usr/include/i386/_types.h /usr/include/sys/_pthread/_pthread_types.h \ - /usr/include/sys/_types/_clock_t.h /usr/include/sys/_types/_null.h \ - /usr/include/sys/_types/_size_t.h /usr/include/sys/_types/_time_t.h \ - /usr/include/sys/_types/_timespec.h /usr/include/math.h \ - /usr/include/Availability.h /usr/include/AvailabilityInternal.h \ - src/lammps/lammps.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/list \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__config \ - /usr/include/unistd.h /usr/include/sys/unistd.h \ - /usr/include/sys/_types/_posix_vdisable.h \ - /usr/include/sys/_types/_seek_set.h /usr/include/_types/_uint64_t.h \ - /usr/include/sys/_types/_ssize_t.h /usr/include/sys/_types/_uid_t.h \ - /usr/include/sys/_types/_gid_t.h /usr/include/sys/_types/_intptr_t.h \ - /usr/include/sys/_types/_off_t.h /usr/include/sys/_types/_pid_t.h \ - /usr/include/sys/_types/_useconds_t.h /usr/include/sys/select.h \ - /usr/include/sys/appleapiopts.h /usr/include/sys/_types/_fd_def.h \ - /usr/include/sys/_types/_timeval.h \ - /usr/include/sys/_types/_suseconds_t.h \ - /usr/include/sys/_types/_sigset_t.h \ - /usr/include/sys/_types/_fd_setsize.h \ - /usr/include/sys/_types/_fd_set.h /usr/include/sys/_types/_fd_clr.h \ - /usr/include/sys/_types/_fd_isset.h /usr/include/sys/_types/_fd_zero.h \ - /usr/include/sys/_types/_fd_copy.h /usr/include/sys/_select.h \ - /usr/include/sys/_types/_dev_t.h /usr/include/sys/_types/_mode_t.h \ - /usr/include/sys/_types/_uuid_t.h /usr/include/gethostuuid.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/memory \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/type_traits \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstddef \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/7.0.2/include/stddef.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/typeinfo \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/exception \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstdint \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/7.0.2/include/stdint.h \ - /usr/include/stdint.h /usr/include/sys/_types/_int8_t.h \ - /usr/include/sys/_types/_int16_t.h /usr/include/sys/_types/_int32_t.h \ - /usr/include/sys/_types/_int64_t.h /usr/include/_types/_uint8_t.h \ - /usr/include/_types/_uint16_t.h /usr/include/_types/_uint32_t.h \ - /usr/include/sys/_types/_uintptr_t.h /usr/include/_types/_intmax_t.h \ - /usr/include/_types/_uintmax_t.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/new \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/utility \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__tuple \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__tuple_03 \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/limits \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__undef_min_max \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/iterator \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__functional_base \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__functional_base_03 \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/iosfwd \ - /usr/include/wchar.h /usr/include/sys/_types/_mbstate_t.h \ - /usr/include/sys/_types/_ct_rune_t.h /usr/include/sys/_types/_rune_t.h \ - /usr/include/sys/_types/_wchar_t.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/7.0.2/include/stdarg.h \ - /usr/include/stdio.h /usr/include/sys/_types/_va_list.h \ - /usr/include/sys/stdio.h /usr/include/_wctype.h \ - /usr/include/sys/_types/_wint_t.h /usr/include/_types/_wctype_t.h \ - /usr/include/ctype.h /usr/include/runetype.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/initializer_list \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__debug \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/tuple \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstring \ - /usr/include/string.h /usr/include/sys/_types/_rsize_t.h \ - /usr/include/sys/_types/_errno_t.h /usr/include/strings.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/algorithm \ - /usr/local/include/boost/property_tree/ptree.hpp \ - /usr/local/include/boost/property_tree/ptree_fwd.hpp \ - /usr/local/include/boost/config.hpp \ - /usr/local/include/boost/config/user.hpp \ - /usr/local/include/boost/config/detail/select_compiler_config.hpp \ - /usr/local/include/boost/config/compiler/clang.hpp \ - /usr/local/include/boost/config/detail/select_stdlib_config.hpp \ - /usr/local/include/boost/config/stdlib/libcpp.hpp \ - /usr/local/include/boost/config/detail/select_platform_config.hpp \ - /usr/local/include/boost/config/platform/macos.hpp \ - /usr/local/include/boost/config/detail/posix_features.hpp \ - /usr/local/include/boost/config/detail/suffix.hpp \ - /usr/local/include/boost/optional/optional_fwd.hpp \ - /usr/local/include/boost/throw_exception.hpp \ - /usr/local/include/boost/detail/workaround.hpp \ - /usr/local/include/boost/config/workaround.hpp \ - /usr/local/include/boost/exception/exception.hpp \ - /usr/local/include/boost/current_function.hpp \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__functional_03 \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/string \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstdio \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cwchar \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cwctype \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cctype \ - /usr/include/wctype.h /usr/include/_types/_wctrans_t.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/stdexcept \ - /usr/local/include/boost/property_tree/string_path.hpp \ - /usr/local/include/boost/property_tree/id_translator.hpp \ - /usr/local/include/boost/optional.hpp \ - /usr/local/include/boost/optional/optional.hpp \ - /usr/local/include/boost/assert.hpp /usr/include/assert.h \ - /usr/include/stdlib.h /usr/include/sys/wait.h \ - /usr/include/sys/_types/_id_t.h /usr/include/sys/signal.h \ - /usr/include/machine/signal.h /usr/include/i386/signal.h \ - /usr/include/machine/_mcontext.h /usr/include/i386/_mcontext.h \ - /usr/include/mach/i386/_structs.h \ - /usr/include/sys/_pthread/_pthread_attr_t.h \ - /usr/include/sys/_types/_sigaltstack.h \ - /usr/include/sys/_types/_ucontext.h /usr/include/sys/resource.h \ - /usr/include/machine/endian.h /usr/include/i386/endian.h \ - /usr/include/sys/_endian.h /usr/include/libkern/_OSByteOrder.h \ - /usr/include/libkern/i386/_OSByteOrder.h /usr/include/alloca.h \ - /usr/include/machine/types.h /usr/include/i386/types.h \ - /usr/local/include/boost/core/addressof.hpp \ - /usr/local/include/boost/core/enable_if.hpp \ - /usr/local/include/boost/core/explicit_operator_bool.hpp \ - /usr/local/include/boost/core/swap.hpp \ - /usr/local/include/boost/optional/bad_optional_access.hpp \ - /usr/local/include/boost/static_assert.hpp \ - /usr/local/include/boost/type.hpp \ - /usr/local/include/boost/type_traits/alignment_of.hpp \ - /usr/local/include/boost/type_traits/intrinsics.hpp \ - /usr/local/include/boost/type_traits/detail/config.hpp \ - /usr/local/include/boost/version.hpp \ - /usr/local/include/boost/type_traits/integral_constant.hpp \ - /usr/local/include/boost/type_traits/conditional.hpp \ - /usr/local/include/boost/type_traits/has_nothrow_constructor.hpp \ - /usr/local/include/boost/type_traits/is_default_constructible.hpp \ - /usr/local/include/boost/type_traits/is_pod.hpp \ - /usr/local/include/boost/type_traits/is_void.hpp \ - /usr/local/include/boost/type_traits/is_scalar.hpp \ - /usr/local/include/boost/type_traits/is_arithmetic.hpp \ - /usr/local/include/boost/type_traits/is_integral.hpp \ - /usr/local/include/boost/type_traits/is_floating_point.hpp \ - /usr/local/include/boost/type_traits/is_enum.hpp \ - /usr/local/include/boost/type_traits/is_pointer.hpp \ - /usr/local/include/boost/type_traits/is_member_pointer.hpp \ - /usr/local/include/boost/type_traits/is_member_function_pointer.hpp \ - /usr/local/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \ - /usr/local/include/boost/type_traits/remove_cv.hpp \ - /usr/local/include/boost/type_traits/type_with_alignment.hpp \ - /usr/local/include/boost/type_traits/remove_const.hpp \ - /usr/local/include/boost/type_traits/remove_reference.hpp \ - /usr/local/include/boost/type_traits/decay.hpp \ - /usr/local/include/boost/type_traits/is_array.hpp \ - /usr/local/include/boost/type_traits/is_function.hpp \ - /usr/local/include/boost/type_traits/is_reference.hpp \ - /usr/local/include/boost/type_traits/is_lvalue_reference.hpp \ - /usr/local/include/boost/type_traits/is_rvalue_reference.hpp \ - /usr/local/include/boost/type_traits/detail/is_function_ptr_helper.hpp \ - /usr/local/include/boost/type_traits/remove_bounds.hpp \ - /usr/local/include/boost/type_traits/remove_extent.hpp \ - /usr/local/include/boost/type_traits/add_pointer.hpp \ - /usr/local/include/boost/type_traits/is_base_of.hpp \ - /usr/local/include/boost/type_traits/is_base_and_derived.hpp \ - /usr/local/include/boost/type_traits/is_same.hpp \ - /usr/local/include/boost/type_traits/is_class.hpp \ - /usr/local/include/boost/type_traits/is_const.hpp \ - /usr/local/include/boost/type_traits/is_constructible.hpp \ - /usr/local/include/boost/type_traits/is_convertible.hpp \ - /usr/local/include/boost/type_traits/is_nothrow_move_assignable.hpp \ - /usr/local/include/boost/type_traits/has_trivial_move_assign.hpp \ - /usr/local/include/boost/type_traits/is_volatile.hpp \ - /usr/local/include/boost/type_traits/is_assignable.hpp \ - /usr/local/include/boost/type_traits/has_trivial_assign.hpp \ - /usr/local/include/boost/type_traits/has_nothrow_assign.hpp \ - /usr/local/include/boost/utility/enable_if.hpp \ - /usr/local/include/boost/type_traits/declval.hpp \ - /usr/local/include/boost/type_traits/add_rvalue_reference.hpp \ - /usr/local/include/boost/type_traits/is_nothrow_move_constructible.hpp \ - /usr/local/include/boost/type_traits/has_trivial_move_constructor.hpp \ - /usr/local/include/boost/type_traits/has_nothrow_copy.hpp \ - /usr/local/include/boost/type_traits/is_copy_constructible.hpp \ - /usr/local/include/boost/type_traits/detail/yes_no_type.hpp \ - /usr/local/include/boost/type_traits/add_reference.hpp \ - /usr/local/include/boost/noncopyable.hpp \ - /usr/local/include/boost/core/noncopyable.hpp \ - /usr/local/include/boost/move/utility.hpp \ - /usr/local/include/boost/move/detail/config_begin.hpp \ - /usr/local/include/boost/move/detail/workaround.hpp \ - /usr/local/include/boost/move/utility_core.hpp \ - /usr/local/include/boost/move/core.hpp \ - /usr/local/include/boost/move/detail/type_traits.hpp \ - /usr/local/include/boost/move/detail/meta_utils.hpp \ - /usr/local/include/boost/move/detail/meta_utils_core.hpp \ - /usr/local/include/boost/move/detail/config_end.hpp \ - /usr/local/include/boost/move/traits.hpp \ - /usr/local/include/boost/none.hpp /usr/local/include/boost/none_t.hpp \ - /usr/local/include/boost/utility/compare_pointees.hpp \ - /usr/local/include/boost/optional/detail/optional_config.hpp \ - /usr/local/include/boost/optional/detail/optional_factory_support.hpp \ - /usr/local/include/boost/optional/detail/optional_aligned_storage.hpp \ - /usr/local/include/boost/optional/detail/optional_trivially_copyable_base.hpp \ - /usr/local/include/boost/optional/detail/optional_reference_spec.hpp \ - /usr/local/include/boost/optional/detail/optional_relops.hpp \ - /usr/local/include/boost/optional/detail/optional_swap.hpp \ - /usr/local/include/boost/property_tree/exceptions.hpp \ - /usr/local/include/boost/any.hpp \ - /usr/local/include/boost/type_index.hpp \ - /usr/local/include/boost/type_index/stl_type_index.hpp \ - /usr/local/include/boost/type_index/type_index_facade.hpp \ - /usr/local/include/boost/core/demangle.hpp /usr/include/cxxabi.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstdlib \ - /usr/local/include/boost/mpl/if.hpp \ - /usr/local/include/boost/mpl/aux_/value_wknd.hpp \ - /usr/local/include/boost/mpl/aux_/static_cast.hpp \ - /usr/local/include/boost/mpl/aux_/config/workaround.hpp \ - /usr/local/include/boost/mpl/aux_/config/integral.hpp \ - /usr/local/include/boost/mpl/aux_/config/msvc.hpp \ - /usr/local/include/boost/mpl/aux_/config/eti.hpp \ - /usr/local/include/boost/mpl/aux_/na_spec.hpp \ - /usr/local/include/boost/mpl/lambda_fwd.hpp \ - /usr/local/include/boost/mpl/void_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/adl_barrier.hpp \ - /usr/local/include/boost/mpl/aux_/config/adl.hpp \ - /usr/local/include/boost/mpl/aux_/config/intel.hpp \ - /usr/local/include/boost/mpl/aux_/config/gcc.hpp \ - /usr/local/include/boost/mpl/aux_/na.hpp \ - /usr/local/include/boost/mpl/bool.hpp \ - /usr/local/include/boost/mpl/bool_fwd.hpp \ - /usr/local/include/boost/mpl/integral_c_tag.hpp \ - /usr/local/include/boost/mpl/aux_/config/static_constant.hpp \ - /usr/local/include/boost/mpl/aux_/na_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/config/ctps.hpp \ - /usr/local/include/boost/mpl/aux_/config/lambda.hpp \ - /usr/local/include/boost/mpl/aux_/config/ttp.hpp \ - /usr/local/include/boost/mpl/int.hpp \ - /usr/local/include/boost/mpl/int_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/nttp_decl.hpp \ - /usr/local/include/boost/mpl/aux_/config/nttp.hpp \ - /usr/local/include/boost/mpl/aux_/integral_wrapper.hpp \ - /usr/local/include/boost/preprocessor/cat.hpp \ - /usr/local/include/boost/preprocessor/config/config.hpp \ - /usr/local/include/boost/mpl/aux_/lambda_arity_param.hpp \ - /usr/local/include/boost/mpl/aux_/template_arity_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/arity.hpp \ - /usr/local/include/boost/mpl/aux_/config/dtp.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessor/params.hpp \ - /usr/local/include/boost/mpl/aux_/config/preprocessor.hpp \ - /usr/local/include/boost/preprocessor/comma_if.hpp \ - /usr/local/include/boost/preprocessor/punctuation/comma_if.hpp \ - /usr/local/include/boost/preprocessor/control/if.hpp \ - /usr/local/include/boost/preprocessor/control/iif.hpp \ - /usr/local/include/boost/preprocessor/logical/bool.hpp \ - /usr/local/include/boost/preprocessor/facilities/empty.hpp \ - /usr/local/include/boost/preprocessor/punctuation/comma.hpp \ - /usr/local/include/boost/preprocessor/repeat.hpp \ - /usr/local/include/boost/preprocessor/repetition/repeat.hpp \ - /usr/local/include/boost/preprocessor/debug/error.hpp \ - /usr/local/include/boost/preprocessor/detail/auto_rec.hpp \ - /usr/local/include/boost/preprocessor/tuple/eat.hpp \ - /usr/local/include/boost/preprocessor/inc.hpp \ - /usr/local/include/boost/preprocessor/arithmetic/inc.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessor/enum.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \ - /usr/local/include/boost/mpl/limits/arity.hpp \ - /usr/local/include/boost/preprocessor/logical/and.hpp \ - /usr/local/include/boost/preprocessor/logical/bitand.hpp \ - /usr/local/include/boost/preprocessor/identity.hpp \ - /usr/local/include/boost/preprocessor/facilities/identity.hpp \ - /usr/local/include/boost/preprocessor/empty.hpp \ - /usr/local/include/boost/preprocessor/arithmetic/add.hpp \ - /usr/local/include/boost/preprocessor/arithmetic/dec.hpp \ - /usr/local/include/boost/preprocessor/control/while.hpp \ - /usr/local/include/boost/preprocessor/list/fold_left.hpp \ - /usr/local/include/boost/preprocessor/list/detail/fold_left.hpp \ - /usr/local/include/boost/preprocessor/control/expr_iif.hpp \ - /usr/local/include/boost/preprocessor/list/adt.hpp \ - /usr/local/include/boost/preprocessor/detail/is_binary.hpp \ - /usr/local/include/boost/preprocessor/detail/check.hpp \ - /usr/local/include/boost/preprocessor/logical/compl.hpp \ - /usr/local/include/boost/preprocessor/list/fold_right.hpp \ - /usr/local/include/boost/preprocessor/list/detail/fold_right.hpp \ - /usr/local/include/boost/preprocessor/list/reverse.hpp \ - /usr/local/include/boost/preprocessor/control/detail/while.hpp \ - /usr/local/include/boost/preprocessor/tuple/elem.hpp \ - /usr/local/include/boost/preprocessor/facilities/expand.hpp \ - /usr/local/include/boost/preprocessor/facilities/overload.hpp \ - /usr/local/include/boost/preprocessor/variadic/size.hpp \ - /usr/local/include/boost/preprocessor/tuple/rem.hpp \ - /usr/local/include/boost/preprocessor/tuple/detail/is_single_return.hpp \ - /usr/local/include/boost/preprocessor/variadic/elem.hpp \ - /usr/local/include/boost/preprocessor/arithmetic/sub.hpp \ - /usr/local/include/boost/mpl/aux_/config/overload_resolution.hpp \ - /usr/local/include/boost/mpl/aux_/lambda_support.hpp \ - /usr/local/include/boost/mpl/or.hpp \ - /usr/local/include/boost/mpl/aux_/config/use_preprocessed.hpp \ - /usr/local/include/boost/mpl/aux_/nested_type_wknd.hpp \ - /usr/local/include/boost/mpl/aux_/include_preprocessed.hpp \ - /usr/local/include/boost/mpl/aux_/config/compiler.hpp \ - /usr/local/include/boost/preprocessor/stringize.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \ - /usr/local/include/boost/functional/hash.hpp \ - /usr/local/include/boost/functional/hash/hash.hpp \ - /usr/local/include/boost/functional/hash/hash_fwd.hpp \ - /usr/local/include/boost/functional/hash/detail/hash_float.hpp \ - /usr/local/include/boost/functional/hash/detail/float_functions.hpp \ - /usr/local/include/boost/config/no_tr1/cmath.hpp \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cmath \ - /usr/local/include/boost/functional/hash/detail/limits.hpp \ - /usr/local/include/boost/limits.hpp \ - /usr/local/include/boost/integer/static_log2.hpp \ - /usr/local/include/boost/integer_fwd.hpp \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/climits \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/7.0.2/include/limits.h \ - /usr/include/limits.h /usr/include/machine/limits.h \ - /usr/include/i386/limits.h /usr/include/i386/_limits.h \ - /usr/include/sys/syslimits.h /usr/local/include/boost/cstdint.hpp \ - /usr/local/include/boost/functional/hash/extensions.hpp \ - /usr/local/include/boost/detail/container_fwd.hpp \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/deque \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__split_buffer \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/vector \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__bit_reference \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/map \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__tree \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/set \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/bitset \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/complex \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/sstream \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/ostream \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/ios \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__locale \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/mutex \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__mutex_base \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/chrono \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/ctime \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/ratio \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/system_error \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cerrno \ - /usr/include/errno.h /usr/include/sys/errno.h /usr/include/pthread.h \ - /usr/include/pthread/pthread_impl.h /usr/include/pthread/sched.h \ - /usr/include/sys/_pthread/_pthread_cond_t.h \ - /usr/include/sys/_pthread/_pthread_condattr_t.h \ - /usr/include/sys/_pthread/_pthread_key_t.h \ - /usr/include/sys/_pthread/_pthread_mutex_t.h \ - /usr/include/sys/_pthread/_pthread_mutexattr_t.h \ - /usr/include/sys/_pthread/_pthread_once_t.h \ - /usr/include/sys/_pthread/_pthread_rwlock_t.h \ - /usr/include/sys/_pthread/_pthread_rwlockattr_t.h \ - /usr/include/sys/_pthread/_pthread_t.h /usr/include/pthread/qos.h \ - /usr/include/sys/qos.h /usr/include/sys/_types/_mach_port_t.h \ - /usr/include/locale.h /usr/include/_locale.h /usr/include/xlocale.h \ - /usr/include/_xlocale.h /usr/include/xlocale/_ctype.h \ - /usr/include/xlocale/__wctype.h /usr/include/xlocale/_stdio.h \ - /usr/include/xlocale/_stdlib.h /usr/include/xlocale/_string.h \ - /usr/include/xlocale/_time.h /usr/include/xlocale/_wchar.h \ - /usr/include/xlocale/_wctype.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/streambuf \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/locale \ - /usr/include/nl_types.h /usr/include/sys/types.h \ - /usr/include/sys/_types/_blkcnt_t.h \ - /usr/include/sys/_types/_blksize_t.h \ - /usr/include/sys/_types/_in_addr_t.h \ - /usr/include/sys/_types/_in_port_t.h /usr/include/sys/_types/_ino_t.h \ - /usr/include/sys/_types/_ino64_t.h /usr/include/sys/_types/_key_t.h \ - /usr/include/sys/_types/_nlink_t.h \ - /usr/include/sys/_types/_fsblkcnt_t.h \ - /usr/include/sys/_types/_fsfilcnt_t.h /usr/include/_types/_nl_item.h \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/istream \ - /usr/local/include/boost/preprocessor/repetition/repeat_from_to.hpp \ - /usr/local/include/boost/preprocessor/repetition/enum_params.hpp \ - /Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/array \ - /usr/local/include/boost/property_tree/detail/exception_implementation.hpp \ - /usr/local/include/boost/property_tree/detail/ptree_utils.hpp \ - /usr/local/include/boost/mpl/has_xxx.hpp \ - /usr/local/include/boost/mpl/aux_/type_wrapper.hpp \ - /usr/local/include/boost/mpl/aux_/yes_no.hpp \ - /usr/local/include/boost/mpl/aux_/config/arrays.hpp \ - /usr/local/include/boost/mpl/aux_/config/has_xxx.hpp \ - /usr/local/include/boost/mpl/aux_/config/msvc_typename.hpp \ - /usr/local/include/boost/preprocessor/array/elem.hpp \ - /usr/local/include/boost/preprocessor/array/data.hpp \ - /usr/local/include/boost/preprocessor/array/size.hpp \ - /usr/local/include/boost/preprocessor/repetition/enum_trailing_params.hpp \ - /usr/local/include/boost/mpl/and.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \ - /usr/local/include/boost/property_tree/stream_translator.hpp \ - /usr/local/include/boost/optional/optional_io.hpp \ - /usr/local/include/boost/multi_index_container.hpp \ - /usr/local/include/boost/detail/allocator_utilities.hpp \ - /usr/local/include/boost/mpl/eval_if.hpp \ - /usr/local/include/boost/detail/no_exceptions_support.hpp \ - /usr/local/include/boost/core/no_exceptions_support.hpp \ - /usr/local/include/boost/mpl/at.hpp \ - /usr/local/include/boost/mpl/at_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/at_impl.hpp \ - /usr/local/include/boost/mpl/begin_end.hpp \ - /usr/local/include/boost/mpl/begin_end_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/begin_end_impl.hpp \ - /usr/local/include/boost/mpl/sequence_tag_fwd.hpp \ - /usr/local/include/boost/mpl/void.hpp \ - /usr/local/include/boost/mpl/aux_/has_begin.hpp \ - /usr/local/include/boost/mpl/aux_/traits_lambda_spec.hpp \ - /usr/local/include/boost/mpl/sequence_tag.hpp \ - /usr/local/include/boost/mpl/aux_/has_tag.hpp \ - /usr/local/include/boost/mpl/aux_/is_msvc_eti_arg.hpp \ - /usr/local/include/boost/mpl/advance.hpp \ - /usr/local/include/boost/mpl/advance_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/common_name_wknd.hpp \ - /usr/local/include/boost/mpl/less.hpp \ - /usr/local/include/boost/mpl/aux_/comparison_op.hpp \ - /usr/local/include/boost/mpl/aux_/numeric_op.hpp \ - /usr/local/include/boost/mpl/numeric_cast.hpp \ - /usr/local/include/boost/mpl/apply_wrap.hpp \ - /usr/local/include/boost/mpl/aux_/has_apply.hpp \ - /usr/local/include/boost/mpl/aux_/config/has_apply.hpp \ - /usr/local/include/boost/mpl/aux_/msvc_never_true.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \ - /usr/local/include/boost/mpl/tag.hpp \ - /usr/local/include/boost/mpl/aux_/numeric_cast_utils.hpp \ - /usr/local/include/boost/mpl/aux_/config/forwarding.hpp \ - /usr/local/include/boost/mpl/aux_/msvc_eti_base.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/less.hpp \ - /usr/local/include/boost/mpl/negate.hpp \ - /usr/local/include/boost/mpl/integral_c.hpp \ - /usr/local/include/boost/mpl/integral_c_fwd.hpp \ - /usr/local/include/boost/mpl/long.hpp \ - /usr/local/include/boost/mpl/long_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/advance_forward.hpp \ - /usr/local/include/boost/mpl/next.hpp \ - /usr/local/include/boost/mpl/next_prior.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/advance_forward.hpp \ - /usr/local/include/boost/mpl/aux_/advance_backward.hpp \ - /usr/local/include/boost/mpl/prior.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/advance_backward.hpp \ - /usr/local/include/boost/mpl/deref.hpp \ - /usr/local/include/boost/mpl/aux_/msvc_type.hpp \ - /usr/local/include/boost/mpl/contains.hpp \ - /usr/local/include/boost/mpl/contains_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/contains_impl.hpp \ - /usr/local/include/boost/mpl/find.hpp \ - /usr/local/include/boost/mpl/find_if.hpp \ - /usr/local/include/boost/mpl/aux_/find_if_pred.hpp \ - /usr/local/include/boost/mpl/aux_/iter_apply.hpp \ - /usr/local/include/boost/mpl/apply.hpp \ - /usr/local/include/boost/mpl/apply_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \ - /usr/local/include/boost/mpl/placeholders.hpp \ - /usr/local/include/boost/mpl/arg.hpp \ - /usr/local/include/boost/mpl/arg_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/na_assert.hpp \ - /usr/local/include/boost/mpl/assert.hpp \ - /usr/local/include/boost/mpl/not.hpp \ - /usr/local/include/boost/mpl/aux_/config/gpu.hpp \ - /usr/local/include/boost/mpl/aux_/config/pp_counter.hpp \ - /usr/local/include/boost/mpl/aux_/arity_spec.hpp \ - /usr/local/include/boost/mpl/aux_/arg_typedef.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \ - /usr/local/include/boost/mpl/lambda.hpp \ - /usr/local/include/boost/mpl/bind.hpp \ - /usr/local/include/boost/mpl/bind_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/config/bind.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \ - /usr/local/include/boost/mpl/protect.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \ - /usr/local/include/boost/mpl/aux_/full_lambda.hpp \ - /usr/local/include/boost/mpl/quote.hpp \ - /usr/local/include/boost/mpl/aux_/has_type.hpp \ - /usr/local/include/boost/mpl/aux_/config/bcc.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \ - /usr/local/include/boost/mpl/aux_/template_arity.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \ - /usr/local/include/boost/mpl/iter_fold_if.hpp \ - /usr/local/include/boost/mpl/logical.hpp \ - /usr/local/include/boost/mpl/always.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessor/default_params.hpp \ - /usr/local/include/boost/mpl/pair.hpp \ - /usr/local/include/boost/mpl/aux_/iter_fold_if_impl.hpp \ - /usr/local/include/boost/mpl/identity.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/iter_fold_if_impl.hpp \ - /usr/local/include/boost/mpl/same_as.hpp \ - /usr/local/include/boost/mpl/aux_/lambda_spec.hpp \ - /usr/local/include/boost/mpl/size.hpp \ - /usr/local/include/boost/mpl/size_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/size_impl.hpp \ - /usr/local/include/boost/mpl/distance.hpp \ - /usr/local/include/boost/mpl/distance_fwd.hpp \ - /usr/local/include/boost/mpl/iter_fold.hpp \ - /usr/local/include/boost/mpl/O1_size.hpp \ - /usr/local/include/boost/mpl/O1_size_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/O1_size_impl.hpp \ - /usr/local/include/boost/mpl/aux_/has_size.hpp \ - /usr/local/include/boost/mpl/aux_/iter_fold_impl.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/iter_fold_impl.hpp \ - /usr/local/include/boost/mpl/iterator_range.hpp \ - /usr/local/include/boost/multi_index_container_fwd.hpp \ - /usr/local/include/boost/multi_index/identity.hpp \ - /usr/local/include/boost/multi_index/identity_fwd.hpp \ - /usr/local/include/boost/multi_index/indexed_by.hpp \ - /usr/local/include/boost/mpl/vector.hpp \ - /usr/local/include/boost/mpl/limits/vector.hpp \ - /usr/local/include/boost/mpl/vector/vector20.hpp \ - /usr/local/include/boost/mpl/vector/vector10.hpp \ - /usr/local/include/boost/mpl/vector/vector0.hpp \ - /usr/local/include/boost/mpl/vector/aux_/at.hpp \ - /usr/local/include/boost/mpl/vector/aux_/tag.hpp \ - /usr/local/include/boost/mpl/aux_/config/typeof.hpp \ - /usr/local/include/boost/mpl/vector/aux_/front.hpp \ - /usr/local/include/boost/mpl/front_fwd.hpp \ - /usr/local/include/boost/mpl/vector/aux_/push_front.hpp \ - /usr/local/include/boost/mpl/push_front_fwd.hpp \ - /usr/local/include/boost/mpl/vector/aux_/item.hpp \ - /usr/local/include/boost/mpl/vector/aux_/pop_front.hpp \ - /usr/local/include/boost/mpl/pop_front_fwd.hpp \ - /usr/local/include/boost/mpl/vector/aux_/push_back.hpp \ - /usr/local/include/boost/mpl/push_back_fwd.hpp \ - /usr/local/include/boost/mpl/vector/aux_/pop_back.hpp \ - /usr/local/include/boost/mpl/pop_back_fwd.hpp \ - /usr/local/include/boost/mpl/vector/aux_/back.hpp \ - /usr/local/include/boost/mpl/back_fwd.hpp \ - /usr/local/include/boost/mpl/vector/aux_/clear.hpp \ - /usr/local/include/boost/mpl/clear_fwd.hpp \ - /usr/local/include/boost/mpl/vector/aux_/vector0.hpp \ - /usr/local/include/boost/mpl/vector/aux_/iterator.hpp \ - /usr/local/include/boost/mpl/iterator_tags.hpp \ - /usr/local/include/boost/mpl/plus.hpp \ - /usr/local/include/boost/mpl/aux_/arithmetic_op.hpp \ - /usr/local/include/boost/mpl/aux_/largest_int.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/plus.hpp \ - /usr/local/include/boost/mpl/minus.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/minus.hpp \ - /usr/local/include/boost/mpl/vector/aux_/O1_size.hpp \ - /usr/local/include/boost/mpl/vector/aux_/size.hpp \ - /usr/local/include/boost/mpl/vector/aux_/empty.hpp \ - /usr/local/include/boost/mpl/empty_fwd.hpp \ - /usr/local/include/boost/mpl/vector/aux_/begin_end.hpp \ - /usr/local/include/boost/mpl/vector/aux_/include_preprocessed.hpp \ - /usr/local/include/boost/mpl/vector/aux_/preprocessed/typeof_based/vector10.hpp \ - /usr/local/include/boost/mpl/vector/aux_/preprocessed/typeof_based/vector20.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/vector.hpp \ - /usr/local/include/boost/preprocessor/control/expr_if.hpp \ - /usr/local/include/boost/preprocessor/repetition/enum.hpp \ - /usr/local/include/boost/multi_index/ordered_index_fwd.hpp \ - /usr/local/include/boost/multi_index/detail/ord_index_args.hpp \ - /usr/local/include/boost/multi_index/tag.hpp \ - /usr/local/include/boost/multi_index/detail/no_duplicate_tags.hpp \ - /usr/local/include/boost/mpl/fold.hpp \ - /usr/local/include/boost/mpl/aux_/fold_impl.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/fold_impl.hpp \ - /usr/local/include/boost/mpl/set/set0.hpp \ - /usr/local/include/boost/mpl/set/aux_/at_impl.hpp \ - /usr/local/include/boost/mpl/set/aux_/has_key_impl.hpp \ - /usr/local/include/boost/mpl/set/aux_/tag.hpp \ - /usr/local/include/boost/mpl/has_key_fwd.hpp \ - /usr/local/include/boost/mpl/aux_/overload_names.hpp \ - /usr/local/include/boost/mpl/aux_/ptr_to_ref.hpp \ - /usr/local/include/boost/mpl/aux_/config/operators.hpp \ - /usr/local/include/boost/mpl/set/aux_/clear_impl.hpp \ - /usr/local/include/boost/mpl/set/aux_/set0.hpp \ - /usr/local/include/boost/mpl/set/aux_/size_impl.hpp \ - /usr/local/include/boost/mpl/set/aux_/empty_impl.hpp \ - /usr/local/include/boost/mpl/set/aux_/insert_impl.hpp \ - /usr/local/include/boost/mpl/insert_fwd.hpp \ - /usr/local/include/boost/mpl/set/aux_/item.hpp \ - /usr/local/include/boost/mpl/base.hpp \ - /usr/local/include/boost/mpl/set/aux_/insert_range_impl.hpp \ - /usr/local/include/boost/mpl/insert_range_fwd.hpp \ - /usr/local/include/boost/mpl/insert.hpp \ - /usr/local/include/boost/mpl/aux_/insert_impl.hpp \ - /usr/local/include/boost/mpl/reverse_fold.hpp \ - /usr/local/include/boost/mpl/aux_/reverse_fold_impl.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/reverse_fold_impl.hpp \ - /usr/local/include/boost/mpl/clear.hpp \ - /usr/local/include/boost/mpl/aux_/clear_impl.hpp \ - /usr/local/include/boost/mpl/push_front.hpp \ - /usr/local/include/boost/mpl/aux_/push_front_impl.hpp \ - /usr/local/include/boost/mpl/set/aux_/erase_impl.hpp \ - /usr/local/include/boost/mpl/erase_fwd.hpp \ - /usr/local/include/boost/mpl/set/aux_/erase_key_impl.hpp \ - /usr/local/include/boost/mpl/erase_key_fwd.hpp \ - /usr/local/include/boost/mpl/set/aux_/key_type_impl.hpp \ - /usr/local/include/boost/mpl/key_type_fwd.hpp \ - /usr/local/include/boost/mpl/set/aux_/value_type_impl.hpp \ - /usr/local/include/boost/mpl/value_type_fwd.hpp \ - /usr/local/include/boost/mpl/set/aux_/begin_end_impl.hpp \ - /usr/local/include/boost/mpl/set/aux_/iterator.hpp \ - /usr/local/include/boost/mpl/has_key.hpp \ - /usr/local/include/boost/mpl/aux_/has_key_impl.hpp \ - /usr/local/include/boost/mpl/transform.hpp \ - /usr/local/include/boost/mpl/pair_view.hpp \ - /usr/local/include/boost/mpl/iterator_category.hpp \ - /usr/local/include/boost/mpl/min_max.hpp \ - /usr/local/include/boost/mpl/is_sequence.hpp \ - /usr/local/include/boost/mpl/aux_/inserter_algorithm.hpp \ - /usr/local/include/boost/mpl/back_inserter.hpp \ - /usr/local/include/boost/mpl/push_back.hpp \ - /usr/local/include/boost/mpl/aux_/push_back_impl.hpp \ - /usr/local/include/boost/mpl/inserter.hpp \ - /usr/local/include/boost/mpl/front_inserter.hpp \ - /usr/local/include/boost/preprocessor/facilities/intercept.hpp \ - /usr/local/include/boost/preprocessor/repetition/enum_binary_params.hpp \ - /usr/local/include/boost/multi_index/detail/ord_index_impl_fwd.hpp \ - /usr/local/include/boost/multi_index/detail/access_specifier.hpp \ - /usr/local/include/boost/multi_index/detail/adl_swap.hpp \ - /usr/local/include/boost/multi_index/detail/base_type.hpp \ - /usr/local/include/boost/multi_index/detail/index_base.hpp \ - /usr/local/include/boost/multi_index/detail/copy_map.hpp \ - /usr/local/include/boost/multi_index/detail/auto_space.hpp \ - /usr/local/include/boost/multi_index/detail/raw_ptr.hpp \ - /usr/local/include/boost/multi_index/detail/do_not_copy_elements_tag.hpp \ - /usr/local/include/boost/multi_index/detail/node_type.hpp \ - /usr/local/include/boost/mpl/reverse_iter_fold.hpp \ - /usr/local/include/boost/mpl/aux_/reverse_iter_fold_impl.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/reverse_iter_fold_impl.hpp \ - /usr/local/include/boost/multi_index/detail/header_holder.hpp \ - /usr/local/include/boost/multi_index/detail/index_node_base.hpp \ - /usr/local/include/boost/type_traits/aligned_storage.hpp \ - /usr/local/include/boost/archive/archive_exception.hpp \ - /usr/local/include/boost/archive/detail/decl.hpp \ - /usr/local/include/boost/archive/detail/abi_prefix.hpp \ - /usr/local/include/boost/config/abi_prefix.hpp \ - /usr/local/include/boost/archive/detail/abi_suffix.hpp \ - /usr/local/include/boost/config/abi_suffix.hpp \ - /usr/local/include/boost/serialization/access.hpp \ - /usr/local/include/boost/multi_index/detail/ignore_wstrict_aliasing.hpp \ - /usr/local/include/boost/multi_index/detail/restore_wstrict_aliasing.hpp \ - /usr/local/include/boost/multi_index/detail/is_index_list.hpp \ - /usr/local/include/boost/mpl/empty.hpp \ - /usr/local/include/boost/mpl/aux_/empty_impl.hpp \ - /usr/local/include/boost/multi_index/detail/vartempl_support.hpp \ - /usr/local/include/boost/preprocessor/seq/elem.hpp \ - /usr/local/include/boost/tuple/tuple.hpp \ - /usr/local/include/boost/ref.hpp /usr/local/include/boost/core/ref.hpp \ - /usr/local/include/boost/tuple/detail/tuple_basic.hpp \ - /usr/local/include/boost/type_traits/cv_traits.hpp \ - /usr/local/include/boost/type_traits/add_const.hpp \ - /usr/local/include/boost/type_traits/add_volatile.hpp \ - /usr/local/include/boost/type_traits/add_cv.hpp \ - /usr/local/include/boost/type_traits/remove_volatile.hpp \ - /usr/local/include/boost/type_traits/function_traits.hpp \ - /usr/local/include/boost/utility/swap.hpp \ - /usr/local/include/boost/multi_index/detail/index_loader.hpp \ - /usr/local/include/boost/serialization/nvp.hpp \ - /usr/local/include/boost/serialization/level.hpp \ - /usr/local/include/boost/type_traits/is_fundamental.hpp \ - /usr/local/include/boost/serialization/level_enum.hpp \ - /usr/local/include/boost/serialization/tracking.hpp \ - /usr/local/include/boost/mpl/equal_to.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp \ - /usr/local/include/boost/mpl/greater.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/greater.hpp \ - /usr/local/include/boost/serialization/tracking_enum.hpp \ - /usr/local/include/boost/serialization/type_info_implementation.hpp \ - /usr/local/include/boost/serialization/traits.hpp \ - /usr/local/include/boost/serialization/split_member.hpp \ - /usr/local/include/boost/serialization/base_object.hpp \ - /usr/local/include/boost/type_traits/is_polymorphic.hpp \ - /usr/local/include/boost/serialization/force_include.hpp \ - /usr/local/include/boost/serialization/void_cast_fwd.hpp \ - /usr/local/include/boost/serialization/wrapper.hpp \ - /usr/local/include/boost/multi_index/detail/index_saver.hpp \ - /usr/local/include/boost/multi_index/detail/index_matcher.hpp \ - /usr/local/include/boost/multi_index/detail/converter.hpp \ - /usr/local/include/boost/multi_index/detail/has_tag.hpp \ - /usr/local/include/boost/multi_index/detail/safe_mode.hpp \ - /usr/local/include/boost/multi_index/detail/scope_guard.hpp \ - /usr/local/include/boost/utility/base_from_member.hpp \ - /usr/local/include/boost/multi_index/detail/archive_constructed.hpp \ - /usr/local/include/boost/serialization/serialization.hpp \ - /usr/local/include/boost/serialization/strong_typedef.hpp \ - /usr/local/include/boost/operators.hpp \ - /usr/local/include/boost/multi_index/detail/serialization_version.hpp \ - /usr/local/include/boost/serialization/version.hpp \ - /usr/local/include/boost/mpl/comparison.hpp \ - /usr/local/include/boost/mpl/not_equal_to.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp \ - /usr/local/include/boost/mpl/less_equal.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp \ - /usr/local/include/boost/mpl/greater_equal.hpp \ - /usr/local/include/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp \ - /usr/local/include/boost/serialization/collection_size_type.hpp \ - /usr/local/include/boost/serialization/split_free.hpp \ - /usr/local/include/boost/serialization/is_bitwise_serializable.hpp \ - /usr/local/include/boost/multi_index/sequenced_index.hpp \ - /usr/local/include/boost/bind.hpp \ - /usr/local/include/boost/bind/bind.hpp \ - /usr/local/include/boost/mem_fn.hpp \ - /usr/local/include/boost/bind/mem_fn.hpp \ - /usr/local/include/boost/get_pointer.hpp \ - /usr/local/include/boost/config/no_tr1/memory.hpp \ - /usr/local/include/boost/bind/mem_fn_template.hpp \ - /usr/local/include/boost/bind/mem_fn_cc.hpp \ - /usr/local/include/boost/is_placeholder.hpp \ - /usr/local/include/boost/bind/arg.hpp \ - /usr/local/include/boost/visit_each.hpp \ - /usr/local/include/boost/core/is_same.hpp \ - /usr/local/include/boost/bind/storage.hpp \ - /usr/local/include/boost/bind/bind_template.hpp \ - /usr/local/include/boost/bind/bind_cc.hpp \ - /usr/local/include/boost/bind/bind_mf_cc.hpp \ - /usr/local/include/boost/bind/bind_mf2_cc.hpp \ - /usr/local/include/boost/bind/placeholders.hpp \ - /usr/local/include/boost/call_traits.hpp \ - /usr/local/include/boost/detail/call_traits.hpp \ - /usr/local/include/boost/foreach_fwd.hpp \ - /usr/local/include/boost/iterator/reverse_iterator.hpp \ - /usr/local/include/boost/iterator/iterator_adaptor.hpp \ - /usr/local/include/boost/iterator/iterator_categories.hpp \ - /usr/local/include/boost/iterator/detail/config_def.hpp \ - /usr/local/include/boost/iterator/detail/config_undef.hpp \ - /usr/local/include/boost/iterator/iterator_facade.hpp \ - /usr/local/include/boost/iterator/interoperable.hpp \ - /usr/local/include/boost/iterator/iterator_traits.hpp \ - /usr/local/include/boost/iterator/detail/facade_iterator_category.hpp \ - /usr/local/include/boost/detail/indirect_traits.hpp \ - /usr/local/include/boost/type_traits/remove_pointer.hpp \ - /usr/local/include/boost/iterator/detail/enable_if.hpp \ - /usr/local/include/boost/utility/addressof.hpp \ - /usr/local/include/boost/type_traits/add_lvalue_reference.hpp \ - /usr/local/include/boost/multi_index/detail/bidir_node_iterator.hpp \ - /usr/local/include/boost/multi_index/detail/seq_index_node.hpp \ - /usr/local/include/boost/multi_index/detail/seq_index_ops.hpp \ - /usr/local/include/boost/multi_index/sequenced_index_fwd.hpp \ - /usr/local/include/boost/multi_index/ordered_index.hpp \ - /usr/local/include/boost/multi_index/detail/ord_index_impl.hpp \ - /usr/local/include/boost/multi_index/detail/modify_key_adaptor.hpp \ - /usr/local/include/boost/multi_index/detail/ord_index_node.hpp \ - /usr/local/include/boost/multi_index/detail/uintptr_type.hpp \ - /usr/local/include/boost/multi_index/detail/ord_index_ops.hpp \ - /usr/local/include/boost/multi_index/detail/promotes_arg.hpp \ - /usr/local/include/boost/multi_index/detail/is_transparent.hpp \ - /usr/local/include/boost/multi_index/detail/unbounded.hpp \ - /usr/local/include/boost/multi_index/detail/value_compare.hpp \ - /usr/local/include/boost/multi_index/detail/duplicates_iterator.hpp \ - /usr/local/include/boost/multi_index/member.hpp \ - /usr/local/include/boost/property_tree/detail/ptree_implementation.hpp \ - src/lammps/lammps_model.h - -/usr/include/time.h: - -/usr/include/_types.h: - -/usr/include/sys/_types.h: - -/usr/include/sys/cdefs.h: - -/usr/include/sys/_symbol_aliasing.h: - -/usr/include/sys/_posix_availability.h: - -/usr/include/machine/_types.h: - -/usr/include/i386/_types.h: - -/usr/include/sys/_pthread/_pthread_types.h: - -/usr/include/sys/_types/_clock_t.h: - -/usr/include/sys/_types/_null.h: - -/usr/include/sys/_types/_size_t.h: - -/usr/include/sys/_types/_time_t.h: - -/usr/include/sys/_types/_timespec.h: - -/usr/include/math.h: - -/usr/include/Availability.h: - -/usr/include/AvailabilityInternal.h: - -src/lammps/lammps.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/list: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__config: - -/usr/include/unistd.h: - -/usr/include/sys/unistd.h: - -/usr/include/sys/_types/_posix_vdisable.h: - -/usr/include/sys/_types/_seek_set.h: - -/usr/include/_types/_uint64_t.h: - -/usr/include/sys/_types/_ssize_t.h: - -/usr/include/sys/_types/_uid_t.h: - -/usr/include/sys/_types/_gid_t.h: - -/usr/include/sys/_types/_intptr_t.h: - -/usr/include/sys/_types/_off_t.h: - -/usr/include/sys/_types/_pid_t.h: - -/usr/include/sys/_types/_useconds_t.h: - -/usr/include/sys/select.h: - -/usr/include/sys/appleapiopts.h: - -/usr/include/sys/_types/_fd_def.h: - -/usr/include/sys/_types/_timeval.h: - -/usr/include/sys/_types/_suseconds_t.h: - -/usr/include/sys/_types/_sigset_t.h: - -/usr/include/sys/_types/_fd_setsize.h: - -/usr/include/sys/_types/_fd_set.h: - -/usr/include/sys/_types/_fd_clr.h: - -/usr/include/sys/_types/_fd_isset.h: - -/usr/include/sys/_types/_fd_zero.h: - -/usr/include/sys/_types/_fd_copy.h: - -/usr/include/sys/_select.h: - -/usr/include/sys/_types/_dev_t.h: - -/usr/include/sys/_types/_mode_t.h: - -/usr/include/sys/_types/_uuid_t.h: - -/usr/include/gethostuuid.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/memory: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/type_traits: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstddef: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/7.0.2/include/stddef.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/typeinfo: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/exception: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstdint: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/7.0.2/include/stdint.h: - -/usr/include/stdint.h: - -/usr/include/sys/_types/_int8_t.h: - -/usr/include/sys/_types/_int16_t.h: - -/usr/include/sys/_types/_int32_t.h: - -/usr/include/sys/_types/_int64_t.h: - -/usr/include/_types/_uint8_t.h: - -/usr/include/_types/_uint16_t.h: - -/usr/include/_types/_uint32_t.h: - -/usr/include/sys/_types/_uintptr_t.h: - -/usr/include/_types/_intmax_t.h: - -/usr/include/_types/_uintmax_t.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/new: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/utility: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__tuple: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__tuple_03: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/limits: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__undef_min_max: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/iterator: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__functional_base: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__functional_base_03: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/iosfwd: - -/usr/include/wchar.h: - -/usr/include/sys/_types/_mbstate_t.h: - -/usr/include/sys/_types/_ct_rune_t.h: - -/usr/include/sys/_types/_rune_t.h: - -/usr/include/sys/_types/_wchar_t.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/7.0.2/include/stdarg.h: - -/usr/include/stdio.h: - -/usr/include/sys/_types/_va_list.h: - -/usr/include/sys/stdio.h: - -/usr/include/_wctype.h: - -/usr/include/sys/_types/_wint_t.h: - -/usr/include/_types/_wctype_t.h: - -/usr/include/ctype.h: - -/usr/include/runetype.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/initializer_list: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__debug: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/tuple: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstring: - -/usr/include/string.h: - -/usr/include/sys/_types/_rsize_t.h: - -/usr/include/sys/_types/_errno_t.h: - -/usr/include/strings.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/algorithm: - -/usr/local/include/boost/property_tree/ptree.hpp: - -/usr/local/include/boost/property_tree/ptree_fwd.hpp: - -/usr/local/include/boost/config.hpp: - -/usr/local/include/boost/config/user.hpp: - -/usr/local/include/boost/config/detail/select_compiler_config.hpp: - -/usr/local/include/boost/config/compiler/clang.hpp: - -/usr/local/include/boost/config/detail/select_stdlib_config.hpp: - -/usr/local/include/boost/config/stdlib/libcpp.hpp: - -/usr/local/include/boost/config/detail/select_platform_config.hpp: - -/usr/local/include/boost/config/platform/macos.hpp: - -/usr/local/include/boost/config/detail/posix_features.hpp: - -/usr/local/include/boost/config/detail/suffix.hpp: - -/usr/local/include/boost/optional/optional_fwd.hpp: - -/usr/local/include/boost/throw_exception.hpp: - -/usr/local/include/boost/detail/workaround.hpp: - -/usr/local/include/boost/config/workaround.hpp: - -/usr/local/include/boost/exception/exception.hpp: - -/usr/local/include/boost/current_function.hpp: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/functional: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__functional_03: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/string: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstdio: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cwchar: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cwctype: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cctype: - -/usr/include/wctype.h: - -/usr/include/_types/_wctrans_t.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/stdexcept: - -/usr/local/include/boost/property_tree/string_path.hpp: - -/usr/local/include/boost/property_tree/id_translator.hpp: - -/usr/local/include/boost/optional.hpp: - -/usr/local/include/boost/optional/optional.hpp: - -/usr/local/include/boost/assert.hpp: - -/usr/include/assert.h: - -/usr/include/stdlib.h: - -/usr/include/sys/wait.h: - -/usr/include/sys/_types/_id_t.h: - -/usr/include/sys/signal.h: - -/usr/include/machine/signal.h: - -/usr/include/i386/signal.h: - -/usr/include/machine/_mcontext.h: - -/usr/include/i386/_mcontext.h: - -/usr/include/mach/i386/_structs.h: - -/usr/include/sys/_pthread/_pthread_attr_t.h: - -/usr/include/sys/_types/_sigaltstack.h: - -/usr/include/sys/_types/_ucontext.h: - -/usr/include/sys/resource.h: - -/usr/include/machine/endian.h: - -/usr/include/i386/endian.h: - -/usr/include/sys/_endian.h: - -/usr/include/libkern/_OSByteOrder.h: - -/usr/include/libkern/i386/_OSByteOrder.h: - -/usr/include/alloca.h: - -/usr/include/machine/types.h: - -/usr/include/i386/types.h: - -/usr/local/include/boost/core/addressof.hpp: - -/usr/local/include/boost/core/enable_if.hpp: - -/usr/local/include/boost/core/explicit_operator_bool.hpp: - -/usr/local/include/boost/core/swap.hpp: - -/usr/local/include/boost/optional/bad_optional_access.hpp: - -/usr/local/include/boost/static_assert.hpp: - -/usr/local/include/boost/type.hpp: - -/usr/local/include/boost/type_traits/alignment_of.hpp: - -/usr/local/include/boost/type_traits/intrinsics.hpp: - -/usr/local/include/boost/type_traits/detail/config.hpp: - -/usr/local/include/boost/version.hpp: - -/usr/local/include/boost/type_traits/integral_constant.hpp: - -/usr/local/include/boost/type_traits/conditional.hpp: - -/usr/local/include/boost/type_traits/has_nothrow_constructor.hpp: - -/usr/local/include/boost/type_traits/is_default_constructible.hpp: - -/usr/local/include/boost/type_traits/is_pod.hpp: - -/usr/local/include/boost/type_traits/is_void.hpp: - -/usr/local/include/boost/type_traits/is_scalar.hpp: - -/usr/local/include/boost/type_traits/is_arithmetic.hpp: - -/usr/local/include/boost/type_traits/is_integral.hpp: - -/usr/local/include/boost/type_traits/is_floating_point.hpp: - -/usr/local/include/boost/type_traits/is_enum.hpp: - -/usr/local/include/boost/type_traits/is_pointer.hpp: - -/usr/local/include/boost/type_traits/is_member_pointer.hpp: - -/usr/local/include/boost/type_traits/is_member_function_pointer.hpp: - -/usr/local/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp: - -/usr/local/include/boost/type_traits/remove_cv.hpp: - -/usr/local/include/boost/type_traits/type_with_alignment.hpp: - -/usr/local/include/boost/type_traits/remove_const.hpp: - -/usr/local/include/boost/type_traits/remove_reference.hpp: - -/usr/local/include/boost/type_traits/decay.hpp: - -/usr/local/include/boost/type_traits/is_array.hpp: - -/usr/local/include/boost/type_traits/is_function.hpp: - -/usr/local/include/boost/type_traits/is_reference.hpp: - -/usr/local/include/boost/type_traits/is_lvalue_reference.hpp: - -/usr/local/include/boost/type_traits/is_rvalue_reference.hpp: - -/usr/local/include/boost/type_traits/detail/is_function_ptr_helper.hpp: - -/usr/local/include/boost/type_traits/remove_bounds.hpp: - -/usr/local/include/boost/type_traits/remove_extent.hpp: - -/usr/local/include/boost/type_traits/add_pointer.hpp: - -/usr/local/include/boost/type_traits/is_base_of.hpp: - -/usr/local/include/boost/type_traits/is_base_and_derived.hpp: - -/usr/local/include/boost/type_traits/is_same.hpp: - -/usr/local/include/boost/type_traits/is_class.hpp: - -/usr/local/include/boost/type_traits/is_const.hpp: - -/usr/local/include/boost/type_traits/is_constructible.hpp: - -/usr/local/include/boost/type_traits/is_convertible.hpp: - -/usr/local/include/boost/type_traits/is_nothrow_move_assignable.hpp: - -/usr/local/include/boost/type_traits/has_trivial_move_assign.hpp: - -/usr/local/include/boost/type_traits/is_volatile.hpp: - -/usr/local/include/boost/type_traits/is_assignable.hpp: - -/usr/local/include/boost/type_traits/has_trivial_assign.hpp: - -/usr/local/include/boost/type_traits/has_nothrow_assign.hpp: - -/usr/local/include/boost/utility/enable_if.hpp: - -/usr/local/include/boost/type_traits/declval.hpp: - -/usr/local/include/boost/type_traits/add_rvalue_reference.hpp: - -/usr/local/include/boost/type_traits/is_nothrow_move_constructible.hpp: - -/usr/local/include/boost/type_traits/has_trivial_move_constructor.hpp: - -/usr/local/include/boost/type_traits/has_nothrow_copy.hpp: - -/usr/local/include/boost/type_traits/is_copy_constructible.hpp: - -/usr/local/include/boost/type_traits/detail/yes_no_type.hpp: - -/usr/local/include/boost/type_traits/add_reference.hpp: - -/usr/local/include/boost/noncopyable.hpp: - -/usr/local/include/boost/core/noncopyable.hpp: - -/usr/local/include/boost/move/utility.hpp: - -/usr/local/include/boost/move/detail/config_begin.hpp: - -/usr/local/include/boost/move/detail/workaround.hpp: - -/usr/local/include/boost/move/utility_core.hpp: - -/usr/local/include/boost/move/core.hpp: - -/usr/local/include/boost/move/detail/type_traits.hpp: - -/usr/local/include/boost/move/detail/meta_utils.hpp: - -/usr/local/include/boost/move/detail/meta_utils_core.hpp: - -/usr/local/include/boost/move/detail/config_end.hpp: - -/usr/local/include/boost/move/traits.hpp: - -/usr/local/include/boost/none.hpp: - -/usr/local/include/boost/none_t.hpp: - -/usr/local/include/boost/utility/compare_pointees.hpp: - -/usr/local/include/boost/optional/detail/optional_config.hpp: - -/usr/local/include/boost/optional/detail/optional_factory_support.hpp: - -/usr/local/include/boost/optional/detail/optional_aligned_storage.hpp: - -/usr/local/include/boost/optional/detail/optional_trivially_copyable_base.hpp: - -/usr/local/include/boost/optional/detail/optional_reference_spec.hpp: - -/usr/local/include/boost/optional/detail/optional_relops.hpp: - -/usr/local/include/boost/optional/detail/optional_swap.hpp: - -/usr/local/include/boost/property_tree/exceptions.hpp: - -/usr/local/include/boost/any.hpp: - -/usr/local/include/boost/type_index.hpp: - -/usr/local/include/boost/type_index/stl_type_index.hpp: - -/usr/local/include/boost/type_index/type_index_facade.hpp: - -/usr/local/include/boost/core/demangle.hpp: - -/usr/include/cxxabi.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cstdlib: - -/usr/local/include/boost/mpl/if.hpp: - -/usr/local/include/boost/mpl/aux_/value_wknd.hpp: - -/usr/local/include/boost/mpl/aux_/static_cast.hpp: - -/usr/local/include/boost/mpl/aux_/config/workaround.hpp: - -/usr/local/include/boost/mpl/aux_/config/integral.hpp: - -/usr/local/include/boost/mpl/aux_/config/msvc.hpp: - -/usr/local/include/boost/mpl/aux_/config/eti.hpp: - -/usr/local/include/boost/mpl/aux_/na_spec.hpp: - -/usr/local/include/boost/mpl/lambda_fwd.hpp: - -/usr/local/include/boost/mpl/void_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/adl_barrier.hpp: - -/usr/local/include/boost/mpl/aux_/config/adl.hpp: - -/usr/local/include/boost/mpl/aux_/config/intel.hpp: - -/usr/local/include/boost/mpl/aux_/config/gcc.hpp: - -/usr/local/include/boost/mpl/aux_/na.hpp: - -/usr/local/include/boost/mpl/bool.hpp: - -/usr/local/include/boost/mpl/bool_fwd.hpp: - -/usr/local/include/boost/mpl/integral_c_tag.hpp: - -/usr/local/include/boost/mpl/aux_/config/static_constant.hpp: - -/usr/local/include/boost/mpl/aux_/na_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/config/ctps.hpp: - -/usr/local/include/boost/mpl/aux_/config/lambda.hpp: - -/usr/local/include/boost/mpl/aux_/config/ttp.hpp: - -/usr/local/include/boost/mpl/int.hpp: - -/usr/local/include/boost/mpl/int_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/nttp_decl.hpp: - -/usr/local/include/boost/mpl/aux_/config/nttp.hpp: - -/usr/local/include/boost/mpl/aux_/integral_wrapper.hpp: - -/usr/local/include/boost/preprocessor/cat.hpp: - -/usr/local/include/boost/preprocessor/config/config.hpp: - -/usr/local/include/boost/mpl/aux_/lambda_arity_param.hpp: - -/usr/local/include/boost/mpl/aux_/template_arity_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/arity.hpp: - -/usr/local/include/boost/mpl/aux_/config/dtp.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessor/params.hpp: - -/usr/local/include/boost/mpl/aux_/config/preprocessor.hpp: - -/usr/local/include/boost/preprocessor/comma_if.hpp: - -/usr/local/include/boost/preprocessor/punctuation/comma_if.hpp: - -/usr/local/include/boost/preprocessor/control/if.hpp: - -/usr/local/include/boost/preprocessor/control/iif.hpp: - -/usr/local/include/boost/preprocessor/logical/bool.hpp: - -/usr/local/include/boost/preprocessor/facilities/empty.hpp: - -/usr/local/include/boost/preprocessor/punctuation/comma.hpp: - -/usr/local/include/boost/preprocessor/repeat.hpp: - -/usr/local/include/boost/preprocessor/repetition/repeat.hpp: - -/usr/local/include/boost/preprocessor/debug/error.hpp: - -/usr/local/include/boost/preprocessor/detail/auto_rec.hpp: - -/usr/local/include/boost/preprocessor/tuple/eat.hpp: - -/usr/local/include/boost/preprocessor/inc.hpp: - -/usr/local/include/boost/preprocessor/arithmetic/inc.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessor/enum.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp: - -/usr/local/include/boost/mpl/limits/arity.hpp: - -/usr/local/include/boost/preprocessor/logical/and.hpp: - -/usr/local/include/boost/preprocessor/logical/bitand.hpp: - -/usr/local/include/boost/preprocessor/identity.hpp: - -/usr/local/include/boost/preprocessor/facilities/identity.hpp: - -/usr/local/include/boost/preprocessor/empty.hpp: - -/usr/local/include/boost/preprocessor/arithmetic/add.hpp: - -/usr/local/include/boost/preprocessor/arithmetic/dec.hpp: - -/usr/local/include/boost/preprocessor/control/while.hpp: - -/usr/local/include/boost/preprocessor/list/fold_left.hpp: - -/usr/local/include/boost/preprocessor/list/detail/fold_left.hpp: - -/usr/local/include/boost/preprocessor/control/expr_iif.hpp: - -/usr/local/include/boost/preprocessor/list/adt.hpp: - -/usr/local/include/boost/preprocessor/detail/is_binary.hpp: - -/usr/local/include/boost/preprocessor/detail/check.hpp: - -/usr/local/include/boost/preprocessor/logical/compl.hpp: - -/usr/local/include/boost/preprocessor/list/fold_right.hpp: - -/usr/local/include/boost/preprocessor/list/detail/fold_right.hpp: - -/usr/local/include/boost/preprocessor/list/reverse.hpp: - -/usr/local/include/boost/preprocessor/control/detail/while.hpp: - -/usr/local/include/boost/preprocessor/tuple/elem.hpp: - -/usr/local/include/boost/preprocessor/facilities/expand.hpp: - -/usr/local/include/boost/preprocessor/facilities/overload.hpp: - -/usr/local/include/boost/preprocessor/variadic/size.hpp: - -/usr/local/include/boost/preprocessor/tuple/rem.hpp: - -/usr/local/include/boost/preprocessor/tuple/detail/is_single_return.hpp: - -/usr/local/include/boost/preprocessor/variadic/elem.hpp: - -/usr/local/include/boost/preprocessor/arithmetic/sub.hpp: - -/usr/local/include/boost/mpl/aux_/config/overload_resolution.hpp: - -/usr/local/include/boost/mpl/aux_/lambda_support.hpp: - -/usr/local/include/boost/mpl/or.hpp: - -/usr/local/include/boost/mpl/aux_/config/use_preprocessed.hpp: - -/usr/local/include/boost/mpl/aux_/nested_type_wknd.hpp: - -/usr/local/include/boost/mpl/aux_/include_preprocessed.hpp: - -/usr/local/include/boost/mpl/aux_/config/compiler.hpp: - -/usr/local/include/boost/preprocessor/stringize.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/or.hpp: - -/usr/local/include/boost/functional/hash.hpp: - -/usr/local/include/boost/functional/hash/hash.hpp: - -/usr/local/include/boost/functional/hash/hash_fwd.hpp: - -/usr/local/include/boost/functional/hash/detail/hash_float.hpp: - -/usr/local/include/boost/functional/hash/detail/float_functions.hpp: - -/usr/local/include/boost/config/no_tr1/cmath.hpp: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cmath: - -/usr/local/include/boost/functional/hash/detail/limits.hpp: - -/usr/local/include/boost/limits.hpp: - -/usr/local/include/boost/integer/static_log2.hpp: - -/usr/local/include/boost/integer_fwd.hpp: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/climits: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../lib/clang/7.0.2/include/limits.h: - -/usr/include/limits.h: - -/usr/include/machine/limits.h: - -/usr/include/i386/limits.h: - -/usr/include/i386/_limits.h: - -/usr/include/sys/syslimits.h: - -/usr/local/include/boost/cstdint.hpp: - -/usr/local/include/boost/functional/hash/extensions.hpp: - -/usr/local/include/boost/detail/container_fwd.hpp: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/deque: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__split_buffer: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/vector: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__bit_reference: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/map: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__tree: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/set: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/bitset: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/complex: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/sstream: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/ostream: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/ios: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__locale: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/mutex: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/__mutex_base: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/chrono: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/ctime: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/ratio: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/system_error: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/cerrno: - -/usr/include/errno.h: - -/usr/include/sys/errno.h: - -/usr/include/pthread.h: - -/usr/include/pthread/pthread_impl.h: - -/usr/include/pthread/sched.h: - -/usr/include/sys/_pthread/_pthread_cond_t.h: - -/usr/include/sys/_pthread/_pthread_condattr_t.h: - -/usr/include/sys/_pthread/_pthread_key_t.h: - -/usr/include/sys/_pthread/_pthread_mutex_t.h: - -/usr/include/sys/_pthread/_pthread_mutexattr_t.h: - -/usr/include/sys/_pthread/_pthread_once_t.h: - -/usr/include/sys/_pthread/_pthread_rwlock_t.h: - -/usr/include/sys/_pthread/_pthread_rwlockattr_t.h: - -/usr/include/sys/_pthread/_pthread_t.h: - -/usr/include/pthread/qos.h: - -/usr/include/sys/qos.h: - -/usr/include/sys/_types/_mach_port_t.h: - -/usr/include/locale.h: - -/usr/include/_locale.h: - -/usr/include/xlocale.h: - -/usr/include/_xlocale.h: - -/usr/include/xlocale/_ctype.h: - -/usr/include/xlocale/__wctype.h: - -/usr/include/xlocale/_stdio.h: - -/usr/include/xlocale/_stdlib.h: - -/usr/include/xlocale/_string.h: - -/usr/include/xlocale/_time.h: - -/usr/include/xlocale/_wchar.h: - -/usr/include/xlocale/_wctype.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/streambuf: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/locale: - -/usr/include/nl_types.h: - -/usr/include/sys/types.h: - -/usr/include/sys/_types/_blkcnt_t.h: - -/usr/include/sys/_types/_blksize_t.h: - -/usr/include/sys/_types/_in_addr_t.h: - -/usr/include/sys/_types/_in_port_t.h: - -/usr/include/sys/_types/_ino_t.h: - -/usr/include/sys/_types/_ino64_t.h: - -/usr/include/sys/_types/_key_t.h: - -/usr/include/sys/_types/_nlink_t.h: - -/usr/include/sys/_types/_fsblkcnt_t.h: - -/usr/include/sys/_types/_fsfilcnt_t.h: - -/usr/include/_types/_nl_item.h: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/istream: - -/usr/local/include/boost/preprocessor/repetition/repeat_from_to.hpp: - -/usr/local/include/boost/preprocessor/repetition/enum_params.hpp: - -/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/../include/c++/v1/array: - -/usr/local/include/boost/property_tree/detail/exception_implementation.hpp: - -/usr/local/include/boost/property_tree/detail/ptree_utils.hpp: - -/usr/local/include/boost/mpl/has_xxx.hpp: - -/usr/local/include/boost/mpl/aux_/type_wrapper.hpp: - -/usr/local/include/boost/mpl/aux_/yes_no.hpp: - -/usr/local/include/boost/mpl/aux_/config/arrays.hpp: - -/usr/local/include/boost/mpl/aux_/config/has_xxx.hpp: - -/usr/local/include/boost/mpl/aux_/config/msvc_typename.hpp: - -/usr/local/include/boost/preprocessor/array/elem.hpp: - -/usr/local/include/boost/preprocessor/array/data.hpp: - -/usr/local/include/boost/preprocessor/array/size.hpp: - -/usr/local/include/boost/preprocessor/repetition/enum_trailing_params.hpp: - -/usr/local/include/boost/mpl/and.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/and.hpp: - -/usr/local/include/boost/property_tree/stream_translator.hpp: - -/usr/local/include/boost/optional/optional_io.hpp: - -/usr/local/include/boost/multi_index_container.hpp: - -/usr/local/include/boost/detail/allocator_utilities.hpp: - -/usr/local/include/boost/mpl/eval_if.hpp: - -/usr/local/include/boost/detail/no_exceptions_support.hpp: - -/usr/local/include/boost/core/no_exceptions_support.hpp: - -/usr/local/include/boost/mpl/at.hpp: - -/usr/local/include/boost/mpl/at_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/at_impl.hpp: - -/usr/local/include/boost/mpl/begin_end.hpp: - -/usr/local/include/boost/mpl/begin_end_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/begin_end_impl.hpp: - -/usr/local/include/boost/mpl/sequence_tag_fwd.hpp: - -/usr/local/include/boost/mpl/void.hpp: - -/usr/local/include/boost/mpl/aux_/has_begin.hpp: - -/usr/local/include/boost/mpl/aux_/traits_lambda_spec.hpp: - -/usr/local/include/boost/mpl/sequence_tag.hpp: - -/usr/local/include/boost/mpl/aux_/has_tag.hpp: - -/usr/local/include/boost/mpl/aux_/is_msvc_eti_arg.hpp: - -/usr/local/include/boost/mpl/advance.hpp: - -/usr/local/include/boost/mpl/advance_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/common_name_wknd.hpp: - -/usr/local/include/boost/mpl/less.hpp: - -/usr/local/include/boost/mpl/aux_/comparison_op.hpp: - -/usr/local/include/boost/mpl/aux_/numeric_op.hpp: - -/usr/local/include/boost/mpl/numeric_cast.hpp: - -/usr/local/include/boost/mpl/apply_wrap.hpp: - -/usr/local/include/boost/mpl/aux_/has_apply.hpp: - -/usr/local/include/boost/mpl/aux_/config/has_apply.hpp: - -/usr/local/include/boost/mpl/aux_/msvc_never_true.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp: - -/usr/local/include/boost/mpl/tag.hpp: - -/usr/local/include/boost/mpl/aux_/numeric_cast_utils.hpp: - -/usr/local/include/boost/mpl/aux_/config/forwarding.hpp: - -/usr/local/include/boost/mpl/aux_/msvc_eti_base.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/less.hpp: - -/usr/local/include/boost/mpl/negate.hpp: - -/usr/local/include/boost/mpl/integral_c.hpp: - -/usr/local/include/boost/mpl/integral_c_fwd.hpp: - -/usr/local/include/boost/mpl/long.hpp: - -/usr/local/include/boost/mpl/long_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/advance_forward.hpp: - -/usr/local/include/boost/mpl/next.hpp: - -/usr/local/include/boost/mpl/next_prior.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/advance_forward.hpp: - -/usr/local/include/boost/mpl/aux_/advance_backward.hpp: - -/usr/local/include/boost/mpl/prior.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/advance_backward.hpp: - -/usr/local/include/boost/mpl/deref.hpp: - -/usr/local/include/boost/mpl/aux_/msvc_type.hpp: - -/usr/local/include/boost/mpl/contains.hpp: - -/usr/local/include/boost/mpl/contains_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/contains_impl.hpp: - -/usr/local/include/boost/mpl/find.hpp: - -/usr/local/include/boost/mpl/find_if.hpp: - -/usr/local/include/boost/mpl/aux_/find_if_pred.hpp: - -/usr/local/include/boost/mpl/aux_/iter_apply.hpp: - -/usr/local/include/boost/mpl/apply.hpp: - -/usr/local/include/boost/mpl/apply_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp: - -/usr/local/include/boost/mpl/placeholders.hpp: - -/usr/local/include/boost/mpl/arg.hpp: - -/usr/local/include/boost/mpl/arg_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/na_assert.hpp: - -/usr/local/include/boost/mpl/assert.hpp: - -/usr/local/include/boost/mpl/not.hpp: - -/usr/local/include/boost/mpl/aux_/config/gpu.hpp: - -/usr/local/include/boost/mpl/aux_/config/pp_counter.hpp: - -/usr/local/include/boost/mpl/aux_/arity_spec.hpp: - -/usr/local/include/boost/mpl/aux_/arg_typedef.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp: - -/usr/local/include/boost/mpl/lambda.hpp: - -/usr/local/include/boost/mpl/bind.hpp: - -/usr/local/include/boost/mpl/bind_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/config/bind.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp: - -/usr/local/include/boost/mpl/protect.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp: - -/usr/local/include/boost/mpl/aux_/full_lambda.hpp: - -/usr/local/include/boost/mpl/quote.hpp: - -/usr/local/include/boost/mpl/aux_/has_type.hpp: - -/usr/local/include/boost/mpl/aux_/config/bcc.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp: - -/usr/local/include/boost/mpl/aux_/template_arity.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp: - -/usr/local/include/boost/mpl/iter_fold_if.hpp: - -/usr/local/include/boost/mpl/logical.hpp: - -/usr/local/include/boost/mpl/always.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessor/default_params.hpp: - -/usr/local/include/boost/mpl/pair.hpp: - -/usr/local/include/boost/mpl/aux_/iter_fold_if_impl.hpp: - -/usr/local/include/boost/mpl/identity.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/iter_fold_if_impl.hpp: - -/usr/local/include/boost/mpl/same_as.hpp: - -/usr/local/include/boost/mpl/aux_/lambda_spec.hpp: - -/usr/local/include/boost/mpl/size.hpp: - -/usr/local/include/boost/mpl/size_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/size_impl.hpp: - -/usr/local/include/boost/mpl/distance.hpp: - -/usr/local/include/boost/mpl/distance_fwd.hpp: - -/usr/local/include/boost/mpl/iter_fold.hpp: - -/usr/local/include/boost/mpl/O1_size.hpp: - -/usr/local/include/boost/mpl/O1_size_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/O1_size_impl.hpp: - -/usr/local/include/boost/mpl/aux_/has_size.hpp: - -/usr/local/include/boost/mpl/aux_/iter_fold_impl.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/iter_fold_impl.hpp: - -/usr/local/include/boost/mpl/iterator_range.hpp: - -/usr/local/include/boost/multi_index_container_fwd.hpp: - -/usr/local/include/boost/multi_index/identity.hpp: - -/usr/local/include/boost/multi_index/identity_fwd.hpp: - -/usr/local/include/boost/multi_index/indexed_by.hpp: - -/usr/local/include/boost/mpl/vector.hpp: - -/usr/local/include/boost/mpl/limits/vector.hpp: - -/usr/local/include/boost/mpl/vector/vector20.hpp: - -/usr/local/include/boost/mpl/vector/vector10.hpp: - -/usr/local/include/boost/mpl/vector/vector0.hpp: - -/usr/local/include/boost/mpl/vector/aux_/at.hpp: - -/usr/local/include/boost/mpl/vector/aux_/tag.hpp: - -/usr/local/include/boost/mpl/aux_/config/typeof.hpp: - -/usr/local/include/boost/mpl/vector/aux_/front.hpp: - -/usr/local/include/boost/mpl/front_fwd.hpp: - -/usr/local/include/boost/mpl/vector/aux_/push_front.hpp: - -/usr/local/include/boost/mpl/push_front_fwd.hpp: - -/usr/local/include/boost/mpl/vector/aux_/item.hpp: - -/usr/local/include/boost/mpl/vector/aux_/pop_front.hpp: - -/usr/local/include/boost/mpl/pop_front_fwd.hpp: - -/usr/local/include/boost/mpl/vector/aux_/push_back.hpp: - -/usr/local/include/boost/mpl/push_back_fwd.hpp: - -/usr/local/include/boost/mpl/vector/aux_/pop_back.hpp: - -/usr/local/include/boost/mpl/pop_back_fwd.hpp: - -/usr/local/include/boost/mpl/vector/aux_/back.hpp: - -/usr/local/include/boost/mpl/back_fwd.hpp: - -/usr/local/include/boost/mpl/vector/aux_/clear.hpp: - -/usr/local/include/boost/mpl/clear_fwd.hpp: - -/usr/local/include/boost/mpl/vector/aux_/vector0.hpp: - -/usr/local/include/boost/mpl/vector/aux_/iterator.hpp: - -/usr/local/include/boost/mpl/iterator_tags.hpp: - -/usr/local/include/boost/mpl/plus.hpp: - -/usr/local/include/boost/mpl/aux_/arithmetic_op.hpp: - -/usr/local/include/boost/mpl/aux_/largest_int.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/plus.hpp: - -/usr/local/include/boost/mpl/minus.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/minus.hpp: - -/usr/local/include/boost/mpl/vector/aux_/O1_size.hpp: - -/usr/local/include/boost/mpl/vector/aux_/size.hpp: - -/usr/local/include/boost/mpl/vector/aux_/empty.hpp: - -/usr/local/include/boost/mpl/empty_fwd.hpp: - -/usr/local/include/boost/mpl/vector/aux_/begin_end.hpp: - -/usr/local/include/boost/mpl/vector/aux_/include_preprocessed.hpp: - -/usr/local/include/boost/mpl/vector/aux_/preprocessed/typeof_based/vector10.hpp: - -/usr/local/include/boost/mpl/vector/aux_/preprocessed/typeof_based/vector20.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/vector.hpp: - -/usr/local/include/boost/preprocessor/control/expr_if.hpp: - -/usr/local/include/boost/preprocessor/repetition/enum.hpp: - -/usr/local/include/boost/multi_index/ordered_index_fwd.hpp: - -/usr/local/include/boost/multi_index/detail/ord_index_args.hpp: - -/usr/local/include/boost/multi_index/tag.hpp: - -/usr/local/include/boost/multi_index/detail/no_duplicate_tags.hpp: - -/usr/local/include/boost/mpl/fold.hpp: - -/usr/local/include/boost/mpl/aux_/fold_impl.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/fold_impl.hpp: - -/usr/local/include/boost/mpl/set/set0.hpp: - -/usr/local/include/boost/mpl/set/aux_/at_impl.hpp: - -/usr/local/include/boost/mpl/set/aux_/has_key_impl.hpp: - -/usr/local/include/boost/mpl/set/aux_/tag.hpp: - -/usr/local/include/boost/mpl/has_key_fwd.hpp: - -/usr/local/include/boost/mpl/aux_/overload_names.hpp: - -/usr/local/include/boost/mpl/aux_/ptr_to_ref.hpp: - -/usr/local/include/boost/mpl/aux_/config/operators.hpp: - -/usr/local/include/boost/mpl/set/aux_/clear_impl.hpp: - -/usr/local/include/boost/mpl/set/aux_/set0.hpp: - -/usr/local/include/boost/mpl/set/aux_/size_impl.hpp: - -/usr/local/include/boost/mpl/set/aux_/empty_impl.hpp: - -/usr/local/include/boost/mpl/set/aux_/insert_impl.hpp: - -/usr/local/include/boost/mpl/insert_fwd.hpp: - -/usr/local/include/boost/mpl/set/aux_/item.hpp: - -/usr/local/include/boost/mpl/base.hpp: - -/usr/local/include/boost/mpl/set/aux_/insert_range_impl.hpp: - -/usr/local/include/boost/mpl/insert_range_fwd.hpp: - -/usr/local/include/boost/mpl/insert.hpp: - -/usr/local/include/boost/mpl/aux_/insert_impl.hpp: - -/usr/local/include/boost/mpl/reverse_fold.hpp: - -/usr/local/include/boost/mpl/aux_/reverse_fold_impl.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/reverse_fold_impl.hpp: - -/usr/local/include/boost/mpl/clear.hpp: - -/usr/local/include/boost/mpl/aux_/clear_impl.hpp: - -/usr/local/include/boost/mpl/push_front.hpp: - -/usr/local/include/boost/mpl/aux_/push_front_impl.hpp: - -/usr/local/include/boost/mpl/set/aux_/erase_impl.hpp: - -/usr/local/include/boost/mpl/erase_fwd.hpp: - -/usr/local/include/boost/mpl/set/aux_/erase_key_impl.hpp: - -/usr/local/include/boost/mpl/erase_key_fwd.hpp: - -/usr/local/include/boost/mpl/set/aux_/key_type_impl.hpp: - -/usr/local/include/boost/mpl/key_type_fwd.hpp: - -/usr/local/include/boost/mpl/set/aux_/value_type_impl.hpp: - -/usr/local/include/boost/mpl/value_type_fwd.hpp: - -/usr/local/include/boost/mpl/set/aux_/begin_end_impl.hpp: - -/usr/local/include/boost/mpl/set/aux_/iterator.hpp: - -/usr/local/include/boost/mpl/has_key.hpp: - -/usr/local/include/boost/mpl/aux_/has_key_impl.hpp: - -/usr/local/include/boost/mpl/transform.hpp: - -/usr/local/include/boost/mpl/pair_view.hpp: - -/usr/local/include/boost/mpl/iterator_category.hpp: - -/usr/local/include/boost/mpl/min_max.hpp: - -/usr/local/include/boost/mpl/is_sequence.hpp: - -/usr/local/include/boost/mpl/aux_/inserter_algorithm.hpp: - -/usr/local/include/boost/mpl/back_inserter.hpp: - -/usr/local/include/boost/mpl/push_back.hpp: - -/usr/local/include/boost/mpl/aux_/push_back_impl.hpp: - -/usr/local/include/boost/mpl/inserter.hpp: - -/usr/local/include/boost/mpl/front_inserter.hpp: - -/usr/local/include/boost/preprocessor/facilities/intercept.hpp: - -/usr/local/include/boost/preprocessor/repetition/enum_binary_params.hpp: - -/usr/local/include/boost/multi_index/detail/ord_index_impl_fwd.hpp: - -/usr/local/include/boost/multi_index/detail/access_specifier.hpp: - -/usr/local/include/boost/multi_index/detail/adl_swap.hpp: - -/usr/local/include/boost/multi_index/detail/base_type.hpp: - -/usr/local/include/boost/multi_index/detail/index_base.hpp: - -/usr/local/include/boost/multi_index/detail/copy_map.hpp: - -/usr/local/include/boost/multi_index/detail/auto_space.hpp: - -/usr/local/include/boost/multi_index/detail/raw_ptr.hpp: - -/usr/local/include/boost/multi_index/detail/do_not_copy_elements_tag.hpp: - -/usr/local/include/boost/multi_index/detail/node_type.hpp: - -/usr/local/include/boost/mpl/reverse_iter_fold.hpp: - -/usr/local/include/boost/mpl/aux_/reverse_iter_fold_impl.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/reverse_iter_fold_impl.hpp: - -/usr/local/include/boost/multi_index/detail/header_holder.hpp: - -/usr/local/include/boost/multi_index/detail/index_node_base.hpp: - -/usr/local/include/boost/type_traits/aligned_storage.hpp: - -/usr/local/include/boost/archive/archive_exception.hpp: - -/usr/local/include/boost/archive/detail/decl.hpp: - -/usr/local/include/boost/archive/detail/abi_prefix.hpp: - -/usr/local/include/boost/config/abi_prefix.hpp: - -/usr/local/include/boost/archive/detail/abi_suffix.hpp: - -/usr/local/include/boost/config/abi_suffix.hpp: - -/usr/local/include/boost/serialization/access.hpp: - -/usr/local/include/boost/multi_index/detail/ignore_wstrict_aliasing.hpp: - -/usr/local/include/boost/multi_index/detail/restore_wstrict_aliasing.hpp: - -/usr/local/include/boost/multi_index/detail/is_index_list.hpp: - -/usr/local/include/boost/mpl/empty.hpp: - -/usr/local/include/boost/mpl/aux_/empty_impl.hpp: - -/usr/local/include/boost/multi_index/detail/vartempl_support.hpp: - -/usr/local/include/boost/preprocessor/seq/elem.hpp: - -/usr/local/include/boost/tuple/tuple.hpp: - -/usr/local/include/boost/ref.hpp: - -/usr/local/include/boost/core/ref.hpp: - -/usr/local/include/boost/tuple/detail/tuple_basic.hpp: - -/usr/local/include/boost/type_traits/cv_traits.hpp: - -/usr/local/include/boost/type_traits/add_const.hpp: - -/usr/local/include/boost/type_traits/add_volatile.hpp: - -/usr/local/include/boost/type_traits/add_cv.hpp: - -/usr/local/include/boost/type_traits/remove_volatile.hpp: - -/usr/local/include/boost/type_traits/function_traits.hpp: - -/usr/local/include/boost/utility/swap.hpp: - -/usr/local/include/boost/multi_index/detail/index_loader.hpp: - -/usr/local/include/boost/serialization/nvp.hpp: - -/usr/local/include/boost/serialization/level.hpp: - -/usr/local/include/boost/type_traits/is_fundamental.hpp: - -/usr/local/include/boost/serialization/level_enum.hpp: - -/usr/local/include/boost/serialization/tracking.hpp: - -/usr/local/include/boost/mpl/equal_to.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/equal_to.hpp: - -/usr/local/include/boost/mpl/greater.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/greater.hpp: - -/usr/local/include/boost/serialization/tracking_enum.hpp: - -/usr/local/include/boost/serialization/type_info_implementation.hpp: - -/usr/local/include/boost/serialization/traits.hpp: - -/usr/local/include/boost/serialization/split_member.hpp: - -/usr/local/include/boost/serialization/base_object.hpp: - -/usr/local/include/boost/type_traits/is_polymorphic.hpp: - -/usr/local/include/boost/serialization/force_include.hpp: - -/usr/local/include/boost/serialization/void_cast_fwd.hpp: - -/usr/local/include/boost/serialization/wrapper.hpp: - -/usr/local/include/boost/multi_index/detail/index_saver.hpp: - -/usr/local/include/boost/multi_index/detail/index_matcher.hpp: - -/usr/local/include/boost/multi_index/detail/converter.hpp: - -/usr/local/include/boost/multi_index/detail/has_tag.hpp: - -/usr/local/include/boost/multi_index/detail/safe_mode.hpp: - -/usr/local/include/boost/multi_index/detail/scope_guard.hpp: - -/usr/local/include/boost/utility/base_from_member.hpp: - -/usr/local/include/boost/multi_index/detail/archive_constructed.hpp: - -/usr/local/include/boost/serialization/serialization.hpp: - -/usr/local/include/boost/serialization/strong_typedef.hpp: - -/usr/local/include/boost/operators.hpp: - -/usr/local/include/boost/multi_index/detail/serialization_version.hpp: - -/usr/local/include/boost/serialization/version.hpp: - -/usr/local/include/boost/mpl/comparison.hpp: - -/usr/local/include/boost/mpl/not_equal_to.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/not_equal_to.hpp: - -/usr/local/include/boost/mpl/less_equal.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/less_equal.hpp: - -/usr/local/include/boost/mpl/greater_equal.hpp: - -/usr/local/include/boost/mpl/aux_/preprocessed/gcc/greater_equal.hpp: - -/usr/local/include/boost/serialization/collection_size_type.hpp: - -/usr/local/include/boost/serialization/split_free.hpp: - -/usr/local/include/boost/serialization/is_bitwise_serializable.hpp: - -/usr/local/include/boost/multi_index/sequenced_index.hpp: - -/usr/local/include/boost/bind.hpp: - -/usr/local/include/boost/bind/bind.hpp: - -/usr/local/include/boost/mem_fn.hpp: - -/usr/local/include/boost/bind/mem_fn.hpp: - -/usr/local/include/boost/get_pointer.hpp: - -/usr/local/include/boost/config/no_tr1/memory.hpp: - -/usr/local/include/boost/bind/mem_fn_template.hpp: - -/usr/local/include/boost/bind/mem_fn_cc.hpp: - -/usr/local/include/boost/is_placeholder.hpp: - -/usr/local/include/boost/bind/arg.hpp: - -/usr/local/include/boost/visit_each.hpp: - -/usr/local/include/boost/core/is_same.hpp: - -/usr/local/include/boost/bind/storage.hpp: - -/usr/local/include/boost/bind/bind_template.hpp: - -/usr/local/include/boost/bind/bind_cc.hpp: - -/usr/local/include/boost/bind/bind_mf_cc.hpp: - -/usr/local/include/boost/bind/bind_mf2_cc.hpp: - -/usr/local/include/boost/bind/placeholders.hpp: - -/usr/local/include/boost/call_traits.hpp: - -/usr/local/include/boost/detail/call_traits.hpp: - -/usr/local/include/boost/foreach_fwd.hpp: - -/usr/local/include/boost/iterator/reverse_iterator.hpp: - -/usr/local/include/boost/iterator/iterator_adaptor.hpp: - -/usr/local/include/boost/iterator/iterator_categories.hpp: - -/usr/local/include/boost/iterator/detail/config_def.hpp: - -/usr/local/include/boost/iterator/detail/config_undef.hpp: - -/usr/local/include/boost/iterator/iterator_facade.hpp: - -/usr/local/include/boost/iterator/interoperable.hpp: - -/usr/local/include/boost/iterator/iterator_traits.hpp: - -/usr/local/include/boost/iterator/detail/facade_iterator_category.hpp: - -/usr/local/include/boost/detail/indirect_traits.hpp: - -/usr/local/include/boost/type_traits/remove_pointer.hpp: - -/usr/local/include/boost/iterator/detail/enable_if.hpp: - -/usr/local/include/boost/utility/addressof.hpp: - -/usr/local/include/boost/type_traits/add_lvalue_reference.hpp: - -/usr/local/include/boost/multi_index/detail/bidir_node_iterator.hpp: - -/usr/local/include/boost/multi_index/detail/seq_index_node.hpp: - -/usr/local/include/boost/multi_index/detail/seq_index_ops.hpp: - -/usr/local/include/boost/multi_index/sequenced_index_fwd.hpp: - -/usr/local/include/boost/multi_index/ordered_index.hpp: - -/usr/local/include/boost/multi_index/detail/ord_index_impl.hpp: - -/usr/local/include/boost/multi_index/detail/modify_key_adaptor.hpp: - -/usr/local/include/boost/multi_index/detail/ord_index_node.hpp: - -/usr/local/include/boost/multi_index/detail/uintptr_type.hpp: - -/usr/local/include/boost/multi_index/detail/ord_index_ops.hpp: - -/usr/local/include/boost/multi_index/detail/promotes_arg.hpp: - -/usr/local/include/boost/multi_index/detail/is_transparent.hpp: - -/usr/local/include/boost/multi_index/detail/unbounded.hpp: - -/usr/local/include/boost/multi_index/detail/value_compare.hpp: - -/usr/local/include/boost/multi_index/detail/duplicates_iterator.hpp: - -/usr/local/include/boost/multi_index/member.hpp: - -/usr/local/include/boost/property_tree/detail/ptree_implementation.hpp: - -src/lammps/lammps_model.h: diff --git a/swm/src/lammps/.dirstamp b/swm/src/lammps/.dirstamp deleted file mode 100644 index e69de29..0000000 diff --git a/swm/src/lammps/lammps_workload1.json b/swm/src/lammps/lammps_workload1.json new file mode 100644 index 0000000..3f0a596 --- /dev/null +++ b/swm/src/lammps/lammps_workload1.json @@ -0,0 +1,21 @@ +{ + "jobs": { + "name": "StandaloneSWM", + "app": "dll", + "dll_path": "apps/dll/lammps.so", + "size": 2048, + "time": 0, + "cfg": + { + "num_x_replicas": 3, + "num_y_replicas": 3, + "num_z_replicas": 3, + "num_time_steps": 10, + "req_vc" : 0, + "resp_vc" : 1, + "router_freq" : 800e6, + "cpu_freq" : 4e9, + "cpu_sim_speedup" : 1e6 + } + } +} diff --git a/swm/src/many_to_many/README b/swm/src/many_to_many/README new file mode 100644 index 0000000..741ed50 --- /dev/null +++ b/swm/src/many_to_many/README @@ -0,0 +1,12 @@ +many_to_many SWM + +Each source in N sends 1 message to every destination in M, where N, M > 0. + +Example: For 4 processes [a, b, c, d] where a, b are sources and c, d are detinations: + +a -> c +a -> d +b -> c +b -> d + +This pattern is repeated for each iteration. diff --git a/swm/src/many_to_many/many_to_many_swm_user_code.cpp b/swm/src/many_to_many/many_to_many_swm_user_code.cpp new file mode 100644 index 0000000..b141f4c --- /dev/null +++ b/swm/src/many_to_many/many_to_many_swm_user_code.cpp @@ -0,0 +1,238 @@ +#include "many_to_many_swm_user_code.h" + +ManyToManySWMUserCode::ManyToManySWMUserCode( + boost::property_tree::ptree cfg, + void**& generic_ptrs + ) : + process_cnt(cfg.get("jobs.size", 1)), + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + msg_req_bytes(cfg.get("jobs.cfg.msg_req_bytes", 0)), + msg_rsp_bytes(cfg.get("jobs.cfg.msg_rsp_bytes", 0)), + compute_delay(cfg.get("jobs.cfg.compute_delay", 0)), + use_any_src(cfg.get("jobs.cfg.use_any_src", false)), + blocking_comm(cfg.get("jobs.cfg.blocking_comm", false)), + scattered_start(cfg.get("jobs.cfg.scattered_start", false)), + fixed_pairs(cfg.get("jobs.cfg.fixed_pairs", false)), + start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), + randomize_comm_order(cfg.get("jobs.cfg.randomize_communication_order", false)), + show_iterations(cfg.get("jobs.cfg.show_iterations", false)), + debug(cfg.get("jobs.cfg.debug", false)) +{ + + // extract the src/dst rank id intervals + int num = 0; + BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.src_rank_id_interval")) + { + std::string value = v.second.data(); + + if(num == 0) min_src_id = atoi(value.c_str()); + if(num == 1) max_src_id = atoi(value.c_str()); + + num++; + } + assert(num == 2); + + // extract the src/dst rank id intervals + num = 0; + BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.dst_rank_id_interval")) + { + std::string value = v.second.data(); + + if(num == 0) min_dst_id = atoi(value.c_str()); + if(num == 1) max_dst_id = atoi(value.c_str()); + + num++; + } + assert(num == 2); + + assert(max_src_id < process_cnt); + assert(max_dst_id < process_cnt); + + process_id = *((int*)generic_ptrs[0]); +} + +void +ManyToManySWMUserCode::call() +{ + /* Print job description */ + if(process_id == 0) + { + std::cout << std::endl << "JOB: Bulk_data | size: " << process_cnt; + std::cout << " | interation_cnt: " << iteration_cnt; + std::cout << " | msg_req_bytes: " << msg_req_bytes; + std::cout << " | msg_rsp_bytes: " << msg_rsp_bytes; + std::cout << " | src_rank_id_interval: " << min_src_id << "-" << max_src_id; + std::cout << " | dst_rank_id_interval: " << min_dst_id << "-" << max_dst_id; + std::cout << " | scattered_start: " << scattered_start; + std::cout << " | compute_delay: " << compute_delay << std::endl; + } + uint32_t *send_handles = NULL; + uint32_t *recv_handles = NULL; + + uint32_t send_limit = (max_dst_id - min_dst_id) + 1; + uint32_t recv_limit = (max_src_id - min_src_id) + 1; + + if (fixed_pairs) + { + send_limit = 1; + recv_limit = 1; + } + + send_handles = new uint32_t[send_limit * iteration_cnt]; + recv_handles = new uint32_t[recv_limit * iteration_cnt]; + + + if (process_id >= min_src_id && process_id <= max_src_id) // Sending processes + { + // if we want to scatter the start time, we mimic this delay with a compute delay + if(scattered_start) + { + assert(start_delay_max > 0); + /* TODO: Use a better random number generator here. */ + uint32_t start_delay = rand() % start_delay_max; + std::cout << std::endl << "process_id: " << process_id << " delay start by " << start_delay << " cycles"; + SWM_Compute(start_delay); + } + + uint32_t marker = 0; + for(uint32_t iter=0; iter < iteration_cnt; iter++) + { + if (compute_delay) + SWM_Compute(compute_delay); + + + //if(show_iterations) + // SWM_Mark_Iteration(marker); + // marker++; + //} + uint32_t send_count = 0; + + uint32_t curr_target; + if (fixed_pairs) + { + curr_target = (process_id - min_src_id) + min_dst_id; + } + else + { + curr_target = (process_id % send_limit) + min_dst_id; + } + if (curr_target > max_dst_id) + { + std::cout << std::endl << "process_id: " << process_id << " - unused source. stopping."; + return; + } + + for(uint32_t sent=0; sent < send_limit; sent++, send_count++) + { + //uint32_t process_id_offset = ( (process_id + 1) << 32); + //uint32_t iter_offset = ( (iter + 1) << 8); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + process_id_offset + iter_offset; + //uint32_t iter_offset = (process_cnt * (iter) ); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); //(iter+1) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE; + //uint32_t send_handle[send_limit]; + + SWM_Isend( + curr_target, + SWM_COMM_WORLD, + this_tag, + -1, + -1, + NO_BUFFER, + msg_req_bytes, + msg_rsp_bytes, + &(send_handles[send_count]), + 0, + 0 + ); + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " sent message to destination: " << curr_target << ", tag: " << this_tag << ", iter: " << iter ; + } + + if (!fixed_pairs) + { + curr_target++; + if (curr_target > max_dst_id) + { + curr_target = min_dst_id; + } + } + } + + SWM_Waitall(send_limit, send_handles); + + + if(show_iterations){ + SWM_Mark_Iteration(marker); + marker++; + } + } // end-for(iteration_cnt) + } + else if (process_id >= min_dst_id && process_id <= max_dst_id) // Recieving processes + { + uint32_t receive_from_proc = -1; + + // Are were using fixed sender-reciever pairs? Or do each reciever get data from all senders? + if (fixed_pairs) + { + min_src_id = (process_id - min_dst_id) + min_src_id; + if(min_src_id > max_src_id){ + std::cout << std::endl << "process_id: " << process_id << " - unused destination. stopping."; + return; + } + max_src_id = min_src_id; + } + // need to receive from each sender every iteration... + for(uint32_t iter = 0; iter < iteration_cnt; iter++) + { + uint32_t count = 0; + + for(uint32_t index = min_src_id; index <= max_src_id; index++, count++) + { + //uint32_t iter_offset = (process_cnt * (iter) ); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * (index + 1) * (iter+1) ); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (index + 1) + iter_offset) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE; + + //uint32_t receive_from_proc = (!use_any_src) ? index : -1; + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " expecting to recv data from: " << index << " with recv tag: " << this_tag << " | iter_" << iter; + } + + + SWM_Irecv( + index, + SWM_COMM_WORLD, + this_tag, + NO_BUFFER, + &(recv_handles[count]) + ); + + } // end of for-loop(all_sources) + + SWM_Waitall(recv_limit, recv_handles); + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " received all data. iteration: " << iter ; + } + + //if(show_iterations) + // SWM_Mark_Iteration(iter); + } // end for-loop(iteration_cnt) + + } // end of else if(synchronous && (process_id == dst_rank_id) ) + + SWM_Finalize(); +} + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/swm/src/many_to_many/many_to_many_swm_user_code.h b/swm/src/many_to_many/many_to_many_swm_user_code.h new file mode 100644 index 0000000..04d9c61 --- /dev/null +++ b/swm/src/many_to_many/many_to_many_swm_user_code.h @@ -0,0 +1,93 @@ +/* + * ===================================================================================== + * + * Filename: many_to_many_swm_user_code.h + * + * Description: + * + * Author: Kevin Brown, kabrown@anl.gov + * + * ===================================================================================== + */ + +#ifndef _MANY_TO_MANY_TEMPLATE_USER_CODE_ +#define _MANY_TO_MANY_TEMPLATE_USER_CODE_ + +#define SWM_APP_TAG_BASE 0 + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "swm-include.h" +using namespace std; + +class ManyToManySWMUserCode +{ + +public: + + ManyToManySWMUserCode( +// SWMUserIF* user_if, + boost::property_tree::ptree cfg, + void**& generic_ptrs + ); + + void call(); + +protected: + std::string req_vcs_string; + std::string rsp_vcs_string; + uint32_t msg_req_bytes; + uint32_t msg_rsp_bytes; + uint32_t pkt_rsp_bytes; + + uint32_t process_id; + uint32_t process_cnt; + uint32_t iteration_cnt; + uint32_t noop_cnt; + uint32_t compute_delay; + + std::vector req_vcs; + std::vector rsp_vcs; + + uint32_t min_src_id; + uint32_t max_src_id; + uint32_t min_dst_id; + uint32_t max_dst_id; + + bool randomize_comm_order; + + // are we staggering the start time of the srcs + bool scattered_start; + + // one-to-one pairs between sender and recv? + bool fixed_pairs; + + // if using staggered start delay, this is the max used in the RNG when computing delay + uint32_t start_delay_max; + + // use isend/irecv instead of synthetic + bool synchronous; + + // use __ANY__ at the receive side for synchronous + bool use_any_src; + + // use blocking (Send/Recv) + bool blocking_comm; + + // for debugging + bool show_iterations; + bool debug; + +}; + +#endif diff --git a/swm/src/many_to_many/many_to_many_workload.json b/swm/src/many_to_many/many_to_many_workload.json new file mode 100644 index 0000000..82a89c4 --- /dev/null +++ b/swm/src/many_to_many/many_to_many_workload.json @@ -0,0 +1,21 @@ +{ +"jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/many_to_many.so", + "size": 1152, + "cfg": { + "app": "many_to_many", + "iteration_cnt": 1, + "compute_delay": 0, + "msg_req_bytes": 1048576, + "msg_rsp_bytes": 0, + "start_delay_max" : 10000, + "scattered_start" : false, + "randomize_communication_order": false, + "fixed_pairs" : false, + "debug" : false, + "src_rank_id_interval": [128,1151], + "dst_rank_id_interval": [0,63], + "cpu_freq" : 4e9 + } + } +} diff --git a/swm/src/many_to_many/many_to_many_workload1.json b/swm/src/many_to_many/many_to_many_workload1.json new file mode 100644 index 0000000..7d44e98 --- /dev/null +++ b/swm/src/many_to_many/many_to_many_workload1.json @@ -0,0 +1,21 @@ +{ +"jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/many_to_many.so", + "size": 256, + "cfg": { + "app": "many_to_many", + "iteration_cnt": 36, + "compute_delay": 0, + "msg_req_bytes": 4194304, + "msg_rsp_bytes": 0, + "start_delay_max" : 10000, + "scattered_start" : false, + "randomize_communication_order": false, + "fixed_pairs" : true, + "debug" : false, + "src_rank_id_interval": [128,255], + "dst_rank_id_interval": [0,127], + "cpu_freq" : 4e9 + } + } +} diff --git a/swm/src/nearest_neighbor/.deps/nearest_neighbor_swm_user_code.Plo b/swm/src/nearest_neighbor/.deps/nearest_neighbor_swm_user_code.Plo deleted file mode 100644 index 9ce06a8..0000000 --- a/swm/src/nearest_neighbor/.deps/nearest_neighbor_swm_user_code.Plo +++ /dev/null @@ -1 +0,0 @@ -# dummy diff --git a/swm/src/nekbone/.deps/nekbone_swm_user_code.Plo b/swm/src/nekbone/.deps/nekbone_swm_user_code.Plo deleted file mode 100644 index 9ce06a8..0000000 --- a/swm/src/nekbone/.deps/nekbone_swm_user_code.Plo +++ /dev/null @@ -1 +0,0 @@ -# dummy diff --git a/swm/src/nekbone/workload1.json b/swm/src/nekbone/workload1.json new file mode 100644 index 0000000..9b10b07 --- /dev/null +++ b/swm/src/nekbone/workload1.json @@ -0,0 +1,31 @@ + +{ + "jobs": + { + "name": "StandaloneSWM", + "app": "nekbone", + "size": 729 , + "time": 0, + "cfg": { + "request_vc": 0, + "response_vc": 1, + "iteration_cnt": 1, + "rcube" : 13, + "ecube" : 13, + "Rx" : 9, + "Ry" : 9, + "Rz" : 9, + "Ex" : 13, + "Ey" : 13, + "Ez" : 13, + "Pbegin" : 8, + "Pend" : 12, + "Pstep" : 3, + "CGcount" : 15, + "NeighborCount" : 26, + "ByteSizeOf1DOF" : 8, + "cpu_freq" : 4e9, + "cpu_sim_speedup" : 1e6 + } + } +} diff --git a/swm/src/spread/one_to_many_swm_user_code.cpp b/swm/src/spread/one_to_many_swm_user_code.cpp new file mode 100644 index 0000000..b67e296 --- /dev/null +++ b/swm/src/spread/one_to_many_swm_user_code.cpp @@ -0,0 +1,213 @@ +/*********************************** + * Spread: one-to-many + * + * ********************************/ + +#include "one_to_many_swm_user_code.h" + +OneToManySWMUserCode::OneToManySWMUserCode( + boost::property_tree::ptree cfg, + void**& generic_ptrs + ) : + process_cnt(cfg.get("jobs.size", 1)), + src_rank_id(cfg.get("jobs.cfg.src_rank_id",0)), + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + msg_req_bytes(cfg.get("jobs.cfg.msg_req_bytes", 0)), + msg_rsp_bytes(cfg.get("jobs.cfg.msg_rsp_bytes", 0)), + compute_delay(cfg.get("jobs.cfg.compute_delay", 0)), + use_any_src(cfg.get("jobs.cfg.use_any_src", false)), + blocking_comm(cfg.get("jobs.cfg.blocking_comm", false)), + scattered_start(cfg.get("jobs.cfg.scattered_start", false)), + start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), + randomize_comm_order(cfg.get("jobs.cfg.randomize_communication_order", false)), + show_iterations(cfg.get("jobs.cfg.show_iterations", false)), + debug(cfg.get("jobs.cfg.debug", false)) +{ + + // extract the src/dst rank id intervals + int num = 0; + BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.dst_rank_id_interval")) + { + std::string value = v.second.data(); + + if(num == 0) min_dst_id = atoi(value.c_str()); + if(num == 1) max_dst_id = atoi(value.c_str()); + + num++; + } + assert(num == 2); + + assert(src_rank_id < process_cnt); + process_id = *((int*)generic_ptrs[0]); +} + + void +OneToManySWMUserCode::call() +{ + + if(process_id == 0) + { + std::cout << std::endl << "JOB: Spread | size: " << process_cnt; + std::cout << " | interation_cnt: " << iteration_cnt; + std::cout << " | msg_req_bytes: " << msg_req_bytes; + std::cout << " | msg_rsp_bytes: " << msg_rsp_bytes; + std::cout << " | src_rank_id: " << src_rank_id; + std::cout << " | dst_rank_id_interval: " << min_dst_id << "-" << max_dst_id; + std::cout << " | scattered_start: " << scattered_start; + std::cout << " | compute_delay: " << compute_delay << std::endl; + } + uint32_t *send_handles = NULL; + uint32_t *recv_handles = NULL; + + uint32_t send_limit = (max_dst_id - min_dst_id) + 1; + uint32_t recv_limit = 1; + + //SWMPiggybackBase* dummy_piggyback = nullptr; + + if(!blocking_comm) + { + send_handles = new uint32_t[send_limit * iteration_cnt]; + recv_handles = new uint32_t[recv_limit * iteration_cnt]; + } + + + // Recieving processes + if ((process_id != src_rank_id) && (process_id >= min_dst_id && process_id <= max_dst_id) ) + { + + for(uint32_t iter=0; iter < iteration_cnt; iter++) + { + uint32_t count = 0; + uint32_t iter_offset = (process_cnt * (iter) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " expecting to recv data from: " << src_rank_id << " with recv tag: " << this_tag << " | iter_" << iter; + } + + if(!blocking_comm) + { + SWM_Irecv( + src_rank_id, + SWM_COMM_WORLD, + this_tag, + NO_BUFFER, + &(recv_handles[count]) + ); + } + else + { + SWM_Recv( + src_rank_id, + SWM_COMM_WORLD, + this_tag, + NO_BUFFER + ); + } + + if(!blocking_comm) + { + SWM_Waitall(recv_limit, recv_handles); + } + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " received data from src: " << src_rank_id << ", iteration: " << iter ; + } + + if(show_iterations) + SWM_Mark_Iteration(iter); + } // end-for(iteration_cnt) + } + + // Sending process + else if(process_id == src_rank_id) + { + + // if we want to scatter the start time, we mimic this delay with a compute delay + if(scattered_start) + { + assert(start_delay_max > 0); + /* TODO: Use a better random number generator here. */ + uint32_t start_delay = rand() % start_delay_max; + std::cout << std::endl << "process_id: " << process_id << " delay start by " << start_delay << " cycles"; + SWM_Compute(start_delay); + } + + // need to send to everybody every iteration... + for(uint32_t iter = 0; iter < iteration_cnt; iter++) + { + uint32_t send_count = 0; + for(uint32_t index = min_dst_id; index <= max_dst_id; index++, send_count++) + { + uint32_t iter_offset = (process_cnt * (iter) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (index + 1) + iter_offset) ); + + uint32_t receive_from_proc = (!use_any_src) ? index : -1; + + if(!blocking_comm) + { + SWM_Isend( + index, + SWM_COMM_WORLD, + this_tag, + -1, + -1, + NO_BUFFER, + msg_req_bytes, + msg_rsp_bytes, + &(send_handles[send_count]), + 0, + 0 + ); + } + else + { + SWM_Send( + index, + SWM_COMM_WORLD, + this_tag, + -1,// req-vc + -1, //resp-vc + NO_BUFFER, + msg_req_bytes, //req-bytes + msg_rsp_bytes, //resp-bytes + 0,//routing type + 0 //routing type + ); + } + + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " sent message to destination: " << index << ", tag: " << this_tag << ", iter: " << iter ; + } + + if (compute_delay) + SWM_Compute(compute_delay); + + } // end of for-loop(all_destinations) + if(!blocking_comm) + { + SWM_Waitall(send_limit, send_handles); + } + if(show_iterations) + SWM_Mark_Iteration(iter); + + } // end for-loop(iteration_cnt) + +} // end of else if(synchronous && (process_id == dst_rank_id) ) + +SWM_Finalize(); +} + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/swm/src/spread/one_to_many_swm_user_code.h b/swm/src/spread/one_to_many_swm_user_code.h new file mode 100644 index 0000000..e09ccd1 --- /dev/null +++ b/swm/src/spread/one_to_many_swm_user_code.h @@ -0,0 +1,91 @@ +/* + * ===================================================================================== + * + * Filename: one_to_many_swm_user_code.h + * + * Description: + * + * Version: 1.0 + * + * Author: Kevin A. Brown, kabrown@anl.gov + * + * ===================================================================================== + */ + +#ifndef _ONE_TO_MANY_TEMPLATE_USER_CODE_ +#define _ONE_TO_MANY_TEMPLATE_USER_CODE_ + +#define SWM_APP_TAG_BASE 0 + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "swm-include.h" +using namespace std; + +class OneToManySWMUserCode +{ + +public: + + OneToManySWMUserCode( +// SWMUserIF* user_if, + boost::property_tree::ptree cfg, + void**& generic_ptrs + ); + + void call(); + +protected: + std::string req_vcs_string; + std::string rsp_vcs_string; + uint32_t msg_req_bytes; + uint32_t msg_rsp_bytes; + uint32_t pkt_rsp_bytes; + uint32_t src_rank_id; + + uint32_t process_id; + uint32_t process_cnt; + uint32_t iteration_cnt; + uint32_t noop_cnt; + uint32_t compute_delay; + + std::vector req_vcs; + std::vector rsp_vcs; + + uint32_t min_dst_id; + uint32_t max_dst_id; + + bool randomize_comm_order; + + // are we staggering the start time of the srcs + bool scattered_start; + + // if using staggered start delay, this is the max used in the RNG when computing delay + uint32_t start_delay_max; + + // use isend/irecv instead of synthetic + bool synchronous; + + // use __ANY__ at the receive side for synchronous + bool use_any_src; + + // use blocking (Send/Recv) + bool blocking_comm; + + // for debugging + bool show_iterations; + bool debug; + +}; + +#endif diff --git a/swm/src/spread/spread_workload.json b/swm/src/spread/spread_workload.json new file mode 100644 index 0000000..387f242 --- /dev/null +++ b/swm/src/spread/spread_workload.json @@ -0,0 +1,21 @@ +{ +"jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/spread.so", + "size": 64, + "cfg": { + "app": "spread", + "iteration_cnt": 10, + "compute_delay": 10000, + "msg_req_bytes": 19922944, + "msg_rsp_bytes": 0, + "start_delay_max" : 10000, + "scattered_start" : false, + "src_rank_id":63, + "randomize_communication_order": false, + "blocking_comm" : false, + "debug" : false, + "dst_rank_id_interval": [0,62], + "cpu_freq" : 4e9 + } + } +} diff --git a/swm/src/swm-include.h b/swm/src/swm-include.h index a4683e6..ea1bce9 100644 --- a/swm/src/swm-include.h +++ b/swm/src/swm-include.h @@ -121,6 +121,9 @@ void SWM_Allreduce( SWM_ROUTING_TYPE reqrt, SWM_ROUTING_TYPE rsprt); +void SWM_Mark_Iteration( + SWM_TAG iter_tag); + void SWM_Finalize(); #endif From 654c598da1739576c60153d045beb824e553a785 Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Fri, 26 Feb 2021 17:38:06 -0500 Subject: [PATCH 02/14] TEMP: neil's swm configs and fixes --- swm/src/incast/all_to_one_swm_user_code.cpp | 18 ++++++++++++----- swm/src/incast/all_to_one_swm_user_code.h | 2 ++ swm/src/incast/incast.json | 12 +++++------ swm/src/incast/incast2.json | 16 ++++++++------- swm/src/incast/incast4.json | 22 +++++++++++++++++++++ swm/src/lammps/lammps.cpp | 2 ++ swm/src/lammps/lammps_workload.json | 2 +- 7 files changed, 55 insertions(+), 19 deletions(-) create mode 100644 swm/src/incast/incast4.json diff --git a/swm/src/incast/all_to_one_swm_user_code.cpp b/swm/src/incast/all_to_one_swm_user_code.cpp index 8ec6878..b17e0e9 100644 --- a/swm/src/incast/all_to_one_swm_user_code.cpp +++ b/swm/src/incast/all_to_one_swm_user_code.cpp @@ -5,6 +5,8 @@ AllToOneSWMUserCode::AllToOneSWMUserCode( void**& generic_ptrs ) : process_cnt(cfg.get("jobs.size", 1)), + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + msg_size(cfg.get("jobs.cfg.msg_size", 0)), dst_rank_id(cfg.get("jobs.cfg.dst_rank_id",0)), scattered_start(cfg.get("jobs.cfg.scattered_start", false)), start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), @@ -14,6 +16,8 @@ AllToOneSWMUserCode::AllToOneSWMUserCode( debug(cfg.get("jobs.cfg.debug", false)) { + process_id = *((int*)generic_ptrs[0]); + // extract the src/dst rank id intervals int num = 0; BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.src_rank_id_interval")) @@ -44,14 +48,12 @@ AllToOneSWMUserCode::call() if(synchronous) { - send_handles = new uint32_t[send_limit * iteration_cnt]; + send_handles = new uint32_t[send_limit * iteration_cnt]; recv_handles = new uint32_t[recv_limit * iteration_cnt]; } - if ((process_id != dst_rank_id) && (process_id >= min_source_id && process_id <= max_source_id) ) // do not send messages to self { - for(uint32_t iter=0; iter < iteration_cnt; iter++) { @@ -108,6 +110,12 @@ AllToOneSWMUserCode::call() //uint32_t send_handle[send_limit]; uint32_t send_count = 0; + if(debug) + { + std::cout << "process_id: " << process_id << " sening message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter << std::endl; + } + + if(!blocking_comm) { @@ -118,7 +126,7 @@ AllToOneSWMUserCode::call() -1, -1, NO_BUFFER, - 0, + msg_size, 0, &(send_handles[send_count]), 0, @@ -134,7 +142,7 @@ AllToOneSWMUserCode::call() -1,// req-vc -1, //resp-vc NO_BUFFER, - 0, //req-bytes + msg_size, //req-bytes 0, //resp-bytes 0,//routing type 0 //routing type diff --git a/swm/src/incast/all_to_one_swm_user_code.h b/swm/src/incast/all_to_one_swm_user_code.h index 877fb1f..99df237 100644 --- a/swm/src/incast/all_to_one_swm_user_code.h +++ b/swm/src/incast/all_to_one_swm_user_code.h @@ -56,6 +56,8 @@ class AllToOneSWMUserCode uint32_t msg_rsp_bytes; uint32_t pkt_rsp_bytes; uint32_t dst_rank_id; + uint32_t msg_size; // MM addition + uint32_t process_id; uint32_t process_cnt; diff --git a/swm/src/incast/incast.json b/swm/src/incast/incast.json index d80c9db..53b08fb 100644 --- a/swm/src/incast/incast.json +++ b/swm/src/incast/incast.json @@ -1,22 +1,22 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 9, + "size": 100, "cfg": { "app": "incast", - "iteration_cnt": 150, + "iteration_cnt": 10, "compute_delay": 0, "noop_cnt": 0, - "msg_size": 13107200, + "msg_size": 1310720, "start_delay_max" : 0, "scattered_start" : false, "synchronous": true, - "dst_rank_id":8, + "dst_rank_id":99, "randomize_communication_order": false, "blocking_comm" : false, "debug" : true, - "src_rank_id_interval": [0,7], + "src_rank_id_interval": [0,98], "cpu_freq" : 4e9 - } + } } } diff --git a/swm/src/incast/incast2.json b/swm/src/incast/incast2.json index ee49c90..7cf75be 100644 --- a/swm/src/incast/incast2.json +++ b/swm/src/incast/incast2.json @@ -1,22 +1,24 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 9, + "size": 30, "cfg": { "app": "incast", "iteration_cnt": 10, "compute_delay": 0, "noop_cnt": 0, - "msg_size": 13107200, + "msg_size": 1310720, "start_delay_max" : 0, "scattered_start" : false, "synchronous": true, - "dst_rank_id":8, + "dst_rank_id":29, "randomize_communication_order": false, - "blocking_comm" : false, - "debug" : true, - "src_rank_id_interval": [0,7], + "blocking_comm" : false, + "debug" : true, + "src_rank_id_interval": [0,28], "cpu_freq" : 4e9 - } + } } } + + \ No newline at end of file diff --git a/swm/src/incast/incast4.json b/swm/src/incast/incast4.json new file mode 100644 index 0000000..40fa8ae --- /dev/null +++ b/swm/src/incast/incast4.json @@ -0,0 +1,22 @@ +{ +"jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", + "size": 100, + "cfg": { + "app": "incast", + "iteration_cnt": 150, + "compute_delay": 0, + "noop_cnt": 0, + "msg_size": 13107200, + "start_delay_max" : 0, + "scattered_start" : false, + "synchronous": true, + "dst_rank_id":99, + "randomize_communication_order": false, + "blocking_comm" : false, + "debug" : true, + "src_rank_id_interval": [0,98], + "cpu_freq" : 4e9 + } + } +} diff --git a/swm/src/lammps/lammps.cpp b/swm/src/lammps/lammps.cpp index e4b68fa..b9200c2 100644 --- a/swm/src/lammps/lammps.cpp +++ b/swm/src/lammps/lammps.cpp @@ -255,6 +255,8 @@ LAMMPS_SWM::call() for(ts = 0; ts < num_timesteps; ts++) { + if(process_id == 0) + printf("LAMMPS Starting Timestep %d / %d\n",ts,num_timesteps); // initial integration SWM_Compute(start_cyc); SWM_Allreduce(48, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // temperature diff --git a/swm/src/lammps/lammps_workload.json b/swm/src/lammps/lammps_workload.json index aca57fe..5c59671 100644 --- a/swm/src/lammps/lammps_workload.json +++ b/swm/src/lammps/lammps_workload.json @@ -10,7 +10,7 @@ "num_x_replicas": 3, "num_y_replicas": 3, "num_z_replicas": 3, - "num_time_steps": 30, + "num_time_steps": 1, "req_vc" : 0, "resp_vc" : 1, "router_freq" : 800e6, From 6514f77576014a83326554333247c76776cd9e66 Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Thu, 25 Mar 2021 11:50:21 -0400 Subject: [PATCH 03/14] Neil Latest epsilon 2 run versions --- swm/src/incast/all_to_one_swm_user_code.cpp | 423 ++++++++++---------- swm/src/incast/incast.json | 38 +- swm/src/incast/incast1.json | 40 +- swm/src/lammps/lammps_workload.json | 2 +- 4 files changed, 261 insertions(+), 242 deletions(-) diff --git a/swm/src/incast/all_to_one_swm_user_code.cpp b/swm/src/incast/all_to_one_swm_user_code.cpp index 7c614d8..b17e0e9 100644 --- a/swm/src/incast/all_to_one_swm_user_code.cpp +++ b/swm/src/incast/all_to_one_swm_user_code.cpp @@ -1,220 +1,235 @@ #include "all_to_one_swm_user_code.h" AllToOneSWMUserCode::AllToOneSWMUserCode( - boost::property_tree::ptree cfg, - void**& generic_ptrs - ) : - process_cnt(cfg.get("jobs.size", 1)), - dst_rank_id(cfg.get("jobs.cfg.dst_rank_id",0)), - iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), - msg_req_bytes(cfg.get("jobs.cfg.msg_req_bytes", 0)), - msg_rsp_bytes(cfg.get("jobs.cfg.msg_rsp_bytes", 0)), - compute_delay(cfg.get("jobs.cfg.compute_delay", 0)), - use_any_src(cfg.get("jobs.cfg.use_any_src", false)), - blocking_comm(cfg.get("jobs.cfg.blocking_comm", false)), - scattered_start(cfg.get("jobs.cfg.scattered_start", false)), - start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), - randomize_comm_order(cfg.get("jobs.cfg.randomize_communication_order", false)), - show_iterations(cfg.get("jobs.cfg.show_iterations", false)), - debug(cfg.get("jobs.cfg.debug", false)) + boost::property_tree::ptree cfg, + void**& generic_ptrs +) : + process_cnt(cfg.get("jobs.size", 1)), + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + msg_size(cfg.get("jobs.cfg.msg_size", 0)), + dst_rank_id(cfg.get("jobs.cfg.dst_rank_id",0)), + scattered_start(cfg.get("jobs.cfg.scattered_start", false)), + start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), + synchronous(cfg.get("jobs.cfg.synchronous", 0)), + use_any_src(cfg.get("jobs.cfg.use_any_src", 0)), + blocking_comm(cfg.get("jobs.cfg.blocking_comm", 0)), + debug(cfg.get("jobs.cfg.debug", false)) { - // extract the src/dst rank id intervals - int num = 0; - BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.src_rank_id_interval")) - { - std::string value = v.second.data(); + process_id = *((int*)generic_ptrs[0]); - if(num == 0) min_source_id = atoi(value.c_str()); - if(num == 1) max_source_id = atoi(value.c_str()); + // extract the src/dst rank id intervals + int num = 0; + BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.src_rank_id_interval")) + { + std::string value = v.second.data(); - num++; - } - assert(num == 2); + if(num == 0) min_source_id = atoi(value.c_str()); + if(num == 1) max_source_id = atoi(value.c_str()); - assert(dst_rank_id < process_cnt); - process_id = *((int*)generic_ptrs[0]); + num++; + } + assert(num == 2); + + assert(dst_rank_id < process_cnt); } void AllToOneSWMUserCode::call() { - if(process_id == 0) - { - std::cout << std::endl << "JOB: Incast | size: " << process_cnt; - std::cout << " | interation_cnt: " << iteration_cnt; - std::cout << " | msg_req_bytes: " << msg_req_bytes; - std::cout << " | msg_rsp_bytes: " << msg_rsp_bytes; - std::cout << " | dst_rank_id: " << dst_rank_id; - std::cout << " | src_rank_id_interval: " << min_source_id << "-" << max_source_id; - std::cout << " | scattered_start: " << scattered_start; - std::cout << " | compute_delay: " << compute_delay << std::endl; - } - uint32_t *send_handles = NULL; - uint32_t *recv_handles = NULL; - - uint32_t send_limit = 1; - uint32_t recv_limit = (max_source_id - min_source_id) + 1; - - //SWMPiggybackBase* dummy_piggyback = nullptr; - - if(!blocking_comm) - { - send_handles = new uint32_t[send_limit * iteration_cnt]; - recv_handles = new uint32_t[recv_limit * iteration_cnt]; - } - - - if ((process_id != dst_rank_id) && (process_id >= min_source_id && process_id <= max_source_id) ) // do not send messages to self - { - // if we want to scatter the start time, we mimic this delay with a compute delay - if(scattered_start) - { - assert(start_delay_max > 0); - /* TODO: Use a better random number generator here. */ - uint32_t start_delay = rand() % start_delay_max; - std::cout << std::endl << "process_id: " << process_id << " delay start by " << start_delay << " cycles"; - SWM_Compute(start_delay); - } - uint32_t marker = 0; - for(uint32_t iter=0; iter < iteration_cnt; iter++) - { - if (compute_delay) - SWM_Compute(compute_delay); - - - if(show_iterations){ - SWM_Mark_Iteration(marker); - marker++; - } - - //uint32_t process_id_offset = ( (process_id + 1) << 32); - //uint32_t iter_offset = ( (iter + 1) << 8); - //SWM_TAG this_tag = SWM_APP_TAG_BASE + process_id_offset + iter_offset; - uint32_t iter_offset = (process_cnt * (iter) ); - SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); //(iter+1) ); - //uint32_t send_handle[send_limit]; - uint32_t send_count = 0; - - if(!blocking_comm) - { - - SWM_Isend( - dst_rank_id, - SWM_COMM_WORLD, - this_tag, - -1, - -1, - NO_BUFFER, - msg_req_bytes, - msg_rsp_bytes, - &(send_handles[send_count]), - 0, - 0 - ); - } - else - { - SWM_Send( - dst_rank_id, - SWM_COMM_WORLD, - this_tag, - -1,// req-vc - -1, //resp-vc - NO_BUFFER, - msg_req_bytes, //req-bytes - msg_rsp_bytes, //resp-bytes - 0,//routing type - 0 //routing type - ); - } - - if(!blocking_comm) - { - SWM_Waitall(send_limit, send_handles); - } - - if(debug) - { - std::cout << std::endl << "process_id: " << process_id << " sent message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter ; - } - - if(show_iterations){ - SWM_Mark_Iteration(marker); - marker++; - } - } // end-for(iteration_cnt) -} -else if(process_id == dst_rank_id) -{ - - // need to receive from everybody every iteration... - for(uint32_t iter = 0; iter < iteration_cnt; iter++) - { - - uint32_t count = 0; - - for(uint32_t index = min_source_id; index <= max_source_id; index++, count++) - { - - uint32_t iter_offset = (process_cnt * (iter) ); - //SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * (index + 1) * (iter+1) ); - SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (index + 1) + iter_offset) ); - - uint32_t receive_from_proc = (!use_any_src) ? index : -1; - - if(debug) - { - std::cout << std::endl << "process_id: " << process_id << " expecting to recv data from: " << receive_from_proc << " with recv tag: " << this_tag << " | iter_" << iter; - } - - - if(!blocking_comm) - { - SWM_Irecv( - receive_from_proc, - SWM_COMM_WORLD, - this_tag, - NO_BUFFER, - &(recv_handles[count]) - ); - } - else - { - SWM_Recv( - receive_from_proc, - SWM_COMM_WORLD, - this_tag, - NO_BUFFER - ); - } - - if(debug) - { - std::cout << std::endl << "process_id: " << process_id << " received data from src: " << index << ", iteration: " << iter ; - } - - } // end of for-loop(all_sources) - - if(!blocking_comm) - { - SWM_Waitall(recv_limit, recv_handles); - } - - //SWM_Mark_Iteration(iter); - } // end for-loop(iteration_cnt) - -} - -SWM_Finalize(); + uint32_t *send_handles = NULL; + uint32_t *recv_handles = NULL; + + uint32_t send_limit = 1; + uint32_t recv_limit = (max_source_id - min_source_id) + 1; + + //SWMPiggybackBase* dummy_piggyback = nullptr; + + if(synchronous) + { + send_handles = new uint32_t[send_limit * iteration_cnt]; + recv_handles = new uint32_t[recv_limit * iteration_cnt]; + } + + if ((process_id != dst_rank_id) && (process_id >= min_source_id && process_id <= max_source_id) ) // do not send messages to self + { + for(uint32_t iter=0; iter < iteration_cnt; iter++) + { + + //msg_traffic_desc msg_desc; + + //GetMsgDetails(&msg_desc); + + // if we want to scatter the start time, we mimic this delay with a compute delay + if(scattered_start) + { + assert(start_delay_max > 0); + /* TODO: Use a better random number generator here. */ + uint32_t start_delay = rand() % start_delay_max; + std::cout << "process_id: " << process_id << " delay start by " << start_delay << " cycles" << std::endl; + SWM_Compute(start_delay); + } + + /*if(!synchronous) + { + + SWM_Synthetic( + dst_rank_id, //dst + msg_desc.msg_req_vc, + msg_desc.msg_rsp_vc, + msg_desc.pkt_rsp_vc, + msg_desc.msg_req_bytes, + msg_desc.msg_rsp_bytes, + msg_desc.pkt_rsp_bytes, + msg_desc.msg_req_routing_type, + msg_desc.msg_rsp_routing_type, + msg_desc.pkt_rsp_routing_type, + dummy_piggyback, //NULL, + msg_desc.attribute +#ifdef FABSIM_EMULATION + , msg_desc.l2_encoding +#endif + ); + + + if(debug) + { + std::cout << "process_id: " << process_id << " sent synthetic message to destination: " << dst_rank_id << ", iter: " << iter << " @ " << SWM_Clock() << std::endl; + } + + } + else + {*/ + + //uint32_t process_id_offset = ( (process_id + 1) << 32); + //uint32_t iter_offset = ( (iter + 1) << 8); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + process_id_offset + iter_offset; + uint32_t iter_offset = (process_cnt * (iter) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); //(iter+1) ); + //uint32_t send_handle[send_limit]; + uint32_t send_count = 0; + + if(debug) + { + std::cout << "process_id: " << process_id << " sening message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter << std::endl; + } + + + if(!blocking_comm) + { + + SWM_Isend( + dst_rank_id, + SWM_COMM_WORLD, + this_tag, + -1, + -1, + NO_BUFFER, + msg_size, + 0, + &(send_handles[send_count]), + 0, + 0 + ); + } + else + { + SWM_Send( + dst_rank_id, + SWM_COMM_WORLD, + this_tag, + -1,// req-vc + -1, //resp-vc + NO_BUFFER, + msg_size, //req-bytes + 0, //resp-bytes + 0,//routing type + 0 //routing type + ); + } + + if(!blocking_comm) + { + SWM_Waitall(send_limit, send_handles); + } + + if(debug) + { + std::cout << "process_id: " << process_id << " sent message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter << std::endl; + } + + //} // else(synchronous) + //MM comment: no def for SWM_Noop in codes + /*for(uint32_t noop=0; noop Date: Wed, 7 Apr 2021 14:08:48 -0400 Subject: [PATCH 04/14] Nearest Neighbor: Add nondeterminism warnings - safer irecv/isend order - cleanup --- .../nearest_neighbor_swm_user_code.cpp | 90 ++++++++----------- 1 file changed, 38 insertions(+), 52 deletions(-) diff --git a/swm/src/nearest_neighbor/nearest_neighbor_swm_user_code.cpp b/swm/src/nearest_neighbor/nearest_neighbor_swm_user_code.cpp index 85e5279..3fc9d3f 100644 --- a/swm/src/nearest_neighbor/nearest_neighbor_swm_user_code.cpp +++ b/swm/src/nearest_neighbor/nearest_neighbor_swm_user_code.cpp @@ -29,7 +29,7 @@ NearestNeighborSWMUserCode::NearestNeighborSWMUserCode( msg_size(cfg.get("jobs.cfg.msg_size", 0)), dimension_sizes(boost_ptree_array_to_std_vector(cfg,"jobs.cfg.dimension_sizes", {0})), max_dimension_distance(cfg.get("jobs.cfg.max_dimension_distance",0)), - synchronous(cfg.get("jobs.cfg.synchronous",false)), + synchronous(cfg.get("jobs.cfg.synchronous",true)), iterations_per_sync(cfg.get("jobs.cfg.iterations_per_sync",1)), randomize_communication_order(cfg.get("jobs.cfg.randomize_communication_order",false)) { @@ -47,6 +47,13 @@ NearestNeighborSWMUserCode::NearestNeighborSWMUserCode( req_rt = AUTOMATIC; rsp_rt = AUTOMATIC; + if (synchronous == false) + printf("SWM Nearest Neighbor - Warning: configuring 'synchronous == false' currently generates no traffic\n"); + + if (randomize_communication_order && process_id == 0) + { + printf("SWM Nearest Neighbor - Warning: configuring 'randomize_communication_order == true' will generate nondeterminstic results.\n"); + } } void @@ -280,13 +287,17 @@ NearestNeighborSWMUserCode::call() } */ - uint32_t* send_handles = NULL; - uint32_t* recv_handles = NULL; + // uint32_t* send_handles = NULL; + // uint32_t* recv_handles = NULL; + uint32_t* all_handles = NULL; + size_t num_handles_per_sync = 0; if(synchronous) { - send_handles = new uint32_t[neighbors.size()*iterations_per_sync]; - recv_handles = new uint32_t[neighbors.size()*iterations_per_sync]; + // send_handles = new uint32_t[neighbors.size()*iterations_per_sync]; + // recv_handles = new uint32_t[neighbors.size()*iterations_per_sync]; + all_handles = new uint32_t[neighbors.size()*iterations_per_sync * 2]; + num_handles_per_sync = neighbors.size() * iterations_per_sync * 2; } uint32_t iter_before_sync = 0; @@ -295,26 +306,29 @@ NearestNeighborSWMUserCode::call() for(uint32_t iter=0; iter(neighbors[neighbor_idx])); - - if(synchronous) - { - + if (synchronous) { //send/recv pair that we'll later wait on + SWM_Irecv( + std::get<0>(neighbors[neighbor_idx]), + SWM_COMM_WORLD, + std::get<0>(neighbors[neighbor_idx]), + NO_BUFFER, + &(all_handles[neighbor_idx+iter_before_sync*neighbors_size+(neighbors_size*iterations_per_sync*0)]) + ); SWM_Isend( std::get<0>(neighbors[neighbor_idx]), @@ -325,32 +339,22 @@ NearestNeighborSWMUserCode::call() NO_BUFFER, msg_size, //msg_desc.msg_req_bytes, pkt_rsp_bytes, //msg_desc.pkt_rsp_bytes, - &(send_handles[neighbor_idx+iter_before_sync*neighbors_size]), + &(all_handles[neighbor_idx+iter_before_sync*neighbors_size+(neighbors_size*iterations_per_sync*1)]), 0, 0 ); - - SWM_Irecv( - std::get<0>(neighbors[neighbor_idx]), - SWM_COMM_WORLD, - std::get<0>(neighbors[neighbor_idx]), - NO_BUFFER, - &(recv_handles[neighbor_idx+iter_before_sync*neighbors_size]) - ); for(uint32_t noop=0; noop(neighbors[neighbor_idx]), //dst 0, @@ -372,48 +376,30 @@ NearestNeighborSWMUserCode::call() { SWM_Noop(); } - - */ - + */ } - } - - if(synchronous) - { - - + if (synchronous) { iter_before_sync++; if(iter_before_sync == iterations_per_sync || iter == iteration_cnt-1 ) { //std::cout << "begin wait at time: " << global_cycle << std::endl; SWM_Waitall( - neighbors.size()*iter_before_sync, - send_handles - ); - - SWM_Waitall( - neighbors.size()*iter_before_sync, - recv_handles + num_handles_per_sync, + all_handles ); iter_before_sync = 0; //std::cout << "end wait at time: " << global_cycle << std::endl; } - - } else { - if (compute_delay) { SWM_Compute(compute_delay); } - } - } - SWM_Finalize(); } From bdc5a058665afd4313d8a4eefb30b5452ff6ead8 Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Thu, 8 Apr 2021 22:12:08 -0400 Subject: [PATCH 05/14] NEKbone: Add log output option --- swm/src/nekbone/nekbone_swm_user_code.cpp | 24 ++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/swm/src/nekbone/nekbone_swm_user_code.cpp b/swm/src/nekbone/nekbone_swm_user_code.cpp index fcb2fd5..8d11923 100644 --- a/swm/src/nekbone/nekbone_swm_user_code.cpp +++ b/swm/src/nekbone/nekbone_swm_user_code.cpp @@ -4,15 +4,30 @@ #include #include #include //memset +#include //log printf wrapper #include "cubiclattice.h" using namespace std; #define CHECKERR if(err) break; +#define STDOUT_LOG 0 #define MPI_SUCCESS (1); typedef BUF_TYPE SWM_Request; + + +void print_log(const char *format, ...) +{ + va_list args; + va_start(args, format); + + if(STDOUT_LOG) + vprintf(format, args); + + va_end(args); +} + NEKBONESWMUserCode::NEKBONESWMUserCode( boost::property_tree::ptree cfg, void**& generic_ptrs @@ -172,23 +187,28 @@ Err_t NEKBONESWMUserCode::run() for(unsigned polyO=Pbegin; polyO polyO=%d\n", __LINE__, polyO); + if(mpiRank==0) print_log("NEKbone: poly=%d/%d\n", polyO, Pend); //NEKbone loop over element/rank removed-->for(G->nelt = G->iel0; G->nelt <= G->ielN; G->nelt += G->ielD){ //Use sizedata.h::E(x|y|z) to change the element distribution within 1 rank. + if(mpiRank==0) print_log("\tMake Mesh 1\n"); err = makeMesh(polyO); assert(err == 0); + if(mpiRank==0) print_log("\tMake Mesh 2\n"); err = makeMesh(polyO); assert(err == 0); + if(mpiRank==0) print_log("\tGSOP 1\n"); err = nek_gsop("on c"); assert(err == 0); + if(mpiRank==0) print_log("\tGSOP 2\n"); err = nek_gsop("on f"); assert(err == 0); + if(mpiRank==0) print_log("\tGrad Conjugation 1\n"); err = conjugateGradient(); assert(err == 0); @@ -204,6 +224,7 @@ Err_t NEKBONESWMUserCode::run() rsp_rt ); + if(mpiRank==0) print_log("\tGrad Conjugation 2\n"); err = conjugateGradient(); assert(err == 0); @@ -691,6 +712,7 @@ Err_t NEKBONESWMUserCode::conjugateGradient() for(unsigned iter = 0; iter Date: Thu, 22 Apr 2021 14:46:35 -0400 Subject: [PATCH 06/14] neil edits --- swm/src/lammps/lammps_workload.json | 4 ++-- swm/src/milc/milc_skeleton.json | 2 +- swm/src/milc/milc_swm_user_code.cpp | 1 + swm/src/nearest_neighbor/skeleton.json | 4 ++-- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/swm/src/lammps/lammps_workload.json b/swm/src/lammps/lammps_workload.json index 71cf613..aca57fe 100644 --- a/swm/src/lammps/lammps_workload.json +++ b/swm/src/lammps/lammps_workload.json @@ -3,14 +3,14 @@ "name": "StandaloneSWM", "app": "dll", "dll_path": "apps/dll/lammps.so", - "size": 1024, + "size": 2048, "time": 0, "cfg": { "num_x_replicas": 3, "num_y_replicas": 3, "num_z_replicas": 3, - "num_time_steps": 1, + "num_time_steps": 30, "req_vc" : 0, "resp_vc" : 1, "router_freq" : 800e6, diff --git a/swm/src/milc/milc_skeleton.json b/swm/src/milc/milc_skeleton.json index 0358282..c0763ab 100644 --- a/swm/src/milc/milc_skeleton.json +++ b/swm/src/milc/milc_skeleton.json @@ -4,7 +4,7 @@ "size": 4096, "cfg": { "app": "milc", - "iteration_cnt": 50, + "iteration_cnt": 1, "compute_delay": 100, "dimension_cnt": 4, "dimension_sizes": [8,8,8,8], diff --git a/swm/src/milc/milc_swm_user_code.cpp b/swm/src/milc/milc_swm_user_code.cpp index 153feb3..d5e8c23 100644 --- a/swm/src/milc/milc_swm_user_code.cpp +++ b/swm/src/milc/milc_swm_user_code.cpp @@ -231,6 +231,7 @@ MilcSWMUserCode::call() uint32_t pkt_rsp_bytes = 0; for(uint32_t iter=0; iter Date: Thu, 22 Apr 2021 14:59:30 -0400 Subject: [PATCH 07/14] Bring back in kevin updates to incast --- swm/src/incast/all_to_one_swm_user_code.cpp | 423 ++++++++++---------- swm/src/incast/incast.json | 26 +- swm/src/incast/incast1.json | 26 +- swm/src/incast/incast2.json | 40 +- 4 files changed, 247 insertions(+), 268 deletions(-) diff --git a/swm/src/incast/all_to_one_swm_user_code.cpp b/swm/src/incast/all_to_one_swm_user_code.cpp index b17e0e9..7c614d8 100644 --- a/swm/src/incast/all_to_one_swm_user_code.cpp +++ b/swm/src/incast/all_to_one_swm_user_code.cpp @@ -1,235 +1,220 @@ #include "all_to_one_swm_user_code.h" AllToOneSWMUserCode::AllToOneSWMUserCode( - boost::property_tree::ptree cfg, - void**& generic_ptrs -) : - process_cnt(cfg.get("jobs.size", 1)), - iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), - msg_size(cfg.get("jobs.cfg.msg_size", 0)), - dst_rank_id(cfg.get("jobs.cfg.dst_rank_id",0)), - scattered_start(cfg.get("jobs.cfg.scattered_start", false)), - start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), - synchronous(cfg.get("jobs.cfg.synchronous", 0)), - use_any_src(cfg.get("jobs.cfg.use_any_src", 0)), - blocking_comm(cfg.get("jobs.cfg.blocking_comm", 0)), - debug(cfg.get("jobs.cfg.debug", false)) + boost::property_tree::ptree cfg, + void**& generic_ptrs + ) : + process_cnt(cfg.get("jobs.size", 1)), + dst_rank_id(cfg.get("jobs.cfg.dst_rank_id",0)), + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + msg_req_bytes(cfg.get("jobs.cfg.msg_req_bytes", 0)), + msg_rsp_bytes(cfg.get("jobs.cfg.msg_rsp_bytes", 0)), + compute_delay(cfg.get("jobs.cfg.compute_delay", 0)), + use_any_src(cfg.get("jobs.cfg.use_any_src", false)), + blocking_comm(cfg.get("jobs.cfg.blocking_comm", false)), + scattered_start(cfg.get("jobs.cfg.scattered_start", false)), + start_delay_max(cfg.get("jobs.cfg.start_delay_max", 0)), + randomize_comm_order(cfg.get("jobs.cfg.randomize_communication_order", false)), + show_iterations(cfg.get("jobs.cfg.show_iterations", false)), + debug(cfg.get("jobs.cfg.debug", false)) { - process_id = *((int*)generic_ptrs[0]); + // extract the src/dst rank id intervals + int num = 0; + BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.src_rank_id_interval")) + { + std::string value = v.second.data(); - // extract the src/dst rank id intervals - int num = 0; - BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.src_rank_id_interval")) - { - std::string value = v.second.data(); + if(num == 0) min_source_id = atoi(value.c_str()); + if(num == 1) max_source_id = atoi(value.c_str()); - if(num == 0) min_source_id = atoi(value.c_str()); - if(num == 1) max_source_id = atoi(value.c_str()); + num++; + } + assert(num == 2); - num++; - } - assert(num == 2); - - assert(dst_rank_id < process_cnt); + assert(dst_rank_id < process_cnt); + process_id = *((int*)generic_ptrs[0]); } void AllToOneSWMUserCode::call() { - uint32_t *send_handles = NULL; - uint32_t *recv_handles = NULL; - - uint32_t send_limit = 1; - uint32_t recv_limit = (max_source_id - min_source_id) + 1; - - //SWMPiggybackBase* dummy_piggyback = nullptr; - - if(synchronous) - { - send_handles = new uint32_t[send_limit * iteration_cnt]; - recv_handles = new uint32_t[recv_limit * iteration_cnt]; - } - - if ((process_id != dst_rank_id) && (process_id >= min_source_id && process_id <= max_source_id) ) // do not send messages to self - { - for(uint32_t iter=0; iter < iteration_cnt; iter++) - { - - //msg_traffic_desc msg_desc; - - //GetMsgDetails(&msg_desc); - - // if we want to scatter the start time, we mimic this delay with a compute delay - if(scattered_start) - { - assert(start_delay_max > 0); - /* TODO: Use a better random number generator here. */ - uint32_t start_delay = rand() % start_delay_max; - std::cout << "process_id: " << process_id << " delay start by " << start_delay << " cycles" << std::endl; - SWM_Compute(start_delay); - } - - /*if(!synchronous) - { - - SWM_Synthetic( - dst_rank_id, //dst - msg_desc.msg_req_vc, - msg_desc.msg_rsp_vc, - msg_desc.pkt_rsp_vc, - msg_desc.msg_req_bytes, - msg_desc.msg_rsp_bytes, - msg_desc.pkt_rsp_bytes, - msg_desc.msg_req_routing_type, - msg_desc.msg_rsp_routing_type, - msg_desc.pkt_rsp_routing_type, - dummy_piggyback, //NULL, - msg_desc.attribute -#ifdef FABSIM_EMULATION - , msg_desc.l2_encoding -#endif - ); - - - if(debug) - { - std::cout << "process_id: " << process_id << " sent synthetic message to destination: " << dst_rank_id << ", iter: " << iter << " @ " << SWM_Clock() << std::endl; - } - - } - else - {*/ - - //uint32_t process_id_offset = ( (process_id + 1) << 32); - //uint32_t iter_offset = ( (iter + 1) << 8); - //SWM_TAG this_tag = SWM_APP_TAG_BASE + process_id_offset + iter_offset; - uint32_t iter_offset = (process_cnt * (iter) ); - SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); //(iter+1) ); - //uint32_t send_handle[send_limit]; - uint32_t send_count = 0; - - if(debug) - { - std::cout << "process_id: " << process_id << " sening message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter << std::endl; - } - - - if(!blocking_comm) - { - - SWM_Isend( - dst_rank_id, - SWM_COMM_WORLD, - this_tag, - -1, - -1, - NO_BUFFER, - msg_size, - 0, - &(send_handles[send_count]), - 0, - 0 - ); - } - else - { - SWM_Send( - dst_rank_id, - SWM_COMM_WORLD, - this_tag, - -1,// req-vc - -1, //resp-vc - NO_BUFFER, - msg_size, //req-bytes - 0, //resp-bytes - 0,//routing type - 0 //routing type - ); - } - - if(!blocking_comm) - { - SWM_Waitall(send_limit, send_handles); - } - - if(debug) - { - std::cout << "process_id: " << process_id << " sent message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter << std::endl; - } - - //} // else(synchronous) - //MM comment: no def for SWM_Noop in codes - /*for(uint32_t noop=0; noop= min_source_id && process_id <= max_source_id) ) // do not send messages to self + { + // if we want to scatter the start time, we mimic this delay with a compute delay + if(scattered_start) + { + assert(start_delay_max > 0); + /* TODO: Use a better random number generator here. */ + uint32_t start_delay = rand() % start_delay_max; + std::cout << std::endl << "process_id: " << process_id << " delay start by " << start_delay << " cycles"; + SWM_Compute(start_delay); + } + uint32_t marker = 0; + for(uint32_t iter=0; iter < iteration_cnt; iter++) + { + if (compute_delay) + SWM_Compute(compute_delay); + + + if(show_iterations){ + SWM_Mark_Iteration(marker); + marker++; + } + + //uint32_t process_id_offset = ( (process_id + 1) << 32); + //uint32_t iter_offset = ( (iter + 1) << 8); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + process_id_offset + iter_offset; + uint32_t iter_offset = (process_cnt * (iter) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); //(iter+1) ); + //uint32_t send_handle[send_limit]; + uint32_t send_count = 0; + + if(!blocking_comm) + { + + SWM_Isend( + dst_rank_id, + SWM_COMM_WORLD, + this_tag, + -1, + -1, + NO_BUFFER, + msg_req_bytes, + msg_rsp_bytes, + &(send_handles[send_count]), + 0, + 0 + ); + } + else + { + SWM_Send( + dst_rank_id, + SWM_COMM_WORLD, + this_tag, + -1,// req-vc + -1, //resp-vc + NO_BUFFER, + msg_req_bytes, //req-bytes + msg_rsp_bytes, //resp-bytes + 0,//routing type + 0 //routing type + ); + } + + if(!blocking_comm) + { + SWM_Waitall(send_limit, send_handles); + } + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " sent message to destination: " << dst_rank_id << ", tag: " << this_tag << ", iter: " << iter ; + } + + if(show_iterations){ + SWM_Mark_Iteration(marker); + marker++; + } + } // end-for(iteration_cnt) +} +else if(process_id == dst_rank_id) +{ + + // need to receive from everybody every iteration... + for(uint32_t iter = 0; iter < iteration_cnt; iter++) + { + + uint32_t count = 0; + + for(uint32_t index = min_source_id; index <= max_source_id; index++, count++) + { + + uint32_t iter_offset = (process_cnt * (iter) ); + //SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * (index + 1) * (iter+1) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (index + 1) + iter_offset) ); + + uint32_t receive_from_proc = (!use_any_src) ? index : -1; + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " expecting to recv data from: " << receive_from_proc << " with recv tag: " << this_tag << " | iter_" << iter; + } + + + if(!blocking_comm) + { + SWM_Irecv( + receive_from_proc, + SWM_COMM_WORLD, + this_tag, + NO_BUFFER, + &(recv_handles[count]) + ); + } + else + { + SWM_Recv( + receive_from_proc, + SWM_COMM_WORLD, + this_tag, + NO_BUFFER + ); + } + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " received data from src: " << index << ", iteration: " << iter ; + } + + } // end of for-loop(all_sources) + + if(!blocking_comm) + { + SWM_Waitall(recv_limit, recv_handles); + } + + //SWM_Mark_Iteration(iter); + } // end for-loop(iteration_cnt) + +} + +SWM_Finalize(); } +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/swm/src/incast/incast.json b/swm/src/incast/incast.json index 327e529..78c98ac 100644 --- a/swm/src/incast/incast.json +++ b/swm/src/incast/incast.json @@ -1,23 +1,21 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 100, + "size": 4, "cfg": { "app": "incast", - "iteration_cnt": 10, - "compute_delay": 0, - "noop_cnt": 0, - "msg_size": 1310720, - "start_delay_max" : 0, + "iteration_cnt": 1, + "compute_delay": 40000, + "msg_req_bytes": 16, + "msg_rsp_bytes": 0, + "start_delay_max" : 10000, "scattered_start" : false, - "synchronous": true, - "dst_rank_id":99, + "dst_rank_id":3, "randomize_communication_order": false, - "blocking_comm" : false, - "debug" : true, - "src_rank_id_interval": [0,98], + "blocking_comm" : true, + "debug" : false, + "src_rank_id_interval": [0,2], "cpu_freq" : 4e9 - } + } } - } - \ No newline at end of file + } \ No newline at end of file diff --git a/swm/src/incast/incast1.json b/swm/src/incast/incast1.json index 38c8413..7785a21 100644 --- a/swm/src/incast/incast1.json +++ b/swm/src/incast/incast1.json @@ -1,23 +1,21 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 40, + "size": 512, "cfg": { "app": "incast", - "iteration_cnt": 30, - "compute_delay": 0, - "noop_cnt": 0, - "msg_size": 65536, - "start_delay_max" : 0, + "iteration_cnt": 500, + "compute_delay": 80000, + "msg_req_bytes": 160, + "msg_rsp_bytes": 0, + "start_delay_max" : 10000, "scattered_start" : false, - "synchronous": true, - "dst_rank_id":39, + "dst_rank_id":511, "randomize_communication_order": false, - "blocking_comm" : false, - "debug" : true, - "src_rank_id_interval": [0,38], + "blocking_comm" : true, + "debug" : false, + "src_rank_id_interval": [0,510], "cpu_freq" : 4e9 - } + } } - } - \ No newline at end of file + } \ No newline at end of file diff --git a/swm/src/incast/incast2.json b/swm/src/incast/incast2.json index 91fe0e7..7cbf85f 100644 --- a/swm/src/incast/incast2.json +++ b/swm/src/incast/incast2.json @@ -1,23 +1,21 @@ { -"jobs" : { - "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 8, - "cfg": { - "app": "incast", - "iteration_cnt": 2, - "compute_delay": 0, - "msg_req_bytes": 160, - "msg_rsp_bytes": 0, - "start_delay_max" : 10, - "scattered_start" : false, - "dst_rank_id":7, - "randomize_communication_order": false, - "blocking_comm" : false, - "debug" : true, - "src_rank_id_interval": [0,6], - "cpu_freq" : 4e9 + "jobs" : { + "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", + "size": 8, + "cfg": { + "app": "incast", + "iteration_cnt": 2, + "compute_delay": 0, + "msg_req_bytes": 160, + "msg_rsp_bytes": 0, + "start_delay_max" : 10, + "scattered_start" : false, + "dst_rank_id":7, + "randomize_communication_order": false, + "blocking_comm" : false, + "debug" : true, + "src_rank_id_interval": [0,6], + "cpu_freq" : 4e9 + } } - } -} - - \ No newline at end of file + } \ No newline at end of file From 0421120cd7e947efe5302419b1864d6c9f28a911 Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Tue, 27 Apr 2021 11:54:10 -0400 Subject: [PATCH 08/14] Incast default change --- swm/src/incast/all_to_one_swm_user_code.cpp | 2 +- swm/src/incast/incast.json | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/swm/src/incast/all_to_one_swm_user_code.cpp b/swm/src/incast/all_to_one_swm_user_code.cpp index 7c614d8..6d8c8e1 100644 --- a/swm/src/incast/all_to_one_swm_user_code.cpp +++ b/swm/src/incast/all_to_one_swm_user_code.cpp @@ -108,7 +108,7 @@ AllToOneSWMUserCode::call() -1, NO_BUFFER, msg_req_bytes, - msg_rsp_bytes, + 0, &(send_handles[send_count]), 0, 0 diff --git a/swm/src/incast/incast.json b/swm/src/incast/incast.json index 78c98ac..62a8119 100644 --- a/swm/src/incast/incast.json +++ b/swm/src/incast/incast.json @@ -1,20 +1,20 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/incast.so", - "size": 4, + "size": 100, "cfg": { "app": "incast", - "iteration_cnt": 1, - "compute_delay": 40000, - "msg_req_bytes": 16, + "iteration_cnt": 10, + "compute_delay": 0, + "msg_req_bytes": 1310720, "msg_rsp_bytes": 0, - "start_delay_max" : 10000, + "start_delay_max" : 0, "scattered_start" : false, - "dst_rank_id":3, + "dst_rank_id":99, "randomize_communication_order": false, - "blocking_comm" : true, - "debug" : false, - "src_rank_id_interval": [0,2], + "blocking_comm" : false, + "debug" : true, + "src_rank_id_interval": [0,98], "cpu_freq" : 4e9 } } From 3bcb97bcf8c5765b3e50bf1a4a73233aa04572b9 Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Tue, 27 Apr 2021 12:00:20 -0400 Subject: [PATCH 09/14] Add Periodic Aggressor SWM --- swm/src/Makefile.subdir | 9 +- swm/src/periodic_aggressor/pa_lammps_model.h | 47 + .../periodic_aggressor/periodic_aggressor.cpp | 1606 +++++++++++++++++ .../periodic_aggressor/periodic_aggressor.h | 218 +++ .../periodic_aggressor.json | 27 + 5 files changed, 1904 insertions(+), 3 deletions(-) create mode 100644 swm/src/periodic_aggressor/pa_lammps_model.h create mode 100644 swm/src/periodic_aggressor/periodic_aggressor.cpp create mode 100644 swm/src/periodic_aggressor/periodic_aggressor.h create mode 100644 swm/src/periodic_aggressor/periodic_aggressor.json diff --git a/swm/src/Makefile.subdir b/swm/src/Makefile.subdir index 0eaa9bf..81f09b8 100644 --- a/swm/src/Makefile.subdir +++ b/swm/src/Makefile.subdir @@ -11,7 +11,8 @@ include_HEADERS = \ src/spread/one_to_many_swm_user_code.h \ src/many_to_many/many_to_many_swm_user_code.h \ src/allreduce/allreduce.h \ - src/milc/milc_swm_user_code.h + src/milc/milc_swm_user_code.h \ + src/periodic_aggressor/periodic_aggressor.h src_libswm_la_SOURCES = src/lammps/lammps.cpp \ src/nekbone/cubiclattice.cpp \ @@ -21,7 +22,8 @@ src_libswm_la_SOURCES = src/lammps/lammps.cpp \ src/spread/one_to_many_swm_user_code.cpp \ src/many_to_many/many_to_many_swm_user_code.cpp \ src/allreduce/allreduce.cpp \ - src/milc/milc_swm_user_code.cpp + src/milc/milc_swm_user_code.cpp \ + src/periodic_aggressor/periodic_aggressor.cpp dist_data_DATA = src/lammps/lammps_workload.json \ src/lammps/lammps_workload1.json \ @@ -38,7 +40,8 @@ dist_data_DATA = src/lammps/lammps_workload.json \ src/allreduce/allreduce_workload.json \ src/allreduce/allreduce32_workload.json \ src/allreduce/allreduce256_workload.json \ - src/milc/milc_skeleton.json + src/milc/milc_skeleton.json \ + src/periodic_aggressor/periodic_aggressor.json diff --git a/swm/src/periodic_aggressor/pa_lammps_model.h b/swm/src/periodic_aggressor/pa_lammps_model.h new file mode 100644 index 0000000..ecb72b4 --- /dev/null +++ b/swm/src/periodic_aggressor/pa_lammps_model.h @@ -0,0 +1,47 @@ +double msg_ghost_fw = 2.48839990371; +double msg_ghost_rw = 2.48841071356; +double msg_k_pre = 8.0; +double msg_k_post = 24.0; +double msg_fix = 2.48841071356; +double msg_neigh_exch = 3.08673789851; +double msg_neigh_border = 6.63563071593; +double ins_start_a[1] = {8.51937488057}; +double ins_start_b[1] = {1544.46231029}; +double ins_start_cpi = 0.843141163755; +double ins_neigh_check_a[1] = {89.6202085326}; +double ins_neigh_check_b[1] = {195042.694781}; +double ins_neigh_check_cpi = 0.951841661097; +double ins_neigh_exch_sr_a[3] = {11.5746361748, 1.3778877165, 1.34223584427}; +double ins_neigh_exch_sr_b[3] = {438096.47233, 4800.95420873, 8838.30958016}; +double ins_neigh_exch_sr_cpi = 1.58963777201; +double ins_neigh_border_sr_a[6] = {1.8243979135, 2.03810250649, 3.06679631198, 3.0870981696, 3.58608401984, 2.7521157202}; +double ins_neigh_border_sr_b[6] = {32382.7816726, 51218.9714454, 83557.2150064, 99920.5231836, 248049.508775, 357653.369027}; +double ins_neigh_border_sr_cpi = 1.75604132297; +double ins_neigh_end_a[5] = {1.21665755465, 6595.30712353, 29.6655250587, 58.3229990241, 1.69059035676}; +double ins_neigh_end_b[5] = {139153.690154, 11183101.9944, 44150.0262654, 91071.0968296, 2057.50606924}; +double ins_neigh_end_cpi = 0.784053776222; +double ins_k_pre_a[6] = {43360.7612799, 0.780443563075, 0.999500801383, 1.23253340415, 1.11044737418, 0.813347233046}; +double ins_k_pre_b[6] = {-8151826.36712, 12652.2538632, 10478.6380748, 5362.54935036, 4946.43943567, 2809.82745824}; +double ins_k_pre_cpi = 0.897392796161; +double ins_k_fft_a[13] = {12.7660165971, 49.5132610315, 36.7957959, 48.6517835605, 102.611869648, 36.7771213175, 48.6352484315, 71.1465535394, 36.7760281598, 48.624983362, 71.1461101858, 36.7661712493, 48.6196591605}; +double ins_k_fft_b[13] = {2755.32405875, -14031.7206559, -268.944769389, -34509.6094468, -15768.811004, 468.975498509, -34540.223738, -23736.2087919, -119.26574367, -34526.5937504, -23716.4687588, 130.351010748, -34514.7315393}; +double ins_k_fft_cpi = 0.700575655531; +double ins_k_post_a[6] = {15.7888010275, 1.98489719387, 2.63496119567, 3.00664450319, 2.41731560611, 1.9085386988}; +double ins_k_post_b[6] = {390.234582372, 6358.92071557, 11170.1400931, 12661.0465342, 24102.3020575, 30456.6591775}; +double ins_k_post_cpi = 1.72457235374; +double ins_k_energy_a[1] = {5476.95439615}; +double ins_k_energy_b[1] = {-1073884.00556}; +double ins_k_energy_cpi = 0.475585305054; +double ins_ghost_fw_a[6] = {43.2730897193, 0.0, 9.23745386168e-09, 2.35234627328e-08, 2.00592476871e-08, 1.33019109126e-07}; +double ins_ghost_fw_b[6] = {96380.315439, 313.368687371, 313.346132312, 313.327974814, 313.346666244, 313.3420572}; +double ins_ghost_fw_cpi = 0.924608655408; +double ins_ghost_rw_a[6] = {0.0, 0.31849027582, 0.316224042474, 0.513934022608, 0.501449013036, 0.298131697301}; +double ins_ghost_rw_b[6] = {566.574947244, 81855.4482201, 82072.2144336, 18124.8399648, 22463.1938402, 12639.7445334}; +double ins_ghost_rw_cpi = 1.91339519762; +double ins_fix_a[6] = {6.31029520441, 0.519042043438, 0.699422916624, 0.862615710189, 0.708782029108, 0.552896821411}; +double ins_fix_b[6] = {25475.6674291, 22357.989938, 30827.99377, 38715.8802264, 89865.0435751, 140845.450888}; +double ins_fix_cpi = 1.96053897728; +double ins_final_a[1] = {43.3348975221}; +double ins_final_b[1] = {141841.44285}; +double ins_final_cpi = 1.56863134534; +double neigh_check_avg = 0.196428571429; diff --git a/swm/src/periodic_aggressor/periodic_aggressor.cpp b/swm/src/periodic_aggressor/periodic_aggressor.cpp new file mode 100644 index 0000000..9b563b6 --- /dev/null +++ b/swm/src/periodic_aggressor/periodic_aggressor.cpp @@ -0,0 +1,1606 @@ +#include "periodic_aggressor.h" + +PeriodicAggressor::PeriodicAggressor( + boost::property_tree::ptree cfg, + void**& generic_ptrs + ) : + // general config + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + process_cnt(cfg.get("jobs.size", 1)), + compute_delay(cfg.get("jobs.cfg.compute_delay", 0)), + show_iterations(cfg.get("jobs.cfg.show_iterations", false)), + router_freq(cfg.get("jobs.cfg.router_freq", 800e6)), + cpu_freq(cfg.get("jobs.cfg.cpu_freq", 1.2e9)), + cpu_sim_speedup(cfg.get("jobs.cfg.cpu_sim_speedup", 1.0)), + debug(cfg.get("jobs.cfg.debug", false)), + show_progress(cfg.get("jobs.cfg.show_progress",true)), + + //lammps config + lammps_iters_per_iter(cfg.get("jobs.cfg.lammps_iters_per_iter", 1)), + x_rep(cfg.get("jobs.cfg.num_x_replicas", 1)), + y_rep(cfg.get("jobs.cfg.num_y_replicas", 1)), + z_rep(cfg.get("jobs.cfg.num_z_replicas", 1)), + + //incast config + incast_process_cnt(cfg.get("jobs.cfg.incast_size", 1)), + incast_iters_per_iter(cfg.get("jobs.cfg.incast_iters_per_iter",5)), + incast_dest_rank_id(cfg.get("jobs.cfg.incast_dest_rank_id",0)), + incast_msg_req_bytes(cfg.get("jobs.cfg.incast_msg_req_bytes", 0)), + incast_msg_rsp_bytes(cfg.get("jobs.cfg.incast_msg_rsp_bytes", 0)) +{ + process_id = *((int*)generic_ptrs[0]); + + //incast setup + // extract the src/dst rank id intervals + int num = 0; + BOOST_FOREACH(const boost::property_tree::ptree::value_type &v, cfg.get_child("jobs.cfg.incast_src_rank_id_interval")) + { + std::string value = v.second.data(); + + if(num == 0) incast_min_source_id = atoi(value.c_str()); + if(num == 1) incast_max_source_id = atoi(value.c_str()); + + num++; + } + assert(num == 2); + + assert(incast_dest_rank_id < incast_process_cnt); + + + //lammps setup + req_vc = 0; + resp_vc = 1; + rsp_bytes = 0; + + int i = 0; + + for( i = 0; i < NUM_TRANSPOSE; i++) + { + k_r_targets[i] = nullptr; + k_s_targets[i] = nullptr; + k_s_sizes[i] = nullptr; + k_cyc[i] = 0; + k_len[i] = 0; + } + + gh_fw_len = gh_rw_len = k_pre_len = k_post_len = fix_len = neigh_e_len = neigh_b_len = 0; + gh_fw_r_targets = gh_fw_s_targets = gh_fw_s_sizes = nullptr; + gh_rw_r_targets = gh_rw_s_targets = gh_rw_s_sizes = nullptr; + k_pre_r_targets = k_pre_s_targets = k_pre_s_sizes = nullptr; + k_post_r_targets = k_post_s_targets = k_post_s_sizes = nullptr; + fix_r_targets = fix_s_targets = fix_s_sizes = nullptr; + neigh_e_r_targets = neigh_e_s_targets = neigh_e_s_sizes = nullptr; + neigh_b_r_targets = neigh_b_s_targets = neigh_b_s_sizes = nullptr; + gh_fw_cyc = nullptr; + gh_rw_cyc = nullptr; + k_pre_cyc = nullptr; + k_post_cyc = nullptr; + fix_cyc = nullptr; + neigh_e_cyc = nullptr; + neigh_b_cyc = nullptr; + + + // size of the problem in (x,y,z) + prd[0] = x_rep * (XHI_BASE - XLO_BASE); + prd[1] = y_rep * (YHI_BASE - YLO_BASE); + prd[2] = z_rep * (ZHI_BASE - ZLO_BASE); + + // Decompose domain to processes, store result in procNums[3] + proc_decomposition(process_cnt, prd, procNums); + + pppm_decomposition(N_ATOMS_BASE*x_rep*y_rep*z_rep, prd, pppmGrid); + +} + +PeriodicAggressor::~PeriodicAggressor() +{ + int i = 0; + + for( i = 0; i < NUM_TRANSPOSE; i++) + { + if(k_r_targets[i]) delete k_r_targets[i]; + if(k_s_targets[i]) delete k_s_targets[i]; + if(k_s_sizes[i]) delete k_s_sizes[i]; + } + + if(gh_fw_r_targets) delete gh_fw_r_targets; + if(gh_fw_s_targets) delete gh_fw_s_targets; + if(gh_fw_s_sizes) delete gh_fw_s_sizes; + if(gh_fw_cyc) delete gh_fw_cyc; + if(gh_rw_r_targets) delete gh_rw_r_targets; + if(gh_rw_s_targets) delete gh_rw_s_targets; + if(gh_rw_s_sizes) delete gh_rw_s_sizes; + if(gh_rw_cyc) delete gh_rw_cyc; + if(k_pre_r_targets) delete k_pre_r_targets; + if(k_pre_s_targets) delete k_pre_s_targets; + if(k_pre_s_sizes) delete k_pre_s_sizes; + if(k_pre_cyc) delete k_pre_cyc; + if(k_post_r_targets) delete k_post_r_targets; + if(k_post_s_targets) delete k_post_s_targets; + if(k_post_s_sizes) delete k_post_s_sizes; + if(k_post_cyc) delete k_post_cyc; + if(fix_r_targets) delete fix_r_targets; + if(fix_s_targets) delete fix_s_targets; + if(fix_s_sizes) delete fix_s_sizes; + if(fix_cyc) delete fix_cyc; + if(neigh_e_r_targets) delete neigh_e_r_targets; + if(neigh_e_s_targets) delete neigh_e_s_targets; + if(neigh_e_s_sizes) delete neigh_e_s_sizes; + if(neigh_e_cyc) delete neigh_e_cyc; + if(neigh_b_r_targets) delete neigh_b_r_targets; + if(neigh_b_s_targets) delete neigh_b_s_targets; + if(neigh_b_s_sizes) delete neigh_b_s_sizes; + if(neigh_b_cyc) delete neigh_b_cyc; +} + +static double round(double x, int p) +{ + return floor(x*pow(10,p) + 0.5)/pow(10,p); +} + +void +PeriodicAggressor::call() +{ + lammps_model_init(); + + for(uint32_t iter = 0; iter < iteration_cnt; iter++) + { + if ((debug || show_progress) && process_id == 0) + printf("Periodic Aggressor Iteration %d/%d\n",iter,iteration_cnt); + if ((debug || show_progress) && process_id == 0) + printf("Periodic Aggressor: Starting Lammps Phase 1\n"); + do_lammps_phase(); + if ((debug || show_progress) && process_id == 0) + printf("Periodic Aggressor: Starting Incast Phase\n"); + SWM_Mark_Iteration(0); //entering incast + do_incast_phase(); + if ((debug || show_progress) && process_id == 0) + printf("Periodic Aggressor: Starting Lammps Phase 2\n"); + SWM_Mark_Iteration(1); //exiting incast + do_lammps_phase(); + } + SWM_Finalize(); +} + +/* TODO (MM): Eliminate process id from the call method. */ +void +PeriodicAggressor::do_lammps_phase() +{ + unsigned int ts = 0; + + for(ts = 0; ts < lammps_iters_per_iter; ts++) + { + if(debug && process_id == 0) + printf("PA: LAMMPS Starting Timestep %d / %d\n",ts,lammps_iters_per_iter); + // initial integration + SWM_Compute(start_cyc); + SWM_Allreduce(48, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // temperature + SWM_Allreduce(48, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // pressure + + // check if neighbors need to be exchanged + if(neigh_check()) + { + // do neighbor exchange + doNeighExch(); + } + else + { + // ghost forward exchange + doP2P(gh_fw_len, gh_fw_r_targets, gh_fw_s_targets, gh_fw_s_sizes, gh_fw_cyc); + } + + // k-space pre exchange + doP2P(k_pre_len, k_pre_r_targets, k_pre_s_targets, k_pre_s_sizes, k_pre_cyc); + + // do FFT + doFFT(); + + // k-space post exchange + doP2P(k_post_len, k_post_r_targets, k_post_s_targets, k_post_s_sizes, k_post_cyc); + + // energy calculation + SWM_Compute(k_energy_cyc); + SWM_Allreduce(48, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); + + // ghost reverse exchange + doP2P(gh_rw_len, gh_rw_r_targets, gh_rw_s_targets, gh_rw_s_sizes, gh_rw_cyc); + + // ghost fixed values exchange + doP2P(fix_len, fix_r_targets, fix_s_targets, fix_s_sizes, fix_cyc); + + // final integration + SWM_Compute(final_cyc); + SWM_Allreduce(8, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // temperature + SWM_Allreduce(48, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // pressure + } + //MM: comment assert(0); +} + +void +PeriodicAggressor::do_incast_phase() +{ + + if(process_id == 0) + { + std::cout << std::endl << "PA: Incast | size: " << incast_process_cnt; + std::cout << " | incast_iters_per_iter: " << incast_iters_per_iter; + std::cout << " | incast_msg_req_bytes: " << incast_msg_req_bytes; + std::cout << " | incast_msg_rsp_bytes: " << incast_msg_rsp_bytes; + std::cout << " | incast_dest_rank_id: " << incast_dest_rank_id; + std::cout << " | src_rank_id_interval: " << incast_min_source_id << "-" << incast_max_source_id; + } + uint32_t *send_handles = NULL; + uint32_t *recv_handles = NULL; + + uint32_t send_limit = 1; + uint32_t recv_limit = (incast_max_source_id - incast_min_source_id) + 1; + + send_handles = new uint32_t[send_limit * incast_iters_per_iter]; + recv_handles = new uint32_t[recv_limit * incast_iters_per_iter]; + + if ((process_id != incast_dest_rank_id) && (process_id >= incast_min_source_id && process_id <= incast_max_source_id) ) // do not send messages to self + { + for(uint32_t iter=0; iter < incast_iters_per_iter; iter++) + { + + uint32_t iter_offset = (incast_process_cnt * (iter) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (process_id + 1) + iter_offset) ); //(iter+1) ); + uint32_t send_count = 0; + + SWM_Isend( + incast_dest_rank_id, + SWM_COMM_WORLD, + this_tag, + -1, + -1, + NO_BUFFER, + incast_msg_req_bytes, + 0, + &(send_handles[send_count]), + 0, + 0 + ); + + SWM_Waitall(send_limit, send_handles); + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " sent message to destination: " << incast_dest_rank_id << ", tag: " << this_tag << ", iter: " << iter ; + } + } + } + + else if(process_id == incast_dest_rank_id) + { + // need to receive from everybody every iteration... + for(uint32_t iter = 0; iter < incast_iters_per_iter; iter++) + { + uint32_t count = 0; + for(uint32_t index = incast_min_source_id; index <= incast_max_source_id; index++, count++) + { + uint32_t iter_offset = (incast_process_cnt * (iter) ); + SWM_TAG this_tag = SWM_APP_TAG_BASE + (sizeof(SWM_TAG) * ( (index + 1) + iter_offset) ); + uint32_t receive_from_proc = index; + + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " expecting to recv data from: " << receive_from_proc << " with recv tag: " << this_tag << " | iter_" << iter; + } + + SWM_Irecv( + receive_from_proc, + SWM_COMM_WORLD, + this_tag, + NO_BUFFER, + &(recv_handles[count]) + ); + } + + SWM_Waitall(recv_limit, recv_handles); + if(debug) + { + std::cout << std::endl << "process_id: " << process_id << " received data from all srcs"; + } + } + } +} + + + +void +PeriodicAggressor::doP2P(int len, int *r_targets, int *s_targets, int *s_sizes, long *cyc_cnt) +{ + int i = 0; + uint32_t h = 0; + for(i = 0; i < len; i++) + { + SWM_Compute(cyc_cnt[i]); + SWM_Irecv(r_targets[i], SWM_COMM_WORLD, 0, NO_BUFFER, &h); + SWM_Send(s_targets[i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, s_sizes[i]); + //printf("S @ %d: i %d: %d -> %d\n", process_id, i, process_id, s_targets[i]); + //printf("W @ %d: i %d: %d -> %d\n", process_id, i, r_targets[i], process_id); + SWM_Wait(h); + //printf("D @ %d: i %d: %d -> %d\n", process_id, i, r_targets[i], process_id); + } +} + +void +PeriodicAggressor::doNeighExch() +{ + int i = 0; + uint32_t h = 0; + + // neighbor exchange + while(i < neigh_e_len) + { + SWM_Compute(neigh_e_cyc[i]); + SWM_Sendrecv(SWM_COMM_WORLD, neigh_e_r_targets[i], 0, req_vc, resp_vc, NO_BUFFER, 4, rsp_bytes, neigh_e_s_targets[i], 0, NO_BUFFER); + if(neigh_e_r_targets[i] != neigh_e_s_targets[i]) + { + SWM_Sendrecv(SWM_COMM_WORLD, neigh_e_s_targets[i], 0, req_vc, resp_vc, NO_BUFFER, 4, rsp_bytes, neigh_e_r_targets[i], 0, NO_BUFFER); + } + SWM_Irecv(neigh_e_r_targets[i], SWM_COMM_WORLD, 0, NO_BUFFER, &h); + SWM_Send(neigh_e_s_targets[i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, neigh_e_s_sizes[i]); + SWM_Wait(h); + i++; + if((i < neigh_e_len) && (neigh_e_r_targets[i-1] != neigh_e_s_targets[i-1])) + { + SWM_Irecv(neigh_e_r_targets[i], SWM_COMM_WORLD, 0, NO_BUFFER, &h); + SWM_Send(neigh_e_s_targets[i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, neigh_e_s_sizes[i]); + SWM_Wait(h); + i++; + } + } + + // neighbor borders + for(i = 0; i < neigh_b_len; i++) + { + SWM_Compute(neigh_b_cyc[i]); + SWM_Sendrecv(SWM_COMM_WORLD, neigh_b_r_targets[i], 0, req_vc, resp_vc, NO_BUFFER, 4, rsp_bytes, neigh_b_s_targets[i], 0, NO_BUFFER); + SWM_Irecv(neigh_b_r_targets[i], SWM_COMM_WORLD, 0, NO_BUFFER, &h); + SWM_Send(neigh_b_s_targets[i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, neigh_b_s_sizes[i]); + SWM_Wait(h); + } + + // allreduces + for(i = 0; i < NUM_NEIGH_ALLREDUCE; i++) + { + SWM_Compute(neigh_end_cyc[i]); + SWM_Allreduce(4, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); + } +} + +void +PeriodicAggressor::doFFT() +{ + uint32_t *h; + int i = 0, idx = 0; + + for(idx = 0; idx < NUM_TRANSPOSE; idx++) + { + + h = new uint32_t[k_len[idx]]; + + SWM_Compute(k_cyc[idx]); + for(i = 0; i < k_len[idx]; i++) + { + SWM_Irecv(k_r_targets[idx][i], SWM_COMM_WORLD, 0, NO_BUFFER, &h[i]); + } + for(i = 0; i < k_len[idx]; i++) + { + SWM_Send(k_s_targets[idx][i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, k_s_sizes[idx][i]); + } + SWM_Waitall(k_len[idx], h); + + delete h; + } +} + +bool +PeriodicAggressor::neigh_check() +{ + if(neigh_check_count < NEIGH_DELAY) + { + neigh_check_count++; + return false; + } + else + { + + if( (neigh_check_count - NEIGH_DELAY) % NEIGH_EVERY ) + { + neigh_check_count++; + return false; + } + else + { + SWM_Compute(neigh_check_cyc); + SWM_Allreduce(4, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); + + neigh_check_cumulative += neigh_check_average; + + if(neigh_check_cumulative > 1.0) + { + neigh_check_cumulative -= 1.0; + neigh_check_count = 0; + return true; + } + } + } + + neigh_check_count++; + return false; +} + +#pragma GCC diagnostic push +void +PeriodicAggressor::lammps_model_init() +{ +#include "pa_lammps_model.h" + double t_vol, f_vol; + int i = 0, j = 0; + + t_vol = prd[0]/procNums[0] * prd[1]/procNums[1] * prd[2]/procNums[2]; + f_vol = pppmGrid[0]/procNums[0] * pppmGrid[1]/procNums[1] * pppmGrid[2]/procNums[2]; + + + get_k_params(process_id, f_vol); + ghost_setup(GHOST_SKIN_CUTOFF, process_id, t_vol); + k_pre_setup(FFT_SKIN_CUTOFF, process_id, f_vol); + k_post_setup(FFT_SKIN_CUTOFF, process_id, f_vol); + neigh_e_setup(GHOST_SKIN_CUTOFF, process_id, t_vol); + + neigh_check_average = neigh_check_avg; + neigh_check_cyc = std::max((long)0, (long)((t_vol * ins_neigh_check_a[0] + ins_neigh_check_b[0]) * ins_neigh_check_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + for(i = 0; i < NUM_NEIGH_ALLREDUCE; i++) + { + neigh_end_cyc[i] = std::max((long)0, (long)((t_vol * ins_neigh_end_a[i] + ins_neigh_end_b[i]) * ins_neigh_end_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + } + + start_cyc = std::max((long)0, (long)((t_vol * ins_start_a[0] + ins_start_b[0]) * ins_start_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + k_energy_cyc = std::max((long)0, (long)((f_vol * ins_k_energy_a[0] + ins_k_energy_b[0]) * ins_k_energy_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + final_cyc = std::max((long)0, (long)((t_vol * ins_final_a[0] + ins_final_b[0]) * ins_final_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + + + neigh_check_count = 0; + neigh_check_cumulative = 0.; + +// for(i = 0; i < 3; i++){ +// printf("procs: %d, %d, %d\n pppm: %lf, %lf, %lf\n", procNums[0], procNums[1], procNums[2], pppmGrid[0], pppmGrid[1], pppmGrid[2]); +// } + +// for(j = 0; j < gh_fw_len; j++){ +// printf("%d: (%d->%d, %d->%d)\n", process_id, gh_fw_r_targets[j], process_id, process_id, gh_fw_s_targets[j]); +// } + +// for(j = 0; j < neigh_e_len; j++){ +// printf("%d: (%d->%d, %d->%d)\n", process_id, neigh_e_r_targets[j], process_id, process_id, neigh_e_s_targets[j]); +// } + +// if(process_id == 11){ +// for(i = 0; i < NUM_TRANSPOSE; i++){ +// printf("transpose: %d\n", i); +// printf("len: %d, (recv proc, send_proc, send size): ", k_len[i]); +// for(j = 0; j < k_len[i]; j++){ +// printf("(%d, %d, %d) ", k_r_targets[i][j], k_s_targets[i][j], k_s_sizes[i][j]); +// } +// printf("\n\n"); +// } +// } + + +// if(process_id == 11){ + +// printf("gh_fw:\n"); +// printf("len: %d, (recv proc, send_proc, send size): ", gh_fw_len); +// for(j = 0; j < gh_fw_len; j++){ +// printf("(%d, %d, %d) ", gh_fw_r_targets[j], gh_fw_s_targets[j], gh_fw_s_sizes[j]); +// } +// printf("\n\n"); + +// printf("gh_rw:\n"); +// printf("len: %d, (recv proc, send_proc, send size): ", gh_rw_len); +// for(j = 0; j < gh_rw_len; j++){ +// printf("(%d, %d, %d) ", gh_rw_r_targets[j], gh_rw_s_targets[j], gh_rw_s_sizes[j]); +// } +// printf("\n\n"); + +// printf("fix:\n"); +// printf("len: %d, (recv proc, send_proc, send size): ", fix_len); +// for(j = 0; j < fix_len; j++){ +// printf("(%d, %d, %d) ", fix_r_targets[j], fix_s_targets[j], fix_s_sizes[j]); +// } +// printf("\n\n"); + +// printf("neigh_e:\n"); +// printf("len: %d, (recv proc, send_proc, send size): ", neigh_e_len); +// for(j = 0; j < neigh_e_len; j++){ +// printf("(%d, %d, %d) ", neigh_e_r_targets[j], neigh_e_s_targets[j], neigh_e_s_sizes[j]); +// } +// printf("\n\n"); + +// printf("neigh_b:\n"); +// printf("len: %d, (recv proc, send_proc, send size): ", neigh_b_len); +// for(j = 0; j < neigh_b_len; j++){ +// printf("(%d, %d, %d) ", neigh_b_r_targets[j], neigh_b_s_targets[j], neigh_b_s_sizes[j]); +// } +// printf("\n\n"); + +// printf("k_pre:\n"); +// printf("len: %d, (recv proc, send_proc, send size): ", k_pre_len); +// for(j = 0; j < k_pre_len; j++){ +// printf("(%d, %d, %d) ", k_pre_r_targets[j], k_pre_s_targets[j], k_pre_s_sizes[j]); +// } +// printf("\n\n"); + +// printf("k_post:\n"); +// printf("len: %d, (recv proc, send_proc, send size): ", k_post_len); +// for(j = 0; j < k_post_len; j++){ +// printf("(%d, %d, %d) ", k_post_r_targets[j], k_post_s_targets[j], k_post_s_sizes[j]); +// } +// printf("\n\n"); + +// } +} +#pragma GCC diagnostic pop + +void +PeriodicAggressor::proc_decomposition(int n, double prd[], int procNums[]) +{ + double area[3]; + double bestArea; + double tmpArea; + int i = 0, j = 0; + + procNums[0]= procNums[1]= procNums[2]=0; + + area[0]=prd[0]*prd[1]; + area[1]=prd[0]*prd[2]; + area[2]=prd[1]*prd[2]; + bestArea = 2*(area[0]+area[1]+area[2]); + + for(i = 1; i <= n; i++) + { + if(n%i == 0) + { + for(j = 1; j <= n/i; j++) + { + if(n/i%j == 0) + { + tmpArea = area[0]/i/j + area[1]/i/(n/i/j) + area[2]/j/(n/i/j); + if(tmpArea < bestArea) + { + bestArea = tmpArea; + procNums[0]=i; + procNums[1]=j; + procNums[2]=n/i/j; + } + } + } + } + } +} +void +PeriodicAggressor::pppm_decomposition(int n, double prd[], double pppmGrid[]) +{ + double h[3]; + double err; + int i; + + h[0] = h[1] = h[2] = 1./GEWALD; + + + for(i = 0; i < 3; i++) pppmGrid[i] = int(prd[i]/h[i]) + 1; + + for(i = 0; i < 3; i++) + { + err = pppm_estimate_ik_error(h[i], prd[i], n, prd); + while(err > FFT_ACCURACY) + { + err = pppm_estimate_ik_error(h[i], prd[i], n, prd); + pppmGrid[i]++; + h[i] = prd[i]/pppmGrid[i]; + } + } + + + for(i = 0; i < 3; i++) while(pppm_factorable(pppmGrid[i]) == 0) pppmGrid[i]++; + +} + +#pragma GCC diagnostic push +void +PeriodicAggressor::ghost_setup(double cutoff, int rank, double t_vol) +{ +#include "pa_lammps_model.h" + int nc[3], neigh[6]; + int i = 0, ni = 0; + double tmp_vol, max_vol; + + gh_fw_len = 0; + for(i = 0; i < 3; i++) nc[i] = int(cutoff / (prd[i] / procNums[i]) + 1); + for(i = 0; i < 3; i++) gh_fw_len+=2*nc[i]; + + gh_fw_r_targets = new int[gh_fw_len]; + gh_fw_s_targets = new int[gh_fw_len]; + gh_fw_s_sizes = new int[gh_fw_len]; + gh_fw_cyc = new long[gh_fw_len]; + +// printf("\n Rank id %d gh_fw_len %d ", rank, gh_fw_len); + // receive targets + rank_to_neigh(rank, neigh); + ni = 0; + for(i = 0; i < nc[0]; i++) + { + gh_fw_r_targets[ni++] = neigh[0]; + gh_fw_r_targets[ni++] = neigh[1]; + } + for(i = 0; i < nc[1]; i++) + { + gh_fw_r_targets[ni++] = neigh[2]; + gh_fw_r_targets[ni++] = neigh[3]; + } + for(i = 0; i < nc[2]; i++) + { + gh_fw_r_targets[ni++] = neigh[4]; + gh_fw_r_targets[ni++] = neigh[5]; + } + + + // send targets and sizes + ni = 0; + tmp_vol = 0; + max_vol = (prd[1]/procNums[1]+0*cutoff)*(prd[2]/procNums[2]+0*cutoff)*cutoff; + for(i = 0; i < nc[0]; i++) + { + gh_fw_s_targets[ni] = neigh[1]; + if(i < nc[0]-1) + { + gh_fw_s_sizes[ni] = (prd[1]/procNums[1]+0*cutoff)*(prd[2]/procNums[2]+0*cutoff)*prd[0]/procNums[0]; + tmp_vol = tmp_vol + gh_fw_s_sizes[ni]; + } + else + { + gh_fw_s_sizes[ni] = max_vol - tmp_vol; + } + ni++; + + gh_fw_s_targets[ni] = neigh[0]; + gh_fw_s_sizes[ni] = gh_fw_s_sizes[ni-1]; + ni++; + } + + tmp_vol = 0; + max_vol = (prd[0]/procNums[0]+2*cutoff)*(prd[2]/procNums[2]+0*cutoff)*cutoff; + for(i = 0; i < nc[1]; i++) + { + gh_fw_s_targets[ni] = neigh[3]; + if(i < nc[1]-1) + { + gh_fw_s_sizes[ni] = (prd[0]/procNums[0]+2*cutoff)*(prd[2]/procNums[2]+0*cutoff)*prd[1]/procNums[1]; + tmp_vol = tmp_vol + gh_fw_s_sizes[ni]; + } + else + { + gh_fw_s_sizes[ni] = max_vol - tmp_vol; + } + ni++; + + gh_fw_s_targets[ni] = neigh[2]; + gh_fw_s_sizes[ni] = gh_fw_s_sizes[ni-1]; + ni++; + } + + tmp_vol = 0; + max_vol = (prd[0]/procNums[0]+2*cutoff)*(prd[1]/procNums[1]+2*cutoff)*cutoff; + for(i = 0; i < nc[2]; i++) + { + gh_fw_s_targets[ni] = neigh[5]; + if(i < nc[2]-1) + { + gh_fw_s_sizes[ni] = (prd[0]/procNums[0]+2*cutoff)*(prd[1]/procNums[1]+2*cutoff)*prd[2]/procNums[2]; + tmp_vol = tmp_vol + gh_fw_s_sizes[ni]; + } + else + { + gh_fw_s_sizes[ni] = max_vol - tmp_vol; + } + ni++; + + gh_fw_s_targets[ni] = neigh[4]; + gh_fw_s_sizes[ni] = gh_fw_s_sizes[ni-1]; + ni++; + } + + // reverse + gh_rw_len = gh_fw_len; + gh_rw_r_targets = new int[gh_rw_len]; + gh_rw_s_targets = new int[gh_rw_len]; + gh_rw_s_sizes = new int[gh_rw_len]; + gh_rw_cyc = new long[gh_rw_len]; + + ni = 0; + + for(i = gh_fw_len-2; i >= 0; i=i-2) + { + gh_rw_r_targets[ni] = gh_fw_r_targets[i]; + gh_rw_s_targets[ni] = gh_fw_s_targets[i]; + gh_rw_s_sizes[ni] = gh_fw_s_sizes[i]; + ni++; + + gh_rw_r_targets[ni] = gh_fw_r_targets[i+1]; + gh_rw_s_targets[ni] = gh_fw_s_targets[i+1]; + gh_rw_s_sizes[ni] = gh_fw_s_sizes[i+1]; + ni++; + } + + + // fix setup + fix_len = gh_fw_len; + fix_r_targets = new int[fix_len]; + fix_s_targets = new int[fix_len]; + fix_s_sizes = new int[fix_len]; + fix_cyc = new long[fix_len]; + + for(i = 0; i < gh_fw_len; i++) + { + fix_r_targets[i] = gh_fw_r_targets[i]; + fix_s_targets[i] = gh_fw_s_targets[i]; + fix_s_sizes[i] = gh_fw_s_sizes[i]; + } + + + // neigh_borders setup + neigh_b_len = gh_fw_len; + neigh_b_r_targets = new int[neigh_b_len]; + neigh_b_s_targets = new int[neigh_b_len]; + neigh_b_s_sizes = new int[neigh_b_len]; + neigh_b_cyc = new long[neigh_b_len]; + + for(i = 0; i < gh_fw_len; i++) + { + neigh_b_r_targets[i] = gh_fw_r_targets[i]; + neigh_b_s_targets[i] = gh_fw_s_targets[i]; + neigh_b_s_sizes[i] = gh_fw_s_sizes[i]; + } + + + // scale the sizes + for(i = 0; i < gh_fw_len; i++) gh_fw_s_sizes[i] = (int)(gh_fw_s_sizes[i] * msg_ghost_fw + 0.5); + for(i = 0; i < gh_rw_len; i++) gh_rw_s_sizes[i] = (int)(gh_rw_s_sizes[i] * msg_ghost_rw + 0.5); + for(i = 0; i < fix_len; i++) fix_s_sizes[i] = (int)(fix_s_sizes[i] * msg_fix + 0.5); + for(i = 0; i < neigh_b_len; i++) neigh_b_s_sizes[i] = (int)(neigh_b_s_sizes[i] * msg_neigh_border + 0.5); + + + // instruction counts + ni = 0; + for(i = 0; i < nc[0]; i++) + { + gh_fw_cyc[ni] = t_vol * ins_ghost_fw_a[0] + ins_ghost_fw_b[0]; + fix_cyc[ni] = t_vol * ins_fix_a[0] + ins_fix_b[0]; + neigh_b_cyc[ni] = t_vol * ins_neigh_border_sr_a[0] + ins_neigh_border_sr_b[0]; + ni++; + gh_fw_cyc[ni] = t_vol * ins_ghost_fw_a[1] + ins_ghost_fw_b[1]; + fix_cyc[ni] = t_vol * ins_fix_a[1] + ins_fix_b[1]; + neigh_b_cyc[ni] = t_vol * ins_neigh_border_sr_a[1] + ins_neigh_border_sr_b[1]; + ni++; + } + for(i = 0; i < nc[1]; i++) + { + gh_fw_cyc[ni] = t_vol * ins_ghost_fw_a[2] + ins_ghost_fw_b[2]; + fix_cyc[ni] = t_vol * ins_fix_a[2] + ins_fix_b[2]; + neigh_b_cyc[ni] = t_vol * ins_neigh_border_sr_a[2] + ins_neigh_border_sr_b[2]; + gh_fw_cyc[ni] = t_vol * ins_ghost_fw_a[2] + ins_ghost_fw_b[2]; + ni++; + gh_fw_cyc[ni] = t_vol * ins_ghost_fw_a[3] + ins_ghost_fw_b[3]; + fix_cyc[ni] = t_vol * ins_fix_a[3] + ins_fix_b[3]; + neigh_b_cyc[ni] = t_vol * ins_neigh_border_sr_a[3] + ins_neigh_border_sr_b[3]; + ni++; + } + for(i = 0; i < nc[2]; i++) + { + gh_fw_cyc[ni] = t_vol * ins_ghost_fw_a[4] + ins_ghost_fw_b[4]; + fix_cyc[ni] = t_vol * ins_fix_a[4] + ins_fix_b[4]; + neigh_b_cyc[ni] = t_vol * ins_neigh_border_sr_a[4] + ins_neigh_border_sr_b[4]; + ni++; + gh_fw_cyc[ni] = t_vol * ins_ghost_fw_a[5] + ins_ghost_fw_b[5]; + fix_cyc[ni] = t_vol * ins_fix_a[5] + ins_fix_b[5]; + neigh_b_cyc[ni] = t_vol * ins_neigh_border_sr_a[5] + ins_neigh_border_sr_b[5]; + ni++; + } + ni = 0; + for(i = 0; i < nc[2]; i++) + { + gh_rw_cyc[ni++] = t_vol * ins_ghost_rw_a[4] + ins_ghost_rw_b[4]; + gh_rw_cyc[ni++] = t_vol * ins_ghost_rw_a[5] + ins_ghost_rw_b[5]; + } + for(i = 0; i < nc[1]; i++) + { + gh_rw_cyc[ni++] = t_vol * ins_ghost_rw_a[2] + ins_ghost_rw_b[2]; + gh_rw_cyc[ni++] = t_vol * ins_ghost_rw_a[3] + ins_ghost_rw_b[3]; + } + for(i = 0; i < nc[0]; i++) + { + gh_rw_cyc[ni++] = t_vol * ins_ghost_rw_a[0] + ins_ghost_rw_b[0]; + gh_rw_cyc[ni++] = t_vol * ins_ghost_rw_a[1] + ins_ghost_rw_b[1]; + } + + + // convert instructions to cycles + for(i = 0; i < gh_fw_len; i++) + gh_fw_cyc[i] = std::max((long)0, (long)(gh_fw_cyc[i] * ins_ghost_fw_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + for(i = 0; i < gh_rw_len; i++) + gh_rw_cyc[i] = std::max((long)0, (long)(gh_rw_cyc[i] * ins_ghost_rw_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + for(i = 0; i < fix_len; i++) + fix_cyc[i] = std::max((long)0, (long)(fix_cyc[i] * ins_fix_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + for(i = 0; i < neigh_b_len; i++) + neigh_b_cyc[i] = std::max((long)0, (long)(neigh_b_cyc[i] * ins_neigh_border_sr_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + +} + +void +PeriodicAggressor::k_pre_setup(double cutoff, int rank, double f_vol) +{ +#include "pa_lammps_model.h" + int neigh[6]; + int i = 0, ni = 0; + int hi_in, hi_out, lo_in, lo_out; + int rs[3]; + int coord[3]; + + //printf("\n proc[0] %d proc[1] %d proc[2] %d ", procNums[0], procNums[1], procNums[2]); + //printf("\n proc[0] %f proc[1] %f proc[2] %f cutoff %f ", prd[0], prd[1], prd[2], cutoff); + for(i = 0; i < 3; i++) + { +// printf("\n cutoff %f prd[%d] %f procNums[%d] %d ", cutoff, i, prd[i], i, procNums[i]); + assert(int(cutoff / (prd[i] / procNums[i]) + 1) == 1); + } + k_pre_len = 6; + + k_pre_r_targets = new int[k_pre_len]; + k_pre_s_targets = new int[k_pre_len]; + k_pre_s_sizes = new int[k_pre_len]; + k_pre_cyc = new long[k_pre_len]; + + // receive targets + rank_to_neigh(rank, neigh); + ni = 0; + k_pre_r_targets[ni++] = neigh[4]; + k_pre_r_targets[ni++] = neigh[5]; + k_pre_r_targets[ni++] = neigh[2]; + k_pre_r_targets[ni++] = neigh[3]; + k_pre_r_targets[ni++] = neigh[0]; + k_pre_r_targets[ni++] = neigh[1]; + + // send targets + ni = 0; + k_pre_s_targets[ni++] = neigh[5]; + k_pre_s_targets[ni++] = neigh[4]; + k_pre_s_targets[ni++] = neigh[3]; + k_pre_s_targets[ni++] = neigh[2]; + k_pre_s_targets[ni++] = neigh[1]; + k_pre_s_targets[ni++] = neigh[0]; + + + // send sizes + rank_to_xyz(rank, coord); + ni = 0; + + // receive sizes + hi_out = (int)(((coord[0]+1)*prd[0]/procNums[0] + cutoff/2.0) * pppmGrid[0]/prd[0] + 0.5); + hi_in = (int)((coord[0]+1)*prd[0]/procNums[0] * pppmGrid[0]/prd[0]) - 1; + lo_out = (int)((coord[0]*prd[0]/procNums[0] - cutoff/2.0) * pppmGrid[0]/prd[0] + 0.5); + lo_in = (int)(coord[0]*prd[0]/procNums[0] * pppmGrid[0]/prd[0]); + rs[0] = abs(lo_out - lo_in) + 2 + abs(hi_out - hi_in) + 2; + hi_out = (int)(((coord[1]+1)*prd[1]/procNums[1] + cutoff/2.0) * pppmGrid[1]/prd[1] + 0.5); + hi_in = (int)((coord[1]+1)*prd[1]/procNums[1] * pppmGrid[1]/prd[1]) - 1; + lo_out = (int)((coord[1]*prd[1]/procNums[1] - cutoff/2.0) * pppmGrid[1]/prd[1] + 0.5); + lo_in = (int)(coord[1]*prd[1]/procNums[1] * pppmGrid[1]/prd[1]); + rs[1] = abs(lo_out - lo_in) + 2 + abs(hi_out - hi_in) + 2; + hi_out = (int)(((coord[2]+1)*prd[2]/procNums[2] + cutoff/2.0) * pppmGrid[2]/prd[2] + 0.5); + hi_in = (int)((coord[2]+1)*prd[2]/procNums[2] * pppmGrid[2]/prd[2]) - 1; + lo_out = (int)((coord[2]*prd[2]/procNums[2] - cutoff/2.0) * pppmGrid[2]/prd[2] + 0.5); + lo_in = (int)(coord[2]*prd[2]/procNums[2] * pppmGrid[2]/prd[2]); + rs[2] = abs(lo_out - lo_in) + 2 + abs(hi_out - hi_in) + 2; + + // send sizes + lo_out = (int)round(((coord[2]+1)*prd[2]/procNums[2] + cutoff/2.0) * pppmGrid[2]/prd[2] + 0.5, 10); + lo_in = (int)round((coord[2]+1)*prd[2]/procNums[2] * pppmGrid[2]/prd[2], 10) - 1; + hi_out = (int)round((coord[2]*prd[2]/procNums[2] - cutoff/2.0) * pppmGrid[2]/prd[2] + 0.5, 10); + hi_in = (int)round(coord[2]*prd[2]/procNums[2] * pppmGrid[2]/prd[2], 10); + k_pre_s_sizes[ni++] = (int)((abs(hi_out - hi_in) + 2) * + (((int)(pppmGrid[0]/procNums[0]*(coord[0]+1))-(int)(pppmGrid[0]/procNums[0]*coord[0])) + rs[0]) * + (((int)(pppmGrid[1]/procNums[1]*(coord[1]+1))-(int)(pppmGrid[1]/procNums[1]*coord[1])) + rs[1])); + k_pre_s_sizes[ni++] = (int)((abs(lo_out - lo_in) + 2) * + (((int)(pppmGrid[0]/procNums[0]*(coord[0]+1))-(int)(pppmGrid[0]/procNums[0]*coord[0])) + rs[0]) * + (((int)(pppmGrid[1]/procNums[1]*(coord[1]+1))-(int)(pppmGrid[1]/procNums[1]*coord[1])) + rs[1])); + + assert( (int)(abs(hi_out - hi_in) + 2) <= + ((int)pppmGrid[2]/procNums[2] + (int)(double)((int)pppmGrid[2]%procNums[2])/procNums[2]*(coord[2]+1)) ); + assert( (int)(abs(lo_out - lo_in) + 2) <= + ((int)pppmGrid[2]/procNums[2] + (int)(double)((int)pppmGrid[2]%procNums[2])/procNums[2]*(coord[2]+1)) ); + + lo_out = (int)round(((coord[1]+1)*prd[1]/procNums[1] + cutoff/2.0) * pppmGrid[1]/prd[1] + 0.5, 10); + lo_in = (int)round((coord[1]+1)*prd[1]/procNums[1] * pppmGrid[1]/prd[1], 10) - 1; + hi_out = (int)round((coord[1]*prd[1]/procNums[1] - cutoff/2.0) * pppmGrid[1]/prd[1] + 0.5, 10); + hi_in = (int)round(coord[1]*prd[1]/procNums[1] * pppmGrid[1]/prd[1], 10); + k_pre_s_sizes[ni++] = (int)((abs(hi_out - hi_in) + 2) * + (((int)(pppmGrid[0]/procNums[0]*(coord[0]+1))-(int)(pppmGrid[0]/procNums[0]*coord[0])) + rs[0]) * + ((int)(pppmGrid[2]/procNums[2]*(coord[2]+1))-(int)(pppmGrid[2]/procNums[2]*coord[2]))); + k_pre_s_sizes[ni++] = (int)((abs(lo_out - lo_in) + 2) * + (((int)(pppmGrid[0]/procNums[0]*(coord[0]+1))-(int)(pppmGrid[0]/procNums[0]*coord[0])) + rs[0]) * + ((int)(pppmGrid[2]/procNums[2]*(coord[2]+1))-(int)(pppmGrid[2]/procNums[2]*coord[2]))); + + assert( (int)(abs(hi_out - hi_in) + 2) <= + ((int)pppmGrid[1]/procNums[1] + (int)(double)((int)pppmGrid[1]%procNums[1])/procNums[1]*(coord[1]+1)) ); + assert( (int)(abs(lo_out - lo_in) + 2) <= + ((int)pppmGrid[1]/procNums[1] + (int)(double)((int)pppmGrid[1]%procNums[1])/procNums[1]*(coord[1]+1)) ); + + lo_out = (int)round(((coord[0]+1)*prd[0]/procNums[0] + cutoff/2.0) * pppmGrid[0]/prd[0] + 0.5, 10); + lo_in = (int)round((coord[0]+1)*prd[0]/procNums[0] * pppmGrid[0]/prd[0], 10) - 1; + hi_out = (int)round((coord[0]*prd[0]/procNums[0] - cutoff/2.0) * pppmGrid[0]/prd[0] + 0.5, 10); + hi_in = (int)round(coord[0]*prd[0]/procNums[0] * pppmGrid[0]/prd[0], 10); + k_pre_s_sizes[ni++] = (int)((abs(hi_out - hi_in) + 2) * + ((int)(pppmGrid[1]/procNums[1]*(coord[1]+1))-(int)(pppmGrid[1]/procNums[1]*coord[1])) * + ((int)(pppmGrid[2]/procNums[2]*(coord[2]+1))-(int)(pppmGrid[2]/procNums[2]*coord[2]))); + k_pre_s_sizes[ni++] = (int)((abs(lo_out - lo_in) + 2) * + ((int)(pppmGrid[1]/procNums[1]*(coord[1]+1))-(int)(pppmGrid[1]/procNums[1]*coord[1])) * + ((int)(pppmGrid[2]/procNums[2]*(coord[2]+1))-(int)(pppmGrid[2]/procNums[2]*coord[2]))); + + assert( (int)(abs(hi_out - hi_in) + 2) <= + ((int)pppmGrid[0]/procNums[0] + (int)(double)((int)pppmGrid[0]%procNums[0])/procNums[0]*(coord[0]+1)) ); + assert( (int)(abs(lo_out - lo_in) + 2) <= + ((int)pppmGrid[0]/procNums[0] + (int)(double)((int)pppmGrid[0]%procNums[0])/procNums[0]*(coord[0]+1)) ); + +// printf("\n lo_out-lo_in %d down-val %d ", lo_out - lo_in, (int)pppmGrid[0]/procNums[0] + (int)(double)((int)pppmGrid[0]%procNums[0])/procNums[0]*(coord[0]+1)); + + for(i = 0; i < k_pre_len; i++) k_pre_s_sizes[i] = int(k_pre_s_sizes[i] * msg_k_pre + 0.5); + + // cycle counts + for(i = 0; i < k_pre_len; i++) + { + k_pre_cyc[i] = std::max((long)0, (long)((f_vol * ins_k_pre_a[i] + ins_k_pre_b[i]) * ins_k_pre_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + } + +} + +void +PeriodicAggressor::k_post_setup(double cutoff, int rank, double f_vol) +{ +#include "pa_lammps_model.h" + int neigh[6]; + int i, ni; + int hi_in, hi_out, lo_in, lo_out; + int rs[3]; + int coord[3]; + + for(i = 0; i < 3; i++) assert(int(cutoff / (prd[i] / procNums[i]) + 1) == 1); + k_post_len = 6; + + k_post_r_targets = new int[k_post_len]; + k_post_s_targets = new int[k_post_len]; + k_post_s_sizes = new int[k_post_len]; + k_post_cyc = new long[k_post_len]; + + // receive targets + rank_to_neigh(rank, neigh); + ni = 0; + k_post_r_targets[ni++] = neigh[0]; + k_post_r_targets[ni++] = neigh[1]; + k_post_r_targets[ni++] = neigh[2]; + k_post_r_targets[ni++] = neigh[3]; + k_post_r_targets[ni++] = neigh[4]; + k_post_r_targets[ni++] = neigh[5]; + + // send targets + ni = 0; + k_post_s_targets[ni++] = neigh[1]; + k_post_s_targets[ni++] = neigh[0]; + k_post_s_targets[ni++] = neigh[3]; + k_post_s_targets[ni++] = neigh[2]; + k_post_s_targets[ni++] = neigh[5]; + k_post_s_targets[ni++] = neigh[4]; + + + // send sizes + rank_to_xyz(rank, coord); + ni = 0; + + + // receive sizes + hi_out = (int)(((coord[0]+1)*prd[0]/procNums[0] + cutoff/2.0) * pppmGrid[0]/prd[0] + 0.5); + hi_in = (int)((coord[0]+1)*prd[0]/procNums[0] * pppmGrid[0]/prd[0]) - 1; + lo_out = (int)((coord[0]*prd[0]/procNums[0] - cutoff/2.0) * pppmGrid[0]/prd[0] + 0.5); + lo_in = (int)(coord[0]*prd[0]/procNums[0] * pppmGrid[0]/prd[0]); + rs[0] = (abs(lo_out - lo_in) + 2 + abs(hi_out - hi_in) + 2); + hi_out = (int)(((coord[1]+1)*prd[1]/procNums[1] + cutoff/2.0) * pppmGrid[1]/prd[1] + 0.5); + hi_in = (int)((coord[1]+1)*prd[1]/procNums[1] * pppmGrid[1]/prd[1]) - 1; + lo_out = (int)((coord[1]*prd[1]/procNums[1] - cutoff/2.0) * pppmGrid[1]/prd[1] + 0.5); + lo_in = (int)(coord[1]*prd[1]/procNums[1] * pppmGrid[1]/prd[1]); + rs[1] = (abs(lo_out - lo_in) + 2 + abs(hi_out - hi_in) + 2); + hi_out = (int)(((coord[2]+1)*prd[2]/procNums[2] + cutoff/2.0) * pppmGrid[2]/prd[2] + 0.5); + hi_in = (int)((coord[2]+1)*prd[2]/procNums[2] * pppmGrid[2]/prd[2]) - 1; + lo_out = (int)((coord[2]*prd[2]/procNums[2] - cutoff/2.0) * pppmGrid[2]/prd[2] + 0.5); + lo_in = (int)(coord[2]*prd[2]/procNums[2] * pppmGrid[2]/prd[2]); + rs[2] = (abs(lo_out - lo_in) + 2 + abs(hi_out - hi_in) + 2); + + // send sizes + lo_out = (int)round((((coord[0]-1)%procNums[0]+1)*prd[0]/procNums[0] + cutoff/2.0) * pppmGrid[0]/prd[0] + 0.5, 10); + lo_in = (int)round(((coord[0]-1)%procNums[0]+1)*prd[0]/procNums[0] * pppmGrid[0]/prd[0], 10) - 1; + hi_out = (int)round(((coord[0]+1)%procNums[0]*prd[0]/procNums[0] - cutoff/2.0) * pppmGrid[0]/prd[0] + 0.5, 10); + hi_in = (int)round((coord[0]+1)%procNums[0]*prd[0]/procNums[0] * pppmGrid[0]/prd[0], 10); + k_post_s_sizes[ni++] = (int)((abs(lo_out - lo_in) + 2) * + ((int)(pppmGrid[1]/procNums[1]*(coord[1]+1))-(int)(pppmGrid[1]/procNums[1]*coord[1])) * + ((int)(pppmGrid[2]/procNums[2]*(coord[2]+1))-(int)(pppmGrid[2]/procNums[2]*coord[2]))); + k_post_s_sizes[ni++] = (int)((abs(hi_out - hi_in) + 2) * + ((int)(pppmGrid[1]/procNums[1]*(coord[1]+1))-(int)(pppmGrid[1]/procNums[1]*coord[1])) * + ((int)(pppmGrid[2]/procNums[2]*(coord[2]+1))-(int)(pppmGrid[2]/procNums[2]*coord[2]))); + + assert( (int)(abs(hi_out - hi_in) + 2) <= + (pppmGrid[0]/procNums[0] + (int)(double)((int)pppmGrid[0]%procNums[0])/procNums[0]*(coord[0]+1))); + assert( (int)(abs(lo_out - lo_in) + 2) <= + (pppmGrid[0]/procNums[0] + (int)(double)((int)pppmGrid[0]%procNums[0])/procNums[0]*(coord[0]+1))); + + lo_out = (int)round((((coord[1]-1)%procNums[1]+1)*prd[1]/procNums[1] + cutoff/2.0) * pppmGrid[1]/prd[1] + 0.5, 10); + lo_in = (int)round(((coord[1]-1)%procNums[1]+1)*prd[1]/procNums[1] * pppmGrid[1]/prd[1], 10) - 1; + hi_out = (int)round(((coord[1]+1)%procNums[1]*prd[1]/procNums[1] - cutoff/2.0) * pppmGrid[1]/prd[1] + 0.5, 10); + hi_in = (int)round((coord[1]+1)%procNums[1]*prd[1]/procNums[1] * pppmGrid[1]/prd[1], 10); + k_post_s_sizes[ni++] = (int)((abs(lo_out - lo_in) + 2) * + (((int)(pppmGrid[0]/procNums[0]*(coord[0]+1))-(int)(pppmGrid[0]/procNums[0]*coord[0])) + rs[0]) * + ((int)(pppmGrid[2]/procNums[2]*(coord[2]+1))-(int)(pppmGrid[2]/procNums[2]*coord[2]))); + k_post_s_sizes[ni++] = (int)((abs(hi_out - hi_in) + 2) * + (((int)(pppmGrid[0]/procNums[0]*(coord[0]+1))-(int)(pppmGrid[0]/procNums[0]*coord[0])) + rs[0]) * + ((int)(pppmGrid[2]/procNums[2]*(coord[2]+1))-(int)(pppmGrid[2]/procNums[2]*coord[2]))); + + assert( (int)(abs(hi_out - hi_in) + 2) <= + (pppmGrid[1]/procNums[1] + (int)(double)((int)pppmGrid[1]%procNums[1])/procNums[1]*(coord[1]+1))); + assert( (int)(abs(lo_out - lo_in) + 2) <= + (pppmGrid[1]/procNums[1] + (int)(double)((int)pppmGrid[1]%procNums[1])/procNums[1]*(coord[1]+1))); + + lo_out = (int)round((((coord[2]-1)%procNums[2]+1)*prd[2]/procNums[2] + cutoff/2.0) * pppmGrid[2]/prd[2] + 0.5, 10); + lo_in = (int)round(((coord[2]-1)%procNums[2]+1)*prd[2]/procNums[2] * pppmGrid[2]/prd[2], 10) - 1; + hi_out = (int)round(((coord[2]+1)%procNums[2]*prd[2]/procNums[2] - cutoff/2.0) * pppmGrid[2]/prd[2] + 0.5, 10); + hi_in = (int)round((coord[2]+1)%procNums[2]*prd[2]/procNums[2] * pppmGrid[2]/prd[2], 10); + k_post_s_sizes[ni++] = (int)((abs(lo_out - lo_in) + 2) * + (((int)(pppmGrid[0]/procNums[0]*(coord[0]+1))-(int)(pppmGrid[0]/procNums[0]*coord[0])) + rs[0]) * + (((int)(pppmGrid[1]/procNums[1]*(coord[1]+1))-(int)(pppmGrid[1]/procNums[1]*coord[1])) + rs[1])); + k_post_s_sizes[ni++] = (int)((abs(hi_out - hi_in) + 2) * + (((int)(pppmGrid[0]/procNums[0]*(coord[0]+1))-(int)(pppmGrid[0]/procNums[0]*coord[0])) + rs[0]) * + (((int)(pppmGrid[1]/procNums[1]*(coord[1]+1))-(int)(pppmGrid[1]/procNums[1]*coord[1])) + rs[1])); + + assert( (int)(abs(hi_out - hi_in) + 2) <= + (pppmGrid[2]/procNums[2] + (int)(double)((int)pppmGrid[2]%procNums[2])/procNums[2]*(coord[2]+1))); + assert( (int)(abs(lo_out - lo_in) + 2) <= + (pppmGrid[2]/procNums[2] + (int)(double)((int)pppmGrid[2]%procNums[2])/procNums[2]*(coord[2]+1))); + + + for(i = 0; i < k_post_len; i++) k_post_s_sizes[i] = int(k_post_s_sizes[i] * msg_k_post + 0.5); + + // cycle counts + for(i = 0; i < k_post_len; i++) + { + k_post_cyc[i] = std::max((long)0, (long)((f_vol * ins_k_post_a[i] + ins_k_post_b[i]) * ins_k_post_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + } + +} + + +void +PeriodicAggressor::neigh_e_setup(double cutoff, int rank, double t_vol) +{ +#include "pa_lammps_model.h" + int neigh[6]; + int i = 0, ni = 0; + + neigh_e_len = 0; + rank_to_neigh(rank, neigh); + + for(i = 0; i < 6; i=i+2) + { + neigh_e_len++; + if(neigh[i] != neigh[i+1]) neigh_e_len++; + } + + neigh_e_r_targets = new int[neigh_e_len]; + neigh_e_s_targets = new int[neigh_e_len]; + neigh_e_s_sizes = new int[neigh_e_len]; + neigh_e_cyc = new long[neigh_e_len]; + + // receive targets + ni = 0; + neigh_e_r_targets[ni++] = neigh[0]; + if(neigh[0] != neigh[1]) neigh_e_r_targets[ni++] = neigh[1]; + neigh_e_r_targets[ni++] = neigh[2]; + if(neigh[2] != neigh[3]) neigh_e_r_targets[ni++] = neigh[3]; + neigh_e_r_targets[ni++] = neigh[4]; + if(neigh[4] != neigh[5]) neigh_e_r_targets[ni++] = neigh[5]; + + // send targets + ni = 0; + neigh_e_s_targets[ni++] = neigh[1]; + if(neigh[0] != neigh[1]) neigh_e_s_targets[ni++] = neigh[0]; + neigh_e_s_targets[ni++] = neigh[3]; + if(neigh[2] != neigh[3]) neigh_e_s_targets[ni++] = neigh[2]; + neigh_e_s_targets[ni++] = neigh[5]; + if(neigh[4] != neigh[5]) neigh_e_s_targets[ni++] = neigh[4]; + + // send sizes + ni = 0; + neigh_e_s_sizes[ni++] = (int)(prd[1]/procNums[1])*(prd[2]/procNums[2]); + if(neigh[0] != neigh[1]) neigh_e_s_sizes[ni++] = (int)(prd[1]/procNums[1])*(prd[2]/procNums[2]); + neigh_e_s_sizes[ni++] = (int)(prd[0]/procNums[0])*(prd[2]/procNums[2]); + if(neigh[2] != neigh[3]) neigh_e_s_sizes[ni++] = (int)(prd[0]/procNums[0])*(prd[2]/procNums[2]); + neigh_e_s_sizes[ni++] = (int)(prd[0]/procNums[0])*(prd[1]/procNums[1]); + if(neigh[4] != neigh[5]) neigh_e_s_sizes[ni++] = (int)(prd[0]/procNums[0])*(prd[1]/procNums[1]); + + for(i = 0; i < neigh_e_len; i++) neigh_e_s_sizes[i] = int(neigh_e_s_sizes[i] * msg_neigh_exch + 0.5); + + // setup cycle counts + ni = 0; + neigh_e_cyc[ni++] = std::max((long)0, (long)((t_vol * ins_neigh_exch_sr_a[0] + ins_neigh_exch_sr_b[0]) * ins_neigh_exch_sr_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + if(neigh[0] != neigh[1]) neigh_e_cyc[ni++] = 0; + neigh_e_cyc[ni++] = std::max((long)0, (long)((t_vol * ins_neigh_exch_sr_a[1] + ins_neigh_exch_sr_b[1]) * ins_neigh_exch_sr_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + if(neigh[2] != neigh[3]) neigh_e_cyc[ni++] = 0; + neigh_e_cyc[ni++] = std::max((long)0, (long)((t_vol * ins_neigh_exch_sr_a[2] + ins_neigh_exch_sr_b[2]) * ins_neigh_exch_sr_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + if(neigh[4] != neigh[5]) neigh_e_cyc[ni++] = 0; + +} + + +double +PeriodicAggressor::pppm_estimate_ik_error(double h, double p, int n, double prd[]) +{ + double q2; + double acons[5] = {1.0/23232.0, 7601.0/13628160.0, 143.0/69120.0, 517231.0/106536960.0, 106640677.0/11737571328.0}; + double sum = 0; + int i; + + q2 = 19.426017 * sqrt(n*prd[0]*prd[1]*prd[2]); + + for(i = 0; i < 5; i++) sum += acons[i] * pow(h*GEWALD, 2.0*i); + + return q2*pow(h*GEWALD,5)*sqrt(GEWALD*p*sqrt(2*PI)*sum/n)/(p*p); + +} + + +int +PeriodicAggressor::pppm_factorable(int n) +{ + int factors[3] = {2, 3, 5}; + int i = 0, flag = 0; + + while(n > 1) + { + flag = 1; + for(i = 0; i < 3; i++) + { + if(n%factors[i] == 0) + { + n = n / factors[i]; + flag = 0; + break; + } + } + if(flag == 1) return 0; + } + return 1; +} + +void +PeriodicAggressor::get_k_params(int rank, double f_vol) +{ +#include "pa_lammps_model.h" + int *r_r, *s_r, *s_rs; + int r_len, s_len; + int *nx_in, *nx_fft, *nx_mid1, *nx_mid2; + int i = 0, n_tr = 0, j = 0; + + r_r = new int[process_cnt]; + s_r = new int[process_cnt]; + s_rs = new int[process_cnt]; + nx_in = new int[10*process_cnt]; + nx_fft = new int[10*process_cnt]; + nx_mid1 = new int[10*process_cnt]; + nx_mid2 = new int[10*process_cnt]; + + for(i = 0; i < (int)process_cnt; i++) + { + get_nx_in(i, &nx_in[i*10]); + get_nx_fft(i, &nx_fft[i*10]); + get_nx_mid1(i, &nx_mid1[i*10]); + get_nx_mid2(i, &nx_mid2[i*10]); + } + + n_tr = 0; + find_overlap(nx_in, 0, nx_fft, 0, rank, r_r, &r_len, s_r, s_rs, &s_len); +// printf("\n r_len %d s_len %d rank %d ", r_len, s_len, rank); + assert(r_len == s_len); + assert(n_tr < NUM_TRANSPOSE); + k_r_targets[n_tr] = new int[r_len]; + k_s_targets[n_tr] = new int[r_len]; + k_s_sizes[n_tr] = new int[r_len]; + for(i = 0; i < r_len; i++) + { + k_r_targets[n_tr][i] = r_r[i]; + k_s_targets[n_tr][i] = s_r[i]; + k_s_sizes[n_tr][i] = 8*s_rs[i]; + } + k_len[n_tr] = r_len; + + n_tr++; + find_overlap(nx_fft, 0, nx_mid1, 0, rank, r_r, &r_len, s_r, s_rs, &s_len); + assert(r_len == s_len); + assert(n_tr < NUM_TRANSPOSE); + k_r_targets[n_tr] = new int[r_len]; + k_s_targets[n_tr] = new int[r_len]; + k_s_sizes[n_tr] = new int[r_len]; + for(i = 0; i < r_len; i++) + { + k_r_targets[n_tr][i] = r_r[i]; + k_s_targets[n_tr][i] = s_r[i]; + k_s_sizes[n_tr][i] = 16*s_rs[i]; + } + k_len[n_tr] = r_len; + + n_tr++; + find_overlap(nx_mid1, 2, nx_mid2, 2, rank, r_r, &r_len, s_r, s_rs, &s_len); + assert(r_len == s_len); + assert(n_tr < NUM_TRANSPOSE); + k_r_targets[n_tr] = new int[r_len]; + k_s_targets[n_tr] = new int[r_len]; + k_s_sizes[n_tr] = new int[r_len]; + for(i = 0; i < r_len; i++) + { + k_r_targets[n_tr][i] = r_r[i]; + k_s_targets[n_tr][i] = s_r[i]; + k_s_sizes[n_tr][i] = 16*s_rs[i]; + } + k_len[n_tr] = r_len; + + n_tr++; + find_overlap(nx_mid2, 4, nx_fft, 4, rank, r_r, &r_len, s_r, s_rs, &s_len); + assert(r_len == s_len); + assert(n_tr < NUM_TRANSPOSE); + k_r_targets[n_tr] = new int[r_len]; + k_s_targets[n_tr] = new int[r_len]; + k_s_sizes[n_tr] = new int[r_len]; + for(i = 0; i < r_len; i++) + { + k_r_targets[n_tr][i] = r_r[i]; + k_s_targets[n_tr][i] = s_r[i]; + k_s_sizes[n_tr][i] = 16*s_rs[i]; + } + k_len[n_tr] = r_len; + + + for(j = 0; j < 3; j++) + { + n_tr++; + find_overlap(nx_fft, 0, nx_mid1, 0, rank, r_r, &r_len, s_r, s_rs, &s_len); + assert(r_len == s_len); + assert(n_tr < NUM_TRANSPOSE); + k_r_targets[n_tr] = new int[r_len]; + k_s_targets[n_tr] = new int[r_len]; + k_s_sizes[n_tr] = new int[r_len]; + for(i = 0; i < r_len; i++) + { + k_r_targets[n_tr][i] = r_r[i]; + k_s_targets[n_tr][i] = s_r[i]; + k_s_sizes[n_tr][i] = 16*s_rs[i]; + } + k_len[n_tr] = r_len; + + n_tr++; + find_overlap(nx_mid1, 2, nx_mid2, 2, rank, r_r, &r_len, s_r, s_rs, &s_len); + assert(r_len == s_len); + assert(n_tr < NUM_TRANSPOSE); + k_r_targets[n_tr] = new int[r_len]; + k_s_targets[n_tr] = new int[r_len]; + k_s_sizes[n_tr] = new int[r_len]; + for(i = 0; i < r_len; i++) + { + k_r_targets[n_tr][i] = r_r[i]; + k_s_targets[n_tr][i] = s_r[i]; + k_s_sizes[n_tr][i] = 16*s_rs[i]; + } + k_len[n_tr] = r_len; + + n_tr++; + find_overlap(nx_mid2, 4, nx_in, 4, rank, r_r, &r_len, s_r, s_rs, &s_len); + assert(r_len == s_len); + assert(n_tr < NUM_TRANSPOSE); + k_r_targets[n_tr] = new int[r_len]; + k_s_targets[n_tr] = new int[r_len]; + k_s_sizes[n_tr] = new int[r_len]; + for(i = 0; i < r_len; i++) + { + k_r_targets[n_tr][i] = r_r[i]; + k_s_targets[n_tr][i] = s_r[i]; + k_s_sizes[n_tr][i] = 16*s_rs[i]; + } + k_len[n_tr] = r_len; + } + + + delete r_r; + delete s_r; + delete s_rs; + delete nx_in; + delete nx_fft; + delete nx_mid1; + delete nx_mid2; + + // cycle counts + for(i = 0; i < NUM_TRANSPOSE; i++) + { + k_cyc[i] = std::max((long)0, (long)((f_vol * ins_k_fft_a[i] + ins_k_fft_b[i]) * ins_k_fft_cpi * router_freq / cpu_freq / cpu_sim_speedup + 0.5)); + } +} + +#pragma GCC diagnostic pop + +int +PeriodicAggressor::find_one_overlap(int a[6], int b[6], int s[3]) +{ + int r[6]; + + r[0] = std::max(a[0], b[0]); + r[1] = std::min(a[1], b[1]); + r[2] = std::max(a[2], b[2]); + r[3] = std::min(a[3], b[3]); + r[4] = std::max(a[4], b[4]); + r[5] = std::min(a[5], b[5]); + + s[0] = s[1] = s[2] = 0; + + if( (r[0] > r[1]) || (r[2] > r[3]) || (r[4] > r[5]) ) return 0; + + s[0] = r[1] - r[0] + 1; + s[1] = r[3] - r[2] + 1; + s[2] = r[5] - r[4] + 1; + + return 1; +} + +void +PeriodicAggressor::find_overlap(int all_in[], int in_shift, int all_out[], int out_shift, int rank, int r_r[], int *r_len, int s_r[], int s_rs[], int *s_len) +{ + int i, r; + int s[3]; + + *r_len = 0; + *s_len = 0; + + for(i = 1; i < (int)process_cnt; i++) + { + r = (rank + i) % process_cnt; + + if(find_one_overlap(&all_in[rank*10+in_shift], &all_out[r*10+out_shift], s)) + { + //printf("\n s_len is %d ", *s_len); + s_r[*s_len] = r; + s_rs[*s_len] = s[0] * s[1] *s[2]; + (*s_len)++; + } + + //printf("\n r is %d ", r); + if(find_one_overlap(&all_in[r*10+in_shift], &all_out[rank*10+out_shift], s)) + { + //printf("\n r_len is %d ", r*10+in_shift); + r_r[*r_len] = r; + (*r_len)++; + } + } +} + +void +PeriodicAggressor::get_nx_in(int rank, int nx[10]) +{ + int coord[3]; + + rank_to_xyz(rank, coord); + + nx[0]=int(double(coord[0]) / procNums[0] * pppmGrid[0]); + nx[1]=int(double(coord[0]+1) / procNums[0] * pppmGrid[0]) - 1; + nx[2]=int(double(coord[1]) / procNums[1] * pppmGrid[1]); + nx[3]=int(double(coord[1]+1) / procNums[1] * pppmGrid[1]) - 1; + nx[4]=int(double(coord[2]) / procNums[2] * pppmGrid[2]); + nx[5]=int(double(coord[2]+1) / procNums[2] * pppmGrid[2]) - 1; + nx[6]=nx[0]; + nx[7]=nx[1]; + nx[8]=nx[2]; + nx[9]=nx[3]; +} + +void +PeriodicAggressor::get_nx_fft(int rank, int nx[10]) +{ + int py, pz, me_y, me_z; + + if(pppmGrid[2] > process_cnt) + { + py = 1; + pz = process_cnt; + } + else + { + best_2d_mapping(&py, &pz, int(pppmGrid[1]), int(pppmGrid[2])); + } + + me_y = rank % py; + me_z = rank / py; + + nx[0] = 0; + nx[1] = pppmGrid[0] - 1; + nx[2] = me_y * pppmGrid[1] / py; + nx[3] = (me_y+1) * pppmGrid[1] / py - 1; + nx[4] = me_z * pppmGrid[2] / pz; + nx[5] = (me_z+1) * pppmGrid[2] / pz - 1; + nx[6]=nx[0]; + nx[7]=nx[1]; + nx[8]=nx[2]; + nx[9]=nx[3]; +} + +void +PeriodicAggressor::get_nx_mid1(int rank, int nx[10]) +{ + int f1, f2; + int ip1, ip2; + + bifactor(process_cnt, &f1, &f2); + + ip1 = rank % f1; + ip2 = rank / f1; + + nx[0]=ip1*(int)pppmGrid[0]/f1; + nx[1]=(ip1+1)*(int)pppmGrid[0]/f1-1; + nx[2]=0; + nx[3]=(int)pppmGrid[1]-1; + nx[4]=ip2*(int)pppmGrid[2]/f2; + nx[5]=(ip2+1)*(int)pppmGrid[2]/f2-1; + nx[6]=nx[0]; + nx[7]=nx[1]; + nx[8]=nx[2]; + nx[9]=nx[3]; +} + +void +PeriodicAggressor::get_nx_mid2(int rank, int nx[10]) +{ + int f1, f2; + int ip1, ip2; + + bifactor(process_cnt, &f1, &f2); + + ip1 = rank % f1; + ip2 = rank / f1; + + nx[0]=ip1*(int)pppmGrid[0]/f1; + nx[1]=(ip1+1)*(int)pppmGrid[0]/f1-1; + nx[2]=ip2*(int)pppmGrid[1]/f2; + nx[3]=(ip2+1)*(int)pppmGrid[1]/f2-1; + nx[4]=0; + nx[5]=(int)pppmGrid[2]-1; + nx[6]=nx[0]; + nx[7]=nx[1]; + + nx[8]=nx[2]; + nx[9]=nx[3]; +} + + +void +PeriodicAggressor::best_2d_mapping(int *px, int *py, int nx, int ny) +{ + int bestsurf, bestboxx, bestboxy; + int boxx, boxy, surf; + int ipx, ipy; + + bestsurf = 2 * (nx + ny); + bestboxx = 0; + bestboxy = 0; + + ipx = 1; + + while(ipx <= (int)process_cnt) + { + if(process_cnt % ipx == 0) + { + ipy = process_cnt / ipx; + boxx = nx / ipx; + if(nx % ipx) boxx++; + boxy = ny / ipy; + if(ny % ipy) boxy++; + surf = boxx + boxy; + if( (surf < bestsurf) || + ( (surf == bestsurf) && (boxx*boxy > bestboxx*bestboxy) )) + { + + bestsurf = surf; + bestboxx = boxx; + bestboxy = boxy; + *px = ipx; + *py = ipy; + } + } + ipx++; + } +} + +void +PeriodicAggressor::bifactor(int n, int *f1, int *f2) +{ + *f1 = int(sqrt(n)); + while(*f1 > 0) + { + *f2 = n / *f1; + if((*f1) * (*f2) == n) return; + (*f1)--; + } +} + +void +PeriodicAggressor::rank_to_xyz(int rank, int coord[3]) +{ + coord[0] = rank / procNums[2] / procNums[1] % procNums[0]; + coord[1] = rank / procNums[2] % procNums[1]; + coord[2] = rank % procNums[2]; +} + +int +PeriodicAggressor::xyz_to_rank(int coord[3]) +{ + int mods[3]; + int i = 0; + for(i = 0; i < 3; i++) + { + mods[i] = coord[i]%procNums[i]; + while(mods[i] < 0) mods[i] += procNums[i]; + } + return mods[0]*procNums[1]*procNums[2] + mods[1]*procNums[2] + mods[2]; +} + +void +PeriodicAggressor::rank_to_neigh(int rank, int neighs[6]) +{ + int coord[3]; + int tmp_coord[3]; + rank_to_xyz(rank, coord); + + tmp_coord[0]=coord[0]+1; + tmp_coord[1]=coord[1]; + tmp_coord[2]=coord[2]; + neighs[0] = xyz_to_rank(tmp_coord); + tmp_coord[0]=coord[0]-1; + tmp_coord[1]=coord[1]; + tmp_coord[2]=coord[2]; + neighs[1] = xyz_to_rank(tmp_coord); + tmp_coord[0]=coord[0]; + tmp_coord[1]=coord[1]+1; + tmp_coord[2]=coord[2]; + neighs[2] = xyz_to_rank(tmp_coord); + tmp_coord[0]=coord[0]; + tmp_coord[1]=coord[1]-1; + tmp_coord[2]=coord[2]; + neighs[3] = xyz_to_rank(tmp_coord); + tmp_coord[0]=coord[0]; + tmp_coord[1]=coord[1]; + tmp_coord[2]=coord[2]+1; + neighs[4] = xyz_to_rank(tmp_coord); + tmp_coord[0]=coord[0]; + tmp_coord[1]=coord[1]; + tmp_coord[2]=coord[2]-1; + neighs[5] = xyz_to_rank(tmp_coord); +} + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/swm/src/periodic_aggressor/periodic_aggressor.h b/swm/src/periodic_aggressor/periodic_aggressor.h new file mode 100644 index 0000000..e6ae962 --- /dev/null +++ b/swm/src/periodic_aggressor/periodic_aggressor.h @@ -0,0 +1,218 @@ +/* + * ===================================================================================== + * + * Filename: all_to_one_swm_user_code.h + * + * Description: + * + * Version: 1.0 + * Created: 12/3/2013 01:05:02 PM + * Revision: none + * Compiler: gcc + * + * Author: Nate Andrysco, nathan.r.andrysco@intel.com + * Company: Intel + * + * ===================================================================================== + */ + +#ifndef _PERIODIC_AGGRESSOR_H +#define _PERIODIC_AGGRESSOR_H + +#define SWM_APP_TAG_BASE 0 + +// Internal LAMMPS paramenters +// Skin cutoff for ghost neighbor exchange (on comm) +#define GHOST_SKIN_CUTOFF 12.0 +// Skin cutoff for fft neighbor exchange (on commgrid) +#define FFT_SKIN_CUTOFF 2.0 +// Number of atoms in a basic block +#define N_ATOMS_BASE 32000 +// Neighbor check after NEIGH_DELAY, then every NEIGH_EVERY +#define NEIGH_DELAY 5 +#define NEIGH_EVERY 1 +// Dimensions of the basic block +#define XLO_BASE (-27.5) +#define XHI_BASE (27.5) +#define YLO_BASE (-38.5) +#define YHI_BASE (38.5) +#define ZLO_BASE (-36.3646) +#define ZHI_BASE (36.3615) +// lammps factors for determining required decomposition +#define GEWALD 0.243177 +#define FFT_ACCURACY 0.033206 +// number of transposes in fft +#define NUM_TRANSPOSE 13 +// number of allreduces at the end of neighbor exchange +#define NUM_NEIGH_ALLREDUCE 5 + +#define PI 3.14159265358979323846 + + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "swm-include.h" +using namespace std; + +class PeriodicAggressor +{ + +public: + + PeriodicAggressor( +// SWMUserIF* user_if, + boost::property_tree::ptree cfg, + void**& generic_ptrs + ); + ~PeriodicAggressor(); + + void call(); + void do_lammps_phase(); + void do_incast_phase(); + +protected: + //general config + uint32_t process_cnt; + uint32_t iteration_cnt; + uint32_t compute_delay; + uint32_t process_id; + double router_freq; // router frequency in Hz + double cpu_freq; // CPU frequency in Hz + double cpu_sim_speedup; // simulation speedup factor (makes CPU faster) + + bool show_iterations; + bool debug; + bool show_progress; + + //lammps phase config + uint32_t lammps_iters_per_iter; // number of time steps to simulate + uint32_t x_rep; // number of replicas in X dimension + uint32_t y_rep; // number of replicas in Y dimension + uint32_t z_rep; // number of replicas in Z dimension + uint32_t req_vc; //not configurable + uint32_t resp_vc; //not configurable + uint32_t rsp_bytes; //not configurable + + //incast phase config + uint32_t incast_process_cnt; + uint32_t incast_iters_per_iter; + uint32_t incast_msg_req_bytes; + uint32_t incast_msg_rsp_bytes; + uint32_t incast_min_source_id; + uint32_t incast_max_source_id; + uint32_t incast_dest_rank_id; + + + +private: + double prd[3]; + double pppmGrid[3]; + int procNums[3]; + + int *k_r_targets[NUM_TRANSPOSE]; + int *k_s_targets[NUM_TRANSPOSE]; + int *k_s_sizes[NUM_TRANSPOSE]; + long k_cyc[NUM_TRANSPOSE]; + int k_len[NUM_TRANSPOSE]; + + int *gh_fw_r_targets; + int *gh_fw_s_targets; + int *gh_fw_s_sizes; + long *gh_fw_cyc; + int gh_fw_len; + + int *gh_rw_r_targets; + int *gh_rw_s_targets; + int *gh_rw_s_sizes; + long *gh_rw_cyc; + int gh_rw_len; + + int *k_pre_r_targets; + int *k_pre_s_targets; + int *k_pre_s_sizes; + long *k_pre_cyc; + int k_pre_len; + + int *k_post_r_targets; + int *k_post_s_targets; + int *k_post_s_sizes; + long *k_post_cyc; + int k_post_len; + + int *fix_r_targets; + int *fix_s_targets; + int *fix_s_sizes; + long *fix_cyc; + int fix_len; + + int *neigh_e_r_targets; + int *neigh_e_s_targets; + int *neigh_e_s_sizes; + long *neigh_e_cyc; + int neigh_e_len; + + int *neigh_b_r_targets; + int *neigh_b_s_targets; + int *neigh_b_s_sizes; + long *neigh_b_cyc; + int neigh_b_len; + + long neigh_check_cyc; + double neigh_check_average; + double neigh_check_cumulative; + int neigh_check_count; + long neigh_end_cyc[NUM_NEIGH_ALLREDUCE]; + + long start_cyc; + long k_energy_cyc; + long final_cyc; + + void lammps_model_init(); + void doP2P(int len, int *r_targets, int *s_targets, int *s_sizes, long *cyc_cnt); + void doNeighExch(); + void doFFT(); + bool neigh_check(); + + // process decomposition + void proc_decomposition(int n, double prd[], int procNums[]); + + // PPPM decomposition + void pppm_decomposition(int n, double prd[], double pppmGrid[]); + double pppm_estimate_ik_error(double h, double prd, int n, double all_prd[]); + int pppm_factorable(int n); + + // neighbor comm setup + void ghost_setup(double cutoff, int rank, double t_vol); + void k_pre_setup(double cutoff, int rank, double f_vol); + void k_post_setup(double cutoff, int rank, double f_vol); + void neigh_e_setup(double cutoff, int rank, double t_vol); + + // k space paramenters + void get_k_params(int rank, double f_vol); + void get_nx_in(int rank, int nx[10]); + void get_nx_fft(int rank, int nx[10]); + void get_nx_mid1(int rank, int nx[10]); + void get_nx_mid2(int rank, int nx[10]); + + int find_one_overlap(int a[6], int b[6], int s[3]); + void find_overlap(int all_in[], int in_shift, int all_out[], int out_shift, int rank, int r_r[], int *r_len, int s_r[], int s_rs[], int *s_len); + + void best_2d_mapping(int *px, int *py, int nx, int ny); + void bifactor(int n, int *f1, int *f2); + + void rank_to_xyz(int rank, int coord[3]); + int xyz_to_rank(int coord[3]); + void rank_to_neigh(int rank, int neighs[6]); + +}; + +#endif diff --git a/swm/src/periodic_aggressor/periodic_aggressor.json b/swm/src/periodic_aggressor/periodic_aggressor.json new file mode 100644 index 0000000..bc9dfbd --- /dev/null +++ b/swm/src/periodic_aggressor/periodic_aggressor.json @@ -0,0 +1,27 @@ +{ + "jobs": { + "name": "StandaloneSWM", + "app": "periodic_aggressor", + "size": 2048, + "time": 0, + "cfg": + { + "router_freq" : 800e6, + "cpu_freq" : 4e9, + "cpu_sim_speedup" : 1e6, + "num_x_replicas": 3, + "num_y_replicas": 3, + "num_z_replicas": 3, + "lammps_iters_per_iter": 1, + "req_vc" : 0, + "resp_vc" : 1, + "incast_size" : 100, + "incast_iters_per_iter" : 5, + "incast_dest_rank_id" : 99, + "incast_src_rank_id_interval": [0,98], + "incast_msg_req_bytes" : 1310720, + "incast_msg_rsp_bytes" : 0, + "debug" : true + } + } +} \ No newline at end of file From 94c22aac08ef069aeef925f4114ee48d33a49e82 Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Tue, 27 Apr 2021 19:37:36 -0400 Subject: [PATCH 10/14] Unify SWM jobs.cfg.app for workload name --- swm/src/hacc/hacc_coral.json | 2 +- swm/src/hacc/hacc_small.json | 2 +- swm/src/hacc/workload.json | 2 +- swm/src/lammps/lammps_workload.json | 2 +- swm/src/lammps/lammps_workload1.json | 4 ++-- swm/src/nekbone/workload.json | 2 +- swm/src/nekbone/workload1.json | 2 +- swm/src/periodic_aggressor/periodic_aggressor.json | 2 +- swm/src/point_to_point/example.json | 2 +- 9 files changed, 10 insertions(+), 10 deletions(-) diff --git a/swm/src/hacc/hacc_coral.json b/swm/src/hacc/hacc_coral.json index 67ce155..15e1618 100644 --- a/swm/src/hacc/hacc_coral.json +++ b/swm/src/hacc/hacc_coral.json @@ -2,7 +2,6 @@ "jobs": [ { "name": "StandaloneSWM", - "app": "hacc", "size": 786432, "time": 0, "placement": { @@ -12,6 +11,7 @@ "weight": 1 }, "cfg": { + "app": "hacc", "request_vc": 0, "response_vc": 1, "iteration_cnt": 1, diff --git a/swm/src/hacc/hacc_small.json b/swm/src/hacc/hacc_small.json index eab0e78..f89b1f0 100644 --- a/swm/src/hacc/hacc_small.json +++ b/swm/src/hacc/hacc_small.json @@ -2,7 +2,6 @@ "jobs": [ { "name": "StandaloneSWM", - "app": "hacc", "size": 128, "time": 0, "placement": { @@ -12,6 +11,7 @@ "weight": 1 }, "cfg": { + "app": "hacc", "request_vc": 0, "response_vc": 1, "iteration_cnt": 1, diff --git a/swm/src/hacc/workload.json b/swm/src/hacc/workload.json index fc8dbb1..b9617d4 100644 --- a/swm/src/hacc/workload.json +++ b/swm/src/hacc/workload.json @@ -2,10 +2,10 @@ "jobs": [ { "name": "StandaloneSWM", - "app": "hacc", "size": 128, "time": 0, "cfg": { + "app": "hacc", "request_vc": 0, "response_vc": 1, "iteration_cnt": 1, diff --git a/swm/src/lammps/lammps_workload.json b/swm/src/lammps/lammps_workload.json index aca57fe..d799bcc 100644 --- a/swm/src/lammps/lammps_workload.json +++ b/swm/src/lammps/lammps_workload.json @@ -1,12 +1,12 @@ { "jobs": { "name": "StandaloneSWM", - "app": "dll", "dll_path": "apps/dll/lammps.so", "size": 2048, "time": 0, "cfg": { + "app": "lammps", "num_x_replicas": 3, "num_y_replicas": 3, "num_z_replicas": 3, diff --git a/swm/src/lammps/lammps_workload1.json b/swm/src/lammps/lammps_workload1.json index 3f0a596..ffcec12 100644 --- a/swm/src/lammps/lammps_workload1.json +++ b/swm/src/lammps/lammps_workload1.json @@ -1,12 +1,12 @@ { "jobs": { "name": "StandaloneSWM", - "app": "dll", "dll_path": "apps/dll/lammps.so", "size": 2048, "time": 0, "cfg": - { + { + "app": "lammps", "num_x_replicas": 3, "num_y_replicas": 3, "num_z_replicas": 3, diff --git a/swm/src/nekbone/workload.json b/swm/src/nekbone/workload.json index bc9c61d..d4bde0f 100644 --- a/swm/src/nekbone/workload.json +++ b/swm/src/nekbone/workload.json @@ -3,10 +3,10 @@ "jobs": { "name": "StandaloneSWM", - "app": "nekbone", "size": 2197 , "time": 0, "cfg": { + "app": "nekbone", "request_vc": 0, "response_vc": 1, "iteration_cnt": 1, diff --git a/swm/src/nekbone/workload1.json b/swm/src/nekbone/workload1.json index 9b10b07..fe18340 100644 --- a/swm/src/nekbone/workload1.json +++ b/swm/src/nekbone/workload1.json @@ -3,10 +3,10 @@ "jobs": { "name": "StandaloneSWM", - "app": "nekbone", "size": 729 , "time": 0, "cfg": { + "app": "nekbone", "request_vc": 0, "response_vc": 1, "iteration_cnt": 1, diff --git a/swm/src/periodic_aggressor/periodic_aggressor.json b/swm/src/periodic_aggressor/periodic_aggressor.json index bc9dfbd..c699b50 100644 --- a/swm/src/periodic_aggressor/periodic_aggressor.json +++ b/swm/src/periodic_aggressor/periodic_aggressor.json @@ -1,11 +1,11 @@ { "jobs": { "name": "StandaloneSWM", - "app": "periodic_aggressor", "size": 2048, "time": 0, "cfg": { + "app": "periodic_aggressor", "router_freq" : 800e6, "cpu_freq" : 4e9, "cpu_sim_speedup" : 1e6, diff --git a/swm/src/point_to_point/example.json b/swm/src/point_to_point/example.json index 57682b6..3d552b2 100644 --- a/swm/src/point_to_point/example.json +++ b/swm/src/point_to_point/example.json @@ -2,10 +2,10 @@ "jobs": [ { "name": "job1", - "app": "point_to_point", "size": 128, "time": 0, "cfg": { + "app": "point_to_point", "iteration_cnt": 1, "num_vcs": 8, "msg_req_bytes": 2, From 40eb83d483f4d87ce75983e5c0ffadee3223cd51 Mon Sep 17 00:00:00 2001 From: "Kevin A. Brown" Date: Fri, 30 Apr 2021 18:32:44 +0000 Subject: [PATCH 11/14] Add iteration markers and fixed LAMMPS emulation of alltoallv in doFFT by changing SWM_Send to SWM_Isend. --- swm/src/lammps/lammps.cpp | 32 ++++++++++++++++++++++- swm/src/lammps/lammps.h | 1 + swm/src/milc/milc_swm_user_code.cpp | 2 ++ swm/src/milc/milc_swm_user_code.h | 1 + swm/src/nekbone/nekbone_swm_user_code.cpp | 4 +++ swm/src/nekbone/nekbone_swm_user_code.h | 1 + 6 files changed, 40 insertions(+), 1 deletion(-) diff --git a/swm/src/lammps/lammps.cpp b/swm/src/lammps/lammps.cpp index e4b68fa..a8c4d80 100644 --- a/swm/src/lammps/lammps.cpp +++ b/swm/src/lammps/lammps.cpp @@ -188,12 +188,14 @@ void LAMMPS_SWM::doFFT() { uint32_t *h; + uint32_t *h2; int i = 0, idx = 0; for(idx = 0; idx < NUM_TRANSPOSE; idx++) { h = new uint32_t[k_len[idx]]; + h2 = new uint32_t[k_len[idx]]; SWM_Compute(k_cyc[idx]); for(i = 0; i < k_len[idx]; i++) @@ -202,12 +204,24 @@ LAMMPS_SWM::doFFT() } for(i = 0; i < k_len[idx]; i++) { - SWM_Send(k_s_targets[idx][i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, k_s_sizes[idx][i]); + //SWM_Send(k_s_targets[idx][i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, k_s_sizes[idx][i]); + SWM_Isend(k_s_targets[idx][i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, k_s_sizes[idx][i], 0, &h2[i]); } + SWM_Waitall(k_len[idx], h2); SWM_Waitall(k_len[idx], h); delete h; + delete h2; } + // Below for instrumentation +// fprintf(stderr, "\nSWMMSGS %d", process_id); +// for(idx = 0; idx < NUM_TRANSPOSE; idx++) +// { +// for(i = 0; i < k_len[idx]; i++) +// { +// fprintf(stderr, " %d", k_s_sizes[idx][i]); +// } +// } } bool @@ -264,37 +278,53 @@ LAMMPS_SWM::call() if(neigh_check()) { // do neighbor exchange +//SWM_Mark_Iteration(0); // Instrumentation doNeighExch(); +//SWM_Mark_Iteration(10); // Instrumentation } else { // ghost forward exchange +//SWM_Mark_Iteration(0); // Instrumentation doP2P(gh_fw_len, gh_fw_r_targets, gh_fw_s_targets, gh_fw_s_sizes, gh_fw_cyc); +//SWM_Mark_Iteration(20); // Instrumentation } // k-space pre exchange +//SWM_Mark_Iteration(0); // Instrumentation doP2P(k_pre_len, k_pre_r_targets, k_pre_s_targets, k_pre_s_sizes, k_pre_cyc); +//SWM_Mark_Iteration(21); // Instrumentation // do FFT +//SWM_Mark_Iteration(0); // Instrumentation doFFT(); +//SWM_Mark_Iteration(30); // Instrumentation // k-space post exchange +//SWM_Mark_Iteration(0); // Instrumentation doP2P(k_post_len, k_post_r_targets, k_post_s_targets, k_post_s_sizes, k_post_cyc); +//SWM_Mark_Iteration(22); // Instrumentation // energy calculation SWM_Compute(k_energy_cyc); SWM_Allreduce(48, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // ghost reverse exchange +//SWM_Mark_Iteration(0); // Instrumentation doP2P(gh_rw_len, gh_rw_r_targets, gh_rw_s_targets, gh_rw_s_sizes, gh_rw_cyc); +//SWM_Mark_Iteration(23); // Instrumentation // ghost fixed values exchange +//SWM_Mark_Iteration(0); // Instrumentation doP2P(fix_len, fix_r_targets, fix_s_targets, fix_s_sizes, fix_cyc); +//SWM_Mark_Iteration(24); // Instrumentation // final integration SWM_Compute(final_cyc); SWM_Allreduce(8, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // temperature SWM_Allreduce(48, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // pressure + + //SWM_Mark_Iteration(ts); // removed for instrumentation test } SWM_Finalize(); //MM: comment assert(0); diff --git a/swm/src/lammps/lammps.h b/swm/src/lammps/lammps.h index f1053ee..9a49817 100644 --- a/swm/src/lammps/lammps.h +++ b/swm/src/lammps/lammps.h @@ -3,6 +3,7 @@ #include #include +#include // For printing message sizes #include "swm-include.h" // Internal LAMMPS paramenters // Skin cutoff for ghost neighbor exchange (on comm) diff --git a/swm/src/milc/milc_swm_user_code.cpp b/swm/src/milc/milc_swm_user_code.cpp index 153feb3..a831c86 100644 --- a/swm/src/milc/milc_swm_user_code.cpp +++ b/swm/src/milc/milc_swm_user_code.cpp @@ -284,6 +284,8 @@ MilcSWMUserCode::call() ); } + SWM_Mark_Iteration(iter); + } SWM_Finalize(); } diff --git a/swm/src/milc/milc_swm_user_code.h b/swm/src/milc/milc_swm_user_code.h index 7e7bd71..dcf5359 100644 --- a/swm/src/milc/milc_swm_user_code.h +++ b/swm/src/milc/milc_swm_user_code.h @@ -33,6 +33,7 @@ #include #include #include +#include // for debugging output #include "swm-include.h" diff --git a/swm/src/nekbone/nekbone_swm_user_code.cpp b/swm/src/nekbone/nekbone_swm_user_code.cpp index fcb2fd5..6cfbe4e 100644 --- a/swm/src/nekbone/nekbone_swm_user_code.cpp +++ b/swm/src/nekbone/nekbone_swm_user_code.cpp @@ -170,6 +170,7 @@ Err_t NEKBONESWMUserCode::run() { Err_t err=0; + uint32_t iter = 0; for(unsigned polyO=Pbegin; polyO polyO=%d\n", __LINE__, polyO); @@ -212,6 +213,8 @@ Err_t NEKBONESWMUserCode::run() //NEKbone loop over element/rank removed-->} + SWM_Mark_Iteration(iter); //KB + iter++; } assert(err == 0); @@ -695,6 +698,7 @@ Err_t NEKBONESWMUserCode::conjugateGradient() nek_gsop("on w"); nek_glsc3(); nek_glsc3(); + } return err; diff --git a/swm/src/nekbone/nekbone_swm_user_code.h b/swm/src/nekbone/nekbone_swm_user_code.h index 1c83a69..bdd8aec 100644 --- a/swm/src/nekbone/nekbone_swm_user_code.h +++ b/swm/src/nekbone/nekbone_swm_user_code.h @@ -22,6 +22,7 @@ #include #include #include +#include // For debugging output #include "swm-include.h" From 4ad0fbb648e66546e15eb4b3afcb4f74eb8892b3 Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Mon, 3 May 2021 14:05:46 -0400 Subject: [PATCH 12/14] Patch Kevins lammps doFFT optimization --- swm/src/lammps/lammps.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/swm/src/lammps/lammps.cpp b/swm/src/lammps/lammps.cpp index b9200c2..8069ce9 100644 --- a/swm/src/lammps/lammps.cpp +++ b/swm/src/lammps/lammps.cpp @@ -188,12 +188,14 @@ void LAMMPS_SWM::doFFT() { uint32_t *h; + uint32_t *h2; int i = 0, idx = 0; for(idx = 0; idx < NUM_TRANSPOSE; idx++) { h = new uint32_t[k_len[idx]]; + h2 = new uint32_t[k_len[idx]]; SWM_Compute(k_cyc[idx]); for(i = 0; i < k_len[idx]; i++) @@ -202,11 +204,14 @@ LAMMPS_SWM::doFFT() } for(i = 0; i < k_len[idx]; i++) { - SWM_Send(k_s_targets[idx][i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, k_s_sizes[idx][i]); + // SWM_Send(k_s_targets[idx][i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, k_s_sizes[idx][i]); + SWM_Isend(k_s_targets[idx][i], SWM_COMM_WORLD, 0, req_vc, resp_vc, NO_BUFFER, k_s_sizes[idx][i], 0, &h2[i]); } + SWM_Waitall(k_len[idx], h2); SWM_Waitall(k_len[idx], h); delete h; + delete h2; } } From aa181d197c2c7c14789e5e4c201b02070dc7c333 Mon Sep 17 00:00:00 2001 From: "Kevin A. Brown" Date: Mon, 7 Jun 2021 16:11:40 +0000 Subject: [PATCH 13/14] Updated some json configs and debug outputs. --- swm/src/lammps/lammps.cpp | 2 +- .../many_to_many/many_to_many_swm_user_code.cpp | 2 +- swm/src/many_to_many/many_to_many_workload.json | 15 ++++++++------- swm/src/spread/spread_workload.json | 7 ++++--- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/swm/src/lammps/lammps.cpp b/swm/src/lammps/lammps.cpp index a8c4d80..e8a09e2 100644 --- a/swm/src/lammps/lammps.cpp +++ b/swm/src/lammps/lammps.cpp @@ -324,7 +324,7 @@ LAMMPS_SWM::call() SWM_Allreduce(8, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // temperature SWM_Allreduce(48, rsp_bytes, SWM_COMM_WORLD, req_vc, resp_vc, NO_BUFFER, NO_BUFFER); // pressure - //SWM_Mark_Iteration(ts); // removed for instrumentation test + SWM_Mark_Iteration(ts); // remove for instrumentation test } SWM_Finalize(); //MM: comment assert(0); diff --git a/swm/src/many_to_many/many_to_many_swm_user_code.cpp b/swm/src/many_to_many/many_to_many_swm_user_code.cpp index b141f4c..19e3fd6 100644 --- a/swm/src/many_to_many/many_to_many_swm_user_code.cpp +++ b/swm/src/many_to_many/many_to_many_swm_user_code.cpp @@ -57,7 +57,7 @@ ManyToManySWMUserCode::call() /* Print job description */ if(process_id == 0) { - std::cout << std::endl << "JOB: Bulk_data | size: " << process_cnt; + std::cout << std::endl << "JOB: Many-to-Many (needs a better name) | size: " << process_cnt; std::cout << " | interation_cnt: " << iteration_cnt; std::cout << " | msg_req_bytes: " << msg_req_bytes; std::cout << " | msg_rsp_bytes: " << msg_rsp_bytes; diff --git a/swm/src/many_to_many/many_to_many_workload.json b/swm/src/many_to_many/many_to_many_workload.json index 82a89c4..8fd635d 100644 --- a/swm/src/many_to_many/many_to_many_workload.json +++ b/swm/src/many_to_many/many_to_many_workload.json @@ -1,20 +1,21 @@ { "jobs" : { "dll_path": "${FABSIM_APPS_PATH}/dll/many_to_many.so", - "size": 1152, + "size": 256, "cfg": { "app": "many_to_many", - "iteration_cnt": 1, + "iteration_cnt": 2, "compute_delay": 0, - "msg_req_bytes": 1048576, - "msg_rsp_bytes": 0, + "msg_req_bytes": 4194304, + "msg_rsp_bytes": 8, "start_delay_max" : 10000, "scattered_start" : false, "randomize_communication_order": false, - "fixed_pairs" : false, + "fixed_pairs" : true, "debug" : false, - "src_rank_id_interval": [128,1151], - "dst_rank_id_interval": [0,63], + "src_rank_id_interval": [128,255], + "dst_rank_id_interval": [0,127], + "show_iterations": true, "cpu_freq" : 4e9 } } diff --git a/swm/src/spread/spread_workload.json b/swm/src/spread/spread_workload.json index 387f242..0ed7eac 100644 --- a/swm/src/spread/spread_workload.json +++ b/swm/src/spread/spread_workload.json @@ -4,10 +4,10 @@ "size": 64, "cfg": { "app": "spread", - "iteration_cnt": 10, + "iteration_cnt": 50, "compute_delay": 10000, - "msg_req_bytes": 19922944, - "msg_rsp_bytes": 0, + "msg_req_bytes": 8, + "msg_rsp_bytes": 8, "start_delay_max" : 10000, "scattered_start" : false, "src_rank_id":63, @@ -15,6 +15,7 @@ "blocking_comm" : false, "debug" : false, "dst_rank_id_interval": [0,62], + "show_iterations": true, "cpu_freq" : 4e9 } } From 3018dfdb32814069e02e38b716b298c5a88ec6df Mon Sep 17 00:00:00 2001 From: Neil McGlohon Date: Wed, 30 Jun 2021 16:22:54 -0400 Subject: [PATCH 14/14] Add Layered AllBroadcast SWM --- swm/src/Makefile.subdir | 9 +- .../layered_allbcast.json | 15 ++ .../layered_allbroadcast.cpp | 148 ++++++++++++++++++ .../layered_allbroadcast.h | 75 +++++++++ 4 files changed, 244 insertions(+), 3 deletions(-) create mode 100644 swm/src/layered_allbroadcast/layered_allbcast.json create mode 100644 swm/src/layered_allbroadcast/layered_allbroadcast.cpp create mode 100644 swm/src/layered_allbroadcast/layered_allbroadcast.h diff --git a/swm/src/Makefile.subdir b/swm/src/Makefile.subdir index 81f09b8..ee6d4e1 100644 --- a/swm/src/Makefile.subdir +++ b/swm/src/Makefile.subdir @@ -12,7 +12,8 @@ include_HEADERS = \ src/many_to_many/many_to_many_swm_user_code.h \ src/allreduce/allreduce.h \ src/milc/milc_swm_user_code.h \ - src/periodic_aggressor/periodic_aggressor.h + src/periodic_aggressor/periodic_aggressor.h \ + src/layered_allbroadcast/layered_allbroadcast.h src_libswm_la_SOURCES = src/lammps/lammps.cpp \ src/nekbone/cubiclattice.cpp \ @@ -23,7 +24,8 @@ src_libswm_la_SOURCES = src/lammps/lammps.cpp \ src/many_to_many/many_to_many_swm_user_code.cpp \ src/allreduce/allreduce.cpp \ src/milc/milc_swm_user_code.cpp \ - src/periodic_aggressor/periodic_aggressor.cpp + src/periodic_aggressor/periodic_aggressor.cpp \ + src/layered_allbroadcast/layered_allbroadcast.cpp dist_data_DATA = src/lammps/lammps_workload.json \ src/lammps/lammps_workload1.json \ @@ -41,7 +43,8 @@ dist_data_DATA = src/lammps/lammps_workload.json \ src/allreduce/allreduce32_workload.json \ src/allreduce/allreduce256_workload.json \ src/milc/milc_skeleton.json \ - src/periodic_aggressor/periodic_aggressor.json + src/periodic_aggressor/periodic_aggressor.json \ + src/layered_allbroadcast/layered_allbcast.json diff --git a/swm/src/layered_allbroadcast/layered_allbcast.json b/swm/src/layered_allbroadcast/layered_allbcast.json new file mode 100644 index 0000000..cf86028 --- /dev/null +++ b/swm/src/layered_allbroadcast/layered_allbcast.json @@ -0,0 +1,15 @@ +{ + "jobs" : { + "size": 32, + "cfg": { + "app": "layered_allbcast", + "iteration_cnt": 10, + "total_layers": 50, + "initial_layer_size": 8192, + "layer_growth_rate": 1.12, + "grad_compression_rate": 32, + "debug" : true, + "cpu_freq" : 4e9 + } + } + } \ No newline at end of file diff --git a/swm/src/layered_allbroadcast/layered_allbroadcast.cpp b/swm/src/layered_allbroadcast/layered_allbroadcast.cpp new file mode 100644 index 0000000..9bf5ecb --- /dev/null +++ b/swm/src/layered_allbroadcast/layered_allbroadcast.cpp @@ -0,0 +1,148 @@ +#include "layered_allbroadcast.h" +#include "math.h" +#include //log printf wrapper + +bool stdout_log = false; + +static void print_log(const char *format, ...) +{ + va_list args; + va_start(args, format); + + if(stdout_log) + vprintf(format, args); + + va_end(args); +} + + +LayeredAllBroadcast::LayeredAllBroadcast( + boost::property_tree::ptree cfg, + void**& generic_ptrs + ) : + process_cnt(cfg.get("jobs.size", 1)), + iteration_cnt(cfg.get("jobs.cfg.iteration_cnt", 1)), + total_layers(cfg.get("jobs.cfg.total_layers",50)), + initial_layer_size(cfg.get("jobs.cfg.initial_layer_size",8192)), + layer_growth_rate(cfg.get("jobs.cfg.layer_growth_rate",1.12)), + grad_compression_rate(cfg.get("jobs.cfg.first_comm_compression_rate",32)), + blocking_comm(cfg.get("jobs.cfg.blocking_comm", false)), + show_iterations(cfg.get("jobs.cfg.show_iterations", false)), + debug(cfg.get("jobs.cfg.debug", false)) +{ + stdout_log = debug; + process_id = *((int*)generic_ptrs[0]); +} + +void LayeredAllBroadcast::call() +{ + + int iter_marker = 0; + for (int iter = 0; iter < iteration_cnt; iter++) + { + if (show_iterations) { + SWM_Mark_Iteration(iter_marker); + iter_marker++; + } + + for (int i = 0; i < total_layers; i++) + { + if (process_id == 0) print_log("LayeredAllBcast Layer %d Comp Grad\n",i); + execute_comp_gradient_comm(i); + if (process_id == 0) print_log("LayeredAllBcast Layer %d Weights\n",i); + execute_weights_comm(i); + } + SWM_Allreduce(32, 0, SWM_COMM_WORLD, -1, -1, NO_BUFFER, NO_BUFFER); + // SWM_Barrier(SWM_COMM_WORLD, -1, -1, NO_BUFFER, 0, 0, 0, 0); + if (process_id == 0) print_log("LayeredAllBcast Iteration %d/%d Completed\n",iter+1,iteration_cnt); + + if (show_iterations) { + SWM_Mark_Iteration(iter_marker); + iter_marker++; + } + } + + + SWM_Finalize(); +} + + +void LayeredAllBroadcast::execute_comp_gradient_comm(int current_layer) +{ + double grad_size = (initial_layer_size * (pow(layer_growth_rate,current_layer)))/grad_compression_rate; + double piece_size = grad_size / process_cnt; + + uint32_t *h = (uint32_t*)calloc(process_cnt-1, sizeof(uint32_t)); + uint32_t *h2 = (uint32_t*)calloc(process_cnt-1, sizeof(uint32_t)); + int send_count = 0; + int recv_count = 0; + + for(int i = 0; i < process_cnt; i++) + { + if (i != process_id) + { + SWM_Irecv(i,SWM_COMM_WORLD, 0, NO_BUFFER, &(h2[recv_count])); + recv_count++; + } + } + + for(int i = 0; i < process_cnt; i++) + { + if (i != process_id) + { + SWM_Isend(i,SWM_COMM_WORLD, 0, -1, -1, NO_BUFFER, (int)piece_size, 0, &(h[send_count]),0,0); + send_count++; + } + } + + + + SWM_Waitall(send_count, h); + SWM_Waitall(recv_count, h2); + free(h); + free(h2); +} + +void LayeredAllBroadcast::execute_weights_comm(int current_layer) +{ + double weights_size = (initial_layer_size * (pow(layer_growth_rate,current_layer))); + double piece_size = weights_size / process_cnt; + + uint32_t *h = (uint32_t*)calloc(process_cnt-1, sizeof(uint32_t)); + uint32_t *h2 = (uint32_t*)calloc(process_cnt-1, sizeof(uint32_t)); + int send_count = 0; + int recv_count = 0; + for(int i = 0; i < process_cnt; i++) + { + if (i != process_id) + { + SWM_Irecv(i,SWM_COMM_WORLD, 1, NO_BUFFER, &(h2[recv_count])); + recv_count++; + } + } + + for(int i = 0; i < process_cnt; i++) + { + if (i != process_id) + { + SWM_Isend(i,SWM_COMM_WORLD, 1, -1, -1, NO_BUFFER, (int)piece_size, 0, &(h[send_count]),0,0); + send_count++; + } + } + + SWM_Waitall(send_count, h); + SWM_Waitall(recv_count, h2); + free(h); + free(h2); +} + + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * End: + * + * vim: ft=c ts=8 sts=4 sw=4 expandtab + */ diff --git a/swm/src/layered_allbroadcast/layered_allbroadcast.h b/swm/src/layered_allbroadcast/layered_allbroadcast.h new file mode 100644 index 0000000..986a426 --- /dev/null +++ b/swm/src/layered_allbroadcast/layered_allbroadcast.h @@ -0,0 +1,75 @@ +/* + * ===================================================================================== + * + * Filename: layered_allbroadcast.h + * + * Description: + * + * Version: 1.0 + * Created: 6/22/2021 + * Revision: none + * Compiler: gcc + * + * Author: Neil McGlohon + * Company: Rensselaer Polytechnic Institute + * + * ===================================================================================== + */ + +#ifndef _LAYERED_ALL_BROADCAST_TEMPLATE_USER_CODE_ +#define _LAYERED_ALL_BROADCAST_TEMPLATE_USER_CODE_ + +#define SWM_APP_TAG_BASE 0 + +#include + +#include +#include +#include +#include +#include +#include + +#include "swm-include.h" +using namespace std; + +class LayeredAllBroadcast +{ + +public: + + LayeredAllBroadcast( +// SWMUserIF* user_if, + boost::property_tree::ptree cfg, + void**& generic_ptrs + ); + + void call(); + +protected: + + + uint32_t process_id; + uint32_t process_cnt; + uint32_t iteration_cnt; + + uint32_t total_layers; + uint32_t initial_layer_size; + double layer_growth_rate; + double grad_compression_rate; + + // use blocking (Send/Recv) + bool blocking_comm; + + // for debugging + bool show_iterations; + bool debug; + + +private: + + void execute_comp_gradient_comm(int current_layer); + void execute_weights_comm(int current_layer); +}; + +#endif