Skip to content

Commit

Permalink
Add the scripts to execute TPCC parallely across multiple clients.
Browse files Browse the repository at this point in the history
Reviewers:
Mihnea
  • Loading branch information
Sudheer Ponnemkunnath committed Nov 17, 2020
1 parent eddd12e commit 79e7e47
Show file tree
Hide file tree
Showing 9 changed files with 511 additions and 0 deletions.
46 changes: 46 additions & 0 deletions parallel_execute_scripts/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
## Step 1. Create the Loader and execute scripts.
First create 2 text files for clients and yugabyte nodes. There is a helper program `output_ips.cpp` that can generate these 2 files for us.
Compile and run the `output_scripts.cpp` to generate `loader$i.sh` and `execute$i.sh`. This program uses the `clients.txt` and `yb_nodes.txt` generated by the first program.

## Step 2. Upload TPCC binaries and scripts to the client nodes.
This can be done as follows. Make sure that the environment has the ssh user
exported to the variable `SSH_USER` and the additional SSH AND SCP arguments
like the pem file or the port exported as `SSH_ARGS` and `SCP_ARGS`.
```sh
for n in $(cat clients.txt);
do
./upload_scripts.sh $(echo $n | cut -d ":" -f 2) $(echo $n | cut -d ":" -f 1);
done
```

This step expects the tpcc.tar.gz file to be present in the same directory.

## Step 3. Create the TPCC tables.
This can be done from one of the clients as:
```sh
./tpccbenchmark --nodes=<ip> --create=true
```

## Step 4. Load the data.
This can be done by:
```sh
for n in $(cat clients.txt);
do
./run_loader.sh $(echo $n | cut -d ":" -f 2) $(echo $n | cut -d ":" -f 1);
done
```

## Step 5. Enable the foreign keys.
This can be done as:
```sh
./tpccbenchmark --nodes=<ip> --enable-foreign-keys=true --create-sql-procedures=true
```

## Step 6. Execute the program.
This can be done as:
```sh
for n in $(cat clients.txt);
do
./run_execute.sh $(echo $n | cut -d ":" -f 2) $(echo $n | cut -d ":" -f 1);
done
```
216 changes: 216 additions & 0 deletions parallel_execute_scripts/create_tables.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
# This script creates the required tables on a cluster.
# Arguments:
# 1. Number warehouses
# 2. IP of the master leader
# 3. Number of splits. This assumes we have split the cluster into (N + 1)
# logical regions with the first one dedicated for the yb-master. The 'item'
# table is pinned here as well.
# 4. Number of tablets per sub-table.
#
# The first sub-table is pinned to $cloud.$region.$zone2, the second to
# $cloud.$region.$zone3 as the first zone is reserved for the yb-master.

warehouses=${warehouses:-100}
ip=${ip:-127.0.0.1}
splits=${splits:-2}
tablets=${tablets:-24}
while [ $# -gt 0 ]; do
if [[ $1 == *"--"* ]]; then
param="${1/--/}"
declare $param="$2"
fi
shift
done

wh_per_split=$(expr $warehouses / $splits)
ysqlsh="/mnt/d0/repositories/yugabyte-db3/bin/ysqlsh -h $ip"
ybadmin="/mnt/d0/repositories/yugabyte-db3/build/debug-clang-dynamic/bin/yb-admin"

cloud=aws
region=us-west-2
zone=us-west-2a

# $1: table_name
# $2: argument list
# $3: partition argument
# $4: argument list without type
# $5: PRIMARY key list
create_table() {
if [[ $# == '3' ]]
then
$ysqlsh -d yugabyte -c "DROP TABLE IF EXISTS $1"
$ysqlsh -d yugabyte -c "CREATE TABLE $1 ($2, PRIMARY KEY($3)) SPLIT INTO 3 TABLETS"
tablezone=$zone$(( 1 ))
$ybadmin --master_addresses $ip:7100 modify_table_placement_info ysql.yugabyte $1 $cloud.$region.$tablezone 3
return
fi

$ysqlsh -d yugabyte -c "DROP TABLE IF EXISTS $1"
$ysqlsh -d yugabyte -c "CREATE TABLE $1 ($2) PARTITION BY RANGE($3) SPLIT INTO 1 TABLETS"

if [[ $# == '4' ]]
then
for i in `seq 1 $splits`;
do
start=$(( (i-1)*wh_per_split+1 ))
end=$(( (i*wh_per_split)+1 ))
$ysqlsh -d yugabyte -c "CREATE TABLE $1$i PARTITION OF $1($4) FOR VALUES FROM ($start) TO ($end) SPLIT INTO $tablets TABLETS";
done
else
for i in `seq 1 $splits`;
do
start=$(( (i-1)*wh_per_split+1 ))
end=$(( (i*wh_per_split)+1 ))
$ysqlsh -d yugabyte -c "CREATE TABLE $1$i PARTITION OF $1($4, PRIMARY KEY($5)) FOR VALUES FROM ($start) TO ($end) SPLIT INTO $tablets TABLETS";
done
fi

for i in `seq 1 $splits`;
do
tablezone=$zone$(( i+1 ))
$ybadmin --master_addresses $ip:7100 modify_table_placement_info ysql.yugabyte $1$i $cloud.$region.$tablezone 3
done
}

create_indexes() {
$ysqlsh -d yugabyte -c 'CREATE INDEX idx_customer_name ON customer ((c_w_id,c_d_id) HASH,c_last,c_first)'
$ysqlsh -d yugabyte -c 'CREATE UNIQUE INDEX idx_order ON oorder ((o_w_id,o_d_id) HASH,o_c_id,o_id DESC)'
}

create_table 'item' \
'i_id int NOT NULL,
i_name varchar(24) NOT NULL,
i_price decimal(5,2) NOT NULL,
i_data varchar(50) NOT NULL,
i_im_id int NOT NULL' \
'i_id'

create_table 'warehouse' \
'w_id int NOT NULL,
w_ytd decimal(12,2) NOT NULL,
w_tax decimal(4,4) NOT NULL,
w_name varchar(10) NOT NULL,
w_street_1 varchar(20) NOT NULL,
w_street_2 varchar(20) NOT NULL,
w_city varchar(20) NOT NULL,
w_state char(2) NOT NULL,
w_zip char(9) NOT NULL' \
'w_id'\
'w_id, w_ytd, w_tax, w_name, w_street_1, w_street_2, w_city, w_state, w_zip' \
'w_id'

create_table 'district' \
'd_w_id int NOT NULL,
d_id int NOT NULL,
d_ytd decimal(12,2) NOT NULL,
d_tax decimal(4,4) NOT NULL,
d_next_o_id int NOT NULL,
d_name varchar(10) NOT NULL,
d_street_1 varchar(20) NOT NULL,
d_street_2 varchar(20) NOT NULL,
d_city varchar(20) NOT NULL,
d_state char(2) NOT NULL,
d_zip char(9) NOT NULL' \
'd_w_id' \
'd_w_id, d_id, d_ytd, d_tax, d_next_o_id, d_name, d_street_1, d_street_2, d_city, d_state, d_zip' \
'(d_w_id,d_id) HASH'

create_table 'customer' \
'c_w_id int NOT NULL,
c_d_id int NOT NULL,
c_id int NOT NULL,
c_discount decimal(4,4) NOT NULL,
c_credit char(2) NOT NULL,
c_last varchar(16) NOT NULL,
c_first varchar(16) NOT NULL,
c_credit_lim decimal(12,2) NOT NULL,
c_balance decimal(12,2) NOT NULL,
c_ytd_payment float NOT NULL,
c_payment_cnt int NOT NULL,
c_delivery_cnt int NOT NULL,
c_street_1 varchar(20) NOT NULL,
c_street_2 varchar(20) NOT NULL,
c_city varchar(20) NOT NULL,
c_state char(2) NOT NULL,
c_zip char(9) NOT NULL,
c_phone char(16) NOT NULL,
c_since timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
c_middle char(2) NOT NULL,
c_data varchar(500) NOT NULL' \
'c_w_id' \
'c_w_id, c_d_id, c_id, c_discount, c_credit, c_last, c_first, c_credit_lim, c_balance, c_ytd_payment, c_payment_cnt,
c_delivery_cnt, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_since, c_middle, c_data' \
'(c_w_id,c_d_id) HASH,c_id' \

create_table 'history' \
'h_c_id int NOT NULL,
h_c_d_id int NOT NULL,
h_c_w_id int NOT NULL,
h_d_id int NOT NULL,
h_w_id int NOT NULL,
h_date timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
h_amount decimal(6,2) NOT NULL,
h_data varchar(24) NOT NULL' \
'h_w_id' \
'h_c_id, h_c_d_id, h_c_w_id, h_d_id, h_w_id, h_date, h_amount, h_data'

create_table 'oorder' \
'o_w_id int NOT NULL,
o_d_id int NOT NULL,
o_id int NOT NULL,
o_c_id int NOT NULL,
o_carrier_id int DEFAULT NULL,
o_ol_cnt decimal(2,0) NOT NULL,
o_all_local decimal(1,0) NOT NULL,
o_entry_d timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP' \
'o_w_id' \
'o_w_id, o_d_id, o_id, o_c_id, o_carrier_id, o_ol_cnt, o_all_local, o_entry_d' \
'(o_w_id,o_d_id) HASH,o_id'

create_table 'stock' \
's_w_id int NOT NULL,
s_i_id int NOT NULL,
s_quantity decimal(4,0) NOT NULL,
s_ytd decimal(8,2) NOT NULL,
s_order_cnt int NOT NULL,
s_remote_cnt int NOT NULL,
s_data varchar(50) NOT NULL,
s_dist_01 char(24) NOT NULL,
s_dist_02 char(24) NOT NULL,
s_dist_03 char(24) NOT NULL,
s_dist_04 char(24) NOT NULL,
s_dist_05 char(24) NOT NULL,
s_dist_06 char(24) NOT NULL,
s_dist_07 char(24) NOT NULL,
s_dist_08 char(24) NOT NULL,
s_dist_09 char(24) NOT NULL,
s_dist_10 char(24) NOT NULL'\
's_w_id' \
's_w_id, s_i_id, s_quantity, s_ytd, s_order_cnt, s_remote_cnt, s_data, s_dist_01, s_dist_02, s_dist_03, s_dist_04, s_dist_05, s_dist_06, s_dist_07, s_dist_08, s_dist_09, s_dist_10' \
'(s_w_id,s_i_id)HASH' \

create_table 'new_order' \
'no_w_id int NOT NULL,
no_d_id int NOT NULL,
no_o_id int NOT NULL' \
'no_w_id' \
'no_w_id, no_d_id, no_o_id' \
'(no_w_id,no_d_id) HASH,no_o_id'

create_table 'order_line' \
'ol_w_id int NOT NULL,
ol_d_id int NOT NULL,
ol_o_id int NOT NULL,
ol_number int NOT NULL,
ol_i_id int NOT NULL,
ol_delivery_d timestamp NULL DEFAULT NULL,
ol_amount decimal(6,2) NOT NULL,
ol_supply_w_id int NOT NULL,
ol_quantity decimal(2,0) NOT NULL,
ol_dist_info char(24) NOT NULL' \
'ol_w_id' \
'ol_w_id, ol_d_id, ol_o_id, ol_number, ol_i_id, ol_delivery_d, ol_amount, ol_supply_w_id, ol_quantity, ol_dist_info' \
'(ol_w_id,ol_d_id) HASH,ol_o_id,ol_number'

create_indexes

1 change: 1 addition & 0 deletions parallel_execute_scripts/kill_java.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sudo ssh $SSH_ARGS -ostricthostkeychecking=no $SSH_USER@$1 'pkill java'
64 changes: 64 additions & 0 deletions parallel_execute_scripts/limits.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# /etc/security/limits.conf
#
#This file sets the resource limits for the users logged in via PAM.
#It does not affect resource limits of the system services.
#
#Also note that configuration files in /etc/security/limits.d directory,
#which are read in alphabetical order, override the settings in this
#file in case the domain is the same or more specific.
#That means for example that setting a limit for wildcard domain here
#can be overriden with a wildcard setting in a config file in the
#subdirectory, but a user specific setting here can be overriden only
#with a user specific setting in the subdirectory.
#
#Each line describes a limit for a user in the form:
#
#<domain> <type> <item> <value>
#
#Where:
#<domain> can be:
# - a user name
# - a group name, with @group syntax
# - the wildcard *, for default entry
# - the wildcard %, can be also used with %group syntax,
# for maxlogin limit
#
#<type> can have the two values:
# - "soft" for enforcing the soft limits
# - "hard" for enforcing hard limits
#
#<item> can be one of the following:
# - core - limits the core file size (KB)
# - data - max data size (KB)
# - fsize - maximum filesize (KB)
# - memlock - max locked-in-memory address space (KB)
# - nofile - max number of open file descriptors
# - rss - max resident set size (KB)
# - stack - max stack size (KB)
# - cpu - max CPU time (MIN)
# - nproc - max number of processes
# - as - address space limit (KB)
# - maxlogins - max number of logins for this user
# - maxsyslogins - max number of logins on the system
# - priority - the priority to run user process with
# - locks - max number of file locks the user can hold
# - sigpending - max number of pending signals
# - msgqueue - max memory used by POSIX message queues (bytes)
# - nice - max nice priority allowed to raise to values: [-20, 19]
# - rtprio - max realtime priority
#
#<domain> <type> <item> <value>
#

#* soft core 0
#* hard rss 10000
#@student hard nproc 20
#@faculty soft nproc 20
#@faculty hard nproc 50
#ftp hard nproc 0
#@student - maxlogins 4

# End of file
* - core unlimited
* - nofile 1048576
* - nproc 30000
58 changes: 58 additions & 0 deletions parallel_execute_scripts/output_ips.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <numeric>

using namespace std;

void GetIps(const char *ip_string, vector<string>& ips) {
string input(ip_string);
int start = -1;
int count = 0; // count of '.'
for (int i = 0; i < input.size(); ++i) {
if (input.at(i) == ',' || input.at(i) == ':') {
if (count == 3) {
ips.emplace_back(input.substr(start, i - start));
}
count = 0;
start = -1;
continue;
}
if (input.at(i) == '.') {
++count;
}
if (start == -1) {
start = i;
}
}
if (start != -1 && count == 3) {
ips.emplace_back(input.substr(start, input.size() - start));
}

for (const auto& ip: ips) {
cout << ip << " ";
}
cout << endl;
}

void OutputToFile(string filename, vector<string> ips, bool should_number) {
ofstream out_file(filename.data());
int i = 0;
for (const auto& ip: ips) {
out_file << (should_number ? (to_string(i++) + ":") : "") << ip << "\n";
}
}

int main(int argc, char **argv) {
if (argc < 3) {
cout << "Usage : ./binary client_ips yb_nodes_ips";
return 1;
}
vector<string> client_ips;
vector<string> yb_node_ips;
GetIps(argv[1], client_ips);
GetIps(argv[2], yb_node_ips);

OutputToFile("clients.txt", client_ips, true);
OutputToFile("yb_nodes.txt", yb_node_ips, false);
}
Loading

0 comments on commit 79e7e47

Please sign in to comment.