Skip to content

Commit

Permalink
Add the scripts to execute TPCC parallely across multiple clients.
Browse files Browse the repository at this point in the history
Reviewers:
Mihnea
  • Loading branch information
Sudheer Ponnemkunnath committed Dec 11, 2020
1 parent eddd12e commit d1e5395
Show file tree
Hide file tree
Showing 8 changed files with 692 additions and 0 deletions.
46 changes: 46 additions & 0 deletions parallel_execute_scripts/Readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
## Step 1. Create the Loader and execute scripts.
First create 2 text files for clients and yugabyte nodes. There is a helper program `output_ips.cpp` that can generate these 2 files for us.
Compile and run the `output_scripts.cpp` to generate `loader$i.sh` and `execute$i.sh`. This program uses the `clients.txt` and `yb_nodes.txt` generated by the first program.

## Step 2. Upload TPCC binaries and scripts to the client nodes.
This can be done as follows. Make sure that the environment has the ssh user
exported to the variable `SSH_USER` and the additional SSH AND SCP arguments
like the pem file or the port exported as `SSH_ARGS` and `SCP_ARGS`.
```sh
for n in $(cat clients.txt);
do
./upload_scripts.sh $(echo $n | cut -d ":" -f 2) $(echo $n | cut -d ":" -f 1);
done
```

This step expects the tpcc.tar.gz file to be present in the same directory.

## Step 3. Create the TPCC tables.
This can be done from one of the clients as:
```sh
./tpccbenchmark --nodes=<ip> --create=true
```

## Step 4. Load the data.
This can be done by:
```sh
for n in $(cat clients.txt);
do
./run_loader.sh $(echo $n | cut -d ":" -f 2) $(echo $n | cut -d ":" -f 1);
done
```

## Step 5. Enable the foreign keys.
This can be done as:
```sh
./tpccbenchmark --nodes=<ip> --enable-foreign-keys=true --create-sql-procedures=true
```

## Step 6. Execute the program.
This can be done as:
```sh
for n in $(cat clients.txt);
do
./run_execute.sh $(echo $n | cut -d ":" -f 2) $(echo $n | cut -d ":" -f 1);
done
```
256 changes: 256 additions & 0 deletions parallel_execute_scripts/create_tables.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,256 @@
# This script creates the required tables on a cluster.
# Arguments:
# 1. Number warehouses
# 2. IP of the master leader
# 3. Number of splits. This assumes we have split the cluster into (N + 1)
# logical regions with the first one dedicated for the yb-master. The 'item'
# table is pinned here as well.
# 4. Number of tablets per sub-table.
#
# The first sub-table is pinned to $cloud.$region.$zone2, the second to
# $cloud.$region.$zone3 as the first zone is reserved for the yb-master.


warehouses=${warehouses:-15000}
splits=${splits:-20}
tablets=${tablets:-24}
cloud=aws
region=us-west-2
zone=us-west-2c

wh_per_split=$(expr $warehouses / $splits)

while [ $# -gt 0 ]; do
if [[ $1 == *"--"* ]]; then
param="${1/--/}"
declare $param="$2"
fi
shift
done

master_addrs=""
for n in $(cat yb_nodes.txt | head -n3);
do
if [ ! -z "$n" ]
then
master_addrs+=","
fi
master_addrs+="$n:7100"
done
ip=$(head -1 yb_nodes.txt)

ysqlsh="/mnt/d0/repositories/yugabyte-db/bin/ysqlsh -h $ip"
ybadmin="/mnt/d0/repositories/yugabyte-db/build/debug-gcc-dynamic/bin/yb-admin --master_addresses $master_addrs"

# $1: table_name
# $2: column list
# $3: primary key
create_simple_table() {
tablezone="${zone}0"
$ybadmin modify_placement_info $cloud.$region.$tablezone 3
$ysqlsh -d yugabyte -c "DROP TABLE IF EXISTS $1"
$ysqlsh -d yugabyte -c "CREATE TABLE $1 ($2, PRIMARY KEY($3)) SPLIT INTO 3 TABLETS"
$ybadmin modify_table_placement_info ysql.yugabyte $1 $cloud.$region.$tablezone 3
return
}

# $1: table_name
# $2: column list
# $3: partition argument
# $4: column list without type
# $5: PRIMARY key list
create_partitioned_table() {

# create parent table.
$ysqlsh -d yugabyte -c "DROP TABLE IF EXISTS $1"
$ysqlsh -d yugabyte -c "CREATE TABLE $1 ($2) PARTITION BY RANGE($3) SPLIT INTO 1 TABLETS"

# Only history table does not have a pkey.
pkey="";
if [[ $# == '5' ]]
then
pkey=", PRIMARY KEY($5)";
fi

# create partitions
for i in `seq 1 $splits`;
do
tablezone=$zone$(( i ))
$ybadmin modify_placement_info $cloud.$region.$tablezone 3

start=$(( (i-1)*wh_per_split+1 ))
end=$(( (i*wh_per_split)+1 ))
$ysqlsh -d yugabyte -c "CREATE TABLE $1$i PARTITION OF $1($4${pkey}) FOR VALUES FROM ($start) TO ($end) SPLIT INTO $tablets TABLETS";

$ybadmin modify_table_placement_info ysql.yugabyte $1$i $cloud.$region.$tablezone 3
done
}


# $1 index name.
# $2 table name.
# $3 indexed colummns.
# $4 is_unique
create_index() {
for i in `seq 1 $splits`;
do
tablezone=$zone$(( i ))
$ybadmin modify_placement_info $cloud.$region.$tablezone 3

if [[ $4 == 0 ]]
then
$ysqlsh -d yugabyte -c "CREATE INDEX $1$i ON $2$i ($3)"
else
$ysqlsh -d yugabyte -c "CREATE UNIQUE INDEX $1$i ON $2$i ($3)"
fi

$ybadmin modify_table_placement_info ysql.yugabyte $1$i $cloud.$region.$tablezone 3
done
}

set -x

$ybadmin set_load_balancer_enabled 0

create_simple_table 'item' \
'i_id int NOT NULL,
i_name varchar(24) NOT NULL,
i_price decimal(5,2) NOT NULL,
i_data varchar(50) NOT NULL,
i_im_id int NOT NULL' \
'i_id'


create_partitioned_table 'warehouse' \
'w_id int NOT NULL,
w_ytd decimal(12,2) NOT NULL,
w_tax decimal(4,4) NOT NULL,
w_name varchar(10) NOT NULL,
w_street_1 varchar(20) NOT NULL,
w_street_2 varchar(20) NOT NULL,
w_city varchar(20) NOT NULL,
w_state char(2) NOT NULL,
w_zip char(9) NOT NULL' \
'w_id'\
'w_id, w_ytd, w_tax, w_name, w_street_1, w_street_2, w_city, w_state, w_zip' \
'w_id'

create_partitioned_table 'district' \
'd_w_id int NOT NULL,
d_id int NOT NULL,
d_ytd decimal(12,2) NOT NULL,
d_tax decimal(4,4) NOT NULL,
d_next_o_id int NOT NULL,
d_name varchar(10) NOT NULL,
d_street_1 varchar(20) NOT NULL,
d_street_2 varchar(20) NOT NULL,
d_city varchar(20) NOT NULL,
d_state char(2) NOT NULL,
d_zip char(9) NOT NULL' \
'd_w_id' \
'd_w_id, d_id, d_ytd, d_tax, d_next_o_id, d_name, d_street_1, d_street_2, d_city, d_state, d_zip' \
'(d_w_id,d_id) HASH'

create_partitioned_table 'customer' \
'c_w_id int NOT NULL,
c_d_id int NOT NULL,
c_id int NOT NULL,
c_discount decimal(4,4) NOT NULL,
c_credit char(2) NOT NULL,
c_last varchar(16) NOT NULL,
c_first varchar(16) NOT NULL,
c_credit_lim decimal(12,2) NOT NULL,
c_balance decimal(12,2) NOT NULL,
c_ytd_payment float NOT NULL,
c_payment_cnt int NOT NULL,
c_delivery_cnt int NOT NULL,
c_street_1 varchar(20) NOT NULL,
c_street_2 varchar(20) NOT NULL,
c_city varchar(20) NOT NULL,
c_state char(2) NOT NULL,
c_zip char(9) NOT NULL,
c_phone char(16) NOT NULL,
c_since timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
c_middle char(2) NOT NULL,
c_data varchar(500) NOT NULL' \
'c_w_id' \
'c_w_id, c_d_id, c_id, c_discount, c_credit, c_last, c_first, c_credit_lim, c_balance, c_ytd_payment, c_payment_cnt,
c_delivery_cnt, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_since, c_middle, c_data' \
'(c_w_id,c_d_id) HASH,c_id' \

create_partitioned_table 'history' \
'h_c_id int NOT NULL,
h_c_d_id int NOT NULL,
h_c_w_id int NOT NULL,
h_d_id int NOT NULL,
h_w_id int NOT NULL,
h_date timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP,
h_amount decimal(6,2) NOT NULL,
h_data varchar(24) NOT NULL' \
'h_w_id' \
'h_c_id, h_c_d_id, h_c_w_id, h_d_id, h_w_id, h_date, h_amount, h_data'

create_partitioned_table 'oorder' \
'o_w_id int NOT NULL,
o_d_id int NOT NULL,
o_id int NOT NULL,
o_c_id int NOT NULL,
o_carrier_id int DEFAULT NULL,
o_ol_cnt decimal(2,0) NOT NULL,
o_all_local decimal(1,0) NOT NULL,
o_entry_d timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP' \
'o_w_id' \
'o_w_id, o_d_id, o_id, o_c_id, o_carrier_id, o_ol_cnt, o_all_local, o_entry_d' \
'(o_w_id,o_d_id) HASH,o_id'

create_partitioned_table 'stock' \
's_w_id int NOT NULL,
s_i_id int NOT NULL,
s_quantity decimal(4,0) NOT NULL,
s_ytd decimal(8,2) NOT NULL,
s_order_cnt int NOT NULL,
s_remote_cnt int NOT NULL,
s_data varchar(50) NOT NULL,
s_dist_01 char(24) NOT NULL,
s_dist_02 char(24) NOT NULL,
s_dist_03 char(24) NOT NULL,
s_dist_04 char(24) NOT NULL,
s_dist_05 char(24) NOT NULL,
s_dist_06 char(24) NOT NULL,
s_dist_07 char(24) NOT NULL,
s_dist_08 char(24) NOT NULL,
s_dist_09 char(24) NOT NULL,
s_dist_10 char(24) NOT NULL'\
's_w_id' \
's_w_id, s_i_id, s_quantity, s_ytd, s_order_cnt, s_remote_cnt, s_data, s_dist_01, s_dist_02, s_dist_03, s_dist_04, s_dist_05, s_dist_06, s_dist_07, s_dist_08, s_dist_09, s_dist_10' \
'(s_w_id,s_i_id)HASH' \

create_partitioned_table 'new_order' \
'no_w_id int NOT NULL,
no_d_id int NOT NULL,
no_o_id int NOT NULL' \
'no_w_id' \
'no_w_id, no_d_id, no_o_id' \
'(no_w_id,no_d_id) HASH,no_o_id'

create_partitioned_table 'order_line' \
'ol_w_id int NOT NULL,
ol_d_id int NOT NULL,
ol_o_id int NOT NULL,
ol_number int NOT NULL,
ol_i_id int NOT NULL,
ol_delivery_d timestamp NULL DEFAULT NULL,
ol_amount decimal(6,2) NOT NULL,
ol_supply_w_id int NOT NULL,
ol_quantity decimal(2,0) NOT NULL,
ol_dist_info char(24) NOT NULL' \
'ol_w_id' \
'ol_w_id, ol_d_id, ol_o_id, ol_number, ol_i_id, ol_delivery_d, ol_amount, ol_supply_w_id, ol_quantity, ol_dist_info' \
'(ol_w_id,ol_d_id) HASH,ol_o_id,ol_number'

create_index 'idx_customer_name' 'customer' '(c_w_id,c_d_id) HASH,c_last,c_first' 0

create_index 'idx_order' 'oorder' '(o_w_id,o_d_id) HASH,o_c_id,o_id DESC' 1

$ybadmin clear_placement_info
$ybadmin set_load_balancer_enabled 1
1 change: 1 addition & 0 deletions parallel_execute_scripts/kill_java.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sudo ssh $SSH_ARGS -ostricthostkeychecking=no $SSH_USER@$1 'pkill java'
64 changes: 64 additions & 0 deletions parallel_execute_scripts/limits.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# /etc/security/limits.conf
#
#This file sets the resource limits for the users logged in via PAM.
#It does not affect resource limits of the system services.
#
#Also note that configuration files in /etc/security/limits.d directory,
#which are read in alphabetical order, override the settings in this
#file in case the domain is the same or more specific.
#That means for example that setting a limit for wildcard domain here
#can be overriden with a wildcard setting in a config file in the
#subdirectory, but a user specific setting here can be overriden only
#with a user specific setting in the subdirectory.
#
#Each line describes a limit for a user in the form:
#
#<domain> <type> <item> <value>
#
#Where:
#<domain> can be:
# - a user name
# - a group name, with @group syntax
# - the wildcard *, for default entry
# - the wildcard %, can be also used with %group syntax,
# for maxlogin limit
#
#<type> can have the two values:
# - "soft" for enforcing the soft limits
# - "hard" for enforcing hard limits
#
#<item> can be one of the following:
# - core - limits the core file size (KB)
# - data - max data size (KB)
# - fsize - maximum filesize (KB)
# - memlock - max locked-in-memory address space (KB)
# - nofile - max number of open file descriptors
# - rss - max resident set size (KB)
# - stack - max stack size (KB)
# - cpu - max CPU time (MIN)
# - nproc - max number of processes
# - as - address space limit (KB)
# - maxlogins - max number of logins for this user
# - maxsyslogins - max number of logins on the system
# - priority - the priority to run user process with
# - locks - max number of file locks the user can hold
# - sigpending - max number of pending signals
# - msgqueue - max memory used by POSIX message queues (bytes)
# - nice - max nice priority allowed to raise to values: [-20, 19]
# - rtprio - max realtime priority
#
#<domain> <type> <item> <value>
#

#* soft core 0
#* hard rss 10000
#@student hard nproc 20
#@faculty soft nproc 20
#@faculty hard nproc 50
#ftp hard nproc 0
#@student - maxlogins 4

# End of file
* - core unlimited
* - nofile 1048576
* - nproc 30000
Loading

0 comments on commit d1e5395

Please sign in to comment.