From 30fbf7def728e7f18916f92f0a034cc71ddddddb Mon Sep 17 00:00:00 2001 From: Bugra Eryilmaz Date: Mon, 20 May 2024 20:15:26 +0200 Subject: [PATCH 1/7] Enable row cache, change GC to G1, and add debugging parameters --- .../data-serving/client/setup_tables.txt | 2 ++ .../data-serving/server/docker-entrypoint.py | 32 +++++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/benchmarks/data-serving/client/setup_tables.txt b/benchmarks/data-serving/client/setup_tables.txt index 4a23ce053..57f092c5f 100644 --- a/benchmarks/data-serving/client/setup_tables.txt +++ b/benchmarks/data-serving/client/setup_tables.txt @@ -13,4 +13,6 @@ create table if not exists usertable ( field7 varchar, field8 varchar, field9 varchar); +ALTER TABLE ycsb.usertable WITH caching = {'keys' : 'ALL', 'rows_per_partition' : '120'}; +ALTER TABLE ycsb.usertable WITH default_time_to_live = 1024; exit; diff --git a/benchmarks/data-serving/server/docker-entrypoint.py b/benchmarks/data-serving/server/docker-entrypoint.py index 2fb420c71..63c6204bb 100755 --- a/benchmarks/data-serving/server/docker-entrypoint.py +++ b/benchmarks/data-serving/server/docker-entrypoint.py @@ -24,6 +24,7 @@ def get_ip(): parser.add_argument("--heap-size", type=int, help="The size of JVM heap in GB. Default is max(min(1/2 ram, 1GB), min(1/4 ram, 8GB)).") parser.add_argument("--seed-server-ip", help="The IP address of the seed server. This option is only for multiple-node deployment.") parser.add_argument("--affinity", help="The CPU ids (separated by comma) given to Cassandra to set JVM affinity. By default, Cassandra would use all CPU cores.") +parser.add_argument("--row-cache", help="The size of row cache, example 16GiB. By default, the row cache is disabled.", default="0") args = parser.parse_args() @@ -56,6 +57,8 @@ def get_ip(): config["concurrent_reads"] = args.reader_count config["concurrent_counter_writes"] = args.reader_count config["concurrent_writes"] = args.writer_count +config["row_cache_size"] = args.row_cache +config["row_cache_save_period"] = "1h" if args.seed_server_ip: config["seed_provider"][0]["parameters"][0]["seeds"] = f"{args.seed_server_ip}:7000" @@ -78,11 +81,40 @@ def get_ip(): jvm_options[idx] = "" if l.startswith("-Xmx"): jvm_options[idx] = "" + # Disable CMS Garbage Collection + if l.startswith("-XX:+UseConcMarkSweepGC"): + jvm_options[idx] = "" + if l.startswith("-XX:+CMSParallelRemarkEnabled"): + jvm_options[idx] = "" + if l.startswith("-XX:SurvivorRatio"): + jvm_options[idx] = "" + if l.startswith("-XX:MaxTenuringThreshold"): + jvm_options[idx] = "" + if l.startswith("-XX:CMSInitiatingOccupancyFraction"): + jvm_options[idx] = "" + if l.startswith("-XX:+UseCMSInitiatingOccupancyOnly"): + jvm_options[idx] = "" + if l.startswith("-XX:CMSWaitDuration"): + jvm_options[idx] = "" + if l.startswith("-XX:+CMSParallelInitialMarkEnabled"): + jvm_options[idx] = "" + if l.startswith("-XX:+CMSEdenChunksRecordAlways"): + jvm_options[idx] = "" + if l.startswith("-XX:+CMSClassUnloadingEnabled"): + jvm_options[idx] = "" + # Add heap size jvm_options.append(f"-Xms{args.heap_size}G\n") jvm_options.append(f"-Xmx{args.heap_size}G\n") + # Add G1 Garbage Collection + jvm_options.append("-XX:+UseG1GC\n") + jvm_options.append("-XX:+ParallelRefProcEnabled\n") + + # Add PreserveFramePointer for flamegraph + jvm_options.append("-XX:+PreserveFramePointer\n") + if args.affinity: found = False for idx, l in enumerate(jvm_options): From 0481a12f71984ec022cc28243bf3ceb8c2c54a51 Mon Sep 17 00:00:00 2001 From: Bugra Eryilmaz Date: Mon, 20 May 2024 20:32:15 +0200 Subject: [PATCH 2/7] fix java options --- .../data-serving/server/docker-entrypoint.py | 68 +++++++++++-------- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/benchmarks/data-serving/server/docker-entrypoint.py b/benchmarks/data-serving/server/docker-entrypoint.py index 63c6204bb..b6f535965 100755 --- a/benchmarks/data-serving/server/docker-entrypoint.py +++ b/benchmarks/data-serving/server/docker-entrypoint.py @@ -43,6 +43,9 @@ def get_ip(): if not path.exists(f"{CASSANDRA_CONFIG}/jvm-server.options.bak"): shutil.copy(f"{CASSANDRA_CONFIG}/jvm-server.options", f"{CASSANDRA_CONFIG}/jvm-server.options.bak") +if not path.exists(f"{CASSANDRA_CONFIG}/jvm11-server.options.bak"): + shutil.copy(f"{CASSANDRA_CONFIG}/jvm11-server.options", f"{CASSANDRA_CONFIG}/jvm11-server.options.bak") + # Now, modify the cassandra.yaml with open(f"{CASSANDRA_CONFIG}/cassandra.yaml") as f: @@ -81,40 +84,11 @@ def get_ip(): jvm_options[idx] = "" if l.startswith("-Xmx"): jvm_options[idx] = "" - # Disable CMS Garbage Collection - if l.startswith("-XX:+UseConcMarkSweepGC"): - jvm_options[idx] = "" - if l.startswith("-XX:+CMSParallelRemarkEnabled"): - jvm_options[idx] = "" - if l.startswith("-XX:SurvivorRatio"): - jvm_options[idx] = "" - if l.startswith("-XX:MaxTenuringThreshold"): - jvm_options[idx] = "" - if l.startswith("-XX:CMSInitiatingOccupancyFraction"): - jvm_options[idx] = "" - if l.startswith("-XX:+UseCMSInitiatingOccupancyOnly"): - jvm_options[idx] = "" - if l.startswith("-XX:CMSWaitDuration"): - jvm_options[idx] = "" - if l.startswith("-XX:+CMSParallelInitialMarkEnabled"): - jvm_options[idx] = "" - if l.startswith("-XX:+CMSEdenChunksRecordAlways"): - jvm_options[idx] = "" - if l.startswith("-XX:+CMSClassUnloadingEnabled"): - jvm_options[idx] = "" - # Add heap size jvm_options.append(f"-Xms{args.heap_size}G\n") jvm_options.append(f"-Xmx{args.heap_size}G\n") - # Add G1 Garbage Collection - jvm_options.append("-XX:+UseG1GC\n") - jvm_options.append("-XX:+ParallelRefProcEnabled\n") - - # Add PreserveFramePointer for flamegraph - jvm_options.append("-XX:+PreserveFramePointer\n") - if args.affinity: found = False for idx, l in enumerate(jvm_options): @@ -128,5 +102,41 @@ def get_ip(): with open(f"{CASSANDRA_CONFIG}/jvm-server.options", "w") as f: f.writelines(jvm_options) +# Then, process the jvm11.options +with open(f"{CASSANDRA_CONFIG}/jvm11-server.options") as f: + jvm11_options = f.readlines() + +for idx, l in enumerate(jvm11_options): + # Disable CMS Garbage Collection + if l.startswith("-XX:+UseConcMarkSweepGC"): + jvm11_options[idx] = "" + if l.startswith("-XX:+CMSParallelRemarkEnabled"): + jvm11_options[idx] = "" + if l.startswith("-XX:SurvivorRatio"): + jvm11_options[idx] = "" + if l.startswith("-XX:MaxTenuringThreshold"): + jvm11_options[idx] = "" + if l.startswith("-XX:CMSInitiatingOccupancyFraction"): + jvm11_options[idx] = "" + if l.startswith("-XX:+UseCMSInitiatingOccupancyOnly"): + jvm11_options[idx] = "" + if l.startswith("-XX:CMSWaitDuration"): + jvm11_options[idx] = "" + if l.startswith("-XX:+CMSParallelInitialMarkEnabled"): + jvm11_options[idx] = "" + if l.startswith("-XX:+CMSEdenChunksRecordAlways"): + jvm11_options[idx] = "" + if l.startswith("-XX:+CMSClassUnloadingEnabled"): + jvm11_options[idx] = "" + # Add G1 Garbage Collection + jvm11_options.append("-XX:+UseG1GC\n") + jvm11_options.append("-XX:+ParallelRefProcEnabled\n") + # Add PreserveFramePointer for flamegraph + jvm11_options.append("-XX:+PreserveFramePointer\n") + +# Write it back +with open(f"{CASSANDRA_CONFIG}/jvm11-server.options", "w") as f: + f.writelines(jvm11_options) + os.execvp("cassandra", ["cassandra", "-R", "-f"]) From 1ba83e715b9ca80dbbb4afd1c54d24ed7cf1ebc8 Mon Sep 17 00:00:00 2001 From: Bugra Eryilmaz Date: Mon, 20 May 2024 21:26:55 +0200 Subject: [PATCH 3/7] jvm11 options fix --- benchmarks/data-serving/server/Dockerfile | 1 + .../data-serving/server/docker-entrypoint.py | 40 ------- .../data-serving/server/jvm11-server.options | 108 ++++++++++++++++++ 3 files changed, 109 insertions(+), 40 deletions(-) create mode 100644 benchmarks/data-serving/server/jvm11-server.options diff --git a/benchmarks/data-serving/server/Dockerfile b/benchmarks/data-serving/server/Dockerfile index 2b8af1aad..d4b47a3d8 100644 --- a/benchmarks/data-serving/server/Dockerfile +++ b/benchmarks/data-serving/server/Dockerfile @@ -3,6 +3,7 @@ FROM cloudsuite/cassandra:4.1.0 RUN apt update && apt install -y --no-install-recommends python3-yaml && rm -rf /var/lib/apt/lists/* COPY docker-entrypoint.py / +COPY jvm11-server.options /etc/cassandra/jvm11-server.options ENTRYPOINT ["/docker-entrypoint.py"] diff --git a/benchmarks/data-serving/server/docker-entrypoint.py b/benchmarks/data-serving/server/docker-entrypoint.py index b6f535965..1d60f7252 100755 --- a/benchmarks/data-serving/server/docker-entrypoint.py +++ b/benchmarks/data-serving/server/docker-entrypoint.py @@ -43,10 +43,6 @@ def get_ip(): if not path.exists(f"{CASSANDRA_CONFIG}/jvm-server.options.bak"): shutil.copy(f"{CASSANDRA_CONFIG}/jvm-server.options", f"{CASSANDRA_CONFIG}/jvm-server.options.bak") -if not path.exists(f"{CASSANDRA_CONFIG}/jvm11-server.options.bak"): - shutil.copy(f"{CASSANDRA_CONFIG}/jvm11-server.options", f"{CASSANDRA_CONFIG}/jvm11-server.options.bak") - - # Now, modify the cassandra.yaml with open(f"{CASSANDRA_CONFIG}/cassandra.yaml") as f: config = yaml.safe_load(f) @@ -102,41 +98,5 @@ def get_ip(): with open(f"{CASSANDRA_CONFIG}/jvm-server.options", "w") as f: f.writelines(jvm_options) -# Then, process the jvm11.options -with open(f"{CASSANDRA_CONFIG}/jvm11-server.options") as f: - jvm11_options = f.readlines() - -for idx, l in enumerate(jvm11_options): - # Disable CMS Garbage Collection - if l.startswith("-XX:+UseConcMarkSweepGC"): - jvm11_options[idx] = "" - if l.startswith("-XX:+CMSParallelRemarkEnabled"): - jvm11_options[idx] = "" - if l.startswith("-XX:SurvivorRatio"): - jvm11_options[idx] = "" - if l.startswith("-XX:MaxTenuringThreshold"): - jvm11_options[idx] = "" - if l.startswith("-XX:CMSInitiatingOccupancyFraction"): - jvm11_options[idx] = "" - if l.startswith("-XX:+UseCMSInitiatingOccupancyOnly"): - jvm11_options[idx] = "" - if l.startswith("-XX:CMSWaitDuration"): - jvm11_options[idx] = "" - if l.startswith("-XX:+CMSParallelInitialMarkEnabled"): - jvm11_options[idx] = "" - if l.startswith("-XX:+CMSEdenChunksRecordAlways"): - jvm11_options[idx] = "" - if l.startswith("-XX:+CMSClassUnloadingEnabled"): - jvm11_options[idx] = "" - # Add G1 Garbage Collection - jvm11_options.append("-XX:+UseG1GC\n") - jvm11_options.append("-XX:+ParallelRefProcEnabled\n") - # Add PreserveFramePointer for flamegraph - jvm11_options.append("-XX:+PreserveFramePointer\n") - -# Write it back -with open(f"{CASSANDRA_CONFIG}/jvm11-server.options", "w") as f: - f.writelines(jvm11_options) - os.execvp("cassandra", ["cassandra", "-R", "-f"]) diff --git a/benchmarks/data-serving/server/jvm11-server.options b/benchmarks/data-serving/server/jvm11-server.options new file mode 100644 index 000000000..31ecc14bb --- /dev/null +++ b/benchmarks/data-serving/server/jvm11-server.options @@ -0,0 +1,108 @@ +########################################################################### +# jvm11-server.options # +# # +# See jvm-server.options. This file is specific for Java 11 and newer. # +########################################################################### + +################# +# GC SETTINGS # +################# + + + +### CMS Settings +#-XX:+UseConcMarkSweepGC +#-XX:+CMSParallelRemarkEnabled +#-XX:SurvivorRatio=8 +#-XX:MaxTenuringThreshold=1 +#-XX:CMSInitiatingOccupancyFraction=75 +#-XX:+UseCMSInitiatingOccupancyOnly +#-XX:CMSWaitDuration=10000 +#-XX:+CMSParallelInitialMarkEnabled +#-XX:+CMSEdenChunksRecordAlways +## some JVMs will fill up their heap when accessed via JMX, see CASSANDRA-6541 +#-XX:+CMSClassUnloadingEnabled + + + +### G1 Settings +## Use the Hotspot garbage-first collector. +-XX:+UseG1GC +-XX:+ParallelRefProcEnabled + +# +## Have the JVM do less remembered set work during STW, instead +## preferring concurrent GC. Reduces p99.9 latency. +#-XX:G1RSetUpdatingPauseTimePercent=5 +# +## Main G1GC tunable: lowering the pause target will lower throughput and vise versa. +## 200ms is the JVM default and lowest viable setting +## 1000ms increases throughput. Keep it smaller than the timeouts in cassandra.yaml. +#-XX:MaxGCPauseMillis=500 + +## Optional G1 Settings +# Save CPU time on large (>= 16GB) heaps by delaying region scanning +# until the heap is 70% full. The default in Hotspot 8u40 is 40%. +#-XX:InitiatingHeapOccupancyPercent=70 + +# For systems with > 8 cores, the default ParallelGCThreads is 5/8 the number of logical cores. +# Otherwise equal to the number of cores when 8 or less. +# Machines with > 10 cores should try setting these to <= full cores. +#-XX:ParallelGCThreads=16 +# By default, ConcGCThreads is 1/4 of ParallelGCThreads. +# Setting both to the same value can reduce STW durations. +#-XX:ConcGCThreads=16 + + +### JPMS + +-Djdk.attach.allowAttachSelf=true +--add-exports java.base/jdk.internal.misc=ALL-UNNAMED +--add-exports java.base/jdk.internal.ref=ALL-UNNAMED +--add-exports java.base/sun.nio.ch=ALL-UNNAMED +--add-exports java.management.rmi/com.sun.jmx.remote.internal.rmi=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.registry=ALL-UNNAMED +--add-exports java.rmi/sun.rmi.server=ALL-UNNAMED +--add-exports java.sql/java.sql=ALL-UNNAMED + +--add-opens java.base/java.lang.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.loader=ALL-UNNAMED +--add-opens java.base/jdk.internal.ref=ALL-UNNAMED +--add-opens java.base/jdk.internal.reflect=ALL-UNNAMED +--add-opens java.base/jdk.internal.math=ALL-UNNAMED +--add-opens java.base/jdk.internal.module=ALL-UNNAMED +--add-opens java.base/jdk.internal.util.jar=ALL-UNNAMED +--add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED + + +### GC logging options -- uncomment to enable + +# Java 11 (and newer) GC logging options: +# See description of https://bugs.openjdk.java.net/browse/JDK-8046148 for details about the syntax +# The following is the equivalent to -XX:+PrintGCDetails -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=10M +#-Xlog:gc=info,heap*=trace,age*=debug,safepoint=info,promotion*=trace:file=/var/log/cassandra/gc.log:time,uptime,pid,tid,level:filecount=10,filesize=10485760 + +# Notes for Java 8 migration: +# +# -XX:+PrintGCDetails maps to -Xlog:gc*:... - i.e. add a '*' after "gc" +# -XX:+PrintGCDateStamps maps to decorator 'time' +# +# -XX:+PrintHeapAtGC maps to 'heap' with level 'trace' +# -XX:+PrintTenuringDistribution maps to 'age' with level 'debug' +# -XX:+PrintGCApplicationStoppedTime maps to 'safepoint' with level 'info' +# -XX:+PrintPromotionFailure maps to 'promotion' with level 'trace' +# -XX:PrintFLSStatistics=1 maps to 'freelist' with level 'trace' + +### Netty Options + +# On Java >= 9 Netty requires the io.netty.tryReflectionSetAccessible system property to be set to true to enable +# creation of direct buffers using Unsafe. Without it, this falls back to ByteBuffer.allocateDirect which has +# inferior performance and risks exceeding MaxDirectMemory +-Dio.netty.tryReflectionSetAccessible=true + +### Preserve Frame pointer for flamegraph +-XX:+PreserveFramePointer + +# The newline in the end of file is intentional + + From 2cd6acf3b884f258971a4a5423503617d16ae278 Mon Sep 17 00:00:00 2001 From: Bugra Eryilmaz Date: Mon, 20 May 2024 21:29:12 +0200 Subject: [PATCH 4/7] added doc --- docs/benchmarks/data-serving.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/benchmarks/data-serving.md b/docs/benchmarks/data-serving.md index 48151820c..a3e951ae4 100644 --- a/docs/benchmarks/data-serving.md +++ b/docs/benchmarks/data-serving.md @@ -23,6 +23,8 @@ The following options can modify the settings of the server: - `--writer-count=`: The number of writer threads Cassandra uses. Cassandra recommends 8 threads per CPU core. The default value is 32. - `--heap-size=`: JVM heap size. Its unit is GB, and by default, JVM uses `max(min(1/2 ram, 1GB), min(1/4 ram, 8GB))`. It is good to increase the value when the server has enough DRAM for better performance or lower the value for explicit resource restriction. - `--affinity=`: The CPUs Cassandra works on. This setting let Cassandra be aware of its CPU affinity explicitly. It should be used together with the container's resource management option (e.g., `--cpuset-cpus`). +- `--row-cache=`: The size of the row cache, example 16GiB. By default, the row cache is disabled. + ### Multiple Server Containers From 8894bb95bfdbb0cddd64cf013abbf33b4762a19c Mon Sep 17 00:00:00 2001 From: Bugra Eryilmaz Date: Wed, 22 May 2024 11:58:29 +0200 Subject: [PATCH 5/7] added more explanations --- .wordlist.txt | 2 ++ benchmarks/data-serving/client/setup_tables.txt | 2 +- benchmarks/data-serving/server/docker-entrypoint.py | 2 +- docs/benchmarks/data-serving.md | 7 +++++-- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/.wordlist.txt b/.wordlist.txt index 1cf1dae01..a4a9296ae 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -148,3 +148,5 @@ pre latencies TLS Elgg's +GiB +MiB \ No newline at end of file diff --git a/benchmarks/data-serving/client/setup_tables.txt b/benchmarks/data-serving/client/setup_tables.txt index 57f092c5f..296dba4f0 100644 --- a/benchmarks/data-serving/client/setup_tables.txt +++ b/benchmarks/data-serving/client/setup_tables.txt @@ -13,6 +13,6 @@ create table if not exists usertable ( field7 varchar, field8 varchar, field9 varchar); -ALTER TABLE ycsb.usertable WITH caching = {'keys' : 'ALL', 'rows_per_partition' : '120'}; +ALTER TABLE ycsb.usertable WITH caching = {'keys' : 'ALL', 'rows_per_partition' : 'ALL'}; ALTER TABLE ycsb.usertable WITH default_time_to_live = 1024; exit; diff --git a/benchmarks/data-serving/server/docker-entrypoint.py b/benchmarks/data-serving/server/docker-entrypoint.py index 1d60f7252..363772782 100755 --- a/benchmarks/data-serving/server/docker-entrypoint.py +++ b/benchmarks/data-serving/server/docker-entrypoint.py @@ -24,7 +24,7 @@ def get_ip(): parser.add_argument("--heap-size", type=int, help="The size of JVM heap in GB. Default is max(min(1/2 ram, 1GB), min(1/4 ram, 8GB)).") parser.add_argument("--seed-server-ip", help="The IP address of the seed server. This option is only for multiple-node deployment.") parser.add_argument("--affinity", help="The CPU ids (separated by comma) given to Cassandra to set JVM affinity. By default, Cassandra would use all CPU cores.") -parser.add_argument("--row-cache", help="The size of row cache, example 16GiB. By default, the row cache is disabled.", default="0") +parser.add_argument("--row-cache", help="The size of the row cache. Also specify the unit, example 16GiB or 256MiB. By default, the row cache is disabled.", default="0") args = parser.parse_args() diff --git a/docs/benchmarks/data-serving.md b/docs/benchmarks/data-serving.md index a3e951ae4..da626522b 100644 --- a/docs/benchmarks/data-serving.md +++ b/docs/benchmarks/data-serving.md @@ -23,7 +23,7 @@ The following options can modify the settings of the server: - `--writer-count=`: The number of writer threads Cassandra uses. Cassandra recommends 8 threads per CPU core. The default value is 32. - `--heap-size=`: JVM heap size. Its unit is GB, and by default, JVM uses `max(min(1/2 ram, 1GB), min(1/4 ram, 8GB))`. It is good to increase the value when the server has enough DRAM for better performance or lower the value for explicit resource restriction. - `--affinity=`: The CPUs Cassandra works on. This setting let Cassandra be aware of its CPU affinity explicitly. It should be used together with the container's resource management option (e.g., `--cpuset-cpus`). -- `--row-cache=`: The size of the row cache, example 16GiB. By default, the row cache is disabled. +- `--row-cache=`: The size of the row cache. Also specify the unit, for example `16GiB` or `256MiB`. By default, the row cache is disabled. ### Multiple Server Containers @@ -70,6 +70,8 @@ You can give your expected load, and YCSB will try to meet the requirement. The More detailed instructions on generating the dataset and load can be found in Step 5 at [this](http://github.com/brianfrankcooper/YCSB/wiki/Running-a-Workload) link. Although Step 5 in the link describes the data loading procedure, other steps (e.g., 1, 2, 3, 4) are useful for understanding the YCSB settings. In this case, our scripts (`warmup.sh` and `load.sh`) are good templates for further customization. +There are a couple of pre-defined workloads from YCSB. For example, Workload C has 100% read operations with no write operations. The default workload is Workload A (50% read + 50% write). You can change the workload by modifying the `load.sh` script. The other workloads can be found at [this](https://github.com/brianfrankcooper/YCSB/wiki/Core-Workloads) link. + A rule of thumb on the dataset size ----------------------------------- If you are only profiling CPU microarchitectures, you should ensure that the hot data part (3% ~ 5% of the dataset) cannot be buffered on-chip to mimic a realistic situation. Usually, a 10GB dataset is enough for a typical CPU with less than 50MB LLC. @@ -80,7 +82,8 @@ Tuning the server performance 2. The server settings are under the $CASSANDRA_PATH/conf folder. The main file is cassandra.yaml. The file has comments about all parameters. These parameters can also be found here: http://wiki.apache.org/cassandra/StorageConfiguration 3. Make sure that half of the main memory is free for the operating system file buffers and caching. 4. As a workload based on JVM, you need to load the server to warm up the JIT cache. You can keep monitoring the throughput and tail latency and take measurement when it becomes relatively stable. As a reference, it takes around 2 minutes for a modern x86 machine (Skylake) to attain stable throughput (5000 RPS, 50% read and 50% update). -5. The following links are useful pointers for performance tuning: +5. The server has row cache disabled by default. It is used to cache the data rows in memory. It is useful for read-intensive workloads (e.g., Workload B and Workload C) and can improve throughput. However, it is not recommended for write-intensive workloads as it trashes the cache frequently. Tune it according to your workload. +6. The following links are useful pointers for performance tuning: a. http://spyced.blogspot.com/2010/01/linux-performance-basics.html From a3a7d8619062646b4758c3c1ed4e7d374c31516f Mon Sep 17 00:00:00 2001 From: Bugra Eryilmaz <70061581+BugraEryilmaz@users.noreply.github.com> Date: Thu, 23 May 2024 00:15:19 +0200 Subject: [PATCH 6/7] Update setup_tables.txt --- benchmarks/data-serving/client/setup_tables.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/data-serving/client/setup_tables.txt b/benchmarks/data-serving/client/setup_tables.txt index 296dba4f0..596a97f81 100644 --- a/benchmarks/data-serving/client/setup_tables.txt +++ b/benchmarks/data-serving/client/setup_tables.txt @@ -14,5 +14,5 @@ create table if not exists usertable ( field8 varchar, field9 varchar); ALTER TABLE ycsb.usertable WITH caching = {'keys' : 'ALL', 'rows_per_partition' : 'ALL'}; -ALTER TABLE ycsb.usertable WITH default_time_to_live = 1024; +ALTER TABLE ycsb.usertable WITH default_time_to_live = 1024000; exit; From 431081f1e29c9aff865bacd58cd6470d1651f741 Mon Sep 17 00:00:00 2001 From: Bugra Eryilmaz <70061581+BugraEryilmaz@users.noreply.github.com> Date: Thu, 23 May 2024 00:16:09 +0200 Subject: [PATCH 7/7] Update docker-entrypoint.py --- benchmarks/data-serving/server/docker-entrypoint.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmarks/data-serving/server/docker-entrypoint.py b/benchmarks/data-serving/server/docker-entrypoint.py index 363772782..f48c03f43 100755 --- a/benchmarks/data-serving/server/docker-entrypoint.py +++ b/benchmarks/data-serving/server/docker-entrypoint.py @@ -24,7 +24,7 @@ def get_ip(): parser.add_argument("--heap-size", type=int, help="The size of JVM heap in GB. Default is max(min(1/2 ram, 1GB), min(1/4 ram, 8GB)).") parser.add_argument("--seed-server-ip", help="The IP address of the seed server. This option is only for multiple-node deployment.") parser.add_argument("--affinity", help="The CPU ids (separated by comma) given to Cassandra to set JVM affinity. By default, Cassandra would use all CPU cores.") -parser.add_argument("--row-cache", help="The size of the row cache. Also specify the unit, example 16GiB or 256MiB. By default, the row cache is disabled.", default="0") +parser.add_argument("--row-cache", help="The size of the row cache. Also specify the unit, example 16GiB or 256MiB. By default, the row cache is disabled.", default="0MiB") args = parser.parse_args()