Skip to content

Commit

Permalink
Adding sysbench benchmark for unmanaged postgresql database
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 698090076
  • Loading branch information
Arushi-07 authored and copybara-github committed Nov 19, 2024
1 parent 960d867 commit 8dff6b7
Show file tree
Hide file tree
Showing 7 changed files with 700 additions and 10 deletions.
1 change: 1 addition & 0 deletions CHANGES.next.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@
- Add `--vm_log_bucket` flag, offering users the option to upload the
logs captured via the `--capture_vm_logs` flag to a GCS bucket.
- Add chromium_compile_benchmark.
- Add unmanaged_postgresql_sysbench benchmark.

### Enhancements:

Expand Down
14 changes: 14 additions & 0 deletions perfkitbenchmarker/data/postgresql/database_setup_queries.sql.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CREATE USER repl WITH REPLICATION ENCRYPTED PASSWORD '{{ repl_user_password }}';

CREATE USER sysbench WITH LOGIN ENCRYPTED PASSWORD '{{ sysbench_user_password }}';
CREATE DATABASE sysbench WITH OWNER=sysbench;
CREATE DATABASE sysbencht WITH OWNER=sysbench;
\c postgres
CREATE EXTENSION pg_stat_statements;
\c sysbench
CREATE EXTENSION pg_stat_statements;
\c sysbencht
CREATE EXTENSION pg_stat_statements;
SELECT pg_create_physical_replication_slot('slot0');
SELECT pg_create_physical_replication_slot('slot1');
CREATE USER pmmuser WITH SUPERUSER ENCRYPTED PASSWORD '{{ pmm_user_password }}';
15 changes: 15 additions & 0 deletions perfkitbenchmarker/data/postgresql/pg_hba.conf.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# TYPE DATABASE USER ADDRESS METHOD
# "local" is for Unix domain socket connections only
local all all trust
# IPv4 local connections:
host all all 127.0.0.1/32 trust
# IPv6 local connections:
host sysbench sysbench 0.0.0.0/0 md5
host sysbencht sysbench 0.0.0.0/0 md5
# Allow replication connections from localhost, by a user with the
# replication privilege.
local replication all trust
host replication all 127.0.0.1/32 trust
host replication repl 0.0.0.0/0 md5
host all all 0.0.0.0/0 md5
host all all ::/0 md5
25 changes: 25 additions & 0 deletions perfkitbenchmarker/data/postgresql/postgresql-custom.conf.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
listen_addresses = '{{ listen_address }}'
wal_level = 'replica'
ssl = off
max_connections = 3000
maintenance_work_mem = '2GB'
work_mem = '64MB'
max_wal_size = '100GB'
min_wal_size = '10GB'
checkpoint_timeout = '1h'
checkpoint_completion_target = '0.9'
bgwriter_lru_maxpages = 800
autovacuum_vacuum_cost_limit = 800
jit = off
random_page_cost = 1
max_wal_senders = 5
full_page_writes = ON
effective_cache_size = {{ effective_cache_size }}
shared_buffers = {{ shared_buffers }}
huge_pages = on
shared_preload_libraries = 'pg_stat_statements'
track_activity_query_size = 2048 # Increase tracked query string size
pg_stat_statements.track = all # Track all statements including nested
track_io_timing = on # Capture read/write stats
wal_compression = 'True'
data_directory = '{{ data_directory }}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
# Copyright 2024 PerfKitBenchmarker Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Sysbench Benchmark for unmanaged PostgreSQL db on a VM.
This benchmark measures performance of Sysbench Databases on unmanaged
postgreSQL.
"""

import copy
import logging

from absl import flags
from perfkitbenchmarker import background_tasks
from perfkitbenchmarker import benchmark_spec as bm_spec
from perfkitbenchmarker import configs
from perfkitbenchmarker import errors
from perfkitbenchmarker import os_types
from perfkitbenchmarker import sample
from perfkitbenchmarker.linux_packages import postgresql16
from perfkitbenchmarker.linux_packages import sysbench


FLAGS = flags.FLAGS


BENCHMARK_NAME = 'unmanaged_postgresql_sysbench'
BENCHMARK_CONFIG = """
unmanaged_postgresql_sysbench:
description: PostgreSQL on a VM benchmarked using Sysbench.
vm_groups:
client:
vm_spec:
GCP:
machine_type: c3-standard-22
zone: us-east1-b
AWS:
machine_type: m7i.4xlarge
zone: us-east-1a
Azure:
machine_type: Standard_D16s_v5
zone: eastus
server:
vm_spec:
GCP:
machine_type: c3-standard-22
zone: us-east1-b
AWS:
machine_type: r7i.4xlarge
zone: us-east-1a
Azure:
machine_type: Standard_E20s_v5
zone: eastus
disk_spec:
GCP:
disk_size: 500
disk_type: pd-ssd
provisioned_iops: 160000
provisioned_throughput: 2400
num_striped_disks: 1
AWS:
disk_size: 500
disk_type: gp3
provisioned_iops: 16000
provisioned_throughput: 1000
num_striped_disks: 5
Azure:
disk_size: 200
disk_type: Premium_LRS_V2
provisioned_iops: 40000
provisioned_throughput: 800
num_striped_disks: 2
flags:
sysbench_version: df89d34c410a2277e19f77e47e535d0890b2029b
disk_fs_type: xfs
"""

# The database name is used to create a database on the server.
_DATABASE_TYPE = 'pgsql'
_DATABASE_NAME = 'sysbench'

# test names
_TPCC = 'percona_tpcc'
_OLTP_READ_WRITE = 'oltp_read_write'
_OLTP_READ_ONLY = 'oltp_read_only'
_OLTP_WRITE_ONLY = 'oltp_write_only'
_OLTP = [_OLTP_READ_WRITE, _OLTP_READ_ONLY, _OLTP_WRITE_ONLY]

SHARED_BUFFER_SIZE = flags.DEFINE_integer(
'postgresql_shared_buffer_size',
10,
'Size of the shared buffer in the postgresql cluster (in Gb).',
)


def GetConfig(user_config):
"""Get the benchmark config, applying user overrides.
Args:
user_config:
Returns:
Benchmark config.
"""
config = configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
# Instead of changing the default data dir of database in (multiple) configs,
# Force the scratch disk as database default dir (simpler code).
disk_spec = config['vm_groups']['server']['disk_spec']
for cloud in disk_spec:
disk_spec[cloud]['mount_point'] = postgresql16.GetOSDependentDefaults(
FLAGS.os_type
)['disk_mount_point']
# Update machine type for server/client.
if FLAGS.db_machine_type:
vm_spec = config['vm_groups']['server']['vm_spec']
for cloud in vm_spec:
vm_spec[cloud]['machine_type'] = FLAGS.db_machine_type
if FLAGS.client_vm_machine_type:
vm_spec = config['vm_groups']['client']['vm_spec']
for cloud in vm_spec:
vm_spec[cloud]['machine_type'] = FLAGS.client_vm_machine_type
# Add replica servers if configured.
if FLAGS.db_high_availability:
for index, zone in enumerate(FLAGS.db_replica_zones):
replica = copy.deepcopy(config['vm_groups']['server'])
for cloud in replica['vm_spec']:
replica['vm_spec'][cloud]['zone'] = zone
config['vm_groups'][f'replica_{index}'] = replica
return config


def Prepare(benchmark_spec: bm_spec.BenchmarkSpec):
"""Prepare the servers and clients for the benchmark run.
Args:
benchmark_spec:
"""
vms = benchmark_spec.vms
replica_servers = []
for vm in benchmark_spec.vm_groups:
if vm.startswith('replica'):
replica_servers += benchmark_spec.vm_groups[vm]
background_tasks.RunThreaded(postgresql16.ConfigureSystemSettings, vms)
background_tasks.RunThreaded(lambda vm: vm.Install('postgresql16'), vms)

primary_server = benchmark_spec.vm_groups['server'][0]
postgresql16.InitializeDatabase(primary_server)
postgresql16.ConfigureAndRestart(primary_server, FLAGS.run_uri)
for index, replica in enumerate(replica_servers):
postgresql16.SetupReplica(primary_server, replica, index, FLAGS.run_uri)
clients = benchmark_spec.vm_groups['client']
for client in clients:
client.InstallPackages('git')
InstallSysbench(client)
if FLAGS.sysbench_testname == _TPCC:
client.RemoteCommand(
'cd /opt && sudo rm -fr sysbench-tpcc && '
f'sudo git clone {sysbench.SYSBENCH_TPCC_REPRO}'
)
loader_vm = benchmark_spec.vm_groups['client'][0]
sysbench_parameters = _GetSysbenchParameters(
primary_server.internal_ip,
postgresql16.GetPsqlUserPassword(FLAGS.run_uri),
)
cmd = sysbench.BuildLoadCommand(sysbench_parameters)
logging.info('%s load command: %s', FLAGS.sysbench_testname, cmd)
loader_vm.RemoteCommand(cmd)


def InstallSysbench(vm):
args = {'db_driver': _DATABASE_TYPE}
if vm.OS_TYPE in os_types.AMAZONLINUX_TYPES + os_types.CENTOS_TYPES:
sysbench.YumInstall(vm, args=args)
else:
sysbench.AptInstall(vm, args=args)


def _GetSysbenchParameters(primary_server_ip: str | None, password: str):
"""Get sysbench parameters from flags."""
sysbench_parameters = sysbench.SysbenchInputParameters(
db_driver=_DATABASE_TYPE,
tables=FLAGS.sysbench_tables,
threads=FLAGS.sysbench_load_threads,
report_interval=FLAGS.sysbench_report_interval,
db_user=_DATABASE_NAME,
db_password=password,
db_name=_DATABASE_NAME,
host_ip=primary_server_ip,
)
sysbench_parameters.port = 5432
test = FLAGS.sysbench_testname
if test in _OLTP:
sysbench_parameters.built_in_test = True
sysbench_parameters.test = f'{sysbench.LUA_SCRIPT_PATH}{test}.lua'
sysbench_parameters.db_ps_mode = 'disable'
sysbench_parameters.skip_trx = True
sysbench_parameters.table_size = FLAGS.sysbench_table_size

elif test == _TPCC:
sysbench_parameters.custom_lua_packages_path = '/opt/sysbench-tpcc/?.lua'
sysbench_parameters.built_in_test = False
sysbench_parameters.test = '/opt/sysbench-tpcc/tpcc.lua'
sysbench_parameters.scale = FLAGS.sysbench_scale
sysbench_parameters.use_fk = FLAGS.sysbench_use_fk
sysbench_parameters.trx_level = FLAGS.sysbench_txn_isolation_level

else:
raise errors.Setup.InvalidConfigurationError(
f'Test --sysbench_testname={FLAGS.sysbench_testname} is not supported.'
)

return sysbench_parameters


def Run(benchmark_spec: bm_spec.BenchmarkSpec) -> list[sample.Sample]:
"""Run the sysbench benchmark and publish results.
Args:
benchmark_spec: The benchmark specification. Contains all data that is
required to run the benchmark.
Returns:
Results.
"""
primary_server = benchmark_spec.vm_groups['server'][0]
client = benchmark_spec.vm_groups['client'][0]
sysbench_parameters = _GetSysbenchParameters(
primary_server.internal_ip,
postgresql16.GetPsqlUserPassword(FLAGS.run_uri),
)
results = []
# a map of transaction metric name (tps/qps) to current sample with max value
max_transactions = {}
for thread_count in FLAGS.sysbench_run_threads:
sysbench_parameters.threads = thread_count
cmd = sysbench.BuildRunCommand(sysbench_parameters)
logging.info('%s run command: %s', FLAGS.sysbench_testname, cmd)
try:
stdout, _ = client.RemoteCommand(
cmd, timeout=2*FLAGS.sysbench_run_seconds,)
except errors.VirtualMachine.RemoteCommandError as e:
logging.exception('Failed to run sysbench command: %s', e)
continue
metadata = sysbench.GetMetadata(sysbench_parameters)
metadata.update({
'shared_buffer_size': f'{SHARED_BUFFER_SIZE.value}GB',
})
results += sysbench.ParseSysbenchTimeSeries(stdout, metadata)
results += sysbench.ParseSysbenchLatency([stdout], metadata)
current_transactions = sysbench.ParseSysbenchTransactions(stdout, metadata)
results += current_transactions
# max transactions stores the max tps/qps for all the thread counts.
# update the max tps/qps in max_transactions.
for item in current_transactions:
metric = item.metric
metric_value = item.value
current_max_sample = max_transactions.get(metric, None)
if not current_max_sample or current_max_sample.value < metric_value:
max_transactions[metric] = item
if not results:
raise errors.Benchmarks.RunError(
'None of the sysbench tests were successful.'
)
# report the max tps/qps as a new metric.
for item in max_transactions.values():
metadata = copy.deepcopy(item.metadata)
metadata['searched_thread_counts'] = FLAGS.sysbench_run_threads
results.append(
sample.Sample(
'max_' + item.metric, item.value, item.unit, metadata=metadata
)
)
return results


def Cleanup(benchmark_spec: bm_spec.BenchmarkSpec):
del benchmark_spec
Loading

0 comments on commit 8dff6b7

Please sign in to comment.