-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpostgresql_tune.py
464 lines (394 loc) · 15.7 KB
/
postgresql_tune.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright © 2014-2018 Alexey Vasiliev (original pgtune project on
# https://github.com/le0pard/pgtune/, licensed under MIT license)
# Copyright © 2017-2018 Mohamed El Morabity (adaptation for Python and Ansible)
#
# This program is free software: you can redistribute it and/or modify it under the terms of the GNU
# General Public License as published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
# even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with this program. If not,
# see <http://www.gnu.org/licenses/>.
import datetime
from distutils.version import LooseVersion
import math
import os
import re
import tempfile
from ansible.module_utils.basic import AnsibleModule
from ansible.module_utils.parsing.convert_bool import BOOLEANS
DOCUMENTATION = '''
---
module: postgresql_tune
author: Mohamed El Morabity
short_description: Tune PostgreSQL configuration by hardware.
description:
- Optimize PostgreSQL configuration by setting up postgresql.conf parameter file according to hardware configuration and database usage. Tuning rules are based on https://github.com/le0pard/pgtune/, developed by Alexey Vasiliev and released under MIT license.
options:
path:
description:
- Path to the PostgreSQL postgresql.conf to modify.
type: path
required: True
aliases:
- dest
- destfile
- name
db_version:
description:
- PostgreSQL version.
type: string
required: False
default: 9.6
os_type:
description:
- Operation system type where PostgreSQL runs.
type: string
required: False
default: linux
choices:
- linux
- windows
db_type:
description:
- Database usage type.
- Available types are web applications (web), online transaction processing systems (oltp), data warehouses (dw), desktop applications (dw), mixed type of applications (mixed).
type: string
required: False
default: mixed
choices:
- desktop
- dw
- mixed
- oltp
- web
total_memory:
description:
- Total memory on the target PostgreSQL server, in megabytes.
type: int
required: True
connections:
description:
- Maximum number of connections for PostgreSQL clients (minimum value is 10).
- Will be computed automatically, dependency on the database type, if not specified.
type: int
required: False
harddrive_type:
description:
- Hard drive type, which PostgreSQL use as storage for data.
type: string
required: False
default: hdd
choices:
- hdd
- ssd
- san
cpus:
description:
- Number of CPUs, which PostgreSQL can use.
type: int
required: False
default: 1
backup:
description:
- Create a backup file including the timestamp information so you can get the original file back if you somehow clobbered it incorrectly.
type: bool
required: False
default: False
follow:
description:
- This flag indicates that filesystem links, if they exist, should be followed.
type: bool
required: False
default: False
others:
description:
- All arguments accepted by the file module also work here.
required: false
'''
EXAMPLES = '''
# Optimize PostgreSQL configuration
- local_action:
module: postgresql_tune
db_version: 9.2
os_type: linux
db_type: web
total_memory: 8192
path: /var/lib/pgsql/data/postgresql.conf
'''
def format_size(size):
"""Pretty-format a size in kB."""
if size % (1 << 20) == 0:
value = size >> 20
unit = 'GB'
elif size % (1 << 10) == 0:
value = size >> 10
unit = 'MB'
else:
value = size
unit = 'kB'
return '{}{}'.format(int(value), unit)
def get_pgtune_config(module):
"""Compute PostgreSQL optimizations."""
db_version = module.params.get('db_version')
os_type = module.params.get('os_type')
db_type = module.params.get('db_type')
total_memory = module.params.get('total_memory')
connections = module.params.get('connections')
harddrive_type = module.params.get('harddrive_type')
cpus = module.params.get('cpus')
config = {}
warnings = []
# Allow overriding the maximum connections
if connections is None:
if db_type == 'desktop':
config['max_connections'] = 10
elif db_type == 'dw':
config['max_connections'] = 20
elif db_type == 'mixed':
config['max_connections'] = 100
elif db_type == 'oltp':
config['max_connections'] = 300
elif db_type == 'web':
config['max_connections'] = 200
else:
if connections < 10:
module.fail_json(msg='connections must be an integer greater than or equal to 10')
config['max_connections'] = connections
if cpus <= 0 or cpus > 9999:
module.fail_json(msg='cpus must be a strictly positive integer')
total_memory_kb = total_memory << 10
# This tool not being optimal for low memory systems
if total_memory < 256:
warnings.append('Not optimal for low memory systems')
# This tool not being optimal for very high memory systems
if total_memory > 100 << 10:
warnings.append('Not optimal for very high memory systems')
if db_type == 'desktop':
config['shared_buffers'] = total_memory_kb / 16
else:
config['shared_buffers'] = total_memory_kb / 4
# Limit shared_buffers to 512MB on Windows
if os_type == 'windows' and config['shared_buffers'] > 512 << 10:
config['shared_buffers'] = 512 << 10
# Effective_cache_size
if db_type == 'desktop':
config['effective_cache_size'] = total_memory_kb / 4
else:
config['effective_cache_size'] = 3 * total_memory_kb / 4
# work_mem is assigned any time a query calls for a sort, or a hash, or any other structure
# that needs a space allocation, which can happen multiple times per query. So you're better
# off assuming max_connections * 2 or max_connections * 3 is the amount of RAM that will
# actually use in reality. At the very least, you need to subtract shared_buffers from the
# amount you're distributing to connections in work_mem.
# The other thing to consider is that there's no reason to run on the edge of available
# memory. If you do that, there's a very high risk the out-of-memory killer will come along
# and start killing PostgreSQL backends. Always leave a buffer of some kind in case of
# spikes in memory usage. So your maximum amount of memory available in work_mem should be
# ((RAM - shared_buffers) / (max_connections * 3) / max_parallel_workers_per_gather).
max_parallel_workers_per_gather = int(math.ceil(0.5 * cpus))
work_mem = (total_memory_kb - config['shared_buffers']) / \
(3. * config['max_connections']) / max_parallel_workers_per_gather
if db_type == 'desktop':
config['work_mem'] = work_mem / 6.
elif db_type == 'dw' or db_type == 'mixed':
config['work_mem'] = work_mem / 2.
else:
config['work_mem'] = work_mem
config['work_mem'] = int(math.floor(work_mem))
if config['work_mem'] < 64:
config['work_mem'] = 64
# maintenance_work_mem
if db_type == 'dw':
config['maintenance_work_mem'] = total_memory_kb / 8
else:
config['maintenance_work_mem'] = total_memory_kb / 16
# Cap maintenance RAM at 2GB on servers with lots of memory
if config['maintenance_work_mem'] > 2 << 20:
config['maintenance_work_mem'] = 2 << 20
if LooseVersion(db_version) < LooseVersion('9.5'):
# checkpoint_segments
if db_type == 'desktop':
config['checkpoint_segments'] = 3
elif db_type == 'dw':
config['checkpoint_segments'] = 128
elif db_type == 'oltp':
config['checkpoint_segments'] = 64
else:
config['checkpoint_segments'] = 32
else:
if db_type == 'desktop':
config['min_wal_size'] = 100 << 10
config['max_wal_size'] = 1024 << 10
elif db_type == 'dw':
config['min_wal_size'] = 4 << 20
config['max_wal_size'] = 8 << 20
elif db_type == 'oltp':
config['min_wal_size'] = 2 << 20
config['max_wal_size'] = 4 << 20
else:
config['min_wal_size'] = 1 << 20
config['max_wal_size'] = 2 << 20
# checkpoint_completion_target
if db_type == 'desktop':
config['checkpoint_completion_target'] = 0.5
elif db_type == 'web':
config['checkpoint_completion_target'] = 0.7
else:
config['checkpoint_completion_target'] = 0.9
# wal_buffers
# Follow auto-tuning guideline for wal_buffers added in 9.1, where it's set to 3% of
# shared_buffers up to a maximum of 16MB
if 'shared_buffers' in config:
config['wal_buffers'] = 3 * config['shared_buffers'] / 100
if config['wal_buffers'] > 16 << 10:
config['wal_buffers'] = 16 << 10
# It's nice of wal_buffers is an even 16MB if it's near that number. Since that is a common
# case on Windows, where shared_buffers is clipped to 512MB, round upwards in that situation
if 14 << 10 < config['wal_buffers'] < 16 << 10:
config['wal_buffers'] = 16 << 10
# default_statistics_target
if db_type == 'dw':
config['default_statistics_target'] = 500
else:
config['default_statistics_target'] = 100
# hard drive type
if harddrive_type == 'hdd':
config['random_page_cost'] = 4
else:
config['random_page_cost'] = 1.1
if os_type != 'windows':
if harddrive_type == 'hdd':
config['effective_io_concurrency'] = 2
elif harddrive_type == 'ssd':
config['effective_io_concurrency'] = 200
elif harddrive_type == 'san':
config['effective_io_concurrency'] = 300
# CPU
if LooseVersion(db_version) >= LooseVersion('9.5'):
if cpus > 1:
config['max_worker_processes'] = cpus
if LooseVersion(db_version) >= LooseVersion('9.6'):
config['max_parallel_workers_per_gather'] = max_parallel_workers_per_gather
if LooseVersion(db_version) >= LooseVersion('10'):
config['max_parallel_workers'] = cpus
# Format configuration
for key, value in config.items():
if not key in ['max_connections', 'checkpoint_segments', 'checkpoint_completion_target',
'default_statistics_target', 'random_page_cost',
'effective_io_concurrency', 'max_worker_processes',
'max_parallel_workers_per_gather', 'max_parallel_workers']:
config[key] = format_size(value)
return (config, warnings)
def write_changes(module, contents, path):
"""Write a string to a file, using a temporary file before to ensure changes are atomic."""
tmp_fd, tmp_file = tempfile.mkstemp()
tmp_object = os.fdopen(tmp_fd, 'wb')
tmp_object.write(contents)
tmp_object.close()
module.atomic_move(tmp_file, path, unsafe_writes=module.params['unsafe_writes'])
def check_file_attrs(module, changed, message):
"""Check file attribute changes."""
file_args = module.load_file_common_arguments(module.params)
if module.set_file_attributes_if_different(file_args, False):
if changed:
message += ' and '
changed = True
message = 'ownership, perms or SE linux context changed'
return (message, changed)
def write_optimizations(module):
"""Update input file with optimized PostgreSQL parameters."""
path = module.params.get('path')
backup = module.params.get('backup')
follow = module.params.get('follow')
if os.path.isdir(path):
module.fail_json(rc=256, msg='Path {} is a directory!'.format(path))
if not os.path.exists(path):
module.fail_json(rc=257, msg='Path {} does not exist!'.format(path))
pg_conf_object = open(path, 'rb')
pg_conf_lines = pg_conf_object.readlines()
pg_conf_object.close()
pgtune_config, warnings = get_pgtune_config(module)
results = {
'pgtune': pgtune_config,
'warnings': warnings
}
msg = ''
if module._diff:
results['diff'] = {'before_header': path, 'before': ''.join(pg_conf_lines),
'after_header': path}
changed = False
pgtune_config_to_append = dict(pgtune_config)
updated_parameters = []
added_parameters = []
today = datetime.date.today()
# Parse the file to update parameters in pgtune configuration
for lineno, line in enumerate(pg_conf_lines):
match = re.match(r'(?P<setup>[\s#]*(?P<key>[\S^#]+)\s*=\s*(?P<value>[\S^#]+)).*$', line)
if match is None:
continue
key = match.group('key')
value = match.group('value')
if not key in pgtune_config:
continue
if str(pgtune_config[key]) == value:
pgtune_config_to_append.pop(key)
continue
# Uncomment lines but keep in-line comments
ansible_comment = '#Ansible: updated by postgresql_tune on {:%Y-%m-%d} ' \
'(previous value: {})'.format(today, value)
pg_conf_lines[lineno] = '{}\n{}'.format(
ansible_comment,
line.replace(match.group('setup'), '{} = {}'.format(key, pgtune_config[key]))
)
changed = True
pgtune_config_to_append.pop(key)
updated_parameters.append(key)
# Append parameters in pgtune configuration not present in the file
for key, value in pgtune_config_to_append.items():
ansible_comment = '#Ansible: added by postgresql_tune on {:%Y-%m-%d}'.format(today)
pg_conf_lines.append('{}\n{} = {}\n'.format(ansible_comment, key, value))
changed = True
added_parameters.append(key)
if updated_parameters:
msg = 'parameters {} updated'.format(', '.join(updated_parameters))
if added_parameters:
msg += 'parameters {} added'.format(', '.join(added_parameters))
contents = ''.join(pg_conf_lines)
if module._diff:
results['diff']['after'] = contents
if changed:
if backup and os.path.exists(path):
results['backup_file'] = module.backup_local(path)
if follow and os.path.islink(path):
path = os.path.realpath(path)
write_changes(module, contents, path)
results['msg'], results['changed'] = check_file_attrs(module, changed, msg)
return results
def main():
"""Main execution path."""
module = AnsibleModule(
argument_spec={
'db_version': {'type': 'str', 'default': '9.6'},
'os_type': {'type': 'str', 'choices': ['linux', 'windows'], 'default': 'linux'},
'db_type': {'type': 'str', 'choices': ['desktop', 'dw', 'mixed', 'oltp', 'web'],
'default': 'mixed'},
'total_memory': {'required': True, 'type': 'int'},
'connections': {'type': 'int'},
'harddrive_type': {'type': 'str', 'choices': ['hdd', 'ssd', 'san'], 'default': 'hdd'},
'cpus': {'type': 'int', 'default': 1},
'path': {'required': True, 'aliases': ['dest', 'destfile', 'name'], 'type': 'path'},
'backup': {'type': 'bool', 'choices': BOOLEANS, 'default': False}
},
add_file_common_args=True,
supports_check_mode=True
)
results = write_optimizations(module)
module.exit_json(**results)
if __name__ == '__main__':
main()