PostgreSQL DBA : Daily checklist of a PostgreSQL DBA

daily checklist of a PostgreSQL DBA

The daily checklist of a PostgreSQL DBA

We often get this question, What are the most important things a PostgreSQL DBA should do to guarantee optimal performance and reliability, Do we have checklist for PostgreSQL DBAs to folalow daily ? Since we are getting this question too often, Thought let’s note it as blog post and share with community of PostgreSQL ecosystem. The only objective this post is to share the information, Please don’t consider this as a run-book or recommendation from MinervaDB PostgreSQL support. We at MinervaDB are not accountable of any negative performance in you PostgreSQL performance with running these scripts in production database infrastructure of your business, The following is a simple daily checklist for PostgreSQL DBA:

Task 1: Check that all the PostgreSQL instances are up and operational:

=====================================================================

pgrep -u postgres -fa -- -D

What if you have several instances of PostgreSQL are running:

pgrep -fa -- -D |grep postgres

Task 2: Monitoring PostgreSQL logsRecord PostgreSQL error logs: Open postgresql.conf configuration file, Under the ERROR REPORTING AND LOGGING section of the file, use following config parameters:

====================================

log_destination = 'stderr'

logging_collector = on

log_directory = 'pg_log'

log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log'

log_truncate_on_rotation = off

log_rotation_age = 1d

log_min_duration_statement = 0

log_connections = on

log_duration = on

log_hostname = on

log_timezone = 'UTC'

Save the postgresql.conf file and restart the PostgreSQL server:

sudo service postgresql restart

Task 3: Confirm PostgreSQL backup completed successfully

Use backup logs (possible only with PostgreSQL logical backup) to audit backup quality:

$ pg_dumpall > /backup-path/pg-backup-dump.sql > /var/log/postgres/pg-backup-dump.log

Task 4: Monitoring PostgreSQL Database Size:

select datname, pg_size_pretty(pg_database_size(datname)) from pg_database order by pg_database_size(datname);

Task 5: Monitor all PostgreSQL queries running (please repeat this task every 90 minutes during business / peak hours):

SELECT pid, age(clock_timestamp(), query_start), usename, query

FROM pg_stat_activity

WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'

ORDER BY query_start desc;

Task 6: Inventory of indexes in PostgreSQL database:

select

t.relname as table_name,

i.relname as index_name,

string_agg(a.attname, ',') as column_name

from

pg_class t,

pg_class i,

pg_index ix,

pg_attribute a

where

t.oid = ix.indrelid

and i.oid = ix.indexrelid

and a.attrelid = t.oid

and a.attnum = ANY(ix.indkey)

and t.relkind = 'r'

and t.relname not like 'pg_%'

group by

t.relname,

i.relname

order by

t.relname,

i.relname;

Task 7: Finding the largest databases in your PostgreSQL cluster:

SELECT d.datname as Name, pg_catalog.pg_get_userbyid(d.datdba) as Owner,

CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')

THEN pg_catalog.pg_size_pretty(pg_catalog.pg_database_size(d.datname))

ELSE 'No Access'

END as Size

FROM pg_catalog.pg_database d

order by

CASE WHEN pg_catalog.has_database_privilege(d.datname, 'CONNECT')

THEN pg_catalog.pg_database_size(d.datname)

ELSE NULL

END desc -- nulls first

LIMIT 20

Task 8: when you are suspecting some serious performance bottleneck in PostgreSQL ? Especially when you suspecting transactions blocking each other:

WITH RECURSIVE l AS (

SELECT pid, locktype, mode, granted,

ROW(locktype,database,relation,page,tuple,virtualxid,transactionid,classid,objid,objsubid) obj

FROM pg_locks

), pairs AS (

SELECT w.pid waiter, l.pid locker, l.obj, l.mode

FROM l w

JOIN l ON l.obj IS NOT DISTINCT FROM w.obj AND l.locktype=w.locktype AND NOT l.pid=w.pid AND l.granted

WHERE NOT w.granted

), tree AS (

SELECT l.locker pid, l.locker root, NULL::record obj, NULL AS mode, 0 lvl, locker::text path, array_agg(l.locker) OVER () all_pids

FROM ( SELECT DISTINCT locker FROM pairs l WHERE NOT EXISTS (SELECT 1 FROM pairs WHERE waiter=l.locker) ) l

UNION ALL

SELECT w.waiter pid, tree.root, w.obj, w.mode, tree.lvl+1, tree.path||'.'||w.waiter, all_pids || array_agg(w.waiter) OVER ()

FROM tree JOIN pairs w ON tree.pid=w.locker AND NOT w.waiter = ANY ( all_pids )

)

SELECT (clock_timestamp() - a.xact_start)::interval(3) AS ts_age,

replace(a.state, 'idle in transaction', 'idletx') state,

(clock_timestamp() - state_change)::interval(3) AS change_age,

a.datname,tree.pid,a.usename,a.client_addr,lvl,

(SELECT count(*) FROM tree p WHERE p.path ~ ('^'||tree.path) AND NOT p.path=tree.path) blocked,

repeat(' .', lvl)||' '||left(regexp_replace(query, 's+', ' ', 'g'),100) query

FROM tree

JOIN pg_stat_activity a USING (pid)

ORDER BY path;

Task 9: Identify bloated Tables in PostgreSQL :

WITH constants AS (

-- define some constants for sizes of things

-- for reference down the query and easy maintenance

SELECT current_setting('block_size')::numeric AS bs, 23 AS hdr, 8 AS ma

no_stats AS (

-- screen out table who have attributes

-- which dont have stats, such as JSON

SELECT table_schema, table_name,

n_live_tup::numeric as est_rows,

pg_table_size(relid)::numeric as table_size

FROM information_schema.columns

JOIN pg_stat_user_tables as psut

ON table_schema = psut.schemaname

AND table_name = psut.relname

LEFT OUTER JOIN pg_stats

ON table_schema = pg_stats.schemaname

AND table_name = pg_stats.tablename

AND column_name = attname

WHERE attname IS NULL

AND table_schema NOT IN ('pg_catalog', 'information_schema')

GROUP BY table_schema, table_name, relid, n_live_tup

null_headers AS (

-- calculate null header sizes

-- omitting tables which dont have complete stats

-- and attributes which aren't visible

SELECT

hdr+1+(sum(case when null_frac <> 0 THEN 1 else 0 END)/8) as nullhdr,

SUM((1-null_frac)*avg_width) as datawidth,

MAX(null_frac) as maxfracsum,

schemaname,

tablename,

hdr, ma, bs

FROM pg_stats CROSS JOIN constants

LEFT OUTER JOIN no_stats

ON schemaname = no_stats.table_schema

AND tablename = no_stats.table_name

WHERE schemaname NOT IN ('pg_catalog', 'information_schema')

AND no_stats.table_name IS NULL

AND EXISTS ( SELECT 1

FROM information_schema.columns

WHERE schemaname = columns.table_schema

AND tablename = columns.table_name )

GROUP BY schemaname, tablename, hdr, ma, bs

data_headers AS (

-- estimate header and row size

SELECT

ma, bs, hdr, schemaname, tablename,

(datawidth+(hdr+ma-(case when hdr%ma=0 THEN ma ELSE hdr%ma END)))::numeric AS datahdr,

(maxfracsum*(nullhdr+ma-(case when nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2

FROM null_headers

table_estimates AS (

-- make estimates of how large the table should be

-- based on row and page size

SELECT schemaname, tablename, bs,

reltuples::numeric as est_rows, relpages * bs as table_bytes,

CEIL((reltuples*

(datahdr + nullhdr2 + 4 + ma -

(CASE WHEN datahdr%ma=0

THEN ma ELSE datahdr%ma END)

)/(bs-20))) * bs AS expected_bytes,

reltoastrelid

FROM data_headers

JOIN pg_class ON tablename = relname

JOIN pg_namespace ON relnamespace = pg_namespace.oid

AND schemaname = nspname

WHERE pg_class.relkind = 'r'

estimates_with_toast AS (

-- add in estimated TOAST table sizes

-- estimate based on 4 toast tuples per page because we dont have

-- anything better. also append the no_data tables

SELECT schemaname, tablename,

TRUE as can_estimate,

est_rows,

table_bytes + ( coalesce(toast.relpages, 0) * bs ) as table_bytes,

expected_bytes + ( ceil( coalesce(toast.reltuples, 0) / 4 ) * bs ) as expected_bytes

FROM table_estimates LEFT OUTER JOIN pg_class as toast

ON table_estimates.reltoastrelid = toast.oid

AND toast.relkind = 't'

table_estimates_plus AS (

-- add some extra metadata to the table data

-- and calculations to be reused

-- including whether we cant estimate it

-- or whether we think it might be compressed

SELECT current_database() as databasename,

schemaname, tablename, can_estimate,

est_rows,

CASE WHEN table_bytes > 0

THEN table_bytes::NUMERIC

ELSE NULL::NUMERIC END

AS table_bytes,

CASE WHEN expected_bytes > 0

THEN expected_bytes::NUMERIC

ELSE NULL::NUMERIC END

AS expected_bytes,

CASE WHEN expected_bytes > 0 AND table_bytes > 0

AND expected_bytes <= table_bytes

THEN (table_bytes - expected_bytes)::NUMERIC

ELSE 0::NUMERIC END AS bloat_bytes

FROM estimates_with_toast

UNION ALL

SELECT current_database() as databasename,

table_schema, table_name, FALSE,

est_rows, table_size,

NULL::NUMERIC, NULL::NUMERIC

FROM no_stats

bloat_data AS (

-- do final math calculations and formatting

select current_database() as databasename,

schemaname, tablename, can_estimate,

table_bytes, round(table_bytes/(1024^2)::NUMERIC,3) as table_mb,

expected_bytes, round(expected_bytes/(1024^2)::NUMERIC,3) as expected_mb,

round(bloat_bytes*100/table_bytes) as pct_bloat,

round(bloat_bytes/(1024::NUMERIC^2),2) as mb_bloat,

table_bytes, expected_bytes, est_rows

FROM table_estimates_plus

)

-- filter output for bloated tables

SELECT databasename, schemaname, tablename,

can_estimate,

est_rows,

pct_bloat, mb_bloat,

table_mb

FROM bloat_data

-- this where clause defines which tables actually appear

-- in the bloat chart

-- example below filters for tables which are either 50%

-- bloated and more than 20mb in size, or more than 25%

-- bloated and more than 4GB in size

WHERE ( pct_bloat >= 50 AND mb_bloat >= 10 )

OR ( pct_bloat >= 25 AND mb_bloat >= 1000 )

ORDER BY pct_bloat DESC;

Task 10: Identify bloated indexes in PostgreSQL :

-- btree index stats query

-- estimates bloat for btree indexes

WITH btree_index_atts AS (

SELECT nspname,

indexclass.relname as index_name,

indexclass.reltuples,

indexclass.relpages,

indrelid, indexrelid,

indexclass.relam,

tableclass.relname as tablename,

regexp_split_to_table(indkey::text, ' ')::smallint AS attnum,

indexrelid as index_oid

FROM pg_index

JOIN pg_class AS indexclass ON pg_index.indexrelid = indexclass.oid

JOIN pg_class AS tableclass ON pg_index.indrelid = tableclass.oid

JOIN pg_namespace ON pg_namespace.oid = indexclass.relnamespace

JOIN pg_am ON indexclass.relam = pg_am.oid

WHERE pg_am.amname = 'btree' and indexclass.relpages > 0

AND nspname NOT IN ('pg_catalog','information_schema')

index_item_sizes AS (

SELECT

ind_atts.nspname, ind_atts.index_name,

ind_atts.reltuples, ind_atts.relpages, ind_atts.relam,

indrelid AS table_oid, index_oid,

current_setting('block_size')::numeric AS bs,

8 AS maxalign,

24 AS pagehdr,

CASE WHEN max(coalesce(pg_stats.null_frac,0)) = 0

THEN 2

ELSE 6

END AS index_tuple_hdr,

sum( (1-coalesce(pg_stats.null_frac, 0)) * coalesce(pg_stats.avg_width, 1024) ) AS nulldatawidth

FROM pg_attribute

JOIN btree_index_atts AS ind_atts ON pg_attribute.attrelid = ind_atts.indexrelid AND pg_attribute.attnum = ind_atts.attnum

JOIN pg_stats ON pg_stats.schemaname = ind_atts.nspname

-- stats for regular index columns

AND ( (pg_stats.tablename = ind_atts.tablename AND pg_stats.attname = pg_catalog.pg_get_indexdef(pg_attribute.attrelid, pg_attribute.attnum, TRUE))

-- stats for functional indexes

OR (pg_stats.tablename = ind_atts.index_name AND pg_stats.attname = pg_attribute.attname))

WHERE pg_attribute.attnum > 0

GROUP BY 1, 2, 3, 4, 5, 6, 7, 8, 9

index_aligned_est AS (

SELECT maxalign, bs, nspname, index_name, reltuples,

relpages, relam, table_oid, index_oid,

coalesce (

ceil (

reltuples * ( 6

+ maxalign

- CASE

WHEN index_tuple_hdr%maxalign = 0 THEN maxalign

ELSE index_tuple_hdr%maxalign

END

+ nulldatawidth

+ maxalign

- CASE /* Add padding to the data to align on MAXALIGN */

WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign

ELSE nulldatawidth::integer%maxalign

END

)::numeric

/ ( bs - pagehdr::NUMERIC )

+1 )

, 0 )

as expected

FROM index_item_sizes

raw_bloat AS (

SELECT current_database() as dbname, nspname, pg_class.relname AS table_name, index_name,

bs*(index_aligned_est.relpages)::bigint AS totalbytes, expected,

CASE

WHEN index_aligned_est.relpages <= expected

THEN 0

ELSE bs*(index_aligned_est.relpages-expected)::bigint

END AS wastedbytes,

CASE

WHEN index_aligned_est.relpages <= expected

THEN 0

ELSE bs*(index_aligned_est.relpages-expected)::bigint * 100 / (bs*(index_aligned_est.relpages)::bigint)

END AS realbloat,

pg_relation_size(index_aligned_est.table_oid) as table_bytes,

stat.idx_scan as index_scans

FROM index_aligned_est

JOIN pg_class ON pg_class.oid=index_aligned_est.table_oid

JOIN pg_stat_user_indexes AS stat ON index_aligned_est.index_oid = stat.indexrelid

format_bloat AS (

SELECT dbname as database_name, nspname as schema_name, table_name, index_name,

round(realbloat) as bloat_pct, round(wastedbytes/(1024^2)::NUMERIC) as bloat_mb,

round(totalbytes/(1024^2)::NUMERIC,3) as index_mb,

round(table_bytes/(1024^2)::NUMERIC,3) as table_mb,

index_scans

FROM raw_bloat

)

-- final query outputting the bloated indexes

-- change the where and order by to change

-- what shows up as bloated

SELECT *

FROM format_bloat

WHERE ( bloat_pct > 50 and bloat_mb > 10 )

ORDER BY bloat_mb DESC;

Task 11: Monitor blocked and blocking activities in PostgreSQL:

SELECT blocked_locks.pid AS blocked_pid,

blocked_activity.usename AS blocked_user,

blocking_locks.pid AS blocking_pid,

blocking_activity.usename AS blocking_user,

blocked_activity.query AS blocked_statement,

blocking_activity.query AS current_statement_in_blocking_process

FROM pg_catalog.pg_locks blocked_locks

JOIN pg_catalog.pg_stat_activity blocked_activity ON blocked_activity.pid = blocked_locks.pid

JOIN pg_catalog.pg_locks blocking_locks

ON blocking_locks.locktype = blocked_locks.locktype

AND blocking_locks.database IS NOT DISTINCT FROM blocked_locks.database

AND blocking_locks.relation IS NOT DISTINCT FROM blocked_locks.relation

AND blocking_locks.page IS NOT DISTINCT FROM blocked_locks.page

AND blocking_locks.tuple IS NOT DISTINCT FROM blocked_locks.tuple

AND blocking_locks.virtualxid IS NOT DISTINCT FROM blocked_locks.virtualxid

AND blocking_locks.transactionid IS NOT DISTINCT FROM blocked_locks.transactionid

AND blocking_locks.classid IS NOT DISTINCT FROM blocked_locks.classid

AND blocking_locks.objid IS NOT DISTINCT FROM blocked_locks.objid

AND blocking_locks.objsubid IS NOT DISTINCT FROM blocked_locks.objsubid

AND blocking_locks.pid != blocked_locks.pid

JOIN pg_catalog.pg_stat_activity blocking_activity ON blocking_activity.pid = blocking_locks.pid

WHERE NOT blocked_locks.granted;

Task 12: Monitoring PostgreSQL Disk I/O performance:

-- perform a "select pg_stat_reset();" when you want to reset counter statistics

with

all_tables as

(

SELECT *

FROM (

SELECT 'all'::text as table_name,

sum( (coalesce(heap_blks_read,0) + coalesce(idx_blks_read,0) + coalesce(toast_blks_read,0) + coalesce(tidx_blks_read,0)) ) as from_disk,

sum( (coalesce(heap_blks_hit,0) + coalesce(idx_blks_hit,0) + coalesce(toast_blks_hit,0) + coalesce(tidx_blks_hit,0)) ) as from_cache

FROM pg_statio_all_tables --> change to pg_statio_USER_tables if you want to check only user tables (excluding postgres's own tables)

) a

WHERE (from_disk + from_cache) > 0 -- discard tables without hits

tables as

(

SELECT *

FROM (

SELECT relname as table_name,

( (coalesce(heap_blks_read,0) + coalesce(idx_blks_read,0) + coalesce(toast_blks_read,0) + coalesce(tidx_blks_read,0)) ) as from_disk,

( (coalesce(heap_blks_hit,0) + coalesce(idx_blks_hit,0) + coalesce(toast_blks_hit,0) + coalesce(tidx_blks_hit,0)) ) as from_cache

FROM pg_statio_all_tables --> change to pg_statio_USER_tables if you want to check only user tables (excluding postgres's own tables)

) a

WHERE (from_disk + from_cache) > 0 -- discard tables without hits

)

SELECT table_name as "table name",

from_disk as "disk hits",

round((from_disk::numeric / (from_disk + from_cache)::numeric)*100.0,2) as "% disk hits",

round((from_cache::numeric / (from_disk + from_cache)::numeric)*100.0,2) as "% cache hits",

(from_disk + from_cache) as "total hits"

FROM (SELECT * FROM all_tables UNION ALL SELECT * FROM tables) a

ORDER BY (case when table_name = 'all' then 0 else 1 end), from_disk desc;

References

https://www.postgresql.org/developer/related-projects/

https://www.postgresql.org/community/

https://github.com/pgexperts

PostgreSQL DBA

Wednesday, 14 August 2024

Daily checklist of a PostgreSQL DBA

daily checklist of a PostgreSQL DBA

No comments:

Post a Comment

Master and Slave - Sync check - PostgreSQL