From d8f468d089469f87185a1ef7a81b19a5d5778d12 Mon Sep 17 00:00:00 2001
From: Jan Mach <jan.mach@cesnet.cz>
Date: Wed, 27 Nov 2019 17:44:16 +0100
Subject: [PATCH] Updated documentation on monitoring, maintenance and
 upgrading procedures.

(Redmine issue: #6096)
---
 doc/sphinx/_doclib/administration.rst | 311 ++++++++++++++++----------
 doc/sphinx/_doclib/upgrading.rst      |   9 +
 etc/default/mentat                    |   3 +
 scripts/sqldb-maintenance.sh          |  90 ++++++++
 4 files changed, 295 insertions(+), 118 deletions(-)
 create mode 100755 scripts/sqldb-maintenance.sh

diff --git a/doc/sphinx/_doclib/administration.rst b/doc/sphinx/_doclib/administration.rst
index 54a35131e..9dddccc6b 100644
--- a/doc/sphinx/_doclib/administration.rst
+++ b/doc/sphinx/_doclib/administration.rst
@@ -25,31 +25,33 @@ If you choose to implement this solution you may try to follow this simple proce
 
 .. code-block:: shell
 
-	# 1. Stop your receiving Warden client.
+    # 1. Stop your receiving Warden client.
 
-	# 2. Wait a moment for your Mentat daemons to process all remaining messages.
+    # 2. Wait a moment for your Mentat daemons to process all remaining messages.
 
-	# 3. Stop all Mentat daemons:
-	mentat-controller.py --command stop
+    # 3. Stop all Mentat daemons:
+    mentat-controller.py --command stop
+    mentat-controller.py --command disable
 
-	# 4. Delete current content of your message processing queues:
-	rm -rf /var/mentat/spool/mentat-*
+    # 4. Delete current content of your message processing queues:
+    rm -rf /var/mentat/spool/mentat-*
 
-	# 5. Add following line to your /etc/fstab file (adjust the size of the RAM disk as necessary):
-	tmpfs  /var/mentat/spool  tmpfs  nodev,nosuid,noexec,nodiratime,size=2048M 0 0
+    # 5. Add following line to your /etc/fstab file (adjust the size of the RAM disk as necessary):
+    tmpfs  /var/mentat/spool  tmpfs  nodev,nosuid,noexec,nodiratime,size=2048M 0 0
 
-	# 6. Mount the newly added filesystem and check:
-	/bin/mount -a
-	mount | grep mentat
-	df -h | grep mentat
+    # 6. Mount the newly added filesystem and check:
+    /bin/mount -a
+    mount | grep mentat
+    df -h | grep mentat
 
-	# 7. Start all Mentat daemons:
-	mentat-controller.py --command start
+    # 7. Start all Mentat daemons:
+    mentat-controller.py --command enable
+    mentat-controller.py --command start
 
-	# 8. Start your receiving Warden client.
+    # 8. Start your receiving Warden client.
 
-	# 9. Check, that the IDEA messages are passing through the processing chain:
-	tail -f /var/mentat/log/mentat-storage.py.log
+    # 9. Check, that the IDEA messages are passing through the processing chain:
+    tail -f /var/mentat/log/mentat-storage.py.log
 
 Please adjust the variables (like queue folder location and RAM filesystem size)
 in the procedure above according to your setup and preferences. In this example
@@ -76,16 +78,16 @@ In each of the directories above you should aim for following files:
 
 The workflow for customizing the templates is as follows::
 
-	# Step 1: Modify the appropriate '.j2' file
+    # Step 1: Modify the appropriate '.j2' file
 
-	# Step 2: Update the message catalogs:
-	hawat-cli repintl update
+    # Step 2: Update the message catalogs:
+    hawat-cli repintl update
 
-	# Step 3: Translate newly added strings in appropriate '*.po' file(s)
+    # Step 3: Translate newly added strings in appropriate '*.po' file(s)
 
-	# Step 4: Compile the message catalogs:
-	hawat-cli repintl compile
-	hawat-cli repintl clean
+    # Step 4: Compile the message catalogs:
+    hawat-cli repintl compile
+    hawat-cli repintl clean
 
 
 .. _section-administration-monitoring:
@@ -102,8 +104,9 @@ your liking.
 
 You may consider monitoring following features of Mentat system:
 
-#. Monitoring system state
-#. Monitoring database state
+#. Monitoring database (low level)
+#. Monitoring Mentat database
+#. Monitoring Mentat system
 #. Monitoring message queues
 #. Monitoring log files
 
@@ -113,58 +116,37 @@ capable of configuring the `Nagios <https://www.nagios.org/>`__ monitoring for y
 Or you may use its appropriate tasks as a model for your custom configuration.
 
 
-Monitoring system state
+Monitoring database (low level)
 ````````````````````````````````````````````````````````````````````````````````
 
-For overall system state monitoring there is a feature built into the :ref:`section-bin-mentat-controller`
-utility. You may use the ``status`` command to detect the current overall state
-of Mentat modules:
+Currently there is no built-in mechanism for database status. We are using the 
+`Nagios <https://www.nagios.org/>`__ plugins ``check_procs`` and `check_postgres <https://exchange.nagios.org/directory/Plugins/Databases/PostgresQL/check_postgres/details>`__
+for monitoring the database. You may use something like the following as your 
+NRPE configuration:
 
 .. code-block:: shell
 
-	root@mentat:~# mentat-controller.py
-	2018-09-26 13:31:17,752 INFO: Executing script command 'status'
-	2018-09-26 13:31:17,981 INFO: Status of configured Mentat real-time modules:
-	2018-09-26 13:31:17,981 INFO: Real-time module 'mentat-storage.py': 'Process is running or service is OK (1)'
-	2018-09-26 13:31:17,981 INFO: Real-time module 'mentat-enricher.py': 'Process is running or service is OK (1)'
-	2018-09-26 13:31:17,982 INFO: Real-time module 'mentat-inspector-b.py': 'Process is running or service is OK (1)'
-	2018-09-26 13:31:17,982 INFO: Real-time module 'mentat-inspector.py': 'Process is running or service is OK (1)'
-	2018-09-26 13:31:17,982 INFO: Overall real-time module status: 'All modules are running OK'
-	2018-09-26 13:31:17,982 INFO: Status of configured Mentat cronjob modules:
-	2018-09-26 13:31:17,982 INFO: Cronjob module 'mentat-backup-py': 'Cronjob is enabled'
-	2018-09-26 13:31:17,982 INFO: Cronjob module 'mentat-cleanup-py': 'Cronjob is enabled'
-	2018-09-26 13:31:17,982 INFO: Cronjob module 'fetch-geoipdb-sh': 'Cronjob is enabled'
-	2018-09-26 13:31:17,982 INFO: Cronjob module 'mentat-informant-py': 'Cronjob is enabled'
-	2018-09-26 13:31:17,983 INFO: Cronjob module 'mentat-precache-py': 'Cronjob is enabled'
-	2018-09-26 13:31:17,983 INFO: Cronjob module 'mentat-reporter-py': 'Cronjob is enabled'
-	2018-09-26 13:31:17,983 INFO: Cronjob module 'mentat-statistician-py': 'Cronjob is enabled'
-	2018-09-26 13:31:17,983 INFO: Cronjob module 'mentat-watchdog-events-py': 'Cronjob is enabled'
-	2018-09-26 13:31:17,983 INFO: Overall cronjob module status: 'All cronjobs are enabled'
-	2018-09-26 13:31:17,983 INFO: Overall Mentat system status: 'All modules are running OK and all cronjobs are enabled'
-	2018-09-26 13:31:17,984 INFO: Application runtime: '0:00:00.329097' (effectivity  70.49 %)
-	2018-09-26 13:31:17,985 INFO: Application persistent state saved to file '/var/mentat/run/mentat-controller.py.pstate'
-	2018-09-26 13:31:17,985 INFO: Application runlog saved to file '/var/mentat/run/mentat-controller.py/201809261331.runlog'
-
-You may use the built-in command line option ``nagios-plugin`` to force the output
-and return code to be according to the `Nagios plugin API <https://assets.nagios.com/downloads/nagioscore/docs/nagioscore/3/en/pluginapi.html>`__.
-In that case you may use something like the following as your NRPE configuration:
+    command[check_postgresql]=/usr/lib/nagios/plugins/check_procs -c 1:100 -C postgres
 
-.. code-block:: shell
+    command[check_pg_con_mentat_events]=/usr/lib/nagios/plugins/check_postgres_connection --dbname=mentat_events --dbuser=watchdog
+    command[check_pg_blt_mentat_events]=/usr/lib/nagios/plugins/check_postgres_bloat --dbname=mentat_events --dbuser=watchdog --warning='1G' --critical='5G'
+    command[check_pg_anl_mentat_events]=/usr/lib/nagios/plugins/check_postgres_last_analyze --dbname=mentat_events --dbuser=watchdog --warning='3d' --critical='7d' --exclude='pg_catalog.' --exclude='alembic_version'
+    command[check_pg_vac_mentat_events]=/usr/lib/nagios/plugins/check_postgres_last_vacuum --dbname=mentat_events --dbuser=watchdog --warning='3d' --critical='7d' --exclude='pg_catalog.' --exclude='alembic_version'
+    command[check_pg_aan_mentat_events]=/usr/lib/nagios/plugins/check_postgres_last_autoanalyze --dbname=mentat_events --dbuser=watchdog --warning='3d' --critical='7d' --exclude='pg_catalog.' --exclude='alembic_version'
+    command[check_pg_ava_mentat_events]=/usr/lib/nagios/plugins/check_postgres_last_autovacuum --dbname=mentat_events --dbuser=watchdog --warning='3d' --critical='7d' --exclude='pg_catalog.' --exclude='alembic_version'
 
-	command[check_mentat]=/usr/local/bin/mentat-controller.py --command status --nagios-plugin --log-level warning --shell
+    command[check_pg_con_mentat_main]=/usr/lib/nagios/plugins/check_postgres_connection --dbname=mentat_main --dbuser=watchdog
+    command[check_pg_blt_mentat_main]=/usr/lib/nagios/plugins/check_postgres_bloat --dbname=mentat_main --dbuser=watchdog --warning='1G' --critical='5G'
+    command[check_pg_anl_mentat_main]=/usr/lib/nagios/plugins/check_postgres_last_analyze --dbname=mentat_main --dbuser=watchdog --warning='3d' --critical='7d' --exclude='pg_catalog.' --exclude='alembic_version'
+    command[check_pg_vac_mentat_main]=/usr/lib/nagios/plugins/check_postgres_last_vacuum --dbname=mentat_main --dbuser=watchdog --warning='3d' --critical='7d' --exclude='pg_catalog.' --exclude='alembic_version'
+    command[check_pg_aan_mentat_main]=/usr/lib/nagios/plugins/check_postgres_last_autoanalyze --dbname=mentat_main --dbuser=watchdog --warning='3d' --critical='7d' --exclude='pg_catalog.' --exclude='alembic_version'
+    command[check_pg_ava_mentat_main]=/usr/lib/nagios/plugins/check_postgres_last_autovacuum --dbname=mentat_main --dbuser=watchdog --warning='3d' --critical='7d' --exclude='pg_catalog.' --exclude='alembic_version'
 
 
-Monitoring database state
+Monitoring Mentat database
 ````````````````````````````````````````````````````````````````````````````````
 
-First of all you may wish to use the ``check_procs`` `Nagios <https://www.nagios.org/>`__
-plugin to check that the database is indeed running:
-
-.. code-block:: shell
-
-	command[check_postgresql]=/usr/lib/nagios/plugins/check_procs -c 5:100 -C postgres
-
-Next very usefull thing to monitor is the health of the message processing chain
+Very usefull thing to monitor is the health of the message processing chain
 and verifiing, that new messages are being constantly added to the database. For
 this there is a built-in feature in the :ref:`section-bin-mentat-dbmngr` utility.
 It contains the ``watchdog-events`` command, which can be executed periodically
@@ -173,7 +155,7 @@ option to be incorporated into your monitoring infrastructure:
 
 .. code-block:: shell
 
-	command[check_mentat_edb]=/usr/local/bin/mentat-dbmngr.py --command watchdog-events --nagios-plugin --log-level warning --shell --user nagios --group nagios
+    command[check_mentat_edb]=/usr/local/bin/mentat-dbmngr.py --command watchdog-events --nagios-plugin --log-level warning --shell --user nagios --group nagios
 
 Additionally there is a bundle of usefull check scripts in the ``/etc/mentat/scripts``
 directory, which can be used to help with keeping the data quality on the sane
@@ -186,60 +168,60 @@ To correctly correctly configure these scripts please pay attention to following
 configurations ``/etc/default/mentat``:
 
 ``MENTAT_IS_ENABLED``
-	Master switch. Unless value is set to ``yes`` no checks will be performed.
+    Master switch. Unless value is set to ``yes`` no checks will be performed.
 ``MENTAT_CHECKS_MAIL_TO``
-	List of recipients of check reports (must be array).
+    List of recipients of check reports (must be array).
 ``MENTAT_HAWAT_URL``
-	Base URL to the Mentat`s web interface. It will be used to generate URLs to
-	example events.
+    Base URL to the Mentat`s web interface. It will be used to generate URLs to
+    example events.
 
 To enable these scripts please configure them to be launched periodically via
 ``cron``.
 
 ``/etc/mentat/scripts/mentat-check-alive.sh``
-	Query the IDEA event database and find a list of event detectors, that stopped
-	sending new events. This can be used to detect possible problems with detectors
-	going suddenly offline.
+    Query the IDEA event database and find a list of event detectors, that stopped
+    sending new events. This can be used to detect possible problems with detectors
+    going suddenly offline.
 ``/etc/mentat/scripts/mentat-check-inspectionerrors.sh``
-	Query the IDEA event database and detect list of all inspection errors along
-	with example events. One of the :ref:`section-bin-mentat-inspector` modules
-	is by default configured to perform event sanity inspection and logs errors
-	it finds directly into the event. This script can provide summary of all
-	current inspection errors, so you can go and fix malfunctioning detectors.
+    Query the IDEA event database and detect list of all inspection errors along
+    with example events. One of the :ref:`section-bin-mentat-inspector` modules
+    is by default configured to perform event sanity inspection and logs errors
+    it finds directly into the event. This script can provide summary of all
+    current inspection errors, so you can go and fix malfunctioning detectors.
 ``/etc/mentat/scripts/mentat-check-noeventclass.sh``
-	Query the IDEA event database and detect list of events without assigned
-	internal classification. The event classification is an internal mechanism
-	for aggregating events possibly from different detectors and representing
-	similar event classess (e.g. SSH bruteforce attacks detected by different
-	detectors may by described by slightly different IDEA events. In a best case
-	scenario any IDEA event should be assigned exactly one event class and there
-	should not be any events without an event class.
+    Query the IDEA event database and detect list of events without assigned
+    internal classification. The event classification is an internal mechanism
+    for aggregating events possibly from different detectors and representing
+    similar event classess (e.g. SSH bruteforce attacks detected by different
+    detectors may by described by slightly different IDEA events. In a best case
+    scenario any IDEA event should be assigned exactly one event class and there
+    should not be any events without an event class.
 ``/etc/mentat/scripts/mentat-check-volatiledescription.sh``
-	Query the IDEA event database and detect list of detectors that are putting
-	variable data into ``Description`` key within the event. The description
-	should contain only constant data, things like IP addressess, timestamps and
-	so on should be placed into the ``Note`` key.
+    Query the IDEA event database and detect list of detectors that are putting
+    variable data into ``Description`` key within the event. The description
+    should contain only constant data, things like IP addressess, timestamps and
+    so on should be placed into the ``Note`` key.
 ``/etc/mentat/scripts/mentat-check-test.sh``
-	Query the IDEA event database and detect list of detectors that are sending
-	events with ``Test`` category for "longer than normal" time. Ussually when
-	new detector is added to the system, it is smart to assess the quality of the
-	data provided before letting the messages be handled in full. However detectors
-	should not use this feature permanently, instead the data source should eiher
-	move to production level by starting to omit the ``Test`` category, or stop
-	sending those messages altogether.
+    Query the IDEA event database and detect list of detectors that are sending
+    events with ``Test`` category for "longer than normal" time. Ussually when
+    new detector is added to the system, it is smart to assess the quality of the
+    data provided before letting the messages be handled in full. However detectors
+    should not use this feature permanently, instead the data source should eiher
+    move to production level by starting to omit the ``Test`` category, or stop
+    sending those messages altogether.
 
 Following is an example ``cron`` configuration to enable all these checks.
 
 .. code-block:: shell
 
-	# root@host$ crontab -e
-	10 0 * * mon /etc/mentat/scripts/mentat-check-alive.sh 7
-	11 0 * * mon /etc/mentat/scripts/mentat-check-inspectionerrors.sh 7
-	12 0 * * mon /etc/mentat/scripts/mentat-check-noeventclass.sh 7
-	# As an example use 14 days as check interval here instead of 7 days
-	13 0 * * mon /etc/mentat/scripts/mentat-check-volatiledescription.sh 14
-	# As an example send these reports to some different people
-	14 0 * * mon /etc/mentat/scripts/mentat-check-test.sh 7 admin@domain.org another-admin@domain.org
+    # root@host$ crontab -e
+    10 0 * * mon /etc/mentat/scripts/mentat-check-alive.sh 7
+    11 0 * * mon /etc/mentat/scripts/mentat-check-inspectionerrors.sh 7
+    12 0 * * mon /etc/mentat/scripts/mentat-check-noeventclass.sh 7
+    # As an example use 14 days as check interval here instead of 7 days
+    13 0 * * mon /etc/mentat/scripts/mentat-check-volatiledescription.sh 14
+    # As an example send these reports to some different people
+    14 0 * * mon /etc/mentat/scripts/mentat-check-test.sh 7 admin@domain.org another-admin@domain.org
 
 All these scripts send their reports via email with following headers, that you
 may use for automated email processing:
@@ -249,8 +231,50 @@ may use for automated email processing:
 * ``X-Mentat-Report-Type: check-[xxx]``
 
 
+Monitoring Mentat system
+````````````````````````````````````````````````````````````````````````````````
+
+For overall system state monitoring there is a feature built into the :ref:`section-bin-mentat-controller`
+utility. You may use the ``status`` command to detect the current overall state
+of Mentat modules:
+
+.. code-block:: shell
+
+    root@mentat:~# mentat-controller.py
+    2018-09-26 13:31:17,752 INFO: Executing script command 'status'
+    2018-09-26 13:31:17,981 INFO: Status of configured Mentat real-time modules:
+    2018-09-26 13:31:17,981 INFO: Real-time module 'mentat-storage.py': 'Process is running or service is OK (1)'
+    2018-09-26 13:31:17,981 INFO: Real-time module 'mentat-enricher.py': 'Process is running or service is OK (1)'
+    2018-09-26 13:31:17,982 INFO: Real-time module 'mentat-inspector-b.py': 'Process is running or service is OK (1)'
+    2018-09-26 13:31:17,982 INFO: Real-time module 'mentat-inspector.py': 'Process is running or service is OK (1)'
+    2018-09-26 13:31:17,982 INFO: Overall real-time module status: 'All modules are running OK'
+    2018-09-26 13:31:17,982 INFO: Status of configured Mentat cronjob modules:
+    2018-09-26 13:31:17,982 INFO: Cronjob module 'mentat-backup-py': 'Cronjob is enabled'
+    2018-09-26 13:31:17,982 INFO: Cronjob module 'mentat-cleanup-py': 'Cronjob is enabled'
+    2018-09-26 13:31:17,982 INFO: Cronjob module 'fetch-geoipdb-sh': 'Cronjob is enabled'
+    2018-09-26 13:31:17,982 INFO: Cronjob module 'mentat-informant-py': 'Cronjob is enabled'
+    2018-09-26 13:31:17,983 INFO: Cronjob module 'mentat-precache-py': 'Cronjob is enabled'
+    2018-09-26 13:31:17,983 INFO: Cronjob module 'mentat-reporter-py': 'Cronjob is enabled'
+    2018-09-26 13:31:17,983 INFO: Cronjob module 'mentat-statistician-py': 'Cronjob is enabled'
+    2018-09-26 13:31:17,983 INFO: Cronjob module 'mentat-watchdog-events-py': 'Cronjob is enabled'
+    2018-09-26 13:31:17,983 INFO: Overall cronjob module status: 'All cronjobs are enabled'
+    2018-09-26 13:31:17,983 INFO: Overall Mentat system status: 'All modules are running OK and all cronjobs are enabled'
+    2018-09-26 13:31:17,984 INFO: Application runtime: '0:00:00.329097' (effectivity  70.49 %)
+    2018-09-26 13:31:17,985 INFO: Application persistent state saved to file '/var/mentat/run/mentat-controller.py.pstate'
+    2018-09-26 13:31:17,985 INFO: Application runlog saved to file '/var/mentat/run/mentat-controller.py/201809261331.runlog'
+
+You may use the built-in command line option ``nagios-plugin`` to force the output
+and return code to be according to the `Nagios plugin API <https://assets.nagios.com/downloads/nagioscore/docs/nagioscore/3/en/pluginapi.html>`__.
+In that case you may use something like the following as your NRPE configuration:
+
+.. code-block:: shell
+
+    command[check_mentat]=/usr/local/bin/mentat-controller.py --command status --nagios-plugin --log-level warning --shell
+
+
 Monitoring message queues
 ````````````````````````````````````````````````````````````````````````````````
+
 Currently there is no built-in mechanism for monitoring number of messages in the
 message queues. We are using the `Nagios <https://www.nagios.org/>`__ plugin
 `check_file_count <https://exchange.nagios.org/directory/Plugins/System-Metrics/File-System/check_file_count/details>`__
@@ -259,18 +283,21 @@ the following as your NRPE configuration:
 
 .. code-block:: shell
 
-	command[check_mentat_inspector_a_errors_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector.py/errors -w 100 -c 1000
-	command[check_mentat_inspector_a_pending_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector.py/pending -w 100 -c 1000
-	command[check_mentat_inspector_a_incoming_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector.py/incoming -w 5000 -c 10000
-	command[check_mentat_inspector_b_errors_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector-b.py/errors -w 100 -c 1000
-	command[check_mentat_inspector_b_pending_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector-b.py/pending -w 100 -c 1000
-	command[check_mentat_inspector_b_incoming_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector-b.py/incoming -w 5000 -c 10000
-	command[check_mentat_enricher_errors_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-enricher.py/errors -w 100 -c 1000
-	command[check_mentat_enricher_pending_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-enricher.py/pending -w 100 -c 1000
-	command[check_mentat_enricher_incoming_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-enricher.py/incoming -w 5000 -c 10000
-	command[check_mentat_storage_errors_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-storage.py/errors -w 100 -c 1000
-	command[check_mentat_storage_pending_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-storage.py/pending -w 100 -c 1000
-	command[check_mentat_storage_incoming_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-storage.py/incoming -w 5000 -c 10000
+    command[check_mentat_inspector_a_errors_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector.py/errors -w 100 -c 1000
+    command[check_mentat_inspector_a_pending_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector.py/pending -w 100 -c 1000
+    command[check_mentat_inspector_a_incoming_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector.py/incoming -w 5000 -c 10000
+
+    command[check_mentat_inspector_b_errors_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector-b.py/errors -w 100 -c 1000
+    command[check_mentat_inspector_b_pending_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector-b.py/pending -w 100 -c 1000
+    command[check_mentat_inspector_b_incoming_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-inspector-b.py/incoming -w 5000 -c 10000
+
+    command[check_mentat_enricher_errors_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-enricher.py/errors -w 100 -c 1000
+    command[check_mentat_enricher_pending_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-enricher.py/pending -w 100 -c 1000
+    command[check_mentat_enricher_incoming_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-enricher.py/incoming -w 5000 -c 10000
+
+    command[check_mentat_storage_errors_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-storage.py/errors -w 100 -c 1000
+    command[check_mentat_storage_pending_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-storage.py/pending -w 100 -c 1000
+    command[check_mentat_storage_incoming_dir]=/usr/lib/nagios/plugins/check_file_count -d /var/mentat/spool/mentat-storage.py/incoming -w 5000 -c 10000
 
 
 Monitoring log files
@@ -279,3 +306,51 @@ Monitoring log files
 You may consider using tools like ``logwatch``, ``logcheck``, ``Kibana`` or
 ``Graylog`` to monitor the log files in ``/var/mentat/log``. So solutions are
 currently part of the package, you have to implement your own.
+
+
+.. _section-administration-maintenance:
+
+Maintenance
+--------------------------------------------------------------------------------
+
+Database
+````````````````````````````````````````````````````````````````````````````````
+
+References:
+* `Introduction to VACUUM, ANALYZE, EXPLAIN, and COUNT <https://wiki.postgresql.org/wiki/Introduction_to_VACUUM,_ANALYZE,_EXPLAIN,_and_COUNT#Using_ANALYZE_to_optimize_PostgreSQL_queries>`__
+
+.. code-block::
+
+    # Launch tmux or screen.
+    tmux
+
+    # Stop Mentat system.
+    printf 'SetOutputFilter SUBSTITUTE;DEFLATE\nSubstitute "s/__MAINTENANCE_START__/%b/n"\nSubstitute "s/__MAINTENANCE_END__/%b/n"\n' "`date '+%F %R'`" "`date -d '+4 hour' '+%F %R'`" > /etc/mentat/apache/maintenance/.htaccess
+    a2enmod substitute
+    a2dissite site_mentat-ng.conf
+    a2ensite site_maintenance.conf
+    systemctl restart apache2
+    mentat-controller.py --command disable
+    mentat-controller.py --command stop
+    systemctl restart postgresql
+
+    # Perform database maintenance tasks.
+    time psql mentat_events -c 'VACUUM FULL VERBOSE;'
+    time psql mentat_events -c 'CLUSTER VERBOSE;'
+    time psql mentat_events -c 'ANALYZE VERBOSE;'
+    time psql mentat_main -c 'VACUUM FULL VERBOSE;'
+    time psql mentat_main -c 'CLUSTER VERBOSE;'
+    time psql mentat_main -c 'ANALYZE VERBOSE;'
+
+    # Start Mentat system.
+    systemctl restart postgresql
+    mentat-controller.py --command start
+    mentat-controller.py --command enable
+    a2dismod substitute
+    a2dissite site_maintenance.conf
+    a2ensite site_mentat-ng.conf
+    systemctl restart apache2
+
+For your convenience there is a script ``/etc/mentat/scripts/sqldb-maintenance.sh``, that can be used
+to perform all of the above tasks for you in single command. We recommend executing it in the ``tmux``
+or ``screen`` terminals, so that it is not dependent on your current session.
diff --git a/doc/sphinx/_doclib/upgrading.rst b/doc/sphinx/_doclib/upgrading.rst
index 089fba245..c0a0b1020 100644
--- a/doc/sphinx/_doclib/upgrading.rst
+++ b/doc/sphinx/_doclib/upgrading.rst
@@ -23,6 +23,9 @@ to latest version:
 
 .. code-block:: shell
 
+    # Launch tmux or screen.
+    tmux
+
     # Step 0: Activate maintenance mode in case the downtime will be noticable for users:
     # First update timestamps of maintenance start and maintenance end:
     $ vim /etc/mentat/apache/maintenance/.htaccess
@@ -94,6 +97,9 @@ from version ``10.x`` to ``11.x``.
 
 .. code-block:: shell
 
+    # Launch tmux or screen.
+    tmux
+
     # Step 0: Activate maintenance mode:
     # First update timestamps of maintenance start and maintenance end:
     $ vim /etc/mentat/apache/maintenance/.htaccess
@@ -188,6 +194,9 @@ from version ``11.x`` to ``12.x``.
 
 .. code-block:: shell
 
+    # Launch tmux or screen.
+    tmux
+
     # Step 0: Activate maintenance mode:
     # First update timestamps of maintenance start and maintenance end:
     $ vim /etc/mentat/apache/maintenance/.htaccess
diff --git a/etc/default/mentat b/etc/default/mentat
index f7e08b87d..cbd000b42 100644
--- a/etc/default/mentat
+++ b/etc/default/mentat
@@ -21,5 +21,8 @@ MENTAT_GROUP=mentat
 # aware, that this variable should be an array.
 MENTAT_CHECKS_MAIL_TO=(root)
 
+# Emails for Mentat system 
+MENTAT_ADMINS_MAIL_TO=(root)
+
 # Base URL for Mentat`s web interface Hawat (with trailing slash).
 MENTAT_HAWAT_URL=https://mentat.domain.org/mentat/
diff --git a/scripts/sqldb-maintenance.sh b/scripts/sqldb-maintenance.sh
new file mode 100755
index 000000000..15ee0ba06
--- /dev/null
+++ b/scripts/sqldb-maintenance.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+#-------------------------------------------------------------------------------
+# This file is part of Mentat system (https://mentat.cesnet.cz/).
+#
+# Copyright (C) since 2011 CESNET, z.s.p.o (http://www.ces.net/)
+# Use of this source is governed by the MIT license, see LICENSE file.
+#-------------------------------------------------------------------------------
+
+. /etc/default/mentat
+cd /
+
+ADDRS=${@}
+
+# In case list of report recipients is not given as command line argument, use
+# the default list from /etc/default/mentat configuration file.
+if [ -z "$ADDRS" ]
+then
+    ADDRS=${MENTAT_ADMINS_MAIL_TO[@]}
+fi
+
+MARK_BEGIN=$SECONDS
+
+echo ""
+echo "Starting database maintenance at: `date --rfc-3339=second`"
+echo ""
+echo "#==============================================================================#"
+echo "| Stopping the Mentat system:                                                  |"
+echo "#==============================================================================#"
+echo "  Current time: `date --rfc-3339=second`"
+echo ""
+printf 'SetOutputFilter SUBSTITUTE;DEFLATE\nSubstitute "s/__MAINTENANCE_START__/%b/n"\nSubstitute "s/__MAINTENANCE_END__/%b/n"\n' "`date '+%F %R'`" "`date -d '+4 hour' '+%F %R'`" > /etc/mentat/apache/maintenance/.htaccess
+a2enmod substitute
+a2dissite site_mentat-ng.conf
+a2ensite site_maintenance.conf
+systemctl restart apache2
+mentat-controller.py --command disable
+mentat-controller.py --command stop
+systemctl restart postgresql
+
+
+echo ""
+echo "#==============================================================================#"
+echo "| Performing database maintenance:                                             |"
+echo "#==============================================================================#"
+echo "  Current time: `date --rfc-3339=second`"
+for dbname in mentat_events mentat_main; do
+	echo ""
+	echo "+------------------------------------------------------------------------------+"
+	echo "  Database '$dbname'"
+	echo "+------------------------------------------------------------------------------+"
+	echo ""
+	echo " * VACUUM:"
+	echo ""
+	time psql "$dbname" -c 'VACUUM FULL VERBOSE;'
+	echo ""
+	echo " * CLUSTER:"
+	echo ""
+	time psql "$dbname" -c 'CLUSTER VERBOSE;'
+	echo ""
+	echo " * ANALYZE:"
+	echo ""
+	time psql "$dbname" -c 'ANALYZE VERBOSE;'
+done
+
+
+echo ""
+echo "#==============================================================================#"
+echo "| Starting the Mentat system:                                                  |"
+echo "#==============================================================================#"
+echo "  Current time: `date --rfc-3339=second`"
+echo ""
+systemctl restart postgresql
+mentat-controller.py --command start
+mentat-controller.py --command enable
+a2dismod substitute
+a2dissite site_maintenance.conf
+a2ensite site_mentat-ng.conf
+systemctl restart apache2
+
+MARK_END=$SECONDS
+DURATION=`expr $MARK_END - $MARK_BEGIN`
+
+{ printf "Dear administrator,\n\ndatabase maintenance has just finished. The whole process took %qh %qm %qs. Please login and make sure everything is in order.\n\n---\nWith regards\n\nMentat maintenance script\n\n##### SYSTEM STATUS #####\n\n" "$(($DURATION / 3600))" "$(($DURATION / 60))" "$(($DURATION % 60))" && system-status; } | mail -s "Mentat: Database maintenance finished" ${ADDRS[@]}
+
+echo ""
+echo "#==============================================================================#"
+echo ""
+echo "  Finished database maintenance at: `date --rfc-3339=second`"
+echo "  Time elapsed: $(($DURATION / 3600))h $(($DURATION / 60))m $(($DURATION % 60))s"
+echo ""
\ No newline at end of file
-- 
GitLab