Simple CPU usage tracking in Linux using SQLite and Python

I figured that over night one of my PCs is spending time on “something”. System logs don’t show anything, app logs neither. So, I spent like 30min and made this tiny script which might be useful for you as well. I connect to my host using SSH, and I let this run over night in a tmux session. The day after I can analyze all the stored data in the SQLite database and find the problematic app.

Requirements:

  • Python 3

  • screen / tmux to keep the session going even when you are not logged in

  • (optional) SQLite3 system package so that you can read the recorded data on the system

Source code (activity-tracker.py)

#!/usr/bin/env python3
import sys
import time

import datetime
import logging
import os
import sqlite3
import subprocess


class ActivityTracker:
    def __init__(self):
        script_directory = os.path.dirname(os.path.abspath(sys.argv[0]))
        db = os.path.join(script_directory, 'activity-tracker.db')
        conn = sqlite3.connect(db)
        cursor = conn.cursor()
        cursor.execute('''CREATE TABLE IF NOT EXISTS activity_log
                     (date text, app text, pid int, user text, usage real)''')
        conn.commit()
        self._db = conn

    def sync(self):
        while True:
            command = ["bash", "-c", "ps -eo pcpu,pid,user,args | grep -v '[p]s -eo' | grep -v '[a]ctivity-tracker.py' | tail -n +2 | sort -k1 -r -n | head -10"]
            logging.info("Running command %s" % " ".join(command))

            result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
            for line in result.stdout.split("\n"):
                components = list(filter(lambda x: x != "", line.split(" ")))
                if len(components) == 0:
                    continue
                if len(components) < 4:
                    logging.warning("Line has wrong format: %s" % components)
                    continue
                usage = float(components[0])
                pid = int(components[1])
                user = components[2]
                app = " ".join(components[3:])
                moment = datetime.datetime.now().isoformat()

                # store in db
                cursor = self._db.cursor()
                sql = "INSERT INTO activity_log (date, app, pid, user, usage) VALUES (?, ?, ?, ?, ?)"
                cursor.execute(sql, (moment, app, pid, user, usage))
                self._db.commit()
            try:
                time.sleep(10)
            except KeyboardInterrupt:
                logging.info("Exiting")
                self._db.close()
                break


def run():
    logging.getLogger().setLevel(logging.INFO)
    ActivityTracker().sync()


if __name__ == '__main__':
    run()

Finally, you can track the biggest users using simple watch command:

watch -n 30 "sqlite3 activity-tracker.db -table \
  'select * from activity_log order by usage desc limit 50'"

Every 30.0s: sqlite3 activity-tracker.db -table   'select * from activity_log order by usage desc limit...

+----------------------------+------------------------------------------------------------------------+-------+--------+-------+
|            date            |                             app                                        |  pid  |  user  | usage |
+----------------------------+------------------------------------------------------------------------+-------+--------+-------+
| 2024-11-30T08:02:10.361848 | /opt/nomad/nomad agent -config=/opt/nomad/nomad.conf                   | 86884 | root   | 24.2  |
| 2024-11-30T08:01:59.840202 | /opt/consul/consul agent -config-file=/opt/consul/consul.json -rejoin  | 86428 | consul | 6.6   |
| 2024-11-30T08:02:10.383667 | /opt/consul/consul agent -config-file=/opt/consul/consul.json -rejoin  | 86428 | consul | 3.6   |
...
+----------------------------+------------------------------------------------------------------------+-------+--------+-------+