Set up comprehensive Linux system monitoring using Prometheus and Grafana. Monitor CPU, memory, disk, network, and application metrics with beautiful dashboards.
Comprehensive system monitoring is essential for maintaining healthy Linux servers. This guide shows you how to set up Prometheus and Grafana for production-grade monitoring.
┌─────────────┐ ┌──────────────┐ ┌─────────────┐
│ Node │────▶│ Prometheus │────▶│ Grafana │
│ Exporter │ │ (Metrics) │ │ (Dashboards)│
└─────────────┘ └──────────────┘ └─────────────┘
# Download Prometheus
wget https://github.com/prometheus/prometheus/releases/download/v2.48.0/prometheus-2.48.0.linux-amd64.tar.gz
tar xvfz prometheus-*.tar.gz
cd prometheus-*
# Create systemd service
sudo tee /etc/systemd/system/prometheus.service <<EOF
[Unit]
Description=Prometheus
After=network.target
[Service]
Type=simple
User=prometheus
ExecStart=/usr/local/bin/prometheus \
--config.file=/etc/prometheus/prometheus.yml \
--storage.tsdb.path=/var/lib/prometheus/ \
--web.console.templates=/etc/prometheus/consoles \
--web.console.libraries=/etc/prometheus/console_libraries
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable prometheus
sudo systemctl start prometheus
# Download Node Exporter
wget https://github.com/prometheus/node_exporter/releases/download/v1.7.0/node_exporter-1.7.0.linux-amd64.tar.gz
tar xvfz node_exporter-*.tar.gz
sudo cp node_exporter-*/node_exporter /usr/local/bin/
# Create systemd service
sudo tee /etc/systemd/system/node_exporter.service <<EOF
[Unit]
Description=Node Exporter
After=network.target
[Service]
Type=simple
User=node_exporter
ExecStart=/usr/local/bin/node_exporter
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl daemon-reload
sudo systemctl enable node_exporter
sudo systemctl start node_exporter
# /etc/prometheus/prometheus.yml
global:
scrape_interval: 15s
evaluation_interval: 15s
scrape_configs:
- job_name: 'prometheus'
static_configs:
- targets: ['localhost:9090']
- job_name: 'node'
static_configs:
- targets: ['localhost:9100']
labels:
instance: 'server-01'
environment: 'production'
# Add Grafana repository
sudo apt-get install -y software-properties-common
sudo add-apt-repository "deb https://packages.grafana.com/oss/deb stable main"
wget -q -O - https://packages.grafana.com/gpg.key | sudo apt-key add -
# Install Grafana
sudo apt-get update
sudo apt-get install grafana
# Start Grafana
sudo systemctl daemon-reload
sudo systemctl enable grafana-server
sudo systemctl start grafana-server
node_cpu_seconds_totalnode_memory_MemTotal_bytes, node_memory_MemAvailable_bytesnode_disk_io_time_seconds_totalnode_network_receive_bytes_total, node_network_transmit_bytes_totalnode_load1, node_load5, node_load15http_requests_totalhttp_requests_total{status=~"5.."}http_request_duration_secondshttp_connections_active100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100)
(node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100
100 - ((node_filesystem_avail_bytes{mountpoint="/"} * 100) / node_filesystem_size_bytes{mountpoint="/"})
http://localhost:90901860 (Node Exporter Full)# /etc/prometheus/alerts.yml
groups:
- name: system_alerts
interval: 30s
rules:
- alert: HighCPUUsage
expr: 100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
for: 5m
labels:
severity: warning
annotations:
summary: "High CPU usage on {{ $labels.instance }}"
description: "CPU usage is above 80% for 5 minutes"
- alert: HighMemoryUsage
expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 90
for: 5m
labels:
severity: critical
annotations:
summary: "High memory usage on {{ $labels.instance }}"
description: "Memory usage is above 90%"
- alert: DiskSpaceLow
expr: (node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"}) * 100 < 10
for: 5m
labels:
severity: warning
annotations:
summary: "Low disk space on {{ $labels.instance }}"
description: "Disk space is below 10%"
version: '3.8'
services:
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus-data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
volumes:
- grafana-data:/var/lib/grafana
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin
depends_on:
- prometheus
node-exporter:
image: prom/node-exporter:latest
ports:
- "9100:9100"
volumes:
- /proc:/host/proc:ro
- /sys:/host/sys:ro
- /:/rootfs:ro
command:
- '--path.procfs=/host/proc'
- '--path.sysfs=/host/sys'
- '--collector.filesystem.mount-points-exclude=^/(sys|proc|dev|host|etc)($$|/)'
volumes:
prometheus-data:
grafana-data:
Prometheus and Grafana provide a powerful, open-source solution for Linux system monitoring. With proper configuration, you can gain deep insights into your infrastructure and applications, enabling proactive problem resolution.
Get the latest tutorials, guides, and insights on AI, DevOps, Cloud, and Infrastructure delivered directly to your inbox.
Discover proven strategies to reduce AWS costs by up to 50%. Learn about Reserved Instances, Spot Instances, right-sizing, and automated cost management.
Compare Terraform, Pulumi, and Ansible for Infrastructure as Code. Learn when to use each tool and how they complement each other in modern DevOps workflows.
Explore more articles in this category
We migrated most scheduled jobs from cron to systemd timers. The wins, the gotchas, and the cases we kept on cron anyway.
A curated list of shell one-liners that earn their place in real ops work — the ones I reach for weekly, not the trick-shot variety.
Generate an SSH key, set up passwordless login, and configure aliases for the servers you use daily — all without copy-pasting yet another long command.