Server Monitoring Tools - Complete Guide

Published: September 25, 2024 | Reading time: 20 minutes

Server Monitoring Overview

Effective server monitoring ensures system reliability and performance:

Monitoring Benefits

# Key Benefits
- Proactive issue detection
- Performance optimization
- Capacity planning
- SLA compliance
- Security monitoring
- Cost optimization
- User experience

Prometheus Setup

Prometheus Installation

Prometheus Configuration

# Install Prometheus
wget https://github.com/prometheus/prometheus/releases/download/v2.45.0/prometheus-2.45.0.linux-amd64.tar.gz
tar xvfz prometheus-2.45.0.linux-amd64.tar.gz
cd prometheus-2.45.0.linux-amd64

# Create systemd service
sudo tee /etc/systemd/system/prometheus.service > /dev/null <



                Prometheus Configuration
                
                    
                        prometheus.yml
                        
                    
                    # /etc/prometheus/prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "rules/*.yml"

alerting:
  alertmanagers:
    - static_configs:
        - targets:
          - localhost:9093

scrape_configs:
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']

  - job_name: 'node-exporter'
    static_configs:
      - targets: ['localhost:9100']

  - job_name: 'nginx-exporter'
    static_configs:
      - targets: ['localhost:9113']

  - job_name: 'mysql-exporter'
    static_configs:
      - targets: ['localhost:9104']

  - job_name: 'redis-exporter'
    static_configs:
      - targets: ['localhost:9121']

  - job_name: 'docker-exporter'
    static_configs:
      - targets: ['localhost:9323']

  - job_name: 'blackbox-exporter'
    static_configs:
      - targets: ['localhost:9115']

# Alert rules
# /etc/prometheus/rules/alerts.yml
groups:
- name: system_alerts
  rules:
  - alert: HighCPUUsage
    expr: 100 - (avg by(instance) (irate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
    for: 5m
    labels:
      severity: warning
    annotations:
      summary: "High CPU usage detected"
      description: "CPU usage is above 80% for more than 5 minutes"

  - alert: HighMemoryUsage
    expr: (node_memory_MemTotal_bytes - node_memory_MemAvailable_bytes) / node_memory_MemTotal_bytes * 100 > 90
    for: 5m
    labels:
      severity: critical
    annotations:
      summary: "High memory usage detected"
      description: "Memory usage is above 90% for more than 5 minutes"

  - alert: DiskSpaceLow
    expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 10
    for: 5m
    labels:
      severity: critical
    annotations:
      summary: "Low disk space"
      description: "Disk space is below 10%"

  - alert: ServiceDown
    expr: up == 0
    for: 1m
    labels:
      severity: critical
    annotations:
      summary: "Service is down"
      description: "Service {{ $labels.instance }} is down"
                

                Grafana Setup

                Grafana Installation
                
                    
                        Grafana Configuration
                        
                    
                    # Install Grafana
sudo apt-get install -y software-properties-common
sudo add-apt-repository "deb https://packages.grafana.com/oss/deb stable main"
wget -q -O - https://packages.grafana.com/gpg.key | sudo apt-key add -
sudo apt-get update
sudo apt-get install grafana

# Start Grafana
sudo systemctl start grafana-server
sudo systemctl enable grafana-server

# Configure Grafana
sudo nano /etc/grafana/grafana.ini

# Key settings:
[server]
http_port = 3000
domain = localhost
root_url = http://localhost:3000/

[security]
admin_user = admin
admin_password = your_secure_password

[users]
allow_sign_up = false

# Add Prometheus data source
curl -X POST \
  http://admin:your_secure_password@localhost:3000/api/datasources \
  -H 'Content-Type: application/json' \
  -d '{
    "name": "Prometheus",
    "type": "prometheus",
    "url": "http://localhost:9090",
    "access": "proxy",
    "isDefault": true
  }'

# Import dashboard
curl -X POST \
  http://admin:your_secure_password@localhost:3000/api/dashboards/db \
  -H 'Content-Type: application/json' \
  -d @node-exporter-dashboard.json
                

                Node Exporter

                System Metrics Collection
                
                    
                        Node Exporter Setup
                        
                    
                    # Install Node Exporter
wget https://github.com/prometheus/node_exporter/releases/download/v1.6.1/node_exporter-1.6.1.linux-amd64.tar.gz
tar xvfz node_exporter-1.6.1.linux-amd64.tar.gz
cd node_exporter-1.6.1.linux-amd64

# Create systemd service
sudo tee /etc/systemd/system/node_exporter.service > /dev/null < /dev/null <

                


                ELK Stack

                Elasticsearch Setup
                
                    
                        Elasticsearch Configuration
                        
                    
                    # Install Elasticsearch
wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo apt-key add -
echo "deb https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee /etc/apt/sources.list.d/elastic-8.x.list
sudo apt-get update
sudo apt-get install elasticsearch

# Configure Elasticsearch
sudo nano /etc/elasticsearch/elasticsearch.yml

# Key settings:
cluster.name: my-cluster
node.name: node-1
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
network.host: 0.0.0.0
http.port: 9200
discovery.type: single-node
xpack.security.enabled: false

# Start Elasticsearch
sudo systemctl start elasticsearch
sudo systemctl enable elasticsearch

# Test Elasticsearch
curl -X GET "localhost:9200/"

# Install Logstash
sudo apt-get install logstash

# Configure Logstash
sudo nano /etc/logstash/conf.d/logstash.conf

# Logstash configuration:
input {
  beats {
    port => 5044
  }
}

filter {
  if [fields][log_type] == "nginx" {
    grok {
      match => { "message" => "%{NGINXACCESS}" }
    }
    date {
      match => [ "timestamp", "dd/MMM/yyyy:HH:mm:ss Z" ]
    }
  }
  
  if [fields][log_type] == "application" {
    json {
      source => "message"
    }
  }
}

output {
  elasticsearch {
    hosts => ["localhost:9200"]
    index => "logs-%{+YYYY.MM.dd}"
  }
}

# Start Logstash
sudo systemctl start logstash
sudo systemctl enable logstash
                

                Kibana Setup
                
                    
                        Kibana Configuration
                        
                    
                    # Install Kibana
sudo apt-get install kibana

# Configure Kibana
sudo nano /etc/kibana/kibana.yml

# Key settings:
server.port: 5601
server.host: "0.0.0.0"
elasticsearch.hosts: ["http://localhost:9200"]
logging.appenders.file.type: file
logging.appenders.file.fileName: /var/log/kibana/kibana.log

# Start Kibana
sudo systemctl start kibana
sudo systemctl enable kibana

# Install Filebeat
sudo apt-get install filebeat

# Configure Filebeat
sudo nano /etc/filebeat/filebeat.yml

# Filebeat configuration:
filebeat.inputs:
- type: log
  enabled: true
  paths:
    - /var/log/nginx/*.log
  fields:
    log_type: nginx
  fields_under_root: true

- type: log
  enabled: true
  paths:
    - /var/log/myapp/*.log
  fields:
    log_type: application
  fields_under_root: true

output.logstash:
  hosts: ["localhost:5044"]

# Start Filebeat
sudo systemctl start filebeat
sudo systemctl enable filebeat
                

                Application Monitoring

                Custom Metrics
                
                    
                        Node.js Application Metrics
                        
                    
                    # Install prom-client
npm install prom-client

# metrics.js
const client = require('prom-client');

// Create a Registry
const register = new client.Registry();

// Add default metrics
client.collectDefaultMetrics({ register });

// Create custom metrics
const httpRequestDuration = new client.Histogram({
  name: 'http_request_duration_seconds',
  help: 'Duration of HTTP requests in seconds',
  labelNames: ['method', 'route', 'status_code'],
  buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10]
});

const httpRequestTotal = new client.Counter({
  name: 'http_requests_total',
  help: 'Total number of HTTP requests',
  labelNames: ['method', 'route', 'status_code']
});

const activeConnections = new client.Gauge({
  name: 'active_connections',
  help: 'Number of active connections'
});

const databaseConnections = new client.Gauge({
  name: 'database_connections',
  help: 'Number of database connections',
  labelNames: ['state']
});

// Register metrics
register.registerMetric(httpRequestDuration);
register.registerMetric(httpRequestTotal);
register.registerMetric(activeConnections);
register.registerMetric(databaseConnections);

// Express middleware
const express = require('express');
const app = express();

// Metrics middleware
app.use((req, res, next) => {
  const start = Date.now();
  
  res.on('finish', () => {
    const duration = (Date.now() - start) / 1000;
    const labels = {
      method: req.method,
      route: req.route ? req.route.path : req.path,
      status_code: res.statusCode
    };
    
    httpRequestDuration.observe(labels, duration);
    httpRequestTotal.inc(labels);
  });
  
  next();
});

// Metrics endpoint
app.get('/metrics', async (req, res) => {
  res.set('Content-Type', register.contentType);
  res.end(await register.metrics());
});

// Health check endpoint
app.get('/health', (req, res) => {
  res.json({
    status: 'healthy',
    timestamp: new Date().toISOString(),
    uptime: process.uptime(),
    memory: process.memoryUsage(),
    version: process.version
  });
});

module.exports = { register, httpRequestDuration, httpRequestTotal, activeConnections, databaseConnections };
                

                Alerting

                Alertmanager Setup
                
                    
                        Alertmanager Configuration
                        
                    
                    # Install Alertmanager
wget https://github.com/prometheus/alertmanager/releases/download/v0.25.0/alertmanager-0.25.0.linux-amd64.tar.gz
tar xvfz alertmanager-0.25.0.linux-amd64.tar.gz
cd alertmanager-0.25.0.linux-amd64

# Create systemd service
sudo tee /etc/systemd/system/alertmanager.service > /dev/null <

                


                Monitoring Best Practices

                Monitoring Strategy
                
                    
                        
                            Key Metrics
                            
                                CPU utilization
                                Memory usage
                                Disk I/O
                                Network traffic
                                Response times
                                Error rates
                                Throughput
                            
                        
                    
                    
                        
                            Alerting Rules
                            
                                Set appropriate thresholds
                                Use multiple alert levels
                                Avoid alert fatigue
                                Test alerting regularly
                                Document alert procedures
                                Use runbooks
                                Escalation policies
                            
                        
                    
                

                Summary
                Server monitoring involves several key components:
                
                    Metrics Collection: Prometheus, Node Exporter
                    Visualization: Grafana dashboards
                    Logging: ELK stack, centralized logging
                    Alerting: Alertmanager, notification channels
                    Application Metrics: Custom metrics, health checks
                    Best Practices: Key metrics, alerting rules
                

                
                    
                        Need More Help?
                        Struggling with server monitoring setup or need help implementing comprehensive monitoring solutions? Our DevOps experts can help you set up robust monitoring systems.
                        Get Monitoring Help