Graphite

From Indie IT Wiki

Introduction

Graphite is an enterprise scale monitoring tool that stores numeric time-series data and can render graphs of this data on demand.

Graphite consists of 3 software components:

  1. carbon - a Twisted daemon that listens for time-series data
  2. whisper - a simple database library for storing time-series data (similar in design to RRD)
  3. graphite webapp - A Django webapp that renders graphs on-demand using Cairo

http://graphite.readthedocs.io/en/latest/overview.html

Tools

http://graphite.readthedocs.io/en/latest/tools.html

BASH

# carbon / graphite
*/5 * * * * ~/Bin/erx_snmp.sh &>/dev/null
#!/bin/sh

# expects the following env vars
# METRIC_PREFIX - At least the hostname to record these stats under in graphite - collectd.someserver
# SNMP_HOST - the host to pull snmp stats from
# CARBON_HOST - The carbon host to send the stats
# CARBON_PORT - The carbon port to send the stats

METRIC_PREFIX="ubnt"
SNMP_HOST="192.168.0.1"
CARBON_HOST="123.456.789.0"
CARBON_PORT="2003"

NOW="$(date +%s)"

snmpId() {
    echo "$1" | awk '{print $1}'
}

snmpVal() {
    echo "$1" | awk '{print $3}' | tr -d '"'
}

sendStat() {
    echo "$METRIC_PREFIX.$1 $2 $NOW" | nc "$CARBON_HOST" "$CARBON_PORT"
    # echo "$METRIC_PREFIX.$1 $2 $NOW"
}

IFACE_NAMES="$(snmpwalk -OQtn -v 2c -c "public" "$SNMP_HOST" "1.3.6.1.2.1.2.2.1.2")"
findName() {
    id="$(snmpId "$1" | rev | cut -d'.' -f1 | rev)"
    echo "$IFACE_NAMES" | grep "$id =" | awk '{print $3}' | tr -d '"'
}

# Memory info
snmpwalk -OQtn -v 2c -c "public" "$SNMP_HOST" "1.3.6.1.4.1.2021.4" | while read line; do
    field=""
    id="$(snmpId "$line")"
    val="$(snmpVal "$line")"
    case "$id" in
        ".1.3.6.1.4.1.2021.4.5.0") field="memory.memory.total";;
        ".1.3.6.1.4.1.2021.4.6.0") field="memory.memory.free";;
        ".1.3.6.1.4.1.2021.4.13.0") field="memory.memory.shared";;
        ".1.3.6.1.4.1.2021.4.14.0") field="memory.memory.buffered";;
        ".1.3.6.1.4.1.2021.4.15.0") field="memory.memory.cached";;
    esac
    if [ "$field" != "" ]; then
        sendStat "$field" "$val"
    fi
done

# System info
snmpwalk -OQtn -v 2c -c "public" "$SNMP_HOST" "1.3.6.1.2.1.25.1" | while read line; do
    field=""
    id="$(snmpId "$line")"
    val="$(snmpVal "$line")"
    case "$id" in
        ".1.3.6.1.2.1.25.1.1.0") field="uptime.uptime"; val="$(expr "$val" / 100)";;
        ".1.3.6.1.2.1.25.1.5.0") field="users.users";;
        ".1.3.6.1.2.1.25.1.6.0") field="processes.processes";;
    esac
    if [ "$field" != "" ]; then
        sendStat "$field" "$val"
    fi
done

# load
snmpwalk -OQtn -v 2c -c "public" "$SNMP_HOST" "1.3.6.1.4.1.2021.10.1.3" | while read line; do
    field=""
    id="$(snmpId "$line")"
    val="$(snmpVal "$line")"
    case "$id" in
        ".1.3.6.1.4.1.2021.10.1.3.1") field="load.load.shortterm";;
        ".1.3.6.1.4.1.2021.10.1.3.2") field="load.load.midterm";;
        ".1.3.6.1.4.1.2021.10.1.3.3") field="load.load.longterm";;
    esac
    if [ "$field" != "" ]; then
        sendStat "$field" "$val"
    fi
done

# cpu info
snmpwalk -OQtn -v 2c -c "public" "$SNMP_HOST" "1.3.6.1.4.1.2021.11" | while read line; do
    field=""
    id="$(snmpId "$line")"
    val="$(snmpVal "$line")"
    case "$id" in
        ".1.3.6.1.4.1.2021.11.9.0") field="cpu.0.percent.user";;
        ".1.3.6.1.4.1.2021.11.10.0") field="cpu.0.percent.system";;
        ".1.3.6.1.4.1.2021.11.11.0") field="cpu.0.percent.idle";;
    esac
    if [ "$field" != "" ]; then
        sendStat "$field" "$val"
    fi
done

# 32 bit interface stats
snmpwalk -OQtn -v 2c -c "public" "$SNMP_HOST" "1.3.6.1.2.1.2.2.1" | while read line; do
    field=""
    id="$(snmpId "$line")"
    val="$(snmpVal "$line")"
    name="$(findName "$line")"
    case "$id" in
        ".1.3.6.1.2.1.2.2.1.13."*) field="interface.$name.if_discards.rx";;
        ".1.3.6.1.2.1.2.2.1.14."*) field="interface.$name.if_errors.rx";;
        ".1.3.6.1.2.1.2.2.1.15."*) field="interface.$name.if_unknown_protocols.rx";;
        ".1.3.6.1.2.1.2.2.1.19."*) field="interface.$name.if_discards.tx";;
        ".1.3.6.1.2.1.2.2.1.20."*) field="interface.$name.if_errors.tx";;
        ".1.3.6.1.2.1.2.2.1.21."*) field="interface.$name.if_queue.tx";;
    esac
    if [ "$field" != "" ]; then
        sendStat "$field" "$val"
    fi
done

# 64 bit interface stats
snmpwalk -OQtn -v 2c -c "public" "$SNMP_HOST" "1.3.6.1.2.1.31.1.1.1" | while read line; do
    field=""
    id="$(snmpId "$line")"
    val="$(snmpVal "$line")"
    name="$(findName "$line")"
    case "$id" in
        ".1.3.6.1.2.1.31.1.1.1.6."*) field="interface.$name.if_octets.rx";;
        ".1.3.6.1.2.1.31.1.1.1.7."*) field="interface.$name.if_unicast_packets.rx";;
        ".1.3.6.1.2.1.31.1.1.1.8."*) field="interface.$name.if_multicast_packets.rx";;
        ".1.3.6.1.2.1.31.1.1.1.9."*) field="interface.$name.if_broadcast_packets.rx";;
        ".1.3.6.1.2.1.31.1.1.1.10."*) field="interface.$name.if_octets.tx";;
        ".1.3.6.1.2.1.31.1.1.1.11."*) field="interface.$name.if_unicast_packets.tx";;
        ".1.3.6.1.2.1.31.1.1.1.12."*) field="interface.$name.if_multicast_packets.tx";;
        ".1.3.6.1.2.1.31.1.1.1.13."*) field="interface.$name.if_broadcast_packets.tx";;
    esac
    if [ "$field" != "" ]; then
        sendStat "$field" "$val"
    fi
done
# graphite carbon
* * * * * /root/bin/graphite_carbon_all.sh
#!/bin/bash
echo "nas.cpu.cpu.load" `cat /proc/loadavg | awk '{ print $1 }' 2>/dev/null` `date +%s` | nc 123.456.789.0 2003;
exit;

https://github.com/iFixit/pipe-to-graphite

https://github.com/jbehrends/monitoring_scripts/blob/master/graphite/edgerouter_metrics.sh

MRTG

https://github.com/oetiker/mrtg/search?q=graphite

PlexPy

https://github.com/JeordyR/plexpyInflux

old...

https://github.com/Drewster727/plexpy-influxdb-export

Plex

https://github.com/barrycarey/Plex-Data-Collector-For-InfluxDB

Metrics

Naming

Properly naming your metrics is critical to avoid conflicts, confusing data and potentially wrong interpretation later on. I like to organize metrics using the following schema:

<namespace>.<instrumented section>.<target (noun)>.<action (past tense verb)>

For example...

accounts.authentication.password.attempted
accounts.authentication.password.succeeded
accounts.authentication.password.failed

I use nouns to define the target and past tense verbs to define the action. This becomes a useful convention when you need to nest metrics. In the above example, let’s say I want to monitor the reasons for the failed password authentications. Here is how I would organize the extra stats...

accounts.authentication.password.failure.no_email_found
accounts.authentication.password.failure.password_check_failed
accounts.authentication.password.failure.password_reset_required

Graphing

http://docs.grafana.org/features/datasources/graphite/

https://grafana.com