summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--docs/images/models-k8s.pngbin0 -> 53813 bytes
-rw-r--r--tools/README.md16
-rw-r--r--tools/docker/demo_deploy.sh60
-rw-r--r--tools/docker/docker-cluster.sh221
-rw-r--r--tools/docker/nginx.json67
-rw-r--r--tools/kubernetes/README.md17
-rw-r--r--tools/kubernetes/demo_deploy.sh74
-rw-r--r--tools/kubernetes/k8s-cluster.sh438
-rw-r--r--tools/maas/deploy.sh75
-rw-r--r--tools/prometheus/README.md10
-rw-r--r--tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json712
-rw-r--r--tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json1618
-rw-r--r--tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json1632
-rw-r--r--tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json792
-rw-r--r--tools/prometheus/prometheus-tools.sh228
-rw-r--r--tools/rancher/demo_deploy.sh65
-rw-r--r--tools/rancher/rancher-cluster.sh529
-rw-r--r--tools/traffic.sh31
18 files changed, 6585 insertions, 0 deletions
diff --git a/docs/images/models-k8s.png b/docs/images/models-k8s.png
new file mode 100644
index 0000000..c54bcdb
--- /dev/null
+++ b/docs/images/models-k8s.png
Binary files differ
diff --git a/tools/README.md b/tools/README.md
new file mode 100644
index 0000000..16c5b79
--- /dev/null
+++ b/tools/README.md
@@ -0,0 +1,16 @@
+This repo contains experimental scripts etc for setting up cloud-native stacks for application deployment and management on bare-metal servers. A lot of cloud-native focus so far has been on public cloud providers (AWS, GCE, Azure) but there aren't many tools and even fewer full-stack open source platforms for setting up bare metal servers with the same types of cloud-native stack features. This repo is thus a collection of tools in development toward that goal, useful in experimentation, demonstration, and further investigation into characteristics of cloud-native platforms in bare-metal environments, e.g. efficiency, performance, security, and resilience.
+
+The toolset will eventually include these elements of one or more full-stack platform solutions:
+* hardware prerequisite/options guidance
+* container-focused application runtime environment, e.g.
+ * kubernetes
+ * docker-ce
+ * rancher
+* software-defined storage backends, e.g.
+ * ceph
+* runtime-native networking ("out of the box" networking features, vs some special add-on networking software)
+* app orchestration, e.g. via
+ * cloudify
+ * ONAP
+ * Helm
+* applications useful for platform characterization \ No newline at end of file
diff --git a/tools/docker/demo_deploy.sh b/tools/docker/demo_deploy.sh
new file mode 100644
index 0000000..cbfe949
--- /dev/null
+++ b/tools/docker/demo_deploy.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Complete scripted deployment of an experimental Docker-based
+#. cloud-native application platform. When complete, Docker-CE and the following
+#. will be installed:
+#. - nginx as demo application
+#. - prometheus + grafana for cluster monitoring/stats
+#. Prometheus dashboard: http://<master_public_ip>:9090
+#. Grafana dashboard: http://<master_public_ip>:3000
+#.
+#. Prerequisites:
+#. - Ubuntu server for cluster nodes (admin/master and worker nodes)
+#. - MAAS server as cluster admin for Rancher master/worker nodes
+#. - Password-less ssh key provided for node setup
+#. Usage: on the MAAS server
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ bash ~/models/tools/docker/demo_deploy.sh <key> "<hosts>" <master_ip>
+#. "<worker_ips>" [<extras>]
+#. <key>: name of private key for cluster node ssh (in current folder)
+#. <hosts>: space separated list of hostnames managed by MAAS
+#. <master_ip>: IP of master node
+#. <worker_ips>: space separated list of worker node IPs
+#. <extras>: optional name of script for extra setup functions as needed
+
+key=$1
+nodes="$2"
+master=$3
+workers="$4"
+extras=$5
+
+source ~/models/tools/maas/deploy.sh $1 "$2" $5
+eval `ssh-agent`
+ssh-add $key
+echo "Setting up Docker..."
+bash ~/models/tools/docker/docker-cluster.sh all $master "$workers"
+# TODO: Figure this out... Have to break the setup into two steps as something
+# causes the ssh session to end before the prometheus setup, if both scripts
+# (k8s-cluster and prometheus-tools) are in the same ssh session
+echo "Setting up Prometheus..."
+scp -o StrictHostKeyChecking=no $key ubuntu@$master:/home/ubuntu/$key
+ssh -x -o StrictHostKeyChecking=no ubuntu@$master <<EOF
+git clone https://gerrit.opnfv.org/gerrit/models
+exec ssh-agent bash
+ssh-add $key
+bash models/tools/prometheus/prometheus-tools.sh all "$master $workers"
+EOF
+echo "All done!"
diff --git a/tools/docker/docker-cluster.sh b/tools/docker/docker-cluster.sh
new file mode 100644
index 0000000..8c0aa69
--- /dev/null
+++ b/tools/docker/docker-cluster.sh
@@ -0,0 +1,221 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Deployment script for a mult-node docker-ce cluster.
+#. Prerequisites:
+#. - Ubuntu server for master and worker nodes
+#. Usage:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ cd ~/models/tools/docker
+#.
+#. Usage:
+#. $ bash docker_cluster.sh all <master> "<workers>"
+#. Automate setup and start demo services.
+#. <master>: master node IPs
+#. <workers>: space-separated list of worker node IPs
+#. $ bash docker_cluster.sh setup <master> "<workers>"
+#. Installs and starts master and worker nodes.
+#. $ bash docker_cluster.sh create <service>
+#. <service>: Demo service name to start.
+#. Currently supported: nginx
+#. $ bash docker_cluster.sh delete <service>
+#. <service>: Service name to delete.
+#. $ bash docker_cluster.sh clean [<node>]
+#. <node>: optional IP address of node to clean.
+#. By default, cleans the entire cluster.
+#.
+
+# Setup master and worker hosts
+function setup() {
+ # Per https://docs.docker.com/engine/swarm/swarm-tutorial/
+ cat >/tmp/env.sh <<EOF
+master=$1
+workers="$2"
+EOF
+ source /tmp/env.sh
+ cat >/tmp/prereqs.sh <<'EOF'
+#!/bin/bash
+# Per https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/
+sudo apt-get remove -y docker docker-engine docker.io docker-ce
+sudo apt-get update
+sudo apt-get install -y \
+ linux-image-extra-$(uname -r) \
+ linux-image-extra-virtual
+sudo apt-get install -y \
+ apt-transport-https \
+ ca-certificates \
+ curl \
+ software-properties-common
+curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+sudo add-apt-repository \
+ "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+ $(lsb_release -cs) \
+ stable"
+sudo apt-get update
+sudo apt-get install -y docker-ce
+EOF
+
+ # jq is used for parsing API reponses
+ sudo apt-get install -y jq
+ scp -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$master:/home/ubuntu/prereqs.sh
+ ssh -x -o StrictHostKeyChecking=no ubuntu@$master bash /home/ubuntu/prereqs.sh
+ # activate docker API
+ # Per https://www.ivankrizsan.se/2016/05/18/enabling-docker-remote-api-on-ubuntu-16-04/
+ ssh -x -o StrictHostKeyChecking=no ubuntu@$master <<EOF
+sudo sed -i -- 's~fd://~fd:// -H tcp://0.0.0.0:4243~' /lib/systemd/system/docker.service
+sudo systemctl daemon-reload
+sudo service docker restart
+# Activate swarm mode
+# Per https://docs.docker.com/engine/swarm/swarm-tutorial/create-swarm/
+sudo docker swarm init --advertise-addr $master
+EOF
+
+ if ! curl http://$master:4243/version ; then
+ echo "${FUNCNAME[0]}: docker API failed to initialize"
+ exit 1
+ fi
+
+ # Per https://docs.docker.com/engine/swarm/swarm-tutorial/add-nodes/
+ token=$(ssh -o StrictHostKeyChecking=no -x ubuntu@$master sudo docker swarm join-token worker | grep docker)
+ for worker in $workers; do
+ echo "${FUNCNAME[0]}: setting up worker at $worker"
+ scp -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$worker:/home/ubuntu/.
+ ssh -x -o StrictHostKeyChecking=no ubuntu@$worker bash /home/ubuntu/prereqs.sh
+ ssh -x -o StrictHostKeyChecking=no ubuntu@$worker sudo $token
+ done
+
+ echo "${FUNCNAME[0]}: testing service creation"
+ reps=1; for a in $workers; do ((reps++)); done
+ create_service nginx $reps
+}
+
+
+function create_service() {
+ echo "${FUNCNAME[0]}: creating service $1 with $2 replicas"
+ # sudo docker service create -p 80:80 --replicas $reps --name nginx nginx
+ # per https://docs.docker.com/engine/api/v1.27/
+ source /tmp/env.sh
+ case "$1" in
+ nginx)
+ match="Welcome to nginx!"
+ ;;
+ *)
+ echo "${FUNCNAME[0]}: service $1 not setup for use with this script"
+ esac
+
+ if ! curl -X POST http://$master:4243/services/create -d @$1.json ; then
+ echo "${FUNCNAME[0]}: service creation failed"
+ exit 1
+ fi
+
+ check_service $1 $match
+}
+
+function check_service() {
+ echo "${FUNCNAME[0]}: checking service state for $1 with match string $2"
+ source /tmp/env.sh
+ service=$1
+ match="$2"
+ services=$(curl http://$master:4243/services)
+ n=$(echo $services | jq '. | length')
+ ((n--))
+ while [[ $n -ge 0 ]]; do
+ if [[ $(echo $services | jq -r ".[$n].Spec.Name") == $service ]]; then
+ id=$(echo $services | jq -r ".[$n].ID")
+ port=$(echo $services | jq -r ".[$n].Endpoint.Ports[0].PublishedPort")
+ nodes="$master $workers"
+ for node in $nodes; do
+ not=""
+ while ! curl -s -o /tmp/resp http://$node:$port ; do
+ echo "${FUNCNAME[0]}: service is not yet active, waiting 10 seconds"
+ sleep 10
+ done
+ curl -s -o /tmp/resp http://$node:$port
+ if [[ $(grep -c "$match" /tmp/resp) == 0 ]]; then
+ not="NOT"
+ fi
+ echo "$service service is $not active at address http://$node:$port"
+ done
+ break
+ fi
+ ((n--))
+ done
+}
+
+function delete_service() {
+ echo "${FUNCNAME[0]}: deleting service $1"
+ source /tmp/env.sh
+ service=$1
+ services=$(curl http://$master:4243/services)
+ n=$(echo $services | jq '. | length')
+ ((n--))
+ while [[ $n -ge 0 ]]; do
+ if [[ $(echo $services | jq -r ".[$n].Spec.Name") == $service ]]; then
+ id=$(echo $services | jq -r ".[$n].ID")
+ if ! curl -X DELETE http://$master:4243/services/$id ; then
+ echo "${FUNCNAME[0]}: failed to delete service $1"
+ else
+ echo "${FUNCNAME[0]}: deleted service $1"
+ fi
+ break
+ fi
+ ((n--))
+ done
+}
+
+# Clean the installation
+function clean() {
+ source /tmp/env.sh
+ nodes="$master $workers"
+ for node in $nodes; do
+ ssh -o StrictHostKeyChecking=no -x ubuntu@$node <<EOF
+sudo docker swarm leave --force
+sudo systemctl stop docker
+sudo apt-get remove -y docker-ce
+EOF
+ done
+}
+
+export WORK_DIR=$(pwd)
+case "$1" in
+ setup)
+ setup $2 "$3"
+ ;;
+ ceph)
+ # TODO Ceph support for docker, e.g. re
+ # http://docker.com/docs/docker/latest/en/docker-services/storage-service/
+ # https://github.com/docker/docker/issues/8722
+ # setup_ceph "$2" $3 $4 $5
+ ;;
+ all)
+ start=`date +%s`
+ setup $2 "$3"
+ end=`date +%s`
+ runtime=$((end-start))
+ runtime=$((runtime/60))
+ echo "${FUNCNAME[0]}: Demo duration = $runtime minutes"
+ ;;
+ create)
+ create_service "$2" $3
+ ;;
+ delete)
+ delete_service "$2"
+ ;;
+ clean)
+ clean $2
+ ;;
+ *)
+ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then grep '#. ' $0; fi
+esac
diff --git a/tools/docker/nginx.json b/tools/docker/nginx.json
new file mode 100644
index 0000000..a74681f
--- /dev/null
+++ b/tools/docker/nginx.json
@@ -0,0 +1,67 @@
+{
+ "Name": "nginx",
+ "TaskTemplate": {
+ "ContainerSpec": {
+ "Image": "nginx",
+ "Mounts": [
+ {
+ "ReadOnly": true,
+ "Source": "web-data",
+ "Target": "/usr/share/nginx/html",
+ "Type": "volume",
+ "VolumeOptions": {
+ "DriverConfig": { },
+ "Labels": { "com.example.something": "something-value" }
+ }
+ }
+ ],
+ "DNSConfig": {
+ "Nameservers": [ "8.8.8.8" ],
+ "Search": [ "example.org" ],
+ "Options": [ "timeout:3" ]
+ }
+ },
+ "LogDriver": {
+ "Name": "json-file",
+ "Options": {
+ "max-file": "3",
+ "max-size": "10M"
+ }
+ },
+ "Placement": { },
+ "Resources": {
+ "Limits": {
+ "MemoryBytes": 104857600
+ },
+ "Reservations": { }
+ },
+ "RestartPolicy": {
+ "Condition": "on-failure",
+ "Delay": 10000000000,
+ "MaxAttempts": 10
+ }
+ },
+ "Mode": {
+ "Replicated": {
+ "Replicas": 3
+ }
+ },
+ "UpdateConfig": {
+ "Delay": 30000000000,
+ "Parallelism": 2,
+ "FailureAction": "pause"
+ },
+ "EndpointSpec": {
+ "Ports": [
+ {
+ "Protocol": "tcp",
+ "PublishedPort": 8080,
+ "TargetPort": 80
+ }
+ ]
+ },
+ "Labels": {
+ "foo": "bar"
+ }
+}
+
diff --git a/tools/kubernetes/README.md b/tools/kubernetes/README.md
new file mode 100644
index 0000000..b8c81f2
--- /dev/null
+++ b/tools/kubernetes/README.md
@@ -0,0 +1,17 @@
+This folder contains scripts etc to setup a kubernetes cluster with the following type of environment and components:
+* hardware
+ * 2 or more bare metal servers
+ * two connected networks (public and private): may work if just a single network
+ * one or more disks on each server: ceph-osd can be setup on an unused disk, or a folder (/ceph) on the host OS disk
+* kubernetes
+ * single master (admin) node
+ * other cluster nodes
+* ceph: ceph-mon on admin, ceph-osd on other nodes
+* helm on admin node
+* demo helm charts, cloned from https://github.com/kubernetes/charts and modified/tested to work on this cluster
+
+See comments in [setup script](k8s-cluster.sh) for more info.
+
+This is a work in progress!
+
+![Resulting Cluster](/docs/images/models-k8s.png?raw=true "Resulting Cluster")
diff --git a/tools/kubernetes/demo_deploy.sh b/tools/kubernetes/demo_deploy.sh
new file mode 100644
index 0000000..b3d165b
--- /dev/null
+++ b/tools/kubernetes/demo_deploy.sh
@@ -0,0 +1,74 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Complete scripted deployment of an experimental kubernetes-based
+#. cloud-native application platform. When complete, kubernetes and the following
+#. will be installed:
+#. - helm and dokuwiki as a demo helm cart based application
+#. - prometheus + grafana for cluster monitoring/stats
+#. - cloudify + kubernetes plugin and a demo hello world (nginx) app installed
+#. will be setup with:
+#. Prometheus dashboard: http://<admin_public_ip>:9090
+#. Grafana dashboard: http://<admin_public_ip>:3000
+#.
+#. Prerequisites:
+#. - Ubuntu server for kubernetes cluster nodes (admin/master and agent nodes)
+#. - MAAS server as cluster admin for kubernetes master/agent nodes
+#. - Password-less ssh key provided for node setup
+#. Usage: on the MAAS server
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ bash ~/models/tools/kubernetes/demo_deploy.sh <key> "<hosts>" <admin ip>
+#. "<agent ips>" <pub-net> <priv-net> [<extras>]
+#. <key>: name of private key for cluster node ssh (in current folder)
+#. <hosts>: space separated list of hostnames managed by MAAS
+#. <admin ip>: IP of cluster admin node
+#. <agent_ips>: space separated list of agent node IPs
+#. <pub-net>: CID formatted public network
+#. <priv-net>: CIDR formatted private network (may be same as pub-net)
+#. <extras>: optional name of script for extra setup functions as needed
+
+key=$1
+nodes="$2"
+admin_ip=$3
+agent_ips="$4"
+extras=$5
+
+source ~/models/tools/maas/deploy.sh $1 "$2" $5
+eval `ssh-agent`
+ssh-add $key
+if [[ "x$extras" != "x" ]]; then source $extras; fi
+scp -o StrictHostKeyChecking=no $key ubuntu@$admin_ip:/home/ubuntu/$key
+echo "Setting up kubernetes..."
+ssh -x ubuntu@$admin_ip <<EOF
+exec ssh-agent bash
+ssh-add $key
+git clone https://gerrit.opnfv.org/gerrit/models
+bash models/tools/kubernetes/k8s-cluster.sh all "$agent_ips" $priv_net $pub_net
+EOF
+# TODO: Figure this out... Have to break the setup into two steps as something
+# causes the ssh session to end before the prometheus setup, if both scripts
+# (k8s-cluster and prometheus-tools) are in the same ssh session
+echo "Setting up prometheus..."
+ssh -x ubuntu@$admin_ip <<EOF
+exec ssh-agent bash
+ssh-add $key
+bash models/tools/prometheus/prometheus-tools.sh all "$agent_ips"
+EOF
+echo "Setting up cloudify..."
+scp models/tools/cloudify/k8s-cloudify.sh ubuntu@$admin_ip:/home/ubuntu/.
+ssh -x ubuntu@$admin_ip bash k8s-cloudify.sh prereqs
+ssh -x ubuntu@$admin_ip bash k8s-cloudify.sh setup
+ssh -x ubuntu@$admin_ip bash k8s-cloudify.sh demo
+echo "All done!"
diff --git a/tools/kubernetes/k8s-cluster.sh b/tools/kubernetes/k8s-cluster.sh
new file mode 100644
index 0000000..6a91cdb
--- /dev/null
+++ b/tools/kubernetes/k8s-cluster.sh
@@ -0,0 +1,438 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: script to setup a kubernetes cluster with calico as sni
+#. Prerequisites:
+#. - Ubuntu xenial server for master and agent nodes
+#. - key-based auth setup for ssh/scp between master and agent nodes
+#. - 192.168.0.0/16 should not be used on your server network interface subnets
+#. Usage:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ cd ~/models/tools/kubernetes
+#. $ bash k8s-cluster.sh master
+#. $ bash k8s-cluster.sh agents "<nodes>"
+#. nodes: space-separated list of ceph node IPs
+#. $ bash k8s-cluster.sh ceph "<nodes>" <cluster-net> <public-net> [ceph_dev]
+#. nodes: space-separated list of ceph node IPs
+#. cluster-net: CIDR of ceph cluster network e.g. 10.0.0.1/24
+#. public-net: CIDR of public network
+#. ceph_dev: disk to use for ceph. ***MUST NOT BE USED FOR ANY OTHER PURPOSE***
+#. if not provided, ceph data will be stored on osd nodes in /ceph
+#. $ bash k8s-cluster.sh helm
+#. Setup helm as app kubernetes orchestration tool
+#. $ bash k8s-cluster.sh demo
+#. Install helm charts for mediawiki and dokuwiki
+#. $ bash k8s-cluster.sh all "<nodes>" <cluster-net> <public-net> [ceph_dev]
+#. Runs all the steps above
+#.
+#. Status: work in progress, incomplete
+#
+
+function setup_prereqs() {
+ echo "${FUNCNAME[0]}: Create prerequisite setup script"
+ cat <<'EOG' >/tmp/prereqs.sh
+#!/bin/bash
+# Basic server pre-reqs
+sudo apt-get -y remove kubectl kubelet kubeadm
+sudo apt-get update
+sudo apt-get upgrade -y
+# Set hostname on agent nodes
+if [[ "$1" == "agent" ]]; then
+ echo $(ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME | sudo tee -a /etc/hosts
+fi
+# Install docker 1.12 (default for xenial is 1.12.6)
+sudo apt-get install -y docker.io
+sudo service docker start
+export KUBE_VERSION=1.7.5
+# per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/
+# Install kubelet, kubeadm, kubectl per https://kubernetes.io/docs/setup/independent/install-kubeadm/
+sudo apt-get update && sudo apt-get install -y apt-transport-https
+curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
+cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list
+deb http://apt.kubernetes.io/ kubernetes-xenial main
+EOF
+sudo apt-get update
+# Next command is to workaround bug resulting in "PersistentVolumeClaim is not bound" for pod startup (remain in Pending)
+# TODO: reverify if this is still an issue in the final working script
+sudo apt-get -y install ceph-common
+sudo apt-get -y install --allow-downgrades kubectl=${KUBE_VERSION}-00 kubelet=${KUBE_VERSION}-00 kubeadm=${KUBE_VERSION}-00
+EOG
+}
+
+function setup_k8s_master() {
+ echo "${FUNCNAME[0]}: Setting up kubernetes master"
+ setup_prereqs
+
+ # Install master
+ bash /tmp/prereqs.sh master
+ # per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/
+ # If the following command fails, run "kubeadm reset" before trying again
+ # --pod-network-cidr=192.168.0.0/16 is required for calico; this should not conflict with your server network interface subnets
+ sudo kubeadm init --pod-network-cidr=192.168.0.0/16 >>/tmp/kubeadm.out
+ cat /tmp/kubeadm.out
+ export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out)
+ echo "${FUNCNAME[0]}: Cluster join command for manual use if needed: $k8s_joincmd"
+
+ # Start cluster
+ echo "${FUNCNAME[0]}: Start the cluster"
+ mkdir -p $HOME/.kube
+ sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config
+ sudo chown $(id -u):$(id -g) $HOME/.kube/config
+ # Deploy pod network
+ echo "${FUNCNAME[0]}: Deploy calico as CNI"
+ sudo kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml
+}
+
+function setup_k8s_agents() {
+ agents="$1"
+ export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out)
+ echo "${FUNCNAME[0]}: Installing agents at $1 with joincmd: $k8s_joincmd"
+
+ setup_prereqs
+
+ kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}')
+ while [[ "$kubedns" != "Running" ]]; do
+ echo "${FUNCNAME[0]}: kube-dns status is $kubedns. Waiting 60 seconds for it to be 'Running'"
+ sleep 60
+ kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}')
+ done
+ echo "${FUNCNAME[0]}: kube-dns status is $kubedns"
+
+ for agent in $agents; do
+ echo "${FUNCNAME[0]}: Install agent at $agent"
+ scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$agent:/tmp/prereqs.sh
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent bash /tmp/prereqs.sh agent
+ # Workaround for "[preflight] Some fatal errors occurred: /var/lib/kubelet is not empty" per https://github.com/kubernetes/kubeadm/issues/1
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent sudo kubeadm reset
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent sudo $k8s_joincmd
+ done
+
+ echo "${FUNCNAME[0]}: Cluster is ready when all nodes in the output of 'kubectl get nodes' show as 'Ready'."
+}
+
+function setup_ceph() {
+ node_ips=$1
+ cluster_net=$2
+ public_net=$3
+ ceph_dev=$4
+ echo "${FUNCNAME[0]}: Deploying ceph-mon on localhost $HOSTNAME"
+ echo "${FUNCNAME[0]}: Deploying ceph-osd on nodes $node_ips"
+ echo "${FUNCNAME[0]}: Setting cluster-network=$cluster_net and public-network=$public_net"
+ mon_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+ all_nodes="$mon_ip $node_ips"
+ # Also caches the server fingerprints so ceph-deploy does not prompt the user
+ # Note this loop may be partially redundant with the ceph-deploy steps below
+ for node_ip in $all_nodes; do
+ echo "${FUNCNAME[0]}: Install ntp and ceph on $node_ip"
+ ssh -x -o StrictHostKeyChecking=no ubuntu@$node_ip <<EOF
+sudo timedatectl set-ntp no
+wget -q -O- 'https://download.ceph.com/keys/release.asc' | sudo apt-key add -
+echo deb https://download.ceph.com/debian/ $(lsb_release -sc) main | sudo tee /etc/apt/sources.list.d/ceph.list
+sudo apt update
+sudo apt-get install -y ntp ceph ceph-deploy
+EOF
+ done
+
+ # per http://docs.ceph.com/docs/master/start/quick-ceph-deploy/
+ # also https://upcommons.upc.edu/bitstream/handle/2117/101816/Degree_Thesis_Nabil_El_Alami.pdf#vote +1
+ echo "${FUNCNAME[0]}: Create ceph config folder ~/ceph-cluster"
+ mkdir ~/ceph-cluster
+ cd ~/ceph-cluster
+
+ echo "${FUNCNAME[0]}: Create new cluster with $HOSTNAME as initial ceph-mon node"
+ ceph-deploy new --cluster-network $cluster_net --public-network $public_net --no-ssh-copykey $HOSTNAME
+ # Update conf per recommendations of http://docs.ceph.com/docs/jewel/rados/configuration/filesystem-recommendations/
+ cat <<EOF >>ceph.conf
+osd max object name len = 256
+osd max object namespace len = 64
+EOF
+ cat ceph.conf
+
+ echo "${FUNCNAME[0]}: Deploy ceph packages on other nodes"
+ ceph-deploy install $mon_ip $node_ips
+
+ echo "${FUNCNAME[0]}: Deploy the initial monitor and gather the keys"
+ ceph-deploy mon create-initial
+
+ if [[ "x$ceph_dev" == "x" ]]; then
+ n=1
+ for node_ip in $node_ips; do
+ echo "${FUNCNAME[0]}: Prepare ceph OSD on node $node_ip"
+ echo "$node_ip ceph-osd$n" | sudo tee -a /etc/hosts
+ # Using ceph-osd$n here avoids need for manual acceptance of the new server hash
+ ssh -x -o StrictHostKeyChecking=no ubuntu@ceph-osd$n <<EOF
+echo "$node_ip ceph-osd$n" | sudo tee -a /etc/hosts
+sudo mkdir /ceph && sudo chown -R ceph:ceph /ceph
+EOF
+ ceph-deploy osd prepare ceph-osd$n:/ceph
+ ceph-deploy osd activate ceph-osd$n:/ceph
+ ((n++))
+ done
+ else
+ echo "${FUNCNAME[0]}: Deploy OSDs"
+ for node_ip in $node_ips; do
+ echo "${FUNCNAME[0]}: Create ceph osd on $node_ip using $ceph_dev"
+ ceph-deploy osd create $node_ip:$ceph_dev
+ done
+ fi
+
+ echo "${FUNCNAME[0]}: Copy the config file and admin key to the admin node and OSD nodes"
+ ceph-deploy admin $mon_ip $node_ips
+
+ echo "${FUNCNAME[0]}: Check the cluster health"
+ sudo ceph health
+ sudo ceph -s
+
+ # per https://crondev.com/kubernetes-persistent-storage-ceph/ and https://github.com/kubernetes/kubernetes/issues/38923
+ # rbd is not included in default kube-controller-manager... use attcomdev version
+ sudo sed -i -- 's~gcr.io/google_containers/kube-controller-manager-amd64:.*~quay.io/attcomdev/kube-controller-manager:v1.7.3~' /etc/kubernetes/manifests/kube-controller-manager.yaml
+ if [[ $(sudo grep -c attcomdev/kube-controller-manager /etc/kubernetes/manifests/kube-controller-manager.yaml) == 0 ]]; then
+ echo "${FUNCNAME[0]}: Problem patching /etc/kubernetes/manifests/kube-controller-manager.yaml... script update needed"
+ exit 1
+ fi
+ mgr=$(kubectl get pods --all-namespaces | grep kube-controller-manager | awk '{print $4}')
+ while [[ "$mgr" != "Running" ]]; do
+ echo "${FUNCNAME[0]}: kube-controller-manager status is $mgr. Waiting 60 seconds for it to be 'Running'"
+ sleep 60
+ mgr=$(kubectl get pods --all-namespaces | grep kube-controller-manager | awk '{print $4}')
+ done
+ echo "${FUNCNAME[0]}: kube-controller-manager status is $mgr"
+
+ echo "${FUNCNAME[0]}: Create Ceph admin secret"
+ admin_key=$(sudo ceph auth get-key client.admin)
+ kubectl create secret generic ceph-secret-admin --from-literal=key="$admin_key" --namespace=kube-system --type=kubernetes.io/rbd
+
+ echo "${FUNCNAME[0]}: Create rdb storageClass 'slow'"
+ cat <<EOF >/tmp/ceph-sc.yaml
+apiVersion: storage.k8s.io/v1
+kind: StorageClass
+metadata:
+ name: slow
+provisioner: kubernetes.io/rbd
+parameters:
+ monitors: $mon_ip:6789
+ adminId: admin
+ adminSecretName: ceph-secret-admin
+ adminSecretNamespace: "kube-system"
+ pool: kube
+ userId: kube
+ userSecretName: ceph-secret-user
+EOF
+ # TODO: find out where in the above ~/.kube folders became owned by root
+ sudo chown -R ubuntu:ubuntu ~/.kube/*
+ kubectl create -f /tmp/ceph-sc.yaml
+
+ echo "${FUNCNAME[0]}: Create storage pool 'kube'"
+ # https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md method
+ sudo ceph osd pool create kube 32 32
+
+ echo "${FUNCNAME[0]}: Authorize client 'kube' access to pool 'kube'"
+ sudo ceph auth get-or-create client.kube mon 'allow r' osd 'allow rwx pool=kube'
+
+ echo "${FUNCNAME[0]}: Create ceph-secret-user secret in namespace 'default'"
+ kube_key=$(sudo ceph auth get-key client.kube)
+ kubectl create secret generic ceph-secret-user --from-literal=key="$kube_key" --namespace=default --type=kubernetes.io/rbd
+ # A similar secret must be created in other namespaces that intend to access the ceph pool
+
+ # Per https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md
+
+ echo "${FUNCNAME[0]}: Create andtest a persistentVolumeClaim"
+ cat <<EOF >/tmp/ceph-pvc.yaml
+{
+ "kind": "PersistentVolumeClaim",
+ "apiVersion": "v1",
+ "metadata": {
+ "name": "claim1",
+ "annotations": {
+ "volume.beta.kubernetes.io/storage-class": "slow"
+ }
+ },
+ "spec": {
+ "accessModes": [
+ "ReadWriteOnce"
+ ],
+ "resources": {
+ "requests": {
+ "storage": "3Gi"
+ }
+ }
+ }
+}
+EOF
+ kubectl create -f /tmp/ceph-pvc.yaml
+ while [[ "x$(kubectl get pvc -o jsonpath='{.status.phase}' claim1)" != "xBound" ]]; do
+ echo "${FUNCNAME[0]}: Waiting for pvc claim1 to be 'Bound'"
+ kubectl describe pvc
+ sleep 10
+ done
+ echo "${FUNCNAME[0]}: pvc claim1 successfully bound to $(kubectl get pvc -o jsonpath='{.spec.volumeName}' claim1)"
+ kubectl get pvc
+ kubectl delete pvc claim1
+ kubectl describe pods
+}
+
+function wait_for_service() {
+ echo "${FUNCNAME[0]}: Waiting for service $1 to be available"
+ pod=$(kubectl get pods --namespace default | awk "/$1/ { print \$1 }")
+ echo "${FUNCNAME[0]}: Service $1 is at pod $pod"
+ ready=$(kubectl get pods --namespace default -o jsonpath='{.status.containerStatuses[0].ready}' $pod)
+ while [[ "$ready" != "true" ]]; do
+ echo "${FUNCNAME[0]}: $1 container is not yet ready... waiting 10 seconds"
+ sleep 10
+ # TODO: figure out why transient pods sometimes mess up this logic, thus need to re-get the pods
+ pod=$(kubectl get pods --namespace default | awk "/$1/ { print \$1 }")
+ ready=$(kubectl get pods --namespace default -o jsonpath='{.status.containerStatuses[0].ready}' $pod)
+ done
+ echo "${FUNCNAME[0]}: pod $pod container status is $ready"
+ host_ip=$(kubectl get pods --namespace default -o jsonpath='{.status.hostIP}' $pod)
+ port=$(kubectl get --namespace default -o jsonpath="{.spec.ports[0].nodePort}" services $1)
+ echo "${FUNCNAME[0]}: pod $pod container is at host $host_ip and port $port"
+ while ! curl http://$host_ip:$port ; do
+ echo "${FUNCNAME[0]}: $1 service is not yet responding... waiting 10 seconds"
+ sleep 10
+ done
+ echo "${FUNCNAME[0]}: $1 is available at http://$host_ip:$port"
+}
+
+function demo_chart() {
+ cd ~
+ rm -rf charts
+ git clone https://github.com/kubernetes/charts.git
+ cd charts/stable
+ case "$1" in
+ mediawiki)
+ # NOT YET WORKING
+ # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed
+ mkdir ./mediawiki/charts
+ cp -r ./mariadb ./mediawiki/charts
+ # LoadBalancer is N/A for baremetal (public cloud only) - use NodePort
+ sed -i -- 's/LoadBalancer/NodePort/g' ./mediawiki/values.yaml
+ # Select the storageClass created in the ceph setup step
+ sed -i -- 's/# storageClass:/storageClass: "slow"/g' ./mediawiki/values.yaml
+ sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./mediawiki/charts/mariadb/values.yaml
+ helm install --name mw -f ./mediawiki/values.yaml ./mediawiki
+ wait_for_service mw-mediawiki
+ ;;
+ dokuwiki)
+ sed -i -- 's/# storageClass:/storageClass: "slow"/g' ./dokuwiki/values.yaml
+ sed -i -- 's/LoadBalancer/NodePort/g' ./dokuwiki/values.yaml
+ helm install --name dw -f ./dokuwiki/values.yaml ./dokuwiki
+ wait_for_service dw-dokuwiki
+ ;;
+ wordpress)
+ # NOT YET WORKING
+ # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed
+ mkdir ./wordpress/charts
+ cp -r ./mariadb ./wordpress/charts
+ sed -i -- 's/LoadBalancer/NodePort/g' ./wordpress/values.yaml
+ sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./wordpress/values.yaml
+ sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./wordpress/charts/mariadb/values.yaml
+ helm install --name wp -f ./wordpress/values.yaml ./wordpress
+ wait_for_service wp-wordpress
+ ;;
+ redmine)
+ # NOT YET WORKING
+ # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed
+ mkdir ./redmine/charts
+ cp -r ./mariadb ./redmine/charts
+ cp -r ./postgresql ./redmine/charts
+ sed -i -- 's/LoadBalancer/NodePort/g' ./redmine/values.yaml
+ sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/values.yaml
+ sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/charts/mariadb/values.yaml
+ sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/charts/postgresql/values.yaml
+ helm install --name rdm -f ./redmine/values.yaml ./redmine
+ wait_for_service rdm-redmine
+ ;;
+ owncloud)
+ # NOT YET WORKING: needs resolvable hostname for service
+ mkdir ./owncloud/charts
+ cp -r ./mariadb ./owncloud/charts
+ sed -i -- 's/LoadBalancer/NodePort/g' ./owncloud/values.yaml
+ sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./owncloud/values.yaml
+ sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./owncloud/charts/mariadb/values.yaml
+ helm install --name oc -f ./owncloud/values.yaml ./owncloud
+ wait_for_service oc-owncloud
+ ;;
+ *)
+ echo "${FUNCNAME[0]}: demo not implemented for $1"
+ esac
+# extra useful commands
+# kubectl describe pvc
+# kubectl get pvc
+# kubectl describe pods
+# kubectl get pods --namespace default
+# kubectl get pods --all-namespaces
+# kubectl get svc --namespace default dw-dokuwiki
+# kubectl describe svc --namespace default dw-dokuwiki
+# kubectl describe pods --namespace default dw-dokuwiki
+}
+
+function setup_helm() {
+ echo "${FUNCNAME[0]}: Setup helm"
+ # Install Helm
+ cd ~
+ curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get > get_helm.sh
+ chmod 700 get_helm.sh
+ ./get_helm.sh
+ helm init
+ helm repo update
+ # TODO: Workaround for bug https://github.com/kubernetes/helm/issues/2224
+ # For testing use only!
+ kubectl create clusterrolebinding permissive-binding --clusterrole=cluster-admin --user=admin --user=kubelet --group=system:serviceaccounts;
+ # TODO: workaround for tiller FailedScheduling (No nodes are available that match all of the following predicates:: PodToleratesNodeTaints (1).)
+ # kubectl taint nodes $HOSTNAME node-role.kubernetes.io/master:NoSchedule-
+ # Wait till tiller is running
+ tiller_deploy=$(kubectl get pods --all-namespaces | grep tiller-deploy | awk '{print $4}')
+ while [[ "$tiller_deploy" != "Running" ]]; do
+ echo "${FUNCNAME[0]}: tiller-deploy status is $tiller_deploy. Waiting 60 seconds for it to be 'Running'"
+ sleep 60
+ tiller_deploy=$(kubectl get pods --all-namespaces | grep tiller-deploy | awk '{print $4}')
+ done
+ echo "${FUNCNAME[0]}: tiller-deploy status is $tiller_deploy"
+
+ # Install services via helm charts from https://kubeapps.com/charts
+ # e.g. helm install stable/dokuwiki
+}
+
+export WORK_DIR=$(pwd)
+case "$1" in
+ master)
+ setup_k8s_master
+ ;;
+ agents)
+ setup_k8s_agents "$2"
+ ;;
+ ceph)
+ setup_ceph "$2" $3 $4 $5
+ ;;
+ helm)
+ setup_helm
+ ;;
+ demo)
+ demo_chart $2
+ ;;
+ all)
+ setup_k8s_master
+ setup_k8s_agents "$2"
+ setup_ceph "$2" $3 $4 $5
+ setup_helm
+ demo_chart dokuwiki
+ ;;
+ clean)
+ # TODO
+ ;;
+ *)
+ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then grep '#. ' $0; fi
+esac
diff --git a/tools/maas/deploy.sh b/tools/maas/deploy.sh
new file mode 100644
index 0000000..ae89893
--- /dev/null
+++ b/tools/maas/deploy.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Scripted deployment of servers using MAAS. Currently it deploys
+#. the default host OS as configured in MAAS.
+#.
+#. Prerequisites:
+#. - MAAS server configured to admin a set of servers
+#. - Password-less ssh key provided for node setup
+#. Usage: on the MAAS server
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ source ~/models/tools/maas/demo_deploy.sh <key> "<hosts>" [<extras>]
+#. <key>: name of private key for cluster node ssh (in current folder)
+#. <hosts>: space separated list of hostnames managed by MAAS
+#. <extras>: optional name of script for extra setup functions as needed
+
+function wait_node_status() {
+ status=$(maas opnfv machines read hostname=$1 | jq -r ".[0].status_name")
+ while [[ "x$status" != "x$2" ]]; do
+ echo "$1 status is $status ... waiting for it to be $2"
+ sleep 30
+ status=$(maas opnfv machines read hostname=$1 | jq -r ".[0].status_name")
+ done
+ echo "$1 status is $status"
+}
+
+function release_nodes() {
+ nodes=$1
+ for node in $nodes; do
+ echo "Releasing node $node"
+ id=$(maas opnfv machines read hostname=$node | jq -r '.[0].system_id')
+ maas opnfv machines release machines=$id
+ done
+}
+
+function deploy_nodes() {
+ nodes=$1
+ for node in $nodes; do
+ echo "Deploying node $node"
+ id=$(maas opnfv machines read hostname=$node | jq -r '.[0].system_id')
+ maas opnfv machines allocate system_id=$id
+ maas opnfv machine deploy $id
+ done
+}
+
+function wait_nodes_status() {
+ nodes=$1
+ for node in $nodes; do
+ wait_node_status $node $2
+ done
+}
+
+key=$1
+nodes="$2"
+extras=$3
+
+release_nodes "$nodes"
+wait_nodes_status "$nodes" Ready
+deploy_nodes "$nodes"
+wait_nodes_status "$nodes" Deployed
+eval `ssh-agent`
+ssh-add $key
+if [[ "x$extras" != "x" ]]; then source $extras; fi
diff --git a/tools/prometheus/README.md b/tools/prometheus/README.md
new file mode 100644
index 0000000..a3dfcc5
--- /dev/null
+++ b/tools/prometheus/README.md
@@ -0,0 +1,10 @@
+This folder contains scripts etc to setup [prometheus](https://github.com/prometheus/prometheus) on a server cluster. It installs:
+* a prometheus server (on the host OS) and [grafana](https://grafana.com/) (in docker)
+* prometheus exporters on a set of other nodes, to be monitored
+ * [node exporter](https://github.com/prometheus/node_exporter) for node basic analytics
+ * [haproxy exporter](https://github.com/prometheus/haproxy_exporter) for load-balancer stats from haproxy e.g. as use by Rancher
+* several sample grafana dashboards... for more see [grafana dashboards for prometheus](https://grafana.com/dashboards?dataSource=prometheus)
+
+See comments in [prometheus-tools.sh](prometheus-tools.sh) for more info.
+
+This is a work in progress!
diff --git a/tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json b/tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json
new file mode 100644
index 0000000..afc69a2
--- /dev/null
+++ b/tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json
@@ -0,0 +1,712 @@
+{
+"dashboard": {
+ "__inputs": [
+ {
+ "name": "Prometheus",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "4.4.3"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": []
+ },
+ "description": "Docker Monitoring Template",
+ "editable": true,
+ "gnetId": 179,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "10s",
+ "rows": [
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 4,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "(sum(node_memory_MemTotal) - sum(node_memory_MemFree+node_memory_Buffers+node_memory_Cached) ) / sum(node_memory_MemTotal) * 100",
+ "interval": "10s",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "65, 90",
+ "title": "Memory usage",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 6,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) )) / count(node_cpu{mode=\"system\"}) * 100",
+ "interval": "10s",
+ "intervalFactor": 1,
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "65, 90",
+ "title": "CPU usage",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(50, 172, 45, 0.97)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(245, 54, 54, 0.9)"
+ ],
+ "datasource": "Prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 7,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 4,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "sum (container_fs_limit_bytes - container_fs_usage_bytes) / sum(container_fs_limit_bytes)",
+ "interval": "10s",
+ "intervalFactor": 1,
+ "metric": "",
+ "refId": "A",
+ "step": 30
+ }
+ ],
+ "thresholds": "65, 90",
+ "title": "Filesystem usage",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 3,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum(rate(container_cpu_user_seconds_total{image!=\"\"}[1m])) by (name))",
+ "interval": "10s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ name }}",
+ "metric": "container_cpu_user_seconds_total",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Container CPU usage",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 2,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (name))",
+ "interval": "10s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ name }}",
+ "metric": "container_memory_usage:sort_desc",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Container Memory Usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 8,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) ))",
+ "interval": "10s",
+ "intervalFactor": 1,
+ "legendFormat": "{{ name }}",
+ "metric": "container_network_receive_bytes_total",
+ "refId": "A",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Container Network Input",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 9,
+ "legend": {
+ "alignAsTable": true,
+ "avg": true,
+ "current": true,
+ "max": false,
+ "min": false,
+ "rightSide": true,
+ "show": true,
+ "sideWidth": 200,
+ "sort": "current",
+ "sortDesc": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) ))",
+ "intervalFactor": 2,
+ "legendFormat": "{{ name }}",
+ "metric": "container_network_transmit_bytes_total",
+ "refId": "B",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Container Network Output",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "docker"
+ ],
+ "templating": {
+ "list": []
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Docker Dashboard",
+ "version": 1
+}
+}
diff --git a/tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json b/tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json
new file mode 100644
index 0000000..6db3532
--- /dev/null
+++ b/tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json
@@ -0,0 +1,1618 @@
+{
+"dashboard": {
+ "__inputs": [
+ {
+ "name": "Prometheus",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "4.4.3"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "table",
+ "name": "Table",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": []
+ },
+ "description": "A simple overview of the most important Docker host and container metrics. (cAdvisor/Prometheus)",
+ "editable": true,
+ "gnetId": 395,
+ "graphTooltip": 1,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": "10s",
+ "rows": [
+ {
+ "collapse": false,
+ "height": 143.625,
+ "panels": [
+ {
+ "aliasColors": {
+ "SENT": "#BF1B00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 5,
+ "grid": {},
+ "id": 19,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 1,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 2,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)",
+ "intervalFactor": 2,
+ "legendFormat": "RECEIVED",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "SENT",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Traffic on Node",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "Ops-Infrastructure": "#447EBC",
+ "{}": "#DEDAF7"
+ },
+ "bars": true,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 0,
+ "editable": true,
+ "error": false,
+ "fill": 3,
+ "grid": {},
+ "id": 7,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": false,
+ "linewidth": 3,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 10,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 1.9899973849372385,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"monitoring\"}[$interval]))",
+ "intervalFactor": 2,
+ "legendFormat": "Monitoring",
+ "metric": "container_last_seen",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"ops-infrastructure\"}[$interval]))",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Backend-Infrastructure",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"backend-infrastructure\"}[$interval]))",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Backend-Workers",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"backend-workers\"}[$interval]))",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Ops-Infrastructure",
+ "refId": "D",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Running Containers (by Container Group)",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "none",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 3,
+ "grid": {},
+ "id": 5,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 2.0707047594142263,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total[1m]))",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "a",
+ "refId": "B",
+ "step": 120
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "nur container",
+ "refId": "F",
+ "step": 10
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))",
+ "hide": true,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "nur docker host",
+ "metric": "",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "host",
+ "metric": "",
+ "refId": "C",
+ "step": 10
+ },
+ {
+ "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "D",
+ "step": 120
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Usage on Node",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "",
+ "logBase": 1,
+ "max": 120,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "Belegete Festplatte": "#BF1B00",
+ "Free Disk Space": "#7EB26D",
+ "Used Disk Space": "#BF1B00",
+ "{}": "#BF1B00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 4,
+ "grid": {},
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 3,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 2,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_filesystem_free{fstype=\"aufs\"}",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Free Disk Space",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "node_filesystem_size{fstype=\"aufs\"} - node_filesystem_free{fstype=\"aufs\"}",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Used Disk Space",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Free and Used Disk Space on Node",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "Available Memory": "#7EB26D",
+ "Unavailable Memory": "#BF1B00"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 4,
+ "grid": {},
+ "id": 20,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 3,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 2,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "container_memory_rss{name=~\".+\"}",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "D",
+ "step": 30
+ },
+ {
+ "expr": "sum(container_memory_rss{name=~\".+\"})",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "container_memory_rss{id=\"/\"}",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "C",
+ "step": 30
+ },
+ {
+ "expr": "sum(container_memory_rss)",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "E",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_Buffers",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "node_memory_Dirty",
+ "refId": "N",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_MemFree",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "F",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_MemAvailable",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Available Memory",
+ "refId": "H",
+ "step": 10
+ },
+ {
+ "expr": "node_memory_MemTotal - node_memory_MemAvailable",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "Unavailable Memory",
+ "refId": "G",
+ "step": 10
+ },
+ {
+ "expr": "node_memory_Inactive",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "I",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_KernelStack",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "J",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_Active",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "K",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "Unknown",
+ "refId": "L",
+ "step": 40
+ },
+ {
+ "expr": "node_memory_MemFree + node_memory_Inactive ",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "M",
+ "step": 30
+ },
+ {
+ "expr": "container_memory_rss{name=~\".+\"}",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "refId": "O",
+ "step": 30
+ },
+ {
+ "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "P",
+ "step": 40
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Available Memory on Node",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": 4200000000,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": false,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 1.939297855648535,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(node_disk_bytes_read[$interval])) by (device)",
+ "intervalFactor": 2,
+ "legendFormat": "OUT on /{{device}}",
+ "metric": "node_disk_bytes_read",
+ "refId": "A",
+ "step": 10
+ },
+ {
+ "expr": "sum(rate(node_disk_bytes_written[$interval])) by (device)",
+ "intervalFactor": 2,
+ "legendFormat": "IN on /{{device}}",
+ "metric": "",
+ "refId": "B",
+ "step": 10
+ },
+ {
+ "expr": "",
+ "intervalFactor": 2,
+ "refId": "C"
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk I/O on Node",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": false,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 284.609375,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 5,
+ "grid": {},
+ "id": 1,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6.0790694124949285,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\"}[$interval])) by (name) * 100",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "metric": "container_cp",
+ "refId": "F",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU Usage per Container (Stacked)",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {
+ "node_load15": "#CCA300"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 0,
+ "grid": {},
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": true,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": true
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 5.920930587505071,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "{__name__=~\"^node_load.*\"}",
+ "intervalFactor": 2,
+ "legendFormat": "{{__name__}}",
+ "metric": "node",
+ "refId": "A",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "System Load on Node",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 203.515625,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])) by (name)",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Sent Network Traffic per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 10,
+ "max": 8,
+ "min": 0,
+ "show": false
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 3,
+ "grid": {},
+ "id": 10,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(container_memory_rss{name=~\".+\"}) by (name)",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 240
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory Usage per Container (Stacked)",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 222.703125,
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 8,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[$interval])) by (name)",
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 10
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Received Network Traffic per Container",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "transparent": false,
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "Bps",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 3,
+ "grid": {},
+ "id": 11,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "container_memory_rss{name=~\".+\"}",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "A",
+ "step": 20
+ },
+ {
+ "expr": "container_memory_usage_bytes{name=~\".+\"}",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "sum(container_memory_cache{name=~\".+\"}) by (name)",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "{{name}}",
+ "refId": "C",
+ "step": 2
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Cached Memory per Container (Stacked)",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": false
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "columns": [
+ {
+ "text": "Avg",
+ "value": "avg"
+ }
+ ],
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fontSize": "100%",
+ "hideTimeOverride": false,
+ "id": 18,
+ "links": [],
+ "pageSize": 100,
+ "scroll": true,
+ "showHeader": true,
+ "sort": {
+ "col": 0,
+ "desc": true
+ },
+ "span": 6,
+ "styles": [
+ {
+ "dateFormat": "YYYY-MM-DD HH:mm:ss",
+ "pattern": "Time",
+ "type": "date"
+ },
+ {
+ "colorMode": null,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "decimals": 2,
+ "pattern": "/.*/",
+ "thresholds": [],
+ "type": "number",
+ "unit": "short"
+ }
+ ],
+ "targets": [
+ {
+ "expr": "cadvisor_version_info",
+ "intervalFactor": 2,
+ "legendFormat": "cAdvisor Version: {{cadvisorVersion}}",
+ "refId": "A",
+ "step": 2
+ },
+ {
+ "expr": "prometheus_build_info",
+ "intervalFactor": 2,
+ "legendFormat": "Prometheus Version: {{version}}",
+ "refId": "B",
+ "step": 2
+ },
+ {
+ "expr": "node_exporter_build_info",
+ "intervalFactor": 2,
+ "legendFormat": "Node-Exporter Version: {{version}}",
+ "refId": "C",
+ "step": 2
+ },
+ {
+ "expr": "cadvisor_version_info",
+ "intervalFactor": 2,
+ "legendFormat": "Docker Version: {{dockerVersion}}",
+ "refId": "D",
+ "step": 2
+ },
+ {
+ "expr": "cadvisor_version_info",
+ "intervalFactor": 2,
+ "legendFormat": "Host OS Version: {{osVersion}}",
+ "refId": "E",
+ "step": 2
+ },
+ {
+ "expr": "cadvisor_version_info",
+ "intervalFactor": 2,
+ "legendFormat": "Host Kernel Version: {{kernelVersion}}",
+ "refId": "F",
+ "step": 2
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "",
+ "transform": "timeseries_aggregations",
+ "type": "table"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Check this out",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 290.98582985381427,
+ "panels": [],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": 127,
+ "panels": [],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [],
+ "templating": {
+ "list": [
+ {
+ "allValue": ".+",
+ "current": {},
+ "datasource": "Prometheus",
+ "hide": 0,
+ "includeAll": true,
+ "label": "Container Group",
+ "multi": true,
+ "name": "containergroup",
+ "options": [],
+ "query": "label_values(container_group)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ },
+ {
+ "auto": true,
+ "auto_count": 50,
+ "auto_min": "50s",
+ "current": {
+ "text": "auto",
+ "value": "$__auto_interval"
+ },
+ "datasource": null,
+ "hide": 0,
+ "includeAll": false,
+ "label": "Interval",
+ "multi": false,
+ "name": "interval",
+ "options": [
+ {
+ "selected": true,
+ "text": "auto",
+ "value": "$__auto_interval"
+ },
+ {
+ "selected": false,
+ "text": "30s",
+ "value": "30s"
+ },
+ {
+ "selected": false,
+ "text": "1m",
+ "value": "1m"
+ },
+ {
+ "selected": false,
+ "text": "2m",
+ "value": "2m"
+ },
+ {
+ "selected": false,
+ "text": "3m",
+ "value": "3m"
+ },
+ {
+ "selected": false,
+ "text": "5m",
+ "value": "5m"
+ },
+ {
+ "selected": false,
+ "text": "7m",
+ "value": "7m"
+ },
+ {
+ "selected": false,
+ "text": "10m",
+ "value": "10m"
+ },
+ {
+ "selected": false,
+ "text": "30m",
+ "value": "30m"
+ },
+ {
+ "selected": false,
+ "text": "1h",
+ "value": "1h"
+ },
+ {
+ "selected": false,
+ "text": "6h",
+ "value": "6h"
+ },
+ {
+ "selected": false,
+ "text": "12h",
+ "value": "12h"
+ },
+ {
+ "selected": false,
+ "text": "1d",
+ "value": "1d"
+ },
+ {
+ "selected": false,
+ "text": "7d",
+ "value": "7d"
+ },
+ {
+ "selected": false,
+ "text": "14d",
+ "value": "14d"
+ },
+ {
+ "selected": false,
+ "text": "30d",
+ "value": "30d"
+ }
+ ],
+ "query": "30s,1m,2m,3m,5m,7m,10m,30m,1h,6h,12h,1d,7d,14d,30d",
+ "refresh": 2,
+ "type": "interval"
+ }
+ ]
+ },
+ "time": {
+ "from": "now-15m",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Docker Host & Container Overview",
+ "version": 1
+}
+}
diff --git a/tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json b/tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json
new file mode 100644
index 0000000..da65d4a
--- /dev/null
+++ b/tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json
@@ -0,0 +1,1632 @@
+{
+"dashboard": {
+ "__inputs": [
+ {
+ "name": "Prometheus",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "4.4.3"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ },
+ {
+ "type": "panel",
+ "id": "text",
+ "name": "Text",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": []
+ },
+ "description": "Dashboard to view multiple servers",
+ "editable": true,
+ "gnetId": 405,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "rows": [
+ {
+ "collapse": false,
+ "height": "25px",
+ "panels": [
+ {
+ "content": "",
+ "editable": true,
+ "error": false,
+ "id": 11,
+ "minSpan": 2,
+ "mode": "html",
+ "repeat": "node",
+ "span": 12,
+ "style": {},
+ "title": "$node",
+ "type": "text"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Title",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "25px",
+ "panels": [
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "format": "none",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 20,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "repeat": "node",
+ "span": 12,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "count(node_cpu{instance=~\"$node\", mode=\"system\"})",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 14400,
+ "target": ""
+ }
+ ],
+ "thresholds": "",
+ "title": "CPU Cores",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 10,
+ "grid": {},
+ "id": 7,
+ "legend": {
+ "alignAsTable": false,
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "max": false,
+ "min": false,
+ "rightSide": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 0,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": true,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": true,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode=\"system\",instance=~'$node'}[5m]))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{mode}}",
+ "metric": "",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode='user',instance=~'$node'}[5m]))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "user",
+ "refId": "B",
+ "step": 1200
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode='nice',instance=~'$node'}[5m]))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "nice",
+ "refId": "C",
+ "step": 1200
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode='iowait',instance=~'$node'}[5m]))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "iowait",
+ "refId": "E",
+ "step": 1200
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode='steal',instance=~'$node'}[5m]))",
+ "intervalFactor": 2,
+ "legendFormat": "steal",
+ "refId": "H",
+ "step": 1200
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode='idle',instance=~'$node'}[5m]))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "idle",
+ "refId": "D",
+ "step": 1200
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode='irq',instance=~'$node'}[5m]))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "irq",
+ "refId": "F",
+ "step": 1200
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode='softirq',instance=~'$node'}[5m]))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "softirq",
+ "refId": "G",
+ "step": 1200
+ },
+ {
+ "expr": "sum by (mode)(irate(node_cpu{mode='guest',instance=~'$node'}[5m]))",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "guest",
+ "refId": "I",
+ "step": 1200
+ }
+ ],
+ "thresholds": [
+ {
+ "colorMode": "custom",
+ "fill": true,
+ "fillColor": "rgba(216, 200, 27, 0.27)",
+ "op": "gt",
+ "value": 0
+ }
+ ],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "CPU",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "%",
+ "logBase": 1,
+ "max": 100,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "CPU",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {
+ "Slab": "#E5A8E2",
+ "Swap": "#E24D42"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 2,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 17,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [
+ {
+ "alias": "/Apps|Buffers|Cached|Free|Slab|SwapCached|PageTables|VmallocUsed/",
+ "fill": 5,
+ "stack": true
+ },
+ {
+ "alias": "Swap",
+ "fill": 5,
+ "stack": true
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "( node_memory_MemTotal{instance=~'$node'} - node_memory_MemFree{instance=~'$node'} - node_memory_Buffers{instance=~'$node'} - node_memory_Cached{instance=~'$node'} - node_memory_SwapCached{instance=~'$node'} - node_memory_Slab{instance=~'$node'} - node_memory_PageTables{instance=~'$node'} - node_memory_VmallocUsed{instance=~'$node'} )",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Apps",
+ "metric": "",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ },
+ {
+ "expr": "node_memory_Buffers{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Buffers",
+ "refId": "B",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_Cached{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Cached",
+ "refId": "D",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_MemFree{instance=~'$node'}",
+ "hide": false,
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Free",
+ "refId": "E",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_Slab{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Slab",
+ "refId": "F",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_SwapCached{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "SwapCached",
+ "refId": "G",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_PageTables{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "PageTables",
+ "refId": "H",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_VmallocUsed{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "VmallocUsed",
+ "metric": "",
+ "refId": "I",
+ "step": 1200
+ },
+ {
+ "expr": "(node_memory_SwapTotal{instance=~'$node'} - node_memory_SwapFree{instance=~'$node'})",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Swap",
+ "metric": "",
+ "refId": "C",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_Committed_AS{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Committed",
+ "metric": "",
+ "refId": "J",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_Mapped{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Mapped",
+ "refId": "K",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_Active{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Active",
+ "metric": "",
+ "refId": "L",
+ "step": 1200
+ },
+ {
+ "expr": "node_memory_Inactive{instance=~'$node'}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Inactive",
+ "metric": "",
+ "refId": "M",
+ "step": 1200
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Memory",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "GB",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Memory",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 13,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_load1{instance=~\"$node\"}",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "load",
+ "metric": "",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Load",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Load",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "decimals": 3,
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "100.0 - 100 * (node_filesystem_avail{instance=~'$node',device !~'tmpfs',device!~'by-uuid'} / node_filesystem_size{instance=~'$node',device !~'tmpfs',device!~'by-uuid'})",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{mountpoint}}",
+ "metric": "",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Space Used",
+ "tooltip": {
+ "msResolution": true,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "logBase": 1,
+ "max": 100,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Disk Used",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 19,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_io_time_ms{instance=~\"$node\"}[5m])/10",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}",
+ "metric": "",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Utilization per Device",
+ "tooltip": {
+ "msResolution": false,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "logBase": 1,
+ "max": 100,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Disk Utilization",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 14,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [
+ {
+ "alias": "/.*_read$/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_reads_completed{instance=~'$node'}[5m])",
+ "interval": "",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}}_read",
+ "metric": "",
+ "refId": "A",
+ "step": 2400,
+ "target": ""
+ },
+ {
+ "expr": "irate(node_disk_writes_completed{instance=~'$node'}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}_write",
+ "metric": "",
+ "refId": "B",
+ "step": 1200
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk IOs per Device",
+ "tooltip": {
+ "msResolution": false,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "label": "IO/second read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Disk IOs per device",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 18,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [
+ {
+ "alias": "/.*_read/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_disk_sectors_read{instance=~'$node'}[5m]) * 512",
+ "interval": "",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}}_read",
+ "refId": "B",
+ "step": 2400
+ },
+ {
+ "expr": "irate(node_disk_sectors_written{instance=~'$node'}[5m]) * 512",
+ "interval": "",
+ "intervalFactor": 4,
+ "legendFormat": "{{device}}_write",
+ "metric": "",
+ "refId": "A",
+ "step": 2400,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk Throughput per Device",
+ "tooltip": {
+ "msResolution": false,
+ "shared": false,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": "Bytes/second read (-) / write (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Disk Throughput per device",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 22,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_context_switches{instance=~\"$node\"}[5m])",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "context switches",
+ "metric": "",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Context Switches",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "Network Traffic",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 12,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [
+ {
+ "alias": "/.*_in/",
+ "transform": "negative-Y"
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_bytes{instance=~'$node'}[5m])*8",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}_in",
+ "metric": "",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ },
+ {
+ "expr": "irate(node_network_transmit_bytes{instance=~'$node'}[5m])*8",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "{{device}}_out",
+ "refId": "B",
+ "step": 1200
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Network Traffic",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bits",
+ "label": "bits in (-) / bits out (+)",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 21,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}",
+ "intervalFactor": 2,
+ "legendFormat": "established",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Netstat",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 23,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [
+ {
+ "alias": "/.*Out.*/",
+ "transform": "negative-Y"
+ },
+ {
+ "alias": "Udp_NoPorts",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_netstat_Udp_InDatagrams{instance=~\"$node\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "Udp_InDatagrams",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ },
+ {
+ "expr": "irate(node_netstat_Udp_InErrors{instance=~\"$node\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "Udp_InErrors",
+ "refId": "B",
+ "step": 1200
+ },
+ {
+ "expr": "irate(node_netstat_Udp_OutDatagrams{instance=~\"$node\"}[5m])",
+ "interval": "",
+ "intervalFactor": 2,
+ "legendFormat": "Udp_OutDatagrams",
+ "refId": "C",
+ "step": 1200
+ },
+ {
+ "expr": "irate(node_netstat_Udp_NoPorts{instance=~\"$node\"}[5m])",
+ "intervalFactor": 2,
+ "legendFormat": "Udp_NoPorts",
+ "refId": "D",
+ "step": 1200
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "UDP Stats",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 24,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "minSpan": 2,
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "repeat": "node",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_nf_conntrack_entries_limit{instance=~\"$node\"} - node_nf_conntrack_entries{instance=~\"$node\"}",
+ "intervalFactor": 2,
+ "legendFormat": "free",
+ "refId": "A",
+ "step": 1200,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Conntrack",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "prometheus"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allFormat": "glob",
+ "allValue": null,
+ "current": {},
+ "datasource": "Prometheus",
+ "hide": 0,
+ "includeAll": false,
+ "label": "",
+ "multi": true,
+ "multiFormat": "regex values",
+ "name": "node",
+ "options": [],
+ "query": "label_values(node_boot_time, instance)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 1,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-7d",
+ "to": "now"
+ },
+ "timepicker": {
+ "now": true,
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Node Exporter Server Metrics",
+ "version": 1
+}
+}
diff --git a/tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json b/tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json
new file mode 100644
index 0000000..5dee4b9
--- /dev/null
+++ b/tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json
@@ -0,0 +1,792 @@
+{
+"dashboard": {
+ "__inputs": [
+ {
+ "name": "Prometheus",
+ "label": "Prometheus",
+ "description": "",
+ "type": "datasource",
+ "pluginId": "prometheus",
+ "pluginName": "Prometheus"
+ }
+ ],
+ "__requires": [
+ {
+ "type": "grafana",
+ "id": "grafana",
+ "name": "Grafana",
+ "version": "4.4.3"
+ },
+ {
+ "type": "panel",
+ "id": "graph",
+ "name": "Graph",
+ "version": ""
+ },
+ {
+ "type": "datasource",
+ "id": "prometheus",
+ "name": "Prometheus",
+ "version": "1.0.0"
+ },
+ {
+ "type": "panel",
+ "id": "singlestat",
+ "name": "Singlestat",
+ "version": ""
+ }
+ ],
+ "annotations": {
+ "list": []
+ },
+ "description": "Dashboard to get an overview of one server",
+ "editable": true,
+ "gnetId": 22,
+ "graphTooltip": 0,
+ "hideControls": false,
+ "id": null,
+ "links": [],
+ "refresh": false,
+ "rows": [
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 3,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "100 - (avg by (cpu) (irate(node_cpu{mode=\"idle\", instance=~\"$server\"}[5m])) * 100)",
+ "hide": false,
+ "intervalFactor": 10,
+ "legendFormat": "{{cpu}}",
+ "refId": "A",
+ "step": 50
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Idle cpu",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percent",
+ "label": "cpu usage",
+ "logBase": 1,
+ "max": 100,
+ "min": 0,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 9,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 6,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_load1{instance=~\"$server\"}",
+ "intervalFactor": 4,
+ "legendFormat": "load 1m",
+ "refId": "A",
+ "step": 20,
+ "target": ""
+ },
+ {
+ "expr": "node_load5{instance=~\"$server\"}",
+ "intervalFactor": 4,
+ "legendFormat": "load 5m",
+ "refId": "B",
+ "step": 20,
+ "target": ""
+ },
+ {
+ "expr": "node_load15{instance=~\"$server\"}",
+ "intervalFactor": 4,
+ "legendFormat": "load 15m",
+ "refId": "C",
+ "step": 20,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "System load",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "percentunit",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 4,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "span": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "node_memory_MemTotal{instance=~\"$server\"} - node_memory_MemFree{instance=~\"$server\"}",
+ "intervalFactor": 2,
+ "legendFormat": "free memory",
+ "metric": "memo",
+ "refId": "A",
+ "step": 10,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Free memory",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "format": "percent",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 5,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "(node_memory_MemFree{instance=~\"$server\"} / node_memory_MemTotal{instance=~\"$server\"}) * 100",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 60,
+ "target": ""
+ }
+ ],
+ "thresholds": "10, 20",
+ "title": "Free memory",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "avg"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 6,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "read",
+ "yaxis": 1
+ },
+ {
+ "alias": "{instance=\"172.17.0.1:9100\"}",
+ "yaxis": 2
+ },
+ {
+ "alias": "io time",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "span": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "sum by (instance) (irate(node_disk_bytes_read{instance=~\"$server\"}[5m]))",
+ "hide": false,
+ "intervalFactor": 4,
+ "legendFormat": "read",
+ "refId": "A",
+ "step": 20,
+ "target": ""
+ },
+ {
+ "expr": "sum by (instance) (irate(node_disk_bytes_written{instance=~\"$server\"}[5m]))",
+ "intervalFactor": 4,
+ "legendFormat": "written",
+ "refId": "B",
+ "step": 20
+ },
+ {
+ "expr": "sum by (instance) (irate(node_disk_io_time_ms{instance=~\"$server\"}[5m]))",
+ "intervalFactor": 4,
+ "legendFormat": "io time",
+ "refId": "C",
+ "step": 20
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Disk usage",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "ms",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "format": "percentunit",
+ "gauge": {
+ "maxValue": 1,
+ "minValue": 0,
+ "show": true,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 7,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "min(node_filesystem_free{device!=\"rootfs\",instance=~\"$server\"} / node_filesystem_size{device!=\"rootfs\",instance=~\"$server\"})",
+ "intervalFactor": 2,
+ "refId": "A",
+ "step": 60,
+ "target": ""
+ }
+ ],
+ "thresholds": "0.10, 0.25",
+ "title": "Free disk space (lowest mountpoint)",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "current"
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ },
+ {
+ "collapse": false,
+ "height": "250px",
+ "panels": [
+ {
+ "aliasColors": {},
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "Prometheus",
+ "editable": true,
+ "error": false,
+ "fill": 1,
+ "grid": {},
+ "id": 8,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 2,
+ "links": [],
+ "nullPointMode": "connected",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [
+ {
+ "alias": "transmitted ",
+ "yaxis": 2
+ }
+ ],
+ "spaceLength": 10,
+ "span": 12,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "irate(node_network_receive_bytes{instance=~\"$server\",device!~\"lo\"}[5m])",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "received",
+ "refId": "A",
+ "step": 4,
+ "target": ""
+ },
+ {
+ "expr": "irate(node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}[5m])",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "transmitted ",
+ "refId": "B",
+ "step": 4,
+ "target": ""
+ },
+ {
+ "expr": "node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}",
+ "hide": true,
+ "intervalFactor": 2,
+ "legendFormat": "transmitted ",
+ "refId": "C",
+ "step": 2,
+ "target": ""
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Data transfer",
+ "tooltip": {
+ "msResolution": false,
+ "shared": true,
+ "sort": 0,
+ "value_type": "cumulative"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "bytes",
+ "label": null,
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
+ }
+ ],
+ "repeat": null,
+ "repeatIteration": null,
+ "repeatRowId": null,
+ "showTitle": false,
+ "title": "New row",
+ "titleSize": "h6"
+ }
+ ],
+ "schemaVersion": 14,
+ "style": "dark",
+ "tags": [
+ "prometheus"
+ ],
+ "templating": {
+ "list": [
+ {
+ "allValue": null,
+ "current": {},
+ "datasource": "Prometheus",
+ "hide": 0,
+ "includeAll": false,
+ "label": null,
+ "multi": false,
+ "name": "server",
+ "options": [],
+ "query": "label_values(node_boot_time, instance)",
+ "refresh": 1,
+ "regex": "",
+ "sort": 0,
+ "tagValuesQuery": "",
+ "tags": [],
+ "tagsQuery": "",
+ "type": "query",
+ "useTags": false
+ }
+ ]
+ },
+ "time": {
+ "from": "now-1h",
+ "to": "now"
+ },
+ "timepicker": {
+ "refresh_intervals": [
+ "5s",
+ "10s",
+ "30s",
+ "1m",
+ "5m",
+ "15m",
+ "30m",
+ "1h",
+ "2h",
+ "1d"
+ ],
+ "time_options": [
+ "5m",
+ "15m",
+ "1h",
+ "6h",
+ "12h",
+ "24h",
+ "2d",
+ "7d",
+ "30d"
+ ]
+ },
+ "timezone": "browser",
+ "title": "Node exporter single server",
+ "version": 1
+}
+}
diff --git a/tools/prometheus/prometheus-tools.sh b/tools/prometheus/prometheus-tools.sh
new file mode 100644
index 0000000..ed6eb22
--- /dev/null
+++ b/tools/prometheus/prometheus-tools.sh
@@ -0,0 +1,228 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Functions for testing with Prometheus and Grafana. Sets up
+#. Prometheus and Grafana on a master node (e.g. for kubernetes, docker,
+#. rancher, openstack) and agent nodes (where applications run).
+#. Prerequisites:
+#. - Ubuntu server for master and agent nodes
+#. - Docker installed
+#. Usage:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ cd ~/models/tools/prometheus
+#. $ bash prometheus-tools.sh setup "<list of agent nodes>"
+#. <list of agent nodes>: space separated IP of agent nodes
+#. $ bash prometheus-tools.sh grafana
+#. Runs grafana in a docker container and connects to prometheus as datasource
+#. $ bash prometheus-tools.sh all "<list of agent nodes>"
+#. Does all of the above
+#. $ bash prometheus-tools.sh clean "<list of agent nodes>"
+#
+
+# Prometheus links
+# https://prometheus.io/download/
+# https://prometheus.io/docs/introduction/getting_started/
+# https://github.com/prometheus/prometheus
+# https://prometheus.io/docs/instrumenting/exporters/
+# https://github.com/prometheus/node_exporter
+# https://github.com/prometheus/haproxy_exporter
+# https://github.com/prometheus/collectd_exporter
+
+# Use this to trigger fail() at the right places
+# if [ "$RESULT" == "Test Failed!" ]; then fail "message"; fi
+function fail() {
+ echo "$1"
+ exit 1
+}
+
+function setup_prometheus() {
+ # Prerequisites
+ echo "${FUNCNAME[0]}: Setting up prometheus master and agents"
+ sudo apt install -y golang-go jq
+
+ # Install Prometheus server
+ echo "${FUNCNAME[0]}: Setting up prometheus master"
+ if [[ -d ~/prometheus ]]; then rm -rf ~/prometheus; fi
+ mkdir ~/prometheus
+ mkdir ~/prometheus/dashboards
+ cp -r dashboards/* ~/prometheus/dashboards
+ cd ~/prometheus
+ wget https://github.com/prometheus/prometheus/releases/download/v2.0.0-beta.2/prometheus-2.0.0-beta.2.linux-amd64.tar.gz
+ tar xvfz prometheus-*.tar.gz
+ cd prometheus-*
+ # Customize prometheus.yml below for your server IPs
+ # This example assumes the node_exporter and haproxy_exporter will be installed on each node
+ cat <<'EOF' >prometheus.yml
+global:
+ scrape_interval: 15s # By default, scrape targets every 15 seconds.
+
+ # Attach these labels to any time series or alerts when communicating with
+ # external systems (federation, remote storage, Alertmanager).
+ external_labels:
+ monitor: 'codelab-monitor'
+
+# A scrape configuration containing exactly one endpoint to scrape:
+# Here it's Prometheus itself.
+scrape_configs:
+ # The job name is added as a label `job=<job_name>` to any timeseries scraped from this config.
+ - job_name: 'prometheus'
+
+ # Override the global default and scrape targets from this job every 5 seconds.
+ scrape_interval: 5s
+
+ static_configs:
+EOF
+
+ for node in $nodes; do
+ echo " - targets: ['${node}:9100']" >>prometheus.yml
+ echo " - targets: ['${node}:9101']" >>prometheus.yml
+ done
+
+ # Start Prometheus
+ nohup ./prometheus --config.file=prometheus.yml > /dev/null 2>&1 &
+ # Browse to http://host_ip:9090
+
+ echo "${FUNCNAME[0]}: Installing exporters"
+ # Install exporters
+ # https://github.com/prometheus/node_exporter
+ cd ~/prometheus
+ wget https://github.com/prometheus/node_exporter/releases/download/v0.14.0/node_exporter-0.14.0.linux-amd64.tar.gz
+ tar xvfz node*.tar.gz
+ # https://github.com/prometheus/haproxy_exporter
+ wget https://github.com/prometheus/haproxy_exporter/releases/download/v0.7.1/haproxy_exporter-0.7.1.linux-amd64.tar.gz
+ tar xvfz haproxy*.tar.gz
+
+ # The scp and ssh actions below assume you have key-based access enabled to the nodes
+ for node in $nodes; do
+ echo "${FUNCNAME[0]}: Setup agent at $node"
+ scp -r -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ node_exporter-0.14.0.linux-amd64/node_exporter ubuntu@$node:/home/ubuntu/node_exporter
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ ubuntu@$node "nohup ./node_exporter > /dev/null 2>&1 &"
+ scp -r -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ haproxy_exporter-0.7.1.linux-amd64/haproxy_exporter ubuntu@$node:/home/ubuntu/haproxy_exporter
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ ubuntu@$node "nohup ./haproxy_exporter > /dev/null 2>&1 &"
+ done
+
+ host_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+ while ! curl -o /tmp/up http://$host_ip:9090/api/v1/query?query=up ; do
+ echo "${FUNCNAME[0]}: Prometheus API is not yet responding... waiting 10 seconds"
+ sleep 10
+ done
+
+ exp=$(jq '.data.result|length' /tmp/up)
+ echo "${FUNCNAME[0]}: $exp exporters are up"
+ while [[ $exp > 0 ]]; do
+ ((exp--))
+ eip=$(jq -r ".data.result[$exp].metric.instance" /tmp/up)
+ job=$(jq -r ".data.result[$exp].metric.job" /tmp/up)
+ echo "${FUNCNAME[0]}: $job at $eip"
+ done
+ echo "${FUNCNAME[0]}: Prometheus dashboard is available at http://$host_ip:9090"
+ echo "Prometheus dashboard is available at http://$host_ip:9090" auto>/tmp/summary
+}
+
+function connect_grafana() {
+ echo "${FUNCNAME[0]}: Setup Grafana datasources and dashboards"
+ prometheus_ip=$1
+ grafana_ip=$2
+
+ while ! curl -X POST http://admin:admin@$grafana_ip:3000/api/login/ping ; do
+ echo "${FUNCNAME[0]}: Grafana API is not yet responding... waiting 10 seconds"
+ sleep 10
+ done
+
+ echo "${FUNCNAME[0]}: Setup Prometheus datasource for Grafana"
+ cd ~/prometheus/
+ cat >datasources.json <<EOF
+{"name":"Prometheus", "type":"prometheus", "access":"proxy", \
+"url":"http://$prometheus_ip:9090/", "basicAuth":false,"isDefault":true, \
+"user":"", "password":"" }
+EOF
+ curl -X POST -o /tmp/json -u admin:admin -H "Accept: application/json" \
+ -H "Content-type: application/json" \
+ -d @datasources.json http://admin:admin@$grafana_ip:3000/api/datasources
+
+ if [[ "$(jq -r '.message' /tmp/json)" != "Datasource added" ]]; then
+ fail "Datasource creation failed"
+ fi
+ echo "${FUNCNAME[0]}: Prometheus datasource for Grafana added"
+
+ echo "${FUNCNAME[0]}: Import Grafana dashboards"
+ # Setup Prometheus dashboards
+ # https://grafana.com/dashboards?dataSource=prometheus
+ # To add additional dashboards, browse the URL above and import the dashboard via the id displayed for the dashboard
+ # Select the home icon (upper left), Dashboards / Import, enter the id, select load, and select the Prometheus datasource
+
+ cd ~/prometheus/dashboards
+ boards=$(ls)
+ for board in $boards; do
+ curl -X POST -u admin:admin -H "Accept: application/json" -H "Content-type: application/json" -d @${board} http://$grafana_ip:3000/api/dashboards/db
+ done
+ echo "${FUNCNAME[0]}: Grafana dashboards are available at http://$host_ip:3000 (login as admin/admin)"
+ echo "Grafana dashboards are available at http://$host_ip:3000 (login as admin/admin)" >>/tmp/summary
+ echo "${FUNCNAME[0]}: Grafana API is available at http://admin:admin@$host_ip:3000/api/v1/query?query=<string>"
+ echo "Grafana API is available at http://admin:admin@$host_ip:3000/api/v1/query?query=<string>" >>/tmp/summary
+}
+
+function run_and_connect_grafana() {
+ # Per http://docs.grafana.org/installation/docker/
+ host_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+ sudo docker run -d -p 3000:3000 --name grafana grafana/grafana
+ status=$(sudo docker inspect grafana | jq -r '.[0].State.Status')
+ while [[ "x$status" != "xrunning" ]]; do
+ echo "${FUNCNAME[0]}: Grafana container state is ($status)"
+ sleep 10
+ status=$(sudo docker inspect grafana | jq -r '.[0].State.Status')
+ done
+ echo "${FUNCNAME[0]}: Grafana container state is $status"
+
+ connect_grafana $host_ip $host_ip
+ echo "${FUNCNAME[0]}: connect_grafana complete"
+}
+
+nodes=$2
+case "$1" in
+ setup)
+ setup_prometheus "$2"
+ ;;
+ grafana)
+ run_and_connect_grafana
+ ;;
+ all)
+ setup_prometheus "$2"
+ run_and_connect_grafana
+ ;;
+ clean)
+ sudo kill $(ps -ef | grep "\./prometheus" | grep prometheus.yml | awk '{print $2}')
+ rm -rf ~/prometheus
+ sudo docker stop grafana
+ sudo docker rm grafana
+ for node in $nodes; do
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ ubuntu@$node "sudo kill $(ps -ef | grep ./node_exporter | awk '{print $2}')"
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ ubuntu@$node "rm -rf /home/ubuntu/node_exporter"
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ ubuntu@$node "sudo kill $(ps -ef | grep ./haproxy_exporter | awk '{print $2}')"
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \
+ ubuntu@$node "rm -rf /home/ubuntu/haproxy_exporter"
+ done
+ ;;
+ *)
+ grep '#. ' $0
+esac
+cat /tmp/summary
diff --git a/tools/rancher/demo_deploy.sh b/tools/rancher/demo_deploy.sh
new file mode 100644
index 0000000..981b421
--- /dev/null
+++ b/tools/rancher/demo_deploy.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Complete scripted deployment of an experimental Rancher-based
+#. cloud-native application platform. When complete, Rancher and the following
+#. will be installed:
+#. - nginx and dokuwiki as demo applications
+#. - prometheus + grafana for cluster monitoring/stats
+#. Prometheus dashboard: http://<master_public_ip>:9090
+#. Grafana dashboard: http://<master_public_ip>:3000
+#.
+#. Prerequisites:
+#. - Ubuntu server for Rancher cluster nodes (admin/master and agent nodes)
+#. - MAAS server as cluster admin for Rancher master/agent nodes
+#. - Password-less ssh key provided for node setup
+#. Usage: on the MAAS server
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ bash ~/models/tools/rancher/demo_deploy.sh <key> "<hosts>" <master_ip>
+#. "<agent ips>" [<extras>]
+#. <key>: name of private key for cluster node ssh (in current folder)
+#. <hosts>: space separated list of hostnames managed by MAAS
+#. <master_ip>: IP of cluster admin node
+#. <agent_ips>: space separated list of agent node IPs
+#. <extras>: optional name of script for extra setup functions as needed
+
+key=$1
+nodes="$2"
+admin_ip=$3
+agent_ips="$4"
+extras=$5
+
+source ~/models/tools/maas/deploy.sh $1 "$2" $5
+eval `ssh-agent`
+ssh-add $key
+if [[ "x$extras" != "x" ]]; then source $extras; fi
+scp -o StrictHostKeyChecking=no $key ubuntu@$admin_ip:/home/ubuntu/$key
+echo "Setting up Rancher..."
+ssh -x ubuntu@$admin_ip <<EOF
+exec ssh-agent bash
+ssh-add $key
+git clone https://gerrit.opnfv.org/gerrit/models
+bash models/tools/rancher/rancher-cluster.sh all "$agent_ips"
+EOF
+# TODO: Figure this out... Have to break the setup into two steps as something
+# causes the ssh session to end before the prometheus setup, if both scripts
+# (k8s-cluster and prometheus-tools) are in the same ssh session
+echo "Setting up Prometheus..."
+ssh -x ubuntu@$admin_ip <<EOF
+exec ssh-agent bash
+ssh-add $key
+bash models/tools/prometheus/prometheus-tools.sh all "$agent_ips"
+EOF
+echo "All done!"
diff --git a/tools/rancher/rancher-cluster.sh b/tools/rancher/rancher-cluster.sh
new file mode 100644
index 0000000..42b3c58
--- /dev/null
+++ b/tools/rancher/rancher-cluster.sh
@@ -0,0 +1,529 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#. What this is: Functions for testing with rancher.
+#. Prerequisites:
+#. - Ubuntu server for master and agent nodes
+#. Usage:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models
+#. $ cd ~/models/tools/rancher
+#.
+#. Usage:
+#. $ bash rancher_cluster.sh all "<agents>"
+#. Automate setup and start demo blueprints.
+#. <agents>: space-separated list of agent node IPs
+#. $ bash rancher_cluster.sh setup "<agents>"
+#. Installs and starts master and agent nodes.
+#. $ bash rancher_cluster.sh master
+#. Setup the Rancher master node.
+#. $ bash rancher_cluster.sh agents "<agents>"
+#. Installs and starts agent nodes.
+#. $ bash rancher_cluster.sh demo
+#. Start demo blueprints.
+#. $ bash rancher_cluster.sh clean "<agents>"
+#. Removes Rancher and installed blueprints from the master and agent nodes.
+#.
+#. To call the procedures, directly, e.g. public_endpoint nginx/lb
+#. $ source rancher-cluster.sh
+#. See below for function-specific usage
+#.
+
+# Install master
+function setup_master() {
+ docker_installed=$(dpkg-query -W --showformat='${Status}\n' docker-ce | grep -c "install ok")
+ if [[ $docker_installed == 0 ]]; then
+ echo "${FUNCNAME[0]}: installing and starting docker"
+ # Per https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/
+ sudo apt-get remove -y docker docker-engine docker.io
+ sudo apt-get update
+ sudo apt-get install -y \
+ linux-image-extra-$(uname -r) \
+ linux-image-extra-virtual
+ sudo apt-get install -y \
+ apt-transport-https \
+ ca-certificates \
+ curl \
+ software-properties-common
+ curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
+ sudo add-apt-repository \
+ "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
+ $(lsb_release -cs) \
+ stable"
+ sudo apt-get update
+ sudo apt-get install -y docker-ce
+
+ echo "${FUNCNAME[0]}: installing jq"
+ sudo apt-get install -y jq
+ fi
+
+ echo "${FUNCNAME[0]}: installing rancher server (master)"
+ sudo docker run -d --restart=unless-stopped -p 8080:8080 --name rancher rancher/server
+
+ echo "${FUNCNAME[0]}: wait until server is up at http://$1:8080"
+ delay=0
+ id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id')
+ while [[ "$id" == "" ]]; do
+ echo "${FUNCNAME[0]}: rancher server is not yet up, checking again in 10 seconds"
+ sleep 10
+ let delay=$delay+10
+ id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id')
+ done
+ echo "${FUNCNAME[0]}: rancher server is up after $delay seconds"
+
+ rm -rf ~/rancher
+ mkdir ~/rancher
+}
+
+# Install rancher CLI tools
+# Usage example: install_cli_tools 172.16.0.2
+function install_cli_tools() {
+ echo "${FUNCNAME[0]}: installing rancher CLI tools for master $1"
+ cd ~
+ echo "${FUNCNAME[0]}: install Rancher CLI"
+ rm -rf rancher-v0.6.3
+ wget -q https://releases.rancher.com/cli/v0.6.3/rancher-linux-amd64-v0.6.3.tar.gz
+ gzip -d -f rancher-linux-amd64-v0.6.3.tar.gz
+ tar -xvf rancher-linux-amd64-v0.6.3.tar
+ sudo mv rancher-v0.6.3/rancher /usr/bin/rancher
+ echo "${FUNCNAME[0]}: install Rancher Compose"
+ rm -rf rancher-compose-v0.12.5
+ wget -q https://releases.rancher.com/compose/v0.12.5/rancher-compose-linux-amd64-v0.12.5.tar.gz
+ gzip -d -f rancher-compose-linux-amd64-v0.12.5.tar.gz
+ tar -xvf rancher-compose-linux-amd64-v0.12.5.tar
+ sudo mv rancher-compose-v0.12.5/rancher-compose /usr/bin/rancher-compose
+ echo "${FUNCNAME[0]}: setup Rancher CLI environment"
+ # CLI setup http://rancher.com/docs/rancher/v1.6/en/cli/
+ # Under the UI "API" select "Add account API key" and name it. Export the keys:
+ # The following scripted approach assumes you have 1 project/environment (Default)
+ # Set the url that Rancher is on
+ export RANCHER_URL=http://$1:8080/v1
+ id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id')
+ export RANCHER_ENVIRONMENT=$id
+ curl -s -o /tmp/keys -X POST -H 'Accept: application/json' -H 'Content-Type: application/json' -d '{"accountId":"reference[account]", "description":"string", "name":"string", "publicValue":"string", "secretValue":"password"}' http://$1:8080/v2-beta/projects/$id/apikeys
+# curl -s -o /tmp/keys -X POST -H 'Accept: application/json' -H 'Content-Type: application/json' -d {"type":"apikey","accountId":"1a1","name":"admin","description":null,"created":null,"kind":null,"removed":null,"uuid":null} http://$1:8080/v2-beta/projects/$id/apikey
+ export RANCHER_ACCESS_KEY=$(jq -r '.publicValue' /tmp/keys)
+ export RANCHER_SECRET_KEY=$(jq -r '.secretValue' /tmp/keys)
+ # create the env file ~/.rancher/cli.json
+ rancher config <<EOF
+$RANCHER_URL
+$RANCHER_ACCESS_KEY
+$RANCHER_SECRET_KEY
+EOF
+
+ master=$(rancher config --print | jq -r '.url' | cut -d '/' -f 3)
+ echo "${FUNCNAME[0]}: Create registration token"
+ # added sleep to allow server time to be ready to create registration tokens (otherwise error is returned)
+ sleep 5
+ curl -s -o /tmp/token -X POST -u "${RANCHER_ACCESS_KEY}:${RANCHER_SECRET_KEY}" -H 'Accept: application/json' -H 'Content-Type: application/json' -d '{"name":"master"}' http://$master/v1/registrationtokens
+ while [[ $(jq -r ".type" /tmp/token) != "registrationToken" ]]; do
+ sleep 5
+ curl -s -o /tmp/token -X POST -u "${RANCHER_ACCESS_KEY}:${RANCHER_SECRET_KEY}" -H 'Accept: application/json' -H 'Content-Type: application/json' -d '{"name":"master"}' http://$master/v1/registrationtokens
+ done
+ id=$(jq -r ".id" /tmp/token)
+ echo "${FUNCNAME[0]}: registration token id=$id"
+
+ echo "${FUNCNAME[0]}: wait until registration command is created"
+ command=$(curl -s -u "${RANCHER_ACCESS_KEY}:${RANCHER_SECRET_KEY}" -H 'Accept: application/json' http://$master/v1/registrationtokens/$id | jq -r '.command')
+ while [[ "$command" == "null" ]]; do
+ echo "${FUNCNAME[0]}: registration command is not yet created, checking again in 10 seconds"
+ sleep 10
+ command=$(curl -s -u "${RANCHER_ACCESS_KEY}:${RANCHER_SECRET_KEY}" -H 'Accept: application/json' http://$master/v1/registrationtokens/$id | jq -r '.command')
+ done
+
+ export RANCHER_REGISTER_COMMAND="$command"
+
+# echo "${FUNCNAME[0]}: activate rancher debug"
+# export RANCHER_CLIENT_DEBUG=true
+
+ echo "${FUNCNAME[0]}: Install docker-compose for syntax checks"
+ sudo apt install -y docker-compose
+
+ cd ~/rancher
+}
+
+# Start an agent host
+# Usage example: start_host Default 172.16.0.7
+function setup_agent() {
+ echo "${FUNCNAME[0]}: SSH to host $2 in env $1 and execute registration command"
+
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$2 "sudo apt-get install -y docker.io; sudo service docker start"
+ ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$2 $RANCHER_REGISTER_COMMAND
+
+ echo "${FUNCNAME[0]}: wait until agent $2 is active"
+ delay=0
+ id=$(rancher hosts | awk "/$2/{print \$1}")
+ while [[ "$id" == "" ]]; do
+ echo "${FUNCNAME[0]}: agent $2 is not yet created, checking again in 10 seconds"
+ sleep 10
+ let delay=$delay+10
+ id=$(rancher hosts | awk "/$2/{print \$1}")
+ done
+
+ echo "${FUNCNAME[0]}: agent $2 id=$id"
+ state=$(rancher inspect $id | jq -r '.state')
+ while [[ "$state" != "active" ]]; do
+ echo "${FUNCNAME[0]}: host $2 state is $state, checking again in 10 seconds"
+ sleep 10
+ let delay=$delay+10
+ state=$(rancher inspect $id | jq -r '.state')
+ done
+ echo "${FUNCNAME[0]}: agent $2 state is $state after $delay seconds"
+}
+
+# Delete an agent host
+# Usage example: delete_host 172.16.0.7
+function stop_agent() {
+ echo "${FUNCNAME[0]}: deleting host $1"
+ rancher rm --stop $(rancher hosts | awk "/$1/{print \$1}")
+}
+
+# Test service at access points
+# Usage example: check_service nginx/nginx http "Welcome to nginx!"
+function check_service() {
+ echo "${FUNCNAME[0]}: checking service state for $1 over $2 with match string $3"
+ service=$1
+ scheme=$2
+ match="$3"
+ id=$(rancher ps | grep " $service " | awk "{print \$1}")
+ n=0
+ while [[ "$(rancher inspect $id | jq -r ".publicEndpoints[$n].ipAddress")" != "null" ]]; do
+ ip=$(rancher inspect $id | jq -r ".publicEndpoints[$n].ipAddress")
+ port=$(rancher inspect $id | jq -r ".publicEndpoints[$n].port")
+ while [[ $(wget -qO- $scheme://$ip:$port | grep -c "$match") == 0 ]]; do
+ echo "$service service is NOT active at address $scheme://$ip:$port, waiting 10 seconds"
+ sleep 10
+ done
+ echo "$service service is active at address $scheme://$ip:$port"
+ let n=$n+1
+ done
+}
+
+# Wait n 10-second tries for service to be active
+# Usage example: wait_till_healthy nginx/nginx 6
+function wait_till_healthy() {
+ service=$1
+ tries=$2
+
+ let delay=$tries*10
+ echo "${FUNCNAME[0]}: waiting for service $service to be ready in $delay seconds"
+ id=$(rancher ps | grep " $service " | awk "{print \$1}")
+ health=$(rancher inspect $id | jq -r ".healthState")
+ state=$(rancher inspect $id | jq -r ".state")
+ while [[ $tries > 0 && "$health" != "healthy" ]]; do
+ health=$(rancher inspect $id | jq -r ".healthState")
+ echo $service is $health
+ sleep 10
+ done
+ echo $service state is $(rancher inspect $id | jq -r ".state")
+}
+
+# Start service based upon docker image and simple templates
+# Usage example: start_simple_service nginx nginx:latest 8081:80 3
+# Usage example: start_simple_service dokuwiki ununseptium/dokuwiki-docker 8082:80 2
+function start_simple_service() {
+ echo "${FUNCNAME[0]}: starting service $1 with image $2, ports $3, and scale $4"
+ service=$1
+ image=$2
+ # port is either a single (unexposed) port, or an source:target pair (source
+ # is the external port)
+ ports=$3
+ scale=$4
+
+ echo "${FUNCNAME[0]}: creating service folder ~/rancher/$service"
+ mkdir ~/rancher/$service
+ cd ~/rancher/$service
+ echo "${FUNCNAME[0]}: creating docker-compose.yml"
+ # Define service via docker-compose.yml
+ cat <<EOF >docker-compose.yml
+version: '2'
+services:
+ $service:
+ image: $image
+ ports:
+ - "$ports"
+EOF
+
+ echo "${FUNCNAME[0]}: syntax checking docker-compose.yml"
+ docker-compose -f docker-compose.yml config
+
+ echo "${FUNCNAME[0]}: creating rancher-compose.yml"
+ cat <<EOF >rancher-compose.yml
+version: '2'
+services:
+ # Reference the service that you want to extend
+ $service:
+ scale: $scale
+EOF
+
+ echo "${FUNCNAME[0]}: starting service $service"
+ rancher up -s $service -d
+
+ wait_till_healthy "$service/$service" 6
+ cd ~/rancher
+}
+
+# Add load balancer to a service
+# Usage example: lb_service nginx 8000 8081
+# Usage example: lb_service dokuwiki 8001 8082
+function lb_service() {
+ echo "${FUNCNAME[0]}: adding load balancer port $2 to service $1, port $3"
+ service=$1
+ lbport=$2
+ port=$3
+
+ cd ~/rancher/$service
+ echo "${FUNCNAME[0]}: creating docker-compose-lb.yml"
+ # Define lb service via docker-compose.yml
+ cat <<EOF >docker-compose-lb.yml
+version: '2'
+services:
+ lb:
+ ports:
+ - $lbport
+ image: rancher/lb-service-haproxy:latest
+EOF
+
+ echo "${FUNCNAME[0]}: syntax checking docker-compose-lb.yml"
+ docker-compose -f docker-compose-lb.yml config
+
+ echo "${FUNCNAME[0]}: creating rancher-compose-lb.yml"
+ cat <<EOF >rancher-compose-lb.yml
+version: '2'
+services:
+ lb:
+ scale: 1
+ lb_config:
+ port_rules:
+ - source_port: $lbport
+ target_port: $port
+ service: $service/$service
+ health_check:
+ port: 42
+ interval: 2000
+ unhealthy_threshold: 3
+ healthy_threshold: 2
+ response_timeout: 2000
+EOF
+
+ echo "${FUNCNAME[0]}: starting service lb"
+ rancher up -s $service -d --file docker-compose-lb.yml --rancher-file rancher-compose-lb.yml
+
+ wait_till_healthy "$service/lb" 6
+ cd ~/rancher
+}
+
+# Change scale of a service
+# Usage example: scale_service nginx 1
+function scale_service() {
+ echo "${FUNCNAME[0]}: scaling service $1 to $2 instances"
+ id=$(rancher ps | grep " $1 " | awk '{print $1}')
+ rancher scale $id=$2
+
+ scale=$(rancher inspect $id | jq -r '.currentScale')
+ health=$(rancher inspect $id | jq -r '.healthState')
+ while [[ $scale != $2 || "$health" != "healthy" ]]; do
+ echo $service is scaled at $scale and is $health
+ scale=$(rancher inspect $id | jq -r '.currentScale')
+ health=$(rancher inspect $id | jq -r '.healthState')
+ sleep 10
+ done
+ echo $service is scaled at $scale and is $health
+}
+
+# Get public endpoint for a service
+# Usage example public_endpoint nginx/lb
+function public_endpoint() {
+ id=$(rancher ps | grep " $1 " | awk "{print \$1}")
+ ip=$(rancher inspect $id | jq -r ".publicEndpoints[0].ipAddress")
+ port=$(rancher inspect $id | jq -r ".publicEndpoints[0].port")
+ echo "${FUNCNAME[0]}: $1 is accessible at http://$ip:$port"
+}
+
+# Stop a stack
+# Usage example: stop_stack nginx
+function stop_stack() {
+ echo "${FUNCNAME[0]}: stopping stack $1"
+ rancher stop $(rancher stacks | awk "/$1/{print \$1}")
+}
+
+# Start a stopped stack
+# Usage example: start_stack nginx
+function start_stack() {
+ echo "${FUNCNAME[0]}: starting stack $1"
+ rancher start $(rancher stacks | awk "/$1/{print \$1}")
+ wait_till_healthy $1 6
+}
+
+# Delete a stack
+# Usage example: delete_stack dokuwiki
+function delete_stack() {
+ id=$(rancher stacks | grep "$1" | awk "{print \$1}")
+ echo "${FUNCNAME[0]}: deleting stack $1 with id $id"
+ rancher rm --stop $id
+}
+
+# Delete a service
+# Usage example: delete_service nginx/lb
+function delete_service() {
+ id=$(rancher ps | grep "$1" | awk "{print \$1}")
+ echo "${FUNCNAME[0]}: deleting service $1 with id $id"
+ rancher rm --stop $id
+}
+
+# Start a complex service, i.e. with yaml file customizations
+# Usage example: start_complex_service grafana 3000:3000 1
+function start_complex_service() {
+ echo "${FUNCNAME[0]}: starting service $1 at ports $2, and scale $3"
+ service=$1
+ # port is either a single (unexposed) port, or an source:target pair (source
+ # is the external port)
+ ports=$2
+ scale=$3
+
+ echo "${FUNCNAME[0]}: creating service folder ~/rancher/$service"
+ mkdir ~/rancher/$service
+ cd ~/rancher/$service
+ echo "${FUNCNAME[0]}: creating docker-compose.yml"
+ # Define service via docker-compose.yml
+ case "$service" in
+ grafana)
+ cat <<EOF >docker-compose.yml
+grafana:
+ image: grafana/grafana:latest
+ ports:
+ - $ports
+ environment:
+ GF_SECURITY_ADMIN_USER: "admin"
+ GF_SECURITY_ADMIN_PASSWORD: "password"
+ GF_SECURITY_SECRET_KEY: $(uuidgen)
+EOF
+ ;;
+
+ *)
+ esac
+
+ echo "${FUNCNAME[0]}: starting service $service"
+ rancher up -s $service -d
+
+ wait_till_healthy "$service/$service" 6
+ cd ~/rancher
+}
+
+# Automated demo
+# Usage example: rancher_demo start "172.16.0.7 172.16.0.8 172.16.0.9"
+# Usage example: rancher_demo clean "172.16.0.7 172.16.0.8 172.16.0.9"
+function demo() {
+ # Deploy apps
+ # Nginx web server, accessible on each machine port 8081, and via load
+ # balancer port 8001
+ start=`date +%s`
+ setup "$1"
+ start_simple_service nginx nginx:latest 8081:80 3
+ check_service nginx/nginx http "Welcome to nginx!"
+ lb_service nginx 8001 80
+ check_service nginx/lb http "Welcome to nginx!"
+ # Dokuwiki server, accessible on each machine port 8082, and via load
+ # balancer port 8002
+ start_simple_service dokuwiki ununseptium/dokuwiki-docker 8082:80 2
+ check_service dokuwiki/dokuwiki http "This topic does not exist yet"
+ lb_service dokuwiki 8002 80
+ check_service dokuwiki/lb http "This topic does not exist yet"
+ # Grafana server, accessible on one machine at port 3000
+ start_complex_service grafana 3000:3000 1
+ id=$(rancher ps | grep " grafana/grafana " | awk "{print \$1}")
+ source ~/models/tools/prometheus/prometheus-tools.sh setup "$agents"
+ grafana_ip=$(rancher inspect $id | jq -r ".publicEndpoints[0].ipAddress")
+ prometheus_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}')
+ connect_grafana $prometheus_ip $grafana_ip
+ public_endpoint nginx/lb
+ public_endpoint dokuwiki/lb
+ public_endpoint grafana/grafana
+
+ end=`date +%s`
+ runtime=$((end-start))
+ runtime=$((runtime/60))
+ echo "${FUNCNAME[0]}: Demo duration = $runtime minutes"
+}
+
+# Automate the installation
+function setup() {
+ # Installation: http://rancher.com/docs/rancher/v1.6/en/
+ # Install rancher server (master) at primary interface of host
+ # Account control is disabled (open access to API), and Default env created
+ ip=$(ip route get 1 | awk '{print $NF;exit}')
+ setup_master $ip
+ # Install rancher CLI tools (rancher, rancher-compose), register with master
+ # and setup CLI environment (e.g. API access/secret keys)
+ install_cli_tools $ip
+
+ # Add agent hosts per http://rancher.com/docs/rancher/v1.6/en/hosts/custom/
+ agents="$1"
+ for agent in $agents; do
+ setup_agent Default $agent
+ done
+}
+
+# Clean the installation
+function clean() {
+ delete_service nginx/lb
+ delete_stack nginx
+ delete_service dokuwiki/lb
+ delete_stack dokuwiki
+ agents="$1"
+ for agent in $agents; do
+ stop_agent $agent
+ done
+ sudo docker stop rancher
+ sudo docker rm -v rancher
+ sudo apt-get remove -y docker-ce
+}
+
+export WORK_DIR=$(pwd)
+case "$1" in
+ master)
+ ip=$(ip route get 1 | awk '{print $NF;exit}')
+ setup_master $ip
+ ;;
+ agents)
+ agents="$2"
+ for agent in $agents; do
+ setup_agent Default $agent
+ done
+ ;;
+ ceph)
+ # TODO Ceph support for rancher, e.g. re
+ # http://rancher.com/docs/rancher/latest/en/rancher-services/storage-service/
+ # https://github.com/rancher/rancher/issues/8722
+ # setup_ceph "$2" $3 $4 $5
+ ;;
+ demo)
+ demo "$2"
+ ;;
+ setup)
+ setup "$2"
+ ;;
+ all)
+ setup "$2"
+ demo "$2"
+ check_service nginx/lb
+ check_service dokuwiki/lb
+ check_service grafana/grafana
+ ;;
+ clean)
+ clean "$2"
+ ;;
+ *)
+ if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then grep '#. ' $0; fi
+esac
diff --git a/tools/traffic.sh b/tools/traffic.sh
new file mode 100644
index 0000000..c020b6c
--- /dev/null
+++ b/tools/traffic.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# Copyright 2017 AT&T Intellectual Property, Inc
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# What this is: semi-random request generator for a web service
+#.
+#. How to use:
+#. $ git clone https://gerrit.opnfv.org/gerrit/models
+# $ bash models/tools/traffic <url>
+# <url>: address of the web service
+
+echo "$0: $(date) Generate some traffic, somewhat randomly"
+ns="0 00 000"
+while true
+do
+ for n in $ns; do
+ sleep .$n$[ ( $RANDOM % 10 ) + 1 ]s
+ curl -s $1 > /dev/null
+ done
+done