From b2c252cd8913ef15a00d63a391da1c8a8a17d739 Mon Sep 17 00:00:00 2001 From: Bryan Sullivan Date: Tue, 17 Oct 2017 09:55:26 -0700 Subject: Merge AT&T WIP on modeled cloud-native stacks into Models Change-Id: I646825bf7d1a9c1be9c00475028084f920c9d399 Signed-off-by: Bryan Sullivan --- docs/images/models-k8s.png | Bin 0 -> 53813 bytes tools/README.md | 16 + tools/docker/demo_deploy.sh | 60 + tools/docker/docker-cluster.sh | 221 +++ tools/docker/nginx.json | 67 + tools/kubernetes/README.md | 17 + tools/kubernetes/demo_deploy.sh | 74 + tools/kubernetes/k8s-cluster.sh | 438 ++++++ tools/maas/deploy.sh | 75 + tools/prometheus/README.md | 10 + .../dashboards/Docker_Dashboard-1503539375161.json | 712 +++++++++ ..._Host_and_Container_Overview-1503539411705.json | 1618 +++++++++++++++++++ ...Node_Exporter_Server_Metrics-1503539692670.json | 1632 ++++++++++++++++++++ .../Node_exporter_single_server-1503539807236.json | 792 ++++++++++ tools/prometheus/prometheus-tools.sh | 228 +++ tools/rancher/demo_deploy.sh | 65 + tools/rancher/rancher-cluster.sh | 529 +++++++ tools/traffic.sh | 31 + 18 files changed, 6585 insertions(+) create mode 100644 docs/images/models-k8s.png create mode 100644 tools/README.md create mode 100644 tools/docker/demo_deploy.sh create mode 100644 tools/docker/docker-cluster.sh create mode 100644 tools/docker/nginx.json create mode 100644 tools/kubernetes/README.md create mode 100644 tools/kubernetes/demo_deploy.sh create mode 100644 tools/kubernetes/k8s-cluster.sh create mode 100644 tools/maas/deploy.sh create mode 100644 tools/prometheus/README.md create mode 100644 tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json create mode 100644 tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json create mode 100644 tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json create mode 100644 tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json create mode 100644 tools/prometheus/prometheus-tools.sh create mode 100644 tools/rancher/demo_deploy.sh create mode 100644 tools/rancher/rancher-cluster.sh create mode 100644 tools/traffic.sh diff --git a/docs/images/models-k8s.png b/docs/images/models-k8s.png new file mode 100644 index 0000000..c54bcdb Binary files /dev/null and b/docs/images/models-k8s.png differ diff --git a/tools/README.md b/tools/README.md new file mode 100644 index 0000000..16c5b79 --- /dev/null +++ b/tools/README.md @@ -0,0 +1,16 @@ +This repo contains experimental scripts etc for setting up cloud-native stacks for application deployment and management on bare-metal servers. A lot of cloud-native focus so far has been on public cloud providers (AWS, GCE, Azure) but there aren't many tools and even fewer full-stack open source platforms for setting up bare metal servers with the same types of cloud-native stack features. This repo is thus a collection of tools in development toward that goal, useful in experimentation, demonstration, and further investigation into characteristics of cloud-native platforms in bare-metal environments, e.g. efficiency, performance, security, and resilience. + +The toolset will eventually include these elements of one or more full-stack platform solutions: +* hardware prerequisite/options guidance +* container-focused application runtime environment, e.g. + * kubernetes + * docker-ce + * rancher +* software-defined storage backends, e.g. + * ceph +* runtime-native networking ("out of the box" networking features, vs some special add-on networking software) +* app orchestration, e.g. via + * cloudify + * ONAP + * Helm +* applications useful for platform characterization \ No newline at end of file diff --git a/tools/docker/demo_deploy.sh b/tools/docker/demo_deploy.sh new file mode 100644 index 0000000..cbfe949 --- /dev/null +++ b/tools/docker/demo_deploy.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright 2017 AT&T Intellectual Property, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#. What this is: Complete scripted deployment of an experimental Docker-based +#. cloud-native application platform. When complete, Docker-CE and the following +#. will be installed: +#. - nginx as demo application +#. - prometheus + grafana for cluster monitoring/stats +#. Prometheus dashboard: http://:9090 +#. Grafana dashboard: http://:3000 +#. +#. Prerequisites: +#. - Ubuntu server for cluster nodes (admin/master and worker nodes) +#. - MAAS server as cluster admin for Rancher master/worker nodes +#. - Password-less ssh key provided for node setup +#. Usage: on the MAAS server +#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models +#. $ bash ~/models/tools/docker/demo_deploy.sh "" +#. "" [] +#. : name of private key for cluster node ssh (in current folder) +#. : space separated list of hostnames managed by MAAS +#. : IP of master node +#. : space separated list of worker node IPs +#. : optional name of script for extra setup functions as needed + +key=$1 +nodes="$2" +master=$3 +workers="$4" +extras=$5 + +source ~/models/tools/maas/deploy.sh $1 "$2" $5 +eval `ssh-agent` +ssh-add $key +echo "Setting up Docker..." +bash ~/models/tools/docker/docker-cluster.sh all $master "$workers" +# TODO: Figure this out... Have to break the setup into two steps as something +# causes the ssh session to end before the prometheus setup, if both scripts +# (k8s-cluster and prometheus-tools) are in the same ssh session +echo "Setting up Prometheus..." +scp -o StrictHostKeyChecking=no $key ubuntu@$master:/home/ubuntu/$key +ssh -x -o StrictHostKeyChecking=no ubuntu@$master < "" +#. Automate setup and start demo services. +#. : master node IPs +#. : space-separated list of worker node IPs +#. $ bash docker_cluster.sh setup "" +#. Installs and starts master and worker nodes. +#. $ bash docker_cluster.sh create +#. : Demo service name to start. +#. Currently supported: nginx +#. $ bash docker_cluster.sh delete +#. : Service name to delete. +#. $ bash docker_cluster.sh clean [] +#. : optional IP address of node to clean. +#. By default, cleans the entire cluster. +#. + +# Setup master and worker hosts +function setup() { + # Per https://docs.docker.com/engine/swarm/swarm-tutorial/ + cat >/tmp/env.sh </tmp/prereqs.sh <<'EOF' +#!/bin/bash +# Per https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/ +sudo apt-get remove -y docker docker-engine docker.io docker-ce +sudo apt-get update +sudo apt-get install -y \ + linux-image-extra-$(uname -r) \ + linux-image-extra-virtual +sudo apt-get install -y \ + apt-transport-https \ + ca-certificates \ + curl \ + software-properties-common +curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - +sudo add-apt-repository \ + "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) \ + stable" +sudo apt-get update +sudo apt-get install -y docker-ce +EOF + + # jq is used for parsing API reponses + sudo apt-get install -y jq + scp -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$master:/home/ubuntu/prereqs.sh + ssh -x -o StrictHostKeyChecking=no ubuntu@$master bash /home/ubuntu/prereqs.sh + # activate docker API + # Per https://www.ivankrizsan.se/2016/05/18/enabling-docker-remote-api-on-ubuntu-16-04/ + ssh -x -o StrictHostKeyChecking=no ubuntu@$master <:9090 +#. Grafana dashboard: http://:3000 +#. +#. Prerequisites: +#. - Ubuntu server for kubernetes cluster nodes (admin/master and agent nodes) +#. - MAAS server as cluster admin for kubernetes master/agent nodes +#. - Password-less ssh key provided for node setup +#. Usage: on the MAAS server +#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models +#. $ bash ~/models/tools/kubernetes/demo_deploy.sh "" +#. "" [] +#. : name of private key for cluster node ssh (in current folder) +#. : space separated list of hostnames managed by MAAS +#. : IP of cluster admin node +#. : space separated list of agent node IPs +#. : CID formatted public network +#. : CIDR formatted private network (may be same as pub-net) +#. : optional name of script for extra setup functions as needed + +key=$1 +nodes="$2" +admin_ip=$3 +agent_ips="$4" +extras=$5 + +source ~/models/tools/maas/deploy.sh $1 "$2" $5 +eval `ssh-agent` +ssh-add $key +if [[ "x$extras" != "x" ]]; then source $extras; fi +scp -o StrictHostKeyChecking=no $key ubuntu@$admin_ip:/home/ubuntu/$key +echo "Setting up kubernetes..." +ssh -x ubuntu@$admin_ip <" +#. nodes: space-separated list of ceph node IPs +#. $ bash k8s-cluster.sh ceph "" [ceph_dev] +#. nodes: space-separated list of ceph node IPs +#. cluster-net: CIDR of ceph cluster network e.g. 10.0.0.1/24 +#. public-net: CIDR of public network +#. ceph_dev: disk to use for ceph. ***MUST NOT BE USED FOR ANY OTHER PURPOSE*** +#. if not provided, ceph data will be stored on osd nodes in /ceph +#. $ bash k8s-cluster.sh helm +#. Setup helm as app kubernetes orchestration tool +#. $ bash k8s-cluster.sh demo +#. Install helm charts for mediawiki and dokuwiki +#. $ bash k8s-cluster.sh all "" [ceph_dev] +#. Runs all the steps above +#. +#. Status: work in progress, incomplete +# + +function setup_prereqs() { + echo "${FUNCNAME[0]}: Create prerequisite setup script" + cat <<'EOG' >/tmp/prereqs.sh +#!/bin/bash +# Basic server pre-reqs +sudo apt-get -y remove kubectl kubelet kubeadm +sudo apt-get update +sudo apt-get upgrade -y +# Set hostname on agent nodes +if [[ "$1" == "agent" ]]; then + echo $(ip route get 8.8.8.8 | awk '{print $NF; exit}') $HOSTNAME | sudo tee -a /etc/hosts +fi +# Install docker 1.12 (default for xenial is 1.12.6) +sudo apt-get install -y docker.io +sudo service docker start +export KUBE_VERSION=1.7.5 +# per https://kubernetes.io/docs/setup/independent/create-cluster-kubeadm/ +# Install kubelet, kubeadm, kubectl per https://kubernetes.io/docs/setup/independent/install-kubeadm/ +sudo apt-get update && sudo apt-get install -y apt-transport-https +curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add - +cat <>/tmp/kubeadm.out + cat /tmp/kubeadm.out + export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out) + echo "${FUNCNAME[0]}: Cluster join command for manual use if needed: $k8s_joincmd" + + # Start cluster + echo "${FUNCNAME[0]}: Start the cluster" + mkdir -p $HOME/.kube + sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config + sudo chown $(id -u):$(id -g) $HOME/.kube/config + # Deploy pod network + echo "${FUNCNAME[0]}: Deploy calico as CNI" + sudo kubectl apply -f http://docs.projectcalico.org/v2.4/getting-started/kubernetes/installation/hosted/kubeadm/1.6/calico.yaml +} + +function setup_k8s_agents() { + agents="$1" + export k8s_joincmd=$(grep "kubeadm join" /tmp/kubeadm.out) + echo "${FUNCNAME[0]}: Installing agents at $1 with joincmd: $k8s_joincmd" + + setup_prereqs + + kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}') + while [[ "$kubedns" != "Running" ]]; do + echo "${FUNCNAME[0]}: kube-dns status is $kubedns. Waiting 60 seconds for it to be 'Running'" + sleep 60 + kubedns=$(kubectl get pods --all-namespaces | grep kube-dns | awk '{print $4}') + done + echo "${FUNCNAME[0]}: kube-dns status is $kubedns" + + for agent in $agents; do + echo "${FUNCNAME[0]}: Install agent at $agent" + scp -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no /tmp/prereqs.sh ubuntu@$agent:/tmp/prereqs.sh + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent bash /tmp/prereqs.sh agent + # Workaround for "[preflight] Some fatal errors occurred: /var/lib/kubelet is not empty" per https://github.com/kubernetes/kubeadm/issues/1 + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent sudo kubeadm reset + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no ubuntu@$agent sudo $k8s_joincmd + done + + echo "${FUNCNAME[0]}: Cluster is ready when all nodes in the output of 'kubectl get nodes' show as 'Ready'." +} + +function setup_ceph() { + node_ips=$1 + cluster_net=$2 + public_net=$3 + ceph_dev=$4 + echo "${FUNCNAME[0]}: Deploying ceph-mon on localhost $HOSTNAME" + echo "${FUNCNAME[0]}: Deploying ceph-osd on nodes $node_ips" + echo "${FUNCNAME[0]}: Setting cluster-network=$cluster_net and public-network=$public_net" + mon_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}') + all_nodes="$mon_ip $node_ips" + # Also caches the server fingerprints so ceph-deploy does not prompt the user + # Note this loop may be partially redundant with the ceph-deploy steps below + for node_ip in $all_nodes; do + echo "${FUNCNAME[0]}: Install ntp and ceph on $node_ip" + ssh -x -o StrictHostKeyChecking=no ubuntu@$node_ip <>ceph.conf +osd max object name len = 256 +osd max object namespace len = 64 +EOF + cat ceph.conf + + echo "${FUNCNAME[0]}: Deploy ceph packages on other nodes" + ceph-deploy install $mon_ip $node_ips + + echo "${FUNCNAME[0]}: Deploy the initial monitor and gather the keys" + ceph-deploy mon create-initial + + if [[ "x$ceph_dev" == "x" ]]; then + n=1 + for node_ip in $node_ips; do + echo "${FUNCNAME[0]}: Prepare ceph OSD on node $node_ip" + echo "$node_ip ceph-osd$n" | sudo tee -a /etc/hosts + # Using ceph-osd$n here avoids need for manual acceptance of the new server hash + ssh -x -o StrictHostKeyChecking=no ubuntu@ceph-osd$n </tmp/ceph-sc.yaml +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: slow +provisioner: kubernetes.io/rbd +parameters: + monitors: $mon_ip:6789 + adminId: admin + adminSecretName: ceph-secret-admin + adminSecretNamespace: "kube-system" + pool: kube + userId: kube + userSecretName: ceph-secret-user +EOF + # TODO: find out where in the above ~/.kube folders became owned by root + sudo chown -R ubuntu:ubuntu ~/.kube/* + kubectl create -f /tmp/ceph-sc.yaml + + echo "${FUNCNAME[0]}: Create storage pool 'kube'" + # https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md method + sudo ceph osd pool create kube 32 32 + + echo "${FUNCNAME[0]}: Authorize client 'kube' access to pool 'kube'" + sudo ceph auth get-or-create client.kube mon 'allow r' osd 'allow rwx pool=kube' + + echo "${FUNCNAME[0]}: Create ceph-secret-user secret in namespace 'default'" + kube_key=$(sudo ceph auth get-key client.kube) + kubectl create secret generic ceph-secret-user --from-literal=key="$kube_key" --namespace=default --type=kubernetes.io/rbd + # A similar secret must be created in other namespaces that intend to access the ceph pool + + # Per https://github.com/kubernetes/examples/blob/master/staging/persistent-volume-provisioning/README.md + + echo "${FUNCNAME[0]}: Create andtest a persistentVolumeClaim" + cat </tmp/ceph-pvc.yaml +{ + "kind": "PersistentVolumeClaim", + "apiVersion": "v1", + "metadata": { + "name": "claim1", + "annotations": { + "volume.beta.kubernetes.io/storage-class": "slow" + } + }, + "spec": { + "accessModes": [ + "ReadWriteOnce" + ], + "resources": { + "requests": { + "storage": "3Gi" + } + } + } +} +EOF + kubectl create -f /tmp/ceph-pvc.yaml + while [[ "x$(kubectl get pvc -o jsonpath='{.status.phase}' claim1)" != "xBound" ]]; do + echo "${FUNCNAME[0]}: Waiting for pvc claim1 to be 'Bound'" + kubectl describe pvc + sleep 10 + done + echo "${FUNCNAME[0]}: pvc claim1 successfully bound to $(kubectl get pvc -o jsonpath='{.spec.volumeName}' claim1)" + kubectl get pvc + kubectl delete pvc claim1 + kubectl describe pods +} + +function wait_for_service() { + echo "${FUNCNAME[0]}: Waiting for service $1 to be available" + pod=$(kubectl get pods --namespace default | awk "/$1/ { print \$1 }") + echo "${FUNCNAME[0]}: Service $1 is at pod $pod" + ready=$(kubectl get pods --namespace default -o jsonpath='{.status.containerStatuses[0].ready}' $pod) + while [[ "$ready" != "true" ]]; do + echo "${FUNCNAME[0]}: $1 container is not yet ready... waiting 10 seconds" + sleep 10 + # TODO: figure out why transient pods sometimes mess up this logic, thus need to re-get the pods + pod=$(kubectl get pods --namespace default | awk "/$1/ { print \$1 }") + ready=$(kubectl get pods --namespace default -o jsonpath='{.status.containerStatuses[0].ready}' $pod) + done + echo "${FUNCNAME[0]}: pod $pod container status is $ready" + host_ip=$(kubectl get pods --namespace default -o jsonpath='{.status.hostIP}' $pod) + port=$(kubectl get --namespace default -o jsonpath="{.spec.ports[0].nodePort}" services $1) + echo "${FUNCNAME[0]}: pod $pod container is at host $host_ip and port $port" + while ! curl http://$host_ip:$port ; do + echo "${FUNCNAME[0]}: $1 service is not yet responding... waiting 10 seconds" + sleep 10 + done + echo "${FUNCNAME[0]}: $1 is available at http://$host_ip:$port" +} + +function demo_chart() { + cd ~ + rm -rf charts + git clone https://github.com/kubernetes/charts.git + cd charts/stable + case "$1" in + mediawiki) + # NOT YET WORKING + # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed + mkdir ./mediawiki/charts + cp -r ./mariadb ./mediawiki/charts + # LoadBalancer is N/A for baremetal (public cloud only) - use NodePort + sed -i -- 's/LoadBalancer/NodePort/g' ./mediawiki/values.yaml + # Select the storageClass created in the ceph setup step + sed -i -- 's/# storageClass:/storageClass: "slow"/g' ./mediawiki/values.yaml + sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./mediawiki/charts/mariadb/values.yaml + helm install --name mw -f ./mediawiki/values.yaml ./mediawiki + wait_for_service mw-mediawiki + ;; + dokuwiki) + sed -i -- 's/# storageClass:/storageClass: "slow"/g' ./dokuwiki/values.yaml + sed -i -- 's/LoadBalancer/NodePort/g' ./dokuwiki/values.yaml + helm install --name dw -f ./dokuwiki/values.yaml ./dokuwiki + wait_for_service dw-dokuwiki + ;; + wordpress) + # NOT YET WORKING + # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed + mkdir ./wordpress/charts + cp -r ./mariadb ./wordpress/charts + sed -i -- 's/LoadBalancer/NodePort/g' ./wordpress/values.yaml + sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./wordpress/values.yaml + sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./wordpress/charts/mariadb/values.yaml + helm install --name wp -f ./wordpress/values.yaml ./wordpress + wait_for_service wp-wordpress + ;; + redmine) + # NOT YET WORKING + # mariadb: Readiness probe failed: mysqladmin: connect to server at 'localhost' failed + mkdir ./redmine/charts + cp -r ./mariadb ./redmine/charts + cp -r ./postgresql ./redmine/charts + sed -i -- 's/LoadBalancer/NodePort/g' ./redmine/values.yaml + sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/values.yaml + sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/charts/mariadb/values.yaml + sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./redmine/charts/postgresql/values.yaml + helm install --name rdm -f ./redmine/values.yaml ./redmine + wait_for_service rdm-redmine + ;; + owncloud) + # NOT YET WORKING: needs resolvable hostname for service + mkdir ./owncloud/charts + cp -r ./mariadb ./owncloud/charts + sed -i -- 's/LoadBalancer/NodePort/g' ./owncloud/values.yaml + sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./owncloud/values.yaml + sed -i -- 's/# storageClass: "-"/storageClass: "slow"/g' ./owncloud/charts/mariadb/values.yaml + helm install --name oc -f ./owncloud/values.yaml ./owncloud + wait_for_service oc-owncloud + ;; + *) + echo "${FUNCNAME[0]}: demo not implemented for $1" + esac +# extra useful commands +# kubectl describe pvc +# kubectl get pvc +# kubectl describe pods +# kubectl get pods --namespace default +# kubectl get pods --all-namespaces +# kubectl get svc --namespace default dw-dokuwiki +# kubectl describe svc --namespace default dw-dokuwiki +# kubectl describe pods --namespace default dw-dokuwiki +} + +function setup_helm() { + echo "${FUNCNAME[0]}: Setup helm" + # Install Helm + cd ~ + curl https://raw.githubusercontent.com/kubernetes/helm/master/scripts/get > get_helm.sh + chmod 700 get_helm.sh + ./get_helm.sh + helm init + helm repo update + # TODO: Workaround for bug https://github.com/kubernetes/helm/issues/2224 + # For testing use only! + kubectl create clusterrolebinding permissive-binding --clusterrole=cluster-admin --user=admin --user=kubelet --group=system:serviceaccounts; + # TODO: workaround for tiller FailedScheduling (No nodes are available that match all of the following predicates:: PodToleratesNodeTaints (1).) + # kubectl taint nodes $HOSTNAME node-role.kubernetes.io/master:NoSchedule- + # Wait till tiller is running + tiller_deploy=$(kubectl get pods --all-namespaces | grep tiller-deploy | awk '{print $4}') + while [[ "$tiller_deploy" != "Running" ]]; do + echo "${FUNCNAME[0]}: tiller-deploy status is $tiller_deploy. Waiting 60 seconds for it to be 'Running'" + sleep 60 + tiller_deploy=$(kubectl get pods --all-namespaces | grep tiller-deploy | awk '{print $4}') + done + echo "${FUNCNAME[0]}: tiller-deploy status is $tiller_deploy" + + # Install services via helm charts from https://kubeapps.com/charts + # e.g. helm install stable/dokuwiki +} + +export WORK_DIR=$(pwd) +case "$1" in + master) + setup_k8s_master + ;; + agents) + setup_k8s_agents "$2" + ;; + ceph) + setup_ceph "$2" $3 $4 $5 + ;; + helm) + setup_helm + ;; + demo) + demo_chart $2 + ;; + all) + setup_k8s_master + setup_k8s_agents "$2" + setup_ceph "$2" $3 $4 $5 + setup_helm + demo_chart dokuwiki + ;; + clean) + # TODO + ;; + *) + if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then grep '#. ' $0; fi +esac diff --git a/tools/maas/deploy.sh b/tools/maas/deploy.sh new file mode 100644 index 0000000..ae89893 --- /dev/null +++ b/tools/maas/deploy.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# Copyright 2017 AT&T Intellectual Property, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#. What this is: Scripted deployment of servers using MAAS. Currently it deploys +#. the default host OS as configured in MAAS. +#. +#. Prerequisites: +#. - MAAS server configured to admin a set of servers +#. - Password-less ssh key provided for node setup +#. Usage: on the MAAS server +#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models +#. $ source ~/models/tools/maas/demo_deploy.sh "" [] +#. : name of private key for cluster node ssh (in current folder) +#. : space separated list of hostnames managed by MAAS +#. : optional name of script for extra setup functions as needed + +function wait_node_status() { + status=$(maas opnfv machines read hostname=$1 | jq -r ".[0].status_name") + while [[ "x$status" != "x$2" ]]; do + echo "$1 status is $status ... waiting for it to be $2" + sleep 30 + status=$(maas opnfv machines read hostname=$1 | jq -r ".[0].status_name") + done + echo "$1 status is $status" +} + +function release_nodes() { + nodes=$1 + for node in $nodes; do + echo "Releasing node $node" + id=$(maas opnfv machines read hostname=$node | jq -r '.[0].system_id') + maas opnfv machines release machines=$id + done +} + +function deploy_nodes() { + nodes=$1 + for node in $nodes; do + echo "Deploying node $node" + id=$(maas opnfv machines read hostname=$node | jq -r '.[0].system_id') + maas opnfv machines allocate system_id=$id + maas opnfv machine deploy $id + done +} + +function wait_nodes_status() { + nodes=$1 + for node in $nodes; do + wait_node_status $node $2 + done +} + +key=$1 +nodes="$2" +extras=$3 + +release_nodes "$nodes" +wait_nodes_status "$nodes" Ready +deploy_nodes "$nodes" +wait_nodes_status "$nodes" Deployed +eval `ssh-agent` +ssh-add $key +if [[ "x$extras" != "x" ]]; then source $extras; fi diff --git a/tools/prometheus/README.md b/tools/prometheus/README.md new file mode 100644 index 0000000..a3dfcc5 --- /dev/null +++ b/tools/prometheus/README.md @@ -0,0 +1,10 @@ +This folder contains scripts etc to setup [prometheus](https://github.com/prometheus/prometheus) on a server cluster. It installs: +* a prometheus server (on the host OS) and [grafana](https://grafana.com/) (in docker) +* prometheus exporters on a set of other nodes, to be monitored + * [node exporter](https://github.com/prometheus/node_exporter) for node basic analytics + * [haproxy exporter](https://github.com/prometheus/haproxy_exporter) for load-balancer stats from haproxy e.g. as use by Rancher +* several sample grafana dashboards... for more see [grafana dashboards for prometheus](https://grafana.com/dashboards?dataSource=prometheus) + +See comments in [prometheus-tools.sh](prometheus-tools.sh) for more info. + +This is a work in progress! diff --git a/tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json b/tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json new file mode 100644 index 0000000..afc69a2 --- /dev/null +++ b/tools/prometheus/dashboards/Docker_Dashboard-1503539375161.json @@ -0,0 +1,712 @@ +{ +"dashboard": { + "__inputs": [ + { + "name": "Prometheus", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.4.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "description": "Docker Monitoring Template", + "editable": true, + "gnetId": 179, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 4, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(sum(node_memory_MemTotal) - sum(node_memory_MemFree+node_memory_Buffers+node_memory_Cached) ) / sum(node_memory_MemTotal) * 100", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 30 + } + ], + "thresholds": "65, 90", + "title": "Memory usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 6, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum(sum by (container_name)( rate(container_cpu_usage_seconds_total{image!=\"\"}[1m] ) )) / count(node_cpu{mode=\"system\"}) * 100", + "interval": "10s", + "intervalFactor": 1, + "refId": "A", + "step": 30 + } + ], + "thresholds": "65, 90", + "title": "CPU usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(50, 172, 45, 0.97)", + "rgba(237, 129, 40, 0.89)", + "rgba(245, 54, 54, 0.9)" + ], + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 4, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "sum (container_fs_limit_bytes - container_fs_usage_bytes) / sum(container_fs_limit_bytes)", + "interval": "10s", + "intervalFactor": 1, + "metric": "", + "refId": "A", + "step": 30 + } + ], + "thresholds": "65, 90", + "title": "Filesystem usage", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 3, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(rate(container_cpu_user_seconds_total{image!=\"\"}[1m])) by (name))", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "metric": "container_cpu_user_seconds_total", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container CPU usage", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 2, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum(container_memory_usage_bytes{image!=\"\"}) by (name))", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "metric": "container_memory_usage:sort_desc", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container Memory Usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 8, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (name) (rate(container_network_receive_bytes_total{image!=\"\"}[1m] ) ))", + "interval": "10s", + "intervalFactor": 1, + "legendFormat": "{{ name }}", + "metric": "container_network_receive_bytes_total", + "refId": "A", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container Network Input", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 9, + "legend": { + "alignAsTable": true, + "avg": true, + "current": true, + "max": false, + "min": false, + "rightSide": true, + "show": true, + "sideWidth": 200, + "sort": "current", + "sortDesc": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sort_desc(sum by (name) (rate(container_network_transmit_bytes_total{image!=\"\"}[1m] ) ))", + "intervalFactor": 2, + "legendFormat": "{{ name }}", + "metric": "container_network_transmit_bytes_total", + "refId": "B", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Container Network Output", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "docker" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Docker Dashboard", + "version": 1 +} +} diff --git a/tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json b/tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json new file mode 100644 index 0000000..6db3532 --- /dev/null +++ b/tools/prometheus/dashboards/Docker_Host_and_Container_Overview-1503539411705.json @@ -0,0 +1,1618 @@ +{ +"dashboard": { + "__inputs": [ + { + "name": "Prometheus", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.4.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "description": "A simple overview of the most important Docker host and container metrics. (cAdvisor/Prometheus)", + "editable": true, + "gnetId": 395, + "graphTooltip": 1, + "hideControls": false, + "id": null, + "links": [], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": 143.625, + "panels": [ + { + "aliasColors": { + "SENT": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 5, + "grid": {}, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{id=\"/\"}[$interval])) by (id)", + "intervalFactor": 2, + "legendFormat": "RECEIVED", + "refId": "A", + "step": 10 + }, + { + "expr": "- sum(rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])) by (id)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "SENT", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Traffic on Node", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Ops-Infrastructure": "#447EBC", + "{}": "#DEDAF7" + }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 0, + "editable": true, + "error": false, + "fill": 3, + "grid": {}, + "id": 7, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 3, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 10, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 1.9899973849372385, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"monitoring\"}[$interval]))", + "intervalFactor": 2, + "legendFormat": "Monitoring", + "metric": "container_last_seen", + "refId": "A", + "step": 10 + }, + { + "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"ops-infrastructure\"}[$interval]))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Backend-Infrastructure", + "refId": "B", + "step": 10 + }, + { + "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"backend-infrastructure\"}[$interval]))", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Backend-Workers", + "refId": "C", + "step": 10 + }, + { + "expr": "count(rate(container_last_seen{name=~\".+\",container_group=\"backend-workers\"}[$interval]))", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Ops-Infrastructure", + "refId": "D", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Running Containers (by Container Group)", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "none", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "{id=\"/\",instance=\"cadvisor:8080\",job=\"prometheus\"}": "#BA43A9" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 3, + "grid": {}, + "id": 5, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2.0707047594142263, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_system_seconds_total[1m]))", + "hide": true, + "intervalFactor": 2, + "legendFormat": "a", + "refId": "B", + "step": 120 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m]))", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "nur container", + "refId": "F", + "step": 10 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m]))", + "hide": true, + "interval": "", + "intervalFactor": 2, + "legendFormat": "nur docker host", + "metric": "", + "refId": "A", + "step": 20 + }, + { + "expr": "sum(rate(process_cpu_seconds_total[$interval])) * 100", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "host", + "metric": "", + "refId": "C", + "step": 10 + }, + { + "expr": "sum(rate(container_cpu_system_seconds_total{name=~\".+\"}[1m])) + sum(rate(container_cpu_system_seconds_total{id=\"/\"}[1m])) + sum(rate(process_cpu_seconds_total[1m]))", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "D", + "step": 120 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage on Node", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": 120, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Belegete Festplatte": "#BF1B00", + "Free Disk Space": "#7EB26D", + "Used Disk Space": "#BF1B00", + "{}": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 4, + "grid": {}, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 3, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "node_filesystem_free{fstype=\"aufs\"}", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Free Disk Space", + "refId": "A", + "step": 10 + }, + { + "expr": "node_filesystem_size{fstype=\"aufs\"} - node_filesystem_free{fstype=\"aufs\"}", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Used Disk Space", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Free and Used Disk Space on Node", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "Available Memory": "#7EB26D", + "Unavailable Memory": "#BF1B00" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 4, + "grid": {}, + "id": 20, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 3, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 2, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "container_memory_rss{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "D", + "step": 30 + }, + { + "expr": "sum(container_memory_rss{name=~\".+\"})", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "A", + "step": 20 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 20 + }, + { + "expr": "container_memory_rss{id=\"/\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "C", + "step": 30 + }, + { + "expr": "sum(container_memory_rss)", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "E", + "step": 30 + }, + { + "expr": "node_memory_Buffers", + "hide": true, + "intervalFactor": 2, + "legendFormat": "node_memory_Dirty", + "refId": "N", + "step": 30 + }, + { + "expr": "node_memory_MemFree", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "F", + "step": 30 + }, + { + "expr": "node_memory_MemAvailable", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Available Memory", + "refId": "H", + "step": 10 + }, + { + "expr": "node_memory_MemTotal - node_memory_MemAvailable", + "hide": false, + "intervalFactor": 2, + "legendFormat": "Unavailable Memory", + "refId": "G", + "step": 10 + }, + { + "expr": "node_memory_Inactive", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "I", + "step": 30 + }, + { + "expr": "node_memory_KernelStack", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "J", + "step": 30 + }, + { + "expr": "node_memory_Active", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "K", + "step": 30 + }, + { + "expr": "node_memory_MemTotal - (node_memory_Active + node_memory_MemFree + node_memory_Inactive)", + "hide": true, + "intervalFactor": 2, + "legendFormat": "Unknown", + "refId": "L", + "step": 40 + }, + { + "expr": "node_memory_MemFree + node_memory_Inactive ", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "M", + "step": 30 + }, + { + "expr": "container_memory_rss{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "refId": "O", + "step": 30 + }, + { + "expr": "node_memory_Inactive + node_memory_MemFree + node_memory_MemAvailable", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "P", + "step": 40 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Available Memory on Node", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": 4200000000, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 1.939297855648535, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(node_disk_bytes_read[$interval])) by (device)", + "intervalFactor": 2, + "legendFormat": "OUT on /{{device}}", + "metric": "node_disk_bytes_read", + "refId": "A", + "step": 10 + }, + { + "expr": "sum(rate(node_disk_bytes_written[$interval])) by (device)", + "intervalFactor": 2, + "legendFormat": "IN on /{{device}}", + "metric": "", + "refId": "B", + "step": 10 + }, + { + "expr": "", + "intervalFactor": 2, + "refId": "C" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk I/O on Node", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 284.609375, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 5, + "grid": {}, + "id": 1, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6.0790694124949285, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_cpu_usage_seconds_total{name=~\".+\"}[$interval])) by (name) * 100", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "metric": "container_cp", + "refId": "F", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "CPU Usage per Container (Stacked)", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "", + "logBase": 1, + "max": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, + { + "aliasColors": { + "node_load15": "#CCA300" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 0, + "grid": {}, + "id": 4, + "legend": { + "avg": false, + "current": true, + "max": false, + "min": false, + "show": true, + "total": false, + "values": true + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 5.920930587505071, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "{__name__=~\"^node_load.*\"}", + "intervalFactor": 2, + "legendFormat": "{{__name__}}", + "metric": "node", + "refId": "A", + "step": 4 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System Load on Node", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 203.515625, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 9, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])) by (name)", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "rate(container_network_transmit_bytes_total{id=\"/\"}[$interval])", + "hide": true, + "intervalFactor": 2, + "legendFormat": "", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Sent Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": "", + "logBase": 10, + "max": 8, + "min": 0, + "show": false + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 3, + "grid": {}, + "id": 10, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum(container_memory_rss{name=~\".+\"}) by (name)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 240 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory Usage per Container (Stacked)", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 222.703125, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(container_network_receive_bytes_total{name=~\".+\"}[$interval])) by (name)", + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 2 + }, + { + "expr": "- rate(container_network_transmit_bytes_total{name=~\".+\"}[$interval])", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 10 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Received Network Traffic per Container", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "transparent": false, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "Bps", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 3, + "grid": {}, + "id": 11, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "null as zero", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "container_memory_rss{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "A", + "step": 20 + }, + { + "expr": "container_memory_usage_bytes{name=~\".+\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "B", + "step": 20 + }, + { + "expr": "sum(container_memory_cache{name=~\".+\"}) by (name)", + "hide": false, + "intervalFactor": 2, + "legendFormat": "{{name}}", + "refId": "C", + "step": 2 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Cached Memory per Container (Stacked)", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "columns": [ + { + "text": "Avg", + "value": "avg" + } + ], + "datasource": "Prometheus", + "editable": true, + "error": false, + "fontSize": "100%", + "hideTimeOverride": false, + "id": 18, + "links": [], + "pageSize": 100, + "scroll": true, + "showHeader": true, + "sort": { + "col": 0, + "desc": true + }, + "span": 6, + "styles": [ + { + "dateFormat": "YYYY-MM-DD HH:mm:ss", + "pattern": "Time", + "type": "date" + }, + { + "colorMode": null, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "decimals": 2, + "pattern": "/.*/", + "thresholds": [], + "type": "number", + "unit": "short" + } + ], + "targets": [ + { + "expr": "cadvisor_version_info", + "intervalFactor": 2, + "legendFormat": "cAdvisor Version: {{cadvisorVersion}}", + "refId": "A", + "step": 2 + }, + { + "expr": "prometheus_build_info", + "intervalFactor": 2, + "legendFormat": "Prometheus Version: {{version}}", + "refId": "B", + "step": 2 + }, + { + "expr": "node_exporter_build_info", + "intervalFactor": 2, + "legendFormat": "Node-Exporter Version: {{version}}", + "refId": "C", + "step": 2 + }, + { + "expr": "cadvisor_version_info", + "intervalFactor": 2, + "legendFormat": "Docker Version: {{dockerVersion}}", + "refId": "D", + "step": 2 + }, + { + "expr": "cadvisor_version_info", + "intervalFactor": 2, + "legendFormat": "Host OS Version: {{osVersion}}", + "refId": "E", + "step": 2 + }, + { + "expr": "cadvisor_version_info", + "intervalFactor": 2, + "legendFormat": "Host Kernel Version: {{kernelVersion}}", + "refId": "F", + "step": 2 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "", + "transform": "timeseries_aggregations", + "type": "table" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Check this out", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 290.98582985381427, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": 127, + "panels": [], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [], + "templating": { + "list": [ + { + "allValue": ".+", + "current": {}, + "datasource": "Prometheus", + "hide": 0, + "includeAll": true, + "label": "Container Group", + "multi": true, + "name": "containergroup", + "options": [], + "query": "label_values(container_group)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "auto": true, + "auto_count": 50, + "auto_min": "50s", + "current": { + "text": "auto", + "value": "$__auto_interval" + }, + "datasource": null, + "hide": 0, + "includeAll": false, + "label": "Interval", + "multi": false, + "name": "interval", + "options": [ + { + "selected": true, + "text": "auto", + "value": "$__auto_interval" + }, + { + "selected": false, + "text": "30s", + "value": "30s" + }, + { + "selected": false, + "text": "1m", + "value": "1m" + }, + { + "selected": false, + "text": "2m", + "value": "2m" + }, + { + "selected": false, + "text": "3m", + "value": "3m" + }, + { + "selected": false, + "text": "5m", + "value": "5m" + }, + { + "selected": false, + "text": "7m", + "value": "7m" + }, + { + "selected": false, + "text": "10m", + "value": "10m" + }, + { + "selected": false, + "text": "30m", + "value": "30m" + }, + { + "selected": false, + "text": "1h", + "value": "1h" + }, + { + "selected": false, + "text": "6h", + "value": "6h" + }, + { + "selected": false, + "text": "12h", + "value": "12h" + }, + { + "selected": false, + "text": "1d", + "value": "1d" + }, + { + "selected": false, + "text": "7d", + "value": "7d" + }, + { + "selected": false, + "text": "14d", + "value": "14d" + }, + { + "selected": false, + "text": "30d", + "value": "30d" + } + ], + "query": "30s,1m,2m,3m,5m,7m,10m,30m,1h,6h,12h,1d,7d,14d,30d", + "refresh": 2, + "type": "interval" + } + ] + }, + "time": { + "from": "now-15m", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Docker Host & Container Overview", + "version": 1 +} +} diff --git a/tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json b/tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json new file mode 100644 index 0000000..da65d4a --- /dev/null +++ b/tools/prometheus/dashboards/Node_Exporter_Server_Metrics-1503539692670.json @@ -0,0 +1,1632 @@ +{ +"dashboard": { + "__inputs": [ + { + "name": "Prometheus", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.4.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "description": "Dashboard to view multiple servers", + "editable": true, + "gnetId": 405, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "rows": [ + { + "collapse": false, + "height": "25px", + "panels": [ + { + "content": "", + "editable": true, + "error": false, + "id": 11, + "minSpan": 2, + "mode": "html", + "repeat": "node", + "span": 12, + "style": {}, + "title": "$node", + "type": "text" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Title", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "25px", + "panels": [ + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "editable": true, + "error": false, + "format": "none", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": false, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 20, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "minSpan": 2, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "repeat": "node", + "span": 12, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "count(node_cpu{instance=~\"$node\", mode=\"system\"})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "", + "metric": "", + "refId": "A", + "step": 14400, + "target": "" + } + ], + "thresholds": "", + "title": "CPU Cores", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "editable": true, + "error": false, + "fill": 10, + "grid": {}, + "id": 7, + "legend": { + "alignAsTable": false, + "avg": false, + "current": false, + "hideEmpty": false, + "max": false, + "min": false, + "rightSide": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 0, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": true, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": true, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (mode)(irate(node_cpu{mode=\"system\",instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mode}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='user',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "user", + "refId": "B", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='nice',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "nice", + "refId": "C", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='iowait',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "iowait", + "refId": "E", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='steal',instance=~'$node'}[5m]))", + "intervalFactor": 2, + "legendFormat": "steal", + "refId": "H", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='idle',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "idle", + "refId": "D", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='irq',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "irq", + "refId": "F", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='softirq',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "softirq", + "refId": "G", + "step": 1200 + }, + { + "expr": "sum by (mode)(irate(node_cpu{mode='guest',instance=~'$node'}[5m]))", + "interval": "", + "intervalFactor": 2, + "legendFormat": "guest", + "refId": "I", + "step": 1200 + } + ], + "thresholds": [ + { + "colorMode": "custom", + "fill": true, + "fillColor": "rgba(216, 200, 27, 0.27)", + "op": "gt", + "value": 0 + } + ], + "timeFrom": null, + "timeShift": null, + "title": "CPU", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "%", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "CPU", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": { + "Slab": "#E5A8E2", + "Swap": "#E24D42" + }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 2, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 17, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/Apps|Buffers|Cached|Free|Slab|SwapCached|PageTables|VmallocUsed/", + "fill": 5, + "stack": true + }, + { + "alias": "Swap", + "fill": 5, + "stack": true + } + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "( node_memory_MemTotal{instance=~'$node'} - node_memory_MemFree{instance=~'$node'} - node_memory_Buffers{instance=~'$node'} - node_memory_Cached{instance=~'$node'} - node_memory_SwapCached{instance=~'$node'} - node_memory_Slab{instance=~'$node'} - node_memory_PageTables{instance=~'$node'} - node_memory_VmallocUsed{instance=~'$node'} )", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Apps", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "node_memory_Buffers{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Buffers", + "refId": "B", + "step": 1200 + }, + { + "expr": "node_memory_Cached{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Cached", + "refId": "D", + "step": 1200 + }, + { + "expr": "node_memory_MemFree{instance=~'$node'}", + "hide": false, + "interval": "", + "intervalFactor": 2, + "legendFormat": "Free", + "refId": "E", + "step": 1200 + }, + { + "expr": "node_memory_Slab{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Slab", + "refId": "F", + "step": 1200 + }, + { + "expr": "node_memory_SwapCached{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "SwapCached", + "refId": "G", + "step": 1200 + }, + { + "expr": "node_memory_PageTables{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "PageTables", + "refId": "H", + "step": 1200 + }, + { + "expr": "node_memory_VmallocUsed{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "VmallocUsed", + "metric": "", + "refId": "I", + "step": 1200 + }, + { + "expr": "(node_memory_SwapTotal{instance=~'$node'} - node_memory_SwapFree{instance=~'$node'})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Swap", + "metric": "", + "refId": "C", + "step": 1200 + }, + { + "expr": "node_memory_Committed_AS{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Committed", + "metric": "", + "refId": "J", + "step": 1200 + }, + { + "expr": "node_memory_Mapped{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Mapped", + "refId": "K", + "step": 1200 + }, + { + "expr": "node_memory_Active{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Active", + "metric": "", + "refId": "L", + "step": 1200 + }, + { + "expr": "node_memory_Inactive{instance=~'$node'}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Inactive", + "metric": "", + "refId": "M", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "individual" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "GB", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Memory", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 13, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$node\"}", + "interval": "", + "intervalFactor": 2, + "legendFormat": "load", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Load", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "decimals": 3, + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100.0 - 100 * (node_filesystem_avail{instance=~'$node',device !~'tmpfs',device!~'by-uuid'} / node_filesystem_size{instance=~'$node',device !~'tmpfs',device!~'by-uuid'})", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{mountpoint}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Space Used", + "tooltip": { + "msResolution": true, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Disk Used", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 19, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_io_time_ms{instance=~\"$node\"}[5m])/10", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Utilization per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "logBase": 1, + "max": 100, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Disk Utilization", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 14, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_read$/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_reads_completed{instance=~'$node'}[5m])", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_read", + "metric": "", + "refId": "A", + "step": 2400, + "target": "" + }, + { + "expr": "irate(node_disk_writes_completed{instance=~'$node'}[5m])", + "intervalFactor": 2, + "legendFormat": "{{device}}_write", + "metric": "", + "refId": "B", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk IOs per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": "IO/second read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Disk IOs per device", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 18, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_read/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_disk_sectors_read{instance=~'$node'}[5m]) * 512", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_read", + "refId": "B", + "step": 2400 + }, + { + "expr": "irate(node_disk_sectors_written{instance=~'$node'}[5m]) * 512", + "interval": "", + "intervalFactor": 4, + "legendFormat": "{{device}}_write", + "metric": "", + "refId": "A", + "step": 2400, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk Throughput per Device", + "tooltip": { + "msResolution": false, + "shared": false, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": "Bytes/second read (-) / write (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Disk Throughput per device", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 22, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_context_switches{instance=~\"$node\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "context switches", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Context Switches", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "Network Traffic", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 12, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*_in/", + "transform": "negative-Y" + } + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes{instance=~'$node'}[5m])*8", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}_in", + "metric": "", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "irate(node_network_transmit_bytes{instance=~'$node'}[5m])*8", + "interval": "", + "intervalFactor": 2, + "legendFormat": "{{device}}_out", + "refId": "B", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Network Traffic", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bits", + "label": "bits in (-) / bits out (+)", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 21, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_netstat_Tcp_CurrEstab{instance=~'$node'}", + "intervalFactor": 2, + "legendFormat": "established", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Netstat", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 23, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [ + { + "alias": "/.*Out.*/", + "transform": "negative-Y" + }, + { + "alias": "Udp_NoPorts", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_netstat_Udp_InDatagrams{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_InDatagrams", + "refId": "A", + "step": 1200, + "target": "" + }, + { + "expr": "irate(node_netstat_Udp_InErrors{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_InErrors", + "refId": "B", + "step": 1200 + }, + { + "expr": "irate(node_netstat_Udp_OutDatagrams{instance=~\"$node\"}[5m])", + "interval": "", + "intervalFactor": 2, + "legendFormat": "Udp_OutDatagrams", + "refId": "C", + "step": 1200 + }, + { + "expr": "irate(node_netstat_Udp_NoPorts{instance=~\"$node\"}[5m])", + "intervalFactor": 2, + "legendFormat": "Udp_NoPorts", + "refId": "D", + "step": 1200 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "UDP Stats", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 24, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "minSpan": 2, + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "repeat": "node", + "seriesOverrides": [], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_nf_conntrack_entries_limit{instance=~\"$node\"} - node_nf_conntrack_entries{instance=~\"$node\"}", + "intervalFactor": 2, + "legendFormat": "free", + "refId": "A", + "step": 1200, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Conntrack", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [ + { + "allFormat": "glob", + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": "", + "multi": true, + "multiFormat": "regex values", + "name": "node", + "options": [], + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "regex": "", + "sort": 1, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-7d", + "to": "now" + }, + "timepicker": { + "now": true, + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Node Exporter Server Metrics", + "version": 1 +} +} diff --git a/tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json b/tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json new file mode 100644 index 0000000..5dee4b9 --- /dev/null +++ b/tools/prometheus/dashboards/Node_exporter_single_server-1503539807236.json @@ -0,0 +1,792 @@ +{ +"dashboard": { + "__inputs": [ + { + "name": "Prometheus", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "4.4.3" + }, + { + "type": "panel", + "id": "graph", + "name": "Graph", + "version": "" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "singlestat", + "name": "Singlestat", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "description": "Dashboard to get an overview of one server", + "editable": true, + "gnetId": 22, + "graphTooltip": 0, + "hideControls": false, + "id": null, + "links": [], + "refresh": false, + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 3, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "100 - (avg by (cpu) (irate(node_cpu{mode=\"idle\", instance=~\"$server\"}[5m])) * 100)", + "hide": false, + "intervalFactor": 10, + "legendFormat": "{{cpu}}", + "refId": "A", + "step": 50 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Idle cpu", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percent", + "label": "cpu usage", + "logBase": 1, + "max": 100, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 9, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "span": 6, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_load1{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 1m", + "refId": "A", + "step": 20, + "target": "" + }, + { + "expr": "node_load5{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 5m", + "refId": "B", + "step": 20, + "target": "" + }, + { + "expr": "node_load15{instance=~\"$server\"}", + "intervalFactor": 4, + "legendFormat": "load 15m", + "refId": "C", + "step": 20, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "System load", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "percentunit", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 4, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "node_memory_SwapFree{instance=\"172.17.0.1:9100\",job=\"prometheus\"}", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "node_memory_MemTotal{instance=~\"$server\"} - node_memory_MemFree{instance=~\"$server\"}", + "intervalFactor": 2, + "legendFormat": "free memory", + "metric": "memo", + "refId": "A", + "step": 10, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Free memory", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "editable": true, + "error": false, + "format": "percent", + "gauge": { + "maxValue": 100, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 5, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "(node_memory_MemFree{instance=~\"$server\"} / node_memory_MemTotal{instance=~\"$server\"}) * 100", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "10, 20", + "title": "Free memory", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "avg" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 6, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "read", + "yaxis": 1 + }, + { + "alias": "{instance=\"172.17.0.1:9100\"}", + "yaxis": 2 + }, + { + "alias": "io time", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by (instance) (irate(node_disk_bytes_read{instance=~\"$server\"}[5m]))", + "hide": false, + "intervalFactor": 4, + "legendFormat": "read", + "refId": "A", + "step": 20, + "target": "" + }, + { + "expr": "sum by (instance) (irate(node_disk_bytes_written{instance=~\"$server\"}[5m]))", + "intervalFactor": 4, + "legendFormat": "written", + "refId": "B", + "step": 20 + }, + { + "expr": "sum by (instance) (irate(node_disk_io_time_ms{instance=~\"$server\"}[5m]))", + "intervalFactor": 4, + "legendFormat": "io time", + "refId": "C", + "step": 20 + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Disk usage", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + }, + { + "cacheTimeout": null, + "colorBackground": false, + "colorValue": false, + "colors": [ + "rgba(245, 54, 54, 0.9)", + "rgba(237, 129, 40, 0.89)", + "rgba(50, 172, 45, 0.97)" + ], + "datasource": "Prometheus", + "editable": true, + "error": false, + "format": "percentunit", + "gauge": { + "maxValue": 1, + "minValue": 0, + "show": true, + "thresholdLabels": false, + "thresholdMarkers": true + }, + "id": 7, + "interval": null, + "links": [], + "mappingType": 1, + "mappingTypes": [ + { + "name": "value to text", + "value": 1 + }, + { + "name": "range to text", + "value": 2 + } + ], + "maxDataPoints": 100, + "nullPointMode": "connected", + "nullText": null, + "postfix": "", + "postfixFontSize": "50%", + "prefix": "", + "prefixFontSize": "50%", + "rangeMaps": [ + { + "from": "null", + "text": "N/A", + "to": "null" + } + ], + "span": 2, + "sparkline": { + "fillColor": "rgba(31, 118, 189, 0.18)", + "full": false, + "lineColor": "rgb(31, 120, 193)", + "show": false + }, + "tableColumn": "", + "targets": [ + { + "expr": "min(node_filesystem_free{device!=\"rootfs\",instance=~\"$server\"} / node_filesystem_size{device!=\"rootfs\",instance=~\"$server\"})", + "intervalFactor": 2, + "refId": "A", + "step": 60, + "target": "" + } + ], + "thresholds": "0.10, 0.25", + "title": "Free disk space (lowest mountpoint)", + "type": "singlestat", + "valueFontSize": "80%", + "valueMaps": [ + { + "op": "=", + "text": "N/A", + "value": "null" + } + ], + "valueName": "current" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "Prometheus", + "editable": true, + "error": false, + "fill": 1, + "grid": {}, + "id": 8, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": true, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 2, + "links": [], + "nullPointMode": "connected", + "percentage": false, + "pointradius": 5, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "transmitted ", + "yaxis": 2 + } + ], + "spaceLength": 10, + "span": 12, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "irate(node_network_receive_bytes{instance=~\"$server\",device!~\"lo\"}[5m])", + "hide": false, + "intervalFactor": 2, + "legendFormat": "received", + "refId": "A", + "step": 4, + "target": "" + }, + { + "expr": "irate(node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}[5m])", + "hide": false, + "intervalFactor": 2, + "legendFormat": "transmitted ", + "refId": "B", + "step": 4, + "target": "" + }, + { + "expr": "node_network_transmit_bytes{instance=~\"$server\",device!~\"lo\"}", + "hide": true, + "intervalFactor": 2, + "legendFormat": "transmitted ", + "refId": "C", + "step": 2, + "target": "" + } + ], + "thresholds": [], + "timeFrom": null, + "timeShift": null, + "title": "Data transfer", + "tooltip": { + "msResolution": false, + "shared": true, + "sort": 0, + "value_type": "cumulative" + }, + "type": "graph", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ] + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": false, + "title": "New row", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "prometheus" + ], + "templating": { + "list": [ + { + "allValue": null, + "current": {}, + "datasource": "Prometheus", + "hide": 0, + "includeAll": false, + "label": null, + "multi": false, + "name": "server", + "options": [], + "query": "label_values(node_boot_time, instance)", + "refresh": 1, + "regex": "", + "sort": 0, + "tagValuesQuery": "", + "tags": [], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "browser", + "title": "Node exporter single server", + "version": 1 +} +} diff --git a/tools/prometheus/prometheus-tools.sh b/tools/prometheus/prometheus-tools.sh new file mode 100644 index 0000000..ed6eb22 --- /dev/null +++ b/tools/prometheus/prometheus-tools.sh @@ -0,0 +1,228 @@ +#!/bin/bash +# Copyright 2017 AT&T Intellectual Property, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#. What this is: Functions for testing with Prometheus and Grafana. Sets up +#. Prometheus and Grafana on a master node (e.g. for kubernetes, docker, +#. rancher, openstack) and agent nodes (where applications run). +#. Prerequisites: +#. - Ubuntu server for master and agent nodes +#. - Docker installed +#. Usage: +#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models +#. $ cd ~/models/tools/prometheus +#. $ bash prometheus-tools.sh setup "" +#. : space separated IP of agent nodes +#. $ bash prometheus-tools.sh grafana +#. Runs grafana in a docker container and connects to prometheus as datasource +#. $ bash prometheus-tools.sh all "" +#. Does all of the above +#. $ bash prometheus-tools.sh clean "" +# + +# Prometheus links +# https://prometheus.io/download/ +# https://prometheus.io/docs/introduction/getting_started/ +# https://github.com/prometheus/prometheus +# https://prometheus.io/docs/instrumenting/exporters/ +# https://github.com/prometheus/node_exporter +# https://github.com/prometheus/haproxy_exporter +# https://github.com/prometheus/collectd_exporter + +# Use this to trigger fail() at the right places +# if [ "$RESULT" == "Test Failed!" ]; then fail "message"; fi +function fail() { + echo "$1" + exit 1 +} + +function setup_prometheus() { + # Prerequisites + echo "${FUNCNAME[0]}: Setting up prometheus master and agents" + sudo apt install -y golang-go jq + + # Install Prometheus server + echo "${FUNCNAME[0]}: Setting up prometheus master" + if [[ -d ~/prometheus ]]; then rm -rf ~/prometheus; fi + mkdir ~/prometheus + mkdir ~/prometheus/dashboards + cp -r dashboards/* ~/prometheus/dashboards + cd ~/prometheus + wget https://github.com/prometheus/prometheus/releases/download/v2.0.0-beta.2/prometheus-2.0.0-beta.2.linux-amd64.tar.gz + tar xvfz prometheus-*.tar.gz + cd prometheus-* + # Customize prometheus.yml below for your server IPs + # This example assumes the node_exporter and haproxy_exporter will be installed on each node + cat <<'EOF' >prometheus.yml +global: + scrape_interval: 15s # By default, scrape targets every 15 seconds. + + # Attach these labels to any time series or alerts when communicating with + # external systems (federation, remote storage, Alertmanager). + external_labels: + monitor: 'codelab-monitor' + +# A scrape configuration containing exactly one endpoint to scrape: +# Here it's Prometheus itself. +scrape_configs: + # The job name is added as a label `job=` to any timeseries scraped from this config. + - job_name: 'prometheus' + + # Override the global default and scrape targets from this job every 5 seconds. + scrape_interval: 5s + + static_configs: +EOF + + for node in $nodes; do + echo " - targets: ['${node}:9100']" >>prometheus.yml + echo " - targets: ['${node}:9101']" >>prometheus.yml + done + + # Start Prometheus + nohup ./prometheus --config.file=prometheus.yml > /dev/null 2>&1 & + # Browse to http://host_ip:9090 + + echo "${FUNCNAME[0]}: Installing exporters" + # Install exporters + # https://github.com/prometheus/node_exporter + cd ~/prometheus + wget https://github.com/prometheus/node_exporter/releases/download/v0.14.0/node_exporter-0.14.0.linux-amd64.tar.gz + tar xvfz node*.tar.gz + # https://github.com/prometheus/haproxy_exporter + wget https://github.com/prometheus/haproxy_exporter/releases/download/v0.7.1/haproxy_exporter-0.7.1.linux-amd64.tar.gz + tar xvfz haproxy*.tar.gz + + # The scp and ssh actions below assume you have key-based access enabled to the nodes + for node in $nodes; do + echo "${FUNCNAME[0]}: Setup agent at $node" + scp -r -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + node_exporter-0.14.0.linux-amd64/node_exporter ubuntu@$node:/home/ubuntu/node_exporter + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + ubuntu@$node "nohup ./node_exporter > /dev/null 2>&1 &" + scp -r -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + haproxy_exporter-0.7.1.linux-amd64/haproxy_exporter ubuntu@$node:/home/ubuntu/haproxy_exporter + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + ubuntu@$node "nohup ./haproxy_exporter > /dev/null 2>&1 &" + done + + host_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}') + while ! curl -o /tmp/up http://$host_ip:9090/api/v1/query?query=up ; do + echo "${FUNCNAME[0]}: Prometheus API is not yet responding... waiting 10 seconds" + sleep 10 + done + + exp=$(jq '.data.result|length' /tmp/up) + echo "${FUNCNAME[0]}: $exp exporters are up" + while [[ $exp > 0 ]]; do + ((exp--)) + eip=$(jq -r ".data.result[$exp].metric.instance" /tmp/up) + job=$(jq -r ".data.result[$exp].metric.job" /tmp/up) + echo "${FUNCNAME[0]}: $job at $eip" + done + echo "${FUNCNAME[0]}: Prometheus dashboard is available at http://$host_ip:9090" + echo "Prometheus dashboard is available at http://$host_ip:9090" auto>/tmp/summary +} + +function connect_grafana() { + echo "${FUNCNAME[0]}: Setup Grafana datasources and dashboards" + prometheus_ip=$1 + grafana_ip=$2 + + while ! curl -X POST http://admin:admin@$grafana_ip:3000/api/login/ping ; do + echo "${FUNCNAME[0]}: Grafana API is not yet responding... waiting 10 seconds" + sleep 10 + done + + echo "${FUNCNAME[0]}: Setup Prometheus datasource for Grafana" + cd ~/prometheus/ + cat >datasources.json <>/tmp/summary + echo "${FUNCNAME[0]}: Grafana API is available at http://admin:admin@$host_ip:3000/api/v1/query?query=" + echo "Grafana API is available at http://admin:admin@$host_ip:3000/api/v1/query?query=" >>/tmp/summary +} + +function run_and_connect_grafana() { + # Per http://docs.grafana.org/installation/docker/ + host_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}') + sudo docker run -d -p 3000:3000 --name grafana grafana/grafana + status=$(sudo docker inspect grafana | jq -r '.[0].State.Status') + while [[ "x$status" != "xrunning" ]]; do + echo "${FUNCNAME[0]}: Grafana container state is ($status)" + sleep 10 + status=$(sudo docker inspect grafana | jq -r '.[0].State.Status') + done + echo "${FUNCNAME[0]}: Grafana container state is $status" + + connect_grafana $host_ip $host_ip + echo "${FUNCNAME[0]}: connect_grafana complete" +} + +nodes=$2 +case "$1" in + setup) + setup_prometheus "$2" + ;; + grafana) + run_and_connect_grafana + ;; + all) + setup_prometheus "$2" + run_and_connect_grafana + ;; + clean) + sudo kill $(ps -ef | grep "\./prometheus" | grep prometheus.yml | awk '{print $2}') + rm -rf ~/prometheus + sudo docker stop grafana + sudo docker rm grafana + for node in $nodes; do + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + ubuntu@$node "sudo kill $(ps -ef | grep ./node_exporter | awk '{print $2}')" + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + ubuntu@$node "rm -rf /home/ubuntu/node_exporter" + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + ubuntu@$node "sudo kill $(ps -ef | grep ./haproxy_exporter | awk '{print $2}')" + ssh -x -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no \ + ubuntu@$node "rm -rf /home/ubuntu/haproxy_exporter" + done + ;; + *) + grep '#. ' $0 +esac +cat /tmp/summary diff --git a/tools/rancher/demo_deploy.sh b/tools/rancher/demo_deploy.sh new file mode 100644 index 0000000..981b421 --- /dev/null +++ b/tools/rancher/demo_deploy.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2017 AT&T Intellectual Property, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +#. What this is: Complete scripted deployment of an experimental Rancher-based +#. cloud-native application platform. When complete, Rancher and the following +#. will be installed: +#. - nginx and dokuwiki as demo applications +#. - prometheus + grafana for cluster monitoring/stats +#. Prometheus dashboard: http://:9090 +#. Grafana dashboard: http://:3000 +#. +#. Prerequisites: +#. - Ubuntu server for Rancher cluster nodes (admin/master and agent nodes) +#. - MAAS server as cluster admin for Rancher master/agent nodes +#. - Password-less ssh key provided for node setup +#. Usage: on the MAAS server +#. $ git clone https://gerrit.opnfv.org/gerrit/models ~/models +#. $ bash ~/models/tools/rancher/demo_deploy.sh "" +#. "" [] +#. : name of private key for cluster node ssh (in current folder) +#. : space separated list of hostnames managed by MAAS +#. : IP of cluster admin node +#. : space separated list of agent node IPs +#. : optional name of script for extra setup functions as needed + +key=$1 +nodes="$2" +admin_ip=$3 +agent_ips="$4" +extras=$5 + +source ~/models/tools/maas/deploy.sh $1 "$2" $5 +eval `ssh-agent` +ssh-add $key +if [[ "x$extras" != "x" ]]; then source $extras; fi +scp -o StrictHostKeyChecking=no $key ubuntu@$admin_ip:/home/ubuntu/$key +echo "Setting up Rancher..." +ssh -x ubuntu@$admin_ip <" +#. Automate setup and start demo blueprints. +#. : space-separated list of agent node IPs +#. $ bash rancher_cluster.sh setup "" +#. Installs and starts master and agent nodes. +#. $ bash rancher_cluster.sh master +#. Setup the Rancher master node. +#. $ bash rancher_cluster.sh agents "" +#. Installs and starts agent nodes. +#. $ bash rancher_cluster.sh demo +#. Start demo blueprints. +#. $ bash rancher_cluster.sh clean "" +#. Removes Rancher and installed blueprints from the master and agent nodes. +#. +#. To call the procedures, directly, e.g. public_endpoint nginx/lb +#. $ source rancher-cluster.sh +#. See below for function-specific usage +#. + +# Install master +function setup_master() { + docker_installed=$(dpkg-query -W --showformat='${Status}\n' docker-ce | grep -c "install ok") + if [[ $docker_installed == 0 ]]; then + echo "${FUNCNAME[0]}: installing and starting docker" + # Per https://docs.docker.com/engine/installation/linux/docker-ce/ubuntu/ + sudo apt-get remove -y docker docker-engine docker.io + sudo apt-get update + sudo apt-get install -y \ + linux-image-extra-$(uname -r) \ + linux-image-extra-virtual + sudo apt-get install -y \ + apt-transport-https \ + ca-certificates \ + curl \ + software-properties-common + curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add - + sudo add-apt-repository \ + "deb [arch=amd64] https://download.docker.com/linux/ubuntu \ + $(lsb_release -cs) \ + stable" + sudo apt-get update + sudo apt-get install -y docker-ce + + echo "${FUNCNAME[0]}: installing jq" + sudo apt-get install -y jq + fi + + echo "${FUNCNAME[0]}: installing rancher server (master)" + sudo docker run -d --restart=unless-stopped -p 8080:8080 --name rancher rancher/server + + echo "${FUNCNAME[0]}: wait until server is up at http://$1:8080" + delay=0 + id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id') + while [[ "$id" == "" ]]; do + echo "${FUNCNAME[0]}: rancher server is not yet up, checking again in 10 seconds" + sleep 10 + let delay=$delay+10 + id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id') + done + echo "${FUNCNAME[0]}: rancher server is up after $delay seconds" + + rm -rf ~/rancher + mkdir ~/rancher +} + +# Install rancher CLI tools +# Usage example: install_cli_tools 172.16.0.2 +function install_cli_tools() { + echo "${FUNCNAME[0]}: installing rancher CLI tools for master $1" + cd ~ + echo "${FUNCNAME[0]}: install Rancher CLI" + rm -rf rancher-v0.6.3 + wget -q https://releases.rancher.com/cli/v0.6.3/rancher-linux-amd64-v0.6.3.tar.gz + gzip -d -f rancher-linux-amd64-v0.6.3.tar.gz + tar -xvf rancher-linux-amd64-v0.6.3.tar + sudo mv rancher-v0.6.3/rancher /usr/bin/rancher + echo "${FUNCNAME[0]}: install Rancher Compose" + rm -rf rancher-compose-v0.12.5 + wget -q https://releases.rancher.com/compose/v0.12.5/rancher-compose-linux-amd64-v0.12.5.tar.gz + gzip -d -f rancher-compose-linux-amd64-v0.12.5.tar.gz + tar -xvf rancher-compose-linux-amd64-v0.12.5.tar + sudo mv rancher-compose-v0.12.5/rancher-compose /usr/bin/rancher-compose + echo "${FUNCNAME[0]}: setup Rancher CLI environment" + # CLI setup http://rancher.com/docs/rancher/v1.6/en/cli/ + # Under the UI "API" select "Add account API key" and name it. Export the keys: + # The following scripted approach assumes you have 1 project/environment (Default) + # Set the url that Rancher is on + export RANCHER_URL=http://$1:8080/v1 + id=$(wget -qO- http://$1:8080/v2-beta/projects/ | jq -r '.data[0].id') + export RANCHER_ENVIRONMENT=$id + curl -s -o /tmp/keys -X POST -H 'Accept: application/json' -H 'Content-Type: application/json' -d '{"accountId":"reference[account]", "description":"string", "name":"string", "publicValue":"string", "secretValue":"password"}' http://$1:8080/v2-beta/projects/$id/apikeys +# curl -s -o /tmp/keys -X POST -H 'Accept: application/json' -H 'Content-Type: application/json' -d {"type":"apikey","accountId":"1a1","name":"admin","description":null,"created":null,"kind":null,"removed":null,"uuid":null} http://$1:8080/v2-beta/projects/$id/apikey + export RANCHER_ACCESS_KEY=$(jq -r '.publicValue' /tmp/keys) + export RANCHER_SECRET_KEY=$(jq -r '.secretValue' /tmp/keys) + # create the env file ~/.rancher/cli.json + rancher config < 0 && "$health" != "healthy" ]]; do + health=$(rancher inspect $id | jq -r ".healthState") + echo $service is $health + sleep 10 + done + echo $service state is $(rancher inspect $id | jq -r ".state") +} + +# Start service based upon docker image and simple templates +# Usage example: start_simple_service nginx nginx:latest 8081:80 3 +# Usage example: start_simple_service dokuwiki ununseptium/dokuwiki-docker 8082:80 2 +function start_simple_service() { + echo "${FUNCNAME[0]}: starting service $1 with image $2, ports $3, and scale $4" + service=$1 + image=$2 + # port is either a single (unexposed) port, or an source:target pair (source + # is the external port) + ports=$3 + scale=$4 + + echo "${FUNCNAME[0]}: creating service folder ~/rancher/$service" + mkdir ~/rancher/$service + cd ~/rancher/$service + echo "${FUNCNAME[0]}: creating docker-compose.yml" + # Define service via docker-compose.yml + cat <docker-compose.yml +version: '2' +services: + $service: + image: $image + ports: + - "$ports" +EOF + + echo "${FUNCNAME[0]}: syntax checking docker-compose.yml" + docker-compose -f docker-compose.yml config + + echo "${FUNCNAME[0]}: creating rancher-compose.yml" + cat <rancher-compose.yml +version: '2' +services: + # Reference the service that you want to extend + $service: + scale: $scale +EOF + + echo "${FUNCNAME[0]}: starting service $service" + rancher up -s $service -d + + wait_till_healthy "$service/$service" 6 + cd ~/rancher +} + +# Add load balancer to a service +# Usage example: lb_service nginx 8000 8081 +# Usage example: lb_service dokuwiki 8001 8082 +function lb_service() { + echo "${FUNCNAME[0]}: adding load balancer port $2 to service $1, port $3" + service=$1 + lbport=$2 + port=$3 + + cd ~/rancher/$service + echo "${FUNCNAME[0]}: creating docker-compose-lb.yml" + # Define lb service via docker-compose.yml + cat <docker-compose-lb.yml +version: '2' +services: + lb: + ports: + - $lbport + image: rancher/lb-service-haproxy:latest +EOF + + echo "${FUNCNAME[0]}: syntax checking docker-compose-lb.yml" + docker-compose -f docker-compose-lb.yml config + + echo "${FUNCNAME[0]}: creating rancher-compose-lb.yml" + cat <rancher-compose-lb.yml +version: '2' +services: + lb: + scale: 1 + lb_config: + port_rules: + - source_port: $lbport + target_port: $port + service: $service/$service + health_check: + port: 42 + interval: 2000 + unhealthy_threshold: 3 + healthy_threshold: 2 + response_timeout: 2000 +EOF + + echo "${FUNCNAME[0]}: starting service lb" + rancher up -s $service -d --file docker-compose-lb.yml --rancher-file rancher-compose-lb.yml + + wait_till_healthy "$service/lb" 6 + cd ~/rancher +} + +# Change scale of a service +# Usage example: scale_service nginx 1 +function scale_service() { + echo "${FUNCNAME[0]}: scaling service $1 to $2 instances" + id=$(rancher ps | grep " $1 " | awk '{print $1}') + rancher scale $id=$2 + + scale=$(rancher inspect $id | jq -r '.currentScale') + health=$(rancher inspect $id | jq -r '.healthState') + while [[ $scale != $2 || "$health" != "healthy" ]]; do + echo $service is scaled at $scale and is $health + scale=$(rancher inspect $id | jq -r '.currentScale') + health=$(rancher inspect $id | jq -r '.healthState') + sleep 10 + done + echo $service is scaled at $scale and is $health +} + +# Get public endpoint for a service +# Usage example public_endpoint nginx/lb +function public_endpoint() { + id=$(rancher ps | grep " $1 " | awk "{print \$1}") + ip=$(rancher inspect $id | jq -r ".publicEndpoints[0].ipAddress") + port=$(rancher inspect $id | jq -r ".publicEndpoints[0].port") + echo "${FUNCNAME[0]}: $1 is accessible at http://$ip:$port" +} + +# Stop a stack +# Usage example: stop_stack nginx +function stop_stack() { + echo "${FUNCNAME[0]}: stopping stack $1" + rancher stop $(rancher stacks | awk "/$1/{print \$1}") +} + +# Start a stopped stack +# Usage example: start_stack nginx +function start_stack() { + echo "${FUNCNAME[0]}: starting stack $1" + rancher start $(rancher stacks | awk "/$1/{print \$1}") + wait_till_healthy $1 6 +} + +# Delete a stack +# Usage example: delete_stack dokuwiki +function delete_stack() { + id=$(rancher stacks | grep "$1" | awk "{print \$1}") + echo "${FUNCNAME[0]}: deleting stack $1 with id $id" + rancher rm --stop $id +} + +# Delete a service +# Usage example: delete_service nginx/lb +function delete_service() { + id=$(rancher ps | grep "$1" | awk "{print \$1}") + echo "${FUNCNAME[0]}: deleting service $1 with id $id" + rancher rm --stop $id +} + +# Start a complex service, i.e. with yaml file customizations +# Usage example: start_complex_service grafana 3000:3000 1 +function start_complex_service() { + echo "${FUNCNAME[0]}: starting service $1 at ports $2, and scale $3" + service=$1 + # port is either a single (unexposed) port, or an source:target pair (source + # is the external port) + ports=$2 + scale=$3 + + echo "${FUNCNAME[0]}: creating service folder ~/rancher/$service" + mkdir ~/rancher/$service + cd ~/rancher/$service + echo "${FUNCNAME[0]}: creating docker-compose.yml" + # Define service via docker-compose.yml + case "$service" in + grafana) + cat <docker-compose.yml +grafana: + image: grafana/grafana:latest + ports: + - $ports + environment: + GF_SECURITY_ADMIN_USER: "admin" + GF_SECURITY_ADMIN_PASSWORD: "password" + GF_SECURITY_SECRET_KEY: $(uuidgen) +EOF + ;; + + *) + esac + + echo "${FUNCNAME[0]}: starting service $service" + rancher up -s $service -d + + wait_till_healthy "$service/$service" 6 + cd ~/rancher +} + +# Automated demo +# Usage example: rancher_demo start "172.16.0.7 172.16.0.8 172.16.0.9" +# Usage example: rancher_demo clean "172.16.0.7 172.16.0.8 172.16.0.9" +function demo() { + # Deploy apps + # Nginx web server, accessible on each machine port 8081, and via load + # balancer port 8001 + start=`date +%s` + setup "$1" + start_simple_service nginx nginx:latest 8081:80 3 + check_service nginx/nginx http "Welcome to nginx!" + lb_service nginx 8001 80 + check_service nginx/lb http "Welcome to nginx!" + # Dokuwiki server, accessible on each machine port 8082, and via load + # balancer port 8002 + start_simple_service dokuwiki ununseptium/dokuwiki-docker 8082:80 2 + check_service dokuwiki/dokuwiki http "This topic does not exist yet" + lb_service dokuwiki 8002 80 + check_service dokuwiki/lb http "This topic does not exist yet" + # Grafana server, accessible on one machine at port 3000 + start_complex_service grafana 3000:3000 1 + id=$(rancher ps | grep " grafana/grafana " | awk "{print \$1}") + source ~/models/tools/prometheus/prometheus-tools.sh setup "$agents" + grafana_ip=$(rancher inspect $id | jq -r ".publicEndpoints[0].ipAddress") + prometheus_ip=$(ip route get 8.8.8.8 | awk '{print $NF; exit}') + connect_grafana $prometheus_ip $grafana_ip + public_endpoint nginx/lb + public_endpoint dokuwiki/lb + public_endpoint grafana/grafana + + end=`date +%s` + runtime=$((end-start)) + runtime=$((runtime/60)) + echo "${FUNCNAME[0]}: Demo duration = $runtime minutes" +} + +# Automate the installation +function setup() { + # Installation: http://rancher.com/docs/rancher/v1.6/en/ + # Install rancher server (master) at primary interface of host + # Account control is disabled (open access to API), and Default env created + ip=$(ip route get 1 | awk '{print $NF;exit}') + setup_master $ip + # Install rancher CLI tools (rancher, rancher-compose), register with master + # and setup CLI environment (e.g. API access/secret keys) + install_cli_tools $ip + + # Add agent hosts per http://rancher.com/docs/rancher/v1.6/en/hosts/custom/ + agents="$1" + for agent in $agents; do + setup_agent Default $agent + done +} + +# Clean the installation +function clean() { + delete_service nginx/lb + delete_stack nginx + delete_service dokuwiki/lb + delete_stack dokuwiki + agents="$1" + for agent in $agents; do + stop_agent $agent + done + sudo docker stop rancher + sudo docker rm -v rancher + sudo apt-get remove -y docker-ce +} + +export WORK_DIR=$(pwd) +case "$1" in + master) + ip=$(ip route get 1 | awk '{print $NF;exit}') + setup_master $ip + ;; + agents) + agents="$2" + for agent in $agents; do + setup_agent Default $agent + done + ;; + ceph) + # TODO Ceph support for rancher, e.g. re + # http://rancher.com/docs/rancher/latest/en/rancher-services/storage-service/ + # https://github.com/rancher/rancher/issues/8722 + # setup_ceph "$2" $3 $4 $5 + ;; + demo) + demo "$2" + ;; + setup) + setup "$2" + ;; + all) + setup "$2" + demo "$2" + check_service nginx/lb + check_service dokuwiki/lb + check_service grafana/grafana + ;; + clean) + clean "$2" + ;; + *) + if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then grep '#. ' $0; fi +esac diff --git a/tools/traffic.sh b/tools/traffic.sh new file mode 100644 index 0000000..c020b6c --- /dev/null +++ b/tools/traffic.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# Copyright 2017 AT&T Intellectual Property, Inc +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# What this is: semi-random request generator for a web service +#. +#. How to use: +#. $ git clone https://gerrit.opnfv.org/gerrit/models +# $ bash models/tools/traffic +# : address of the web service + +echo "$0: $(date) Generate some traffic, somewhat randomly" +ns="0 00 000" +while true +do + for n in $ns; do + sleep .$n$[ ( $RANDOM % 10 ) + 1 ]s + curl -s $1 > /dev/null + done +done -- cgit 1.2.3-korg