13 files changed, 1125 insertions, 0 deletions
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..7bac042
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,15 @@
+[[source]]
+
+url = "https://pypi.python.org/simple"
+verify_ssl = true
+name = "pypi"
+
+
+[dev-packages]
+
+
+
+[packages]
+
+kubernetes = "*"
+sh = "*"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..d840d68
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,182 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "3e773b6eb42a9dba3e5cb71bcac1c832939ab3b069641084d9f5ecd0967ce7cf"
+        },
+        "pipfile-spec": 6,
+        "requires": {},
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.python.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "cachetools": {
+            "hashes": [
+                "sha256:4319bbb78172e7bcf99423e1ecd6914b32336ccfe97d2058ffe62e641a7f3abe",
+                "sha256:ede01f2d3cbd6ddc9e35e16c2b0ce011d8bb70ce0dbaf282f5b4df24b213bc5d"
+            ],
+            "version": "==2.0.1"
+        },
+        "certifi": {
+            "hashes": [
+                "sha256:14131608ad2fd56836d33a71ee60fa1c82bc9d2c8d98b7bdbc631fe1b3cd1296",
+                "sha256:edbc3f203427eef571f79a7692bb160a2b0f7ccaa31953e99bd17e307cf63f7d"
+            ],
+            "version": "==2018.1.18"
+        },
+        "chardet": {
+            "hashes": [
+                "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae",
+                "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691"
+            ],
+            "version": "==3.0.4"
+        },
+        "google-auth": {
+            "hashes": [
+                "sha256:34088434cb2a2409360b8f3cbc04195a465df1fb2aafad71ebbded77cbf08803",
+                "sha256:9051802d3dae256036cca9e34633a32c0ed1427730d4ebc513dff91ec8b6dd45"
+            ],
+            "version": "==1.4.1"
+        },
+        "idna": {
+            "hashes": [
+                "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f",
+                "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4"
+            ],
+            "version": "==2.6"
+        },
+        "ipaddress": {
+            "hashes": [
+                "sha256:200d8686011d470b5e4de207d803445deee427455cd0cb7c982b68cf82524f81"
+            ],
+            "version": "==1.0.19"
+        },
+        "kubernetes": {
+            "hashes": [
+                "sha256:2f1a05a9bb2549d6afb6d138b2767d61d8aeb735a7a12bf554440524205e2894",
+                "sha256:f81f145882471a1dd9d23360e99bd77027f07744729ef2728af4af7130cd19fd"
+            ],
+            "version": "==5.0.0"
+        },
+        "oauthlib": {
+            "hashes": [
+                "sha256:ce57b501e906ff4f614e71c36a3ab9eacbb96d35c24d1970d2539bbc3ec70ce1"
+            ],
+            "version": "==2.0.6"
+        },
+        "pyasn1": {
+            "hashes": [
+                "sha256:0d7f6e959fe53f3960a23d73f35e1fce61348b30915b6664309ca756de7c1f89",
+                "sha256:5a0db897b311d265cde49615cf783f1c78613138605cdd0f907ecfa5b2aba3ee",
+                "sha256:758cb50abddc03e4563fd9e7f03db56e3e87b58c0bd01247360326e5c0c7ffa5",
+                "sha256:7d626683e3d792cccc608da02498aff37ab4f3dafd8905d6bf755d11f9b26b43",
+                "sha256:a7efe807c4b83a859e2735c692b92ed7b567cfddc4163763412920041d876c2b",
+                "sha256:b5a9ca48055b9a20f6d1b3d68e38692e5431c86a0f99ea602e61294e891fee5b",
+                "sha256:c07d6e587b2f928366b1f67c09bda026a3e6fcc99e80a744dc67f8fca3895626",
+                "sha256:d258b0a71994f7770599835249cece1caef3c70def868c4915e6e5ca49b67d15",
+                "sha256:d5cd6ed995dba16fad0c521cfe31cd2d68400b53fcc2bce93326829be73ab6d1",
+                "sha256:d84c2aea3cf43780e9e6a19f4e4dddee9f6976519020e64e47c57e5c7a8c3dd2",
+                "sha256:e85895087905c65b5b594eb91f7522664c85545b147d5f4d4e7b1b07da8dcbdc",
+                "sha256:f81c96761fca60d64b1c9b79ec2e40cf9495a745cf570613079ef324aeb9672b"
+            ],
+            "version": "==0.4.2"
+        },
+        "pyasn1-modules": {
+            "hashes": [
+                "sha256:041e9fbafac548d095f5b6c3b328b80792f006196e15a232b731a83c93d59493",
+                "sha256:0cdca76a68dcb701fff58c397de0ef9922b472b1cb3ea9695ca19d03f1869787",
+                "sha256:0cea139045c38f84abaa803bcb4b5e8775ea12a42af10019d942f227acc426c3",
+                "sha256:0f2e50d20bc670be170966638fa0ae603f0bc9ed6ebe8e97a6d1d4cef30cc889",
+                "sha256:47fb6757ab78fe966e7c58b2030b546854f78416d653163f0ce9290cf2278e8b",
+                "sha256:598a6004ec26a8ab40a39ea955068cf2a3949ad9c0030da970f2e1ca4c9f1cc9",
+                "sha256:72fd8b0c11191da088147c6e4678ec53e573923ecf60b57eeac9e97433e09fc2",
+                "sha256:854700bbdd01394e2ada9c1bfbd0ed9f5d0c551350dbbd023e88b11d2771ae06",
+                "sha256:af00ea8f2022b6287dc375b2c70f31ab5af83989fc6fe9eacd4976ce26cd7ccc",
+                "sha256:b1f395cae2d669e0830cb023aa86f9f283b7a9aa32317d7f80d8e78aa2745812",
+                "sha256:c6747146e95d2b14cc2a8399b2b0bde3f93778f8f9ec704690d2b589c376c137",
+                "sha256:f53fe5bcebdf318f51399b250fe8325ef3a26d927f012cc0c8e0f9e9af7f9deb"
+            ],
+            "version": "==0.2.1"
+        },
+        "python-dateutil": {
+            "hashes": [
+                "sha256:07009062406cffd554a9b4135cd2ff167c9bf6b7aac61fe946c93e69fad1bbd8",
+                "sha256:8f95bb7e6edbb2456a51a1fb58c8dca942024b4f5844cae62c90aa88afe6e300"
+            ],
+            "version": "==2.7.0"
+        },
+        "pyyaml": {
+            "hashes": [
+                "sha256:0c507b7f74b3d2dd4d1322ec8a94794927305ab4cebbe89cc47fe5e81541e6e8",
+                "sha256:16b20e970597e051997d90dc2cddc713a2876c47e3d92d59ee198700c5427736",
+                "sha256:3262c96a1ca437e7e4763e2843746588a965426550f3797a79fca9c6199c431f",
+                "sha256:326420cbb492172dec84b0f65c80942de6cedb5233c413dd824483989c000608",
+                "sha256:4474f8ea030b5127225b8894d626bb66c01cda098d47a2b0d3429b6700af9fd8",
+                "sha256:592766c6303207a20efc445587778322d7f73b161bd994f227adaa341ba212ab",
+                "sha256:5ac82e411044fb129bae5cfbeb3ba626acb2af31a8d17d175004b70862a741a7",
+                "sha256:5f84523c076ad14ff5e6c037fe1c89a7f73a3e04cf0377cb4d017014976433f3",
+                "sha256:827dc04b8fa7d07c44de11fabbc888e627fa8293b695e0f99cb544fdfa1bf0d1",
+                "sha256:b4c423ab23291d3945ac61346feeb9a0dc4184999ede5e7c43e1ffb975130ae6",
+                "sha256:bc6bced57f826ca7cb5125a10b23fd0f2fff3b7c4701d64c439a300ce665fff8",
+                "sha256:c01b880ec30b5a6e6aa67b09a2fe3fb30473008c85cd6a67359a1b15ed6d83a4",
+                "sha256:ca233c64c6e40eaa6c66ef97058cdc80e8d0157a443655baa1b2966e812807ca",
+                "sha256:e863072cdf4c72eebf179342c94e6989c67185842d9997960b3e69290b2fa269"
+            ],
+            "version": "==3.12"
+        },
+        "requests": {
+            "hashes": [
+                "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b",
+                "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e"
+            ],
+            "version": "==2.18.4"
+        },
+        "requests-oauthlib": {
+            "hashes": [
+                "sha256:50a8ae2ce8273e384895972b56193c7409601a66d4975774c60c2aed869639ca",
+                "sha256:883ac416757eada6d3d07054ec7092ac21c7f35cb1d2cf82faf205637081f468"
+            ],
+            "version": "==0.8.0"
+        },
+        "rsa": {
+            "hashes": [
+                "sha256:25df4e10c263fb88b5ace923dd84bf9aa7f5019687b5e55382ffcdb8bede9db5",
+                "sha256:43f682fea81c452c98d09fc316aae12de6d30c4b5c84226642cf8f8fd1c93abd"
+            ],
+            "version": "==3.4.2"
+        },
+        "sh": {
+            "hashes": [
+                "sha256:ae3258c5249493cebe73cb4e18253a41ed69262484bad36fdb3efcb8ad8870bb",
+                "sha256:b52bf5833ed01c7b5c5fb73a7f71b3d98d48e9b9b8764236237bdc7ecae850fc"
+            ],
+            "version": "==1.12.14"
+        },
+        "six": {
+            "hashes": [
+                "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9",
+                "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb"
+            ],
+            "version": "==1.11.0"
+        },
+        "urllib3": {
+            "hashes": [
+                "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b",
+                "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f"
+            ],
+            "version": "==1.22"
+        },
+        "websocket-client": {
+            "hashes": [
+                "sha256:188b68b14fdb2d8eb1a111f21b9ffd2dbf1dbc4e4c1d28cf2c37cdbf1dd1cae6",
+                "sha256:a453dc4dfa6e0db3d8fd7738a308a88effe6240c59f3226eb93e8f020c216149"
+            ],
+            "version": "==0.47.0"
+        }
+    },
+    "develop": {}
+}
diff --git a/clover/logging/install/fluentd-istio.yaml b/clover/logging/install/fluentd-istio.yaml
new file mode 100644
index 0000000..1853831
--- /dev/null
+++ b/clover/logging/install/fluentd-istio.yaml
@@ -0,0 +1,40 @@
+# Configuration for logentry instances
+apiVersion: "config.istio.io/v1alpha2"
+kind: logentry
+metadata:
+  name: newlog
+  namespace: istio-system
+spec:
+  severity: '"info"'
+  timestamp: request.time
+  variables:
+    source: source.labels["app"] | source.service | "unknown"
+    user: source.user | "unknown"
+    destination: destination.labels["app"] | destination.service | "unknown"
+    responseCode: response.code | 0
+    responseSize: response.size | 0
+    latency: response.duration | "0ms"
+  monitored_resource_type: '"UNSPECIFIED"'
+---
+# Configuration for a fluentd handler
+apiVersion: "config.istio.io/v1alpha2"
+kind: fluentd
+metadata:
+  name: handler
+  namespace: istio-system
+spec:
+  address: "fluentd-es.logging:24224"
+---
+# Rule to send logentry instances to the fluentd handler
+apiVersion: "config.istio.io/v1alpha2"
+kind: rule
+metadata:
+  name: newlogtofluentd
+  namespace: istio-system
+spec:
+  match: "true" # match for all requests
+  actions:
+   - handler: handler.fluentd
+     instances:
+     - newlog.logentry
+---
diff --git a/clover/logging/install/logging-stack.yaml b/clover/logging/install/logging-stack.yaml
new file mode 100644
index 0000000..9542496
--- /dev/null
+++ b/clover/logging/install/logging-stack.yaml
@@ -0,0 +1,205 @@
+# Logging Namespace. All below are a part of this namespace.
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: logging
+---
+# Elasticsearch Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: elasticsearch
+  namespace: logging
+  labels:
+    app: elasticsearch
+spec:
+  ports:
+  - port: 9200
+    protocol: TCP
+    targetPort: db
+  selector:
+    app: elasticsearch
+  type: NodePort
+---
+# Elasticsearch Deployment
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: elasticsearch
+  namespace: logging
+  labels:
+    app: elasticsearch
+  annotations:
+    sidecar.istio.io/inject: "false"
+spec:
+  template:
+    metadata:
+      labels:
+        app: elasticsearch
+    spec:
+      containers:
+      - image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.1.1
+        name: elasticsearch
+        resources:
+          # need more cpu upon initialization, therefore burstable class
+          limits:
+            cpu: 1000m
+          requests:
+            cpu: 100m
+        env:
+          - name: discovery.type
+            value: single-node
+        ports:
+        - containerPort: 9200
+          name: db
+          protocol: TCP
+        - containerPort: 9300
+          name: transport
+          protocol: TCP
+        volumeMounts:
+        - name: elasticsearch
+          mountPath: /data
+      volumes:
+      - name: elasticsearch
+        emptyDir: {}
+---
+# Fluentd Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: fluentd-es
+  namespace: logging
+  labels:
+    app: fluentd-es
+spec:
+  ports:
+  - name: fluentd-tcp
+    port: 24224
+    protocol: TCP
+    targetPort: 24224
+  - name: fluentd-udp
+    port: 24224
+    protocol: UDP
+    targetPort: 24224
+  selector:
+    app: fluentd-es
+---
+# Fluentd Deployment
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: fluentd-es
+  namespace: logging
+  labels:
+    app: fluentd-es
+  annotations:
+    sidecar.istio.io/inject: "false"
+spec:
+  template:
+    metadata:
+      labels:
+        app: fluentd-es
+    spec:
+      containers:
+      - name: fluentd-es
+        image: gcr.io/google-containers/fluentd-elasticsearch:v2.0.1
+        env:
+        - name: FLUENTD_ARGS
+          value: --no-supervisor -q
+        resources:
+          limits:
+            memory: 500Mi
+          requests:
+            cpu: 100m
+            memory: 200Mi
+        volumeMounts:
+        - name: config-volume
+          mountPath: /etc/fluent/config.d
+      terminationGracePeriodSeconds: 30
+      volumes:
+      - name: config-volume
+        configMap:
+          name: fluentd-es-config
+---
+# Fluentd ConfigMap, contains config files.
+kind: ConfigMap
+apiVersion: v1
+data:
+  forward.input.conf: |-
+    # Takes the messages sent over TCP
+    <source>
+      type forward
+    </source>
+  output.conf: |-
+    <match **>
+       type elasticsearch
+       log_level info
+       include_tag_key true
+       host elasticsearch
+       port 9200
+       logstash_format true
+       # Set the chunk limits.
+       buffer_chunk_limit 2M
+       buffer_queue_limit 8
+       flush_interval 5s
+       # Never wait longer than 5 minutes between retries.
+       max_retry_wait 30
+       # Disable the limit on the number of retries (retry forever).
+       disable_retry_limit
+       # Use multiple threads for processing.
+       num_threads 2
+    </match>
+metadata:
+  name: fluentd-es-config
+  namespace: logging
+---
+# Kibana Service
+apiVersion: v1
+kind: Service
+metadata:
+  name: kibana
+  namespace: logging
+  labels:
+    app: kibana
+spec:
+  ports:
+  - port: 5601
+    protocol: TCP
+    targetPort: ui
+  selector:
+    app: kibana
+  type: NodePort
+---
+# Kibana Deployment
+apiVersion: extensions/v1beta1
+kind: Deployment
+metadata:
+  name: kibana
+  namespace: logging
+  labels:
+    app: kibana
+  annotations:
+    sidecar.istio.io/inject: "false"
+spec:
+  template:
+    metadata:
+      labels:
+        app: kibana
+    spec:
+      containers:
+      - name: kibana
+        image: docker.elastic.co/kibana/kibana-oss:6.1.1
+        resources:
+          # need more cpu upon initialization, therefore burstable class
+          limits:
+            cpu: 1000m
+          requests:
+            cpu: 100m
+        env:
+          - name: ELASTICSEARCH_URL
+            value: http://elasticsearch:9200
+        ports:
+        - containerPort: 5601
+          name: ui
+          protocol: TCP
+---
diff --git a/clover/logging/validate.py b/clover/logging/validate.py
new file mode 100644
index 0000000..821f912
--- /dev/null
+++ b/clover/logging/validate.py
@@ -0,0 +1,56 @@
+# Copyright (c) Authors of Clover
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+
+from kubernetes import client, config
+from kubernetes.stream import stream
+import sh
+import re
+
+FLUENTD_NAMESPACE = 'logging'
+FLUENTD_PATTERN = 'fluentd-.*'
+FLUENTD_LABELS = 'app=fluentd-es'
+FLUENTD_INPUT = """<source>
+  type forward
+</source>"""
+
+def main():
+    # Load config from default location.
+    config.load_kube_config()
+
+    v1 = client.CoreV1Api()
+
+    fluentd_pod_name = None
+
+    # find by name
+    print("Find fluentd pod by name '{}'".format(FLUENTD_PATTERN))
+    fluentd_regex = re.compile(FLUENTD_PATTERN)
+    resp = v1.list_namespaced_pod(FLUENTD_NAMESPACE)
+    for i in resp.items:
+        if fluentd_regex.search(i.metadata.name) is not None:
+            print(i.metadata.name)
+
+    # find by label selector
+    print("Find fluentd pod by label selector '{}'".format(FLUENTD_LABELS))
+    resp = v1.list_namespaced_pod(FLUENTD_NAMESPACE, label_selector=FLUENTD_LABELS)
+    for i in resp.items:
+        print(i.metadata.name)
+        fluentd_pod_name = i.metadata.name
+
+    # check fluentd configuration
+    # NOTE: exec in Python librarry does not work well, use shell command as a workaround
+    # See https://github.com/kubernetes-client/python/issues/485
+    result = sh.kubectl((
+        'exec -n logging ' +
+        fluentd_pod_name +
+        ' cat /etc/fluent/config.d/forward.input.conf').split())
+    if FLUENTD_INPUT in result:
+        print("fluentd input configured correctly")
+    else:
+        print("fluentd input not configured\n{}".format(FLUENTD_INPUT))
+
+if __name__ == '__main__':
+    main()
diff --git a/clover/monitoring/monitoring.py b/clover/monitoring/monitoring.py
new file mode 100644
index 0000000..9726fd1
--- /dev/null
+++ b/clover/monitoring/monitoring.py
@@ -0,0 +1,140 @@
+# Copyright (c) Authors of Clover
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+
+from datetime import timedelta
+import pprint
+import requests
+import time
+
+PROMETHEUS_URL = "http://127.0.0.1:9090"
+
+
+class Monitoring(object):
+    PROMETHEUS_HEALTH_UP = "up"
+    PROMETHEUS_ISTIO_TARGETS = {"envoy",
+        "istio-mesh",
+        "kubernetes-apiservers",
+        "kubernetes-cadvisor",
+        "kubernetes-nodes",
+        "kubernetes-service-endpoints",
+        "mixer",
+        "pilot"}
+    PROMETHEUS_API_TARGETS = "/api/v1/targets"
+    PROMETHEUS_API_QUERY = "/api/v1/query"
+    PROMETHEUS_API_QUERY_RANGE = "/api/v1/query_range"
+
+    def __init__(self, host):
+        self.host = host
+
+    def get_targets(self):
+        try:
+            # Reference api: https://prometheus.io/docs/prometheus/latest/querying/api/#targets
+            response = requests.get('%s%s' % (self.host, Monitoring.PROMETHEUS_API_TARGETS))
+            if response.status_code != 200:
+                print("ERROR: get targets status code: %r" % response.status_code)
+                return False
+        except Exception as e:
+            print("ERROR: Cannot connect to prometheus\n%s" % e)
+            return False
+
+        return response.json()
+
+    def is_targets_healthy(self):
+        targets = set()
+
+        raw_targets = self.get_targets()
+        if raw_targets == False:
+            return False
+
+        for target in raw_targets["data"]["activeTargets"]:
+            if target["health"] != Monitoring.PROMETHEUS_HEALTH_UP:
+                print("ERROR: target unhealth job: %s, health: %s" % \
+                    (target["labels"]["job"], target["health"]))
+                return False
+            targets.add(target["labels"]["job"])
+
+        diff = Monitoring.PROMETHEUS_ISTIO_TARGETS - targets
+        if len(diff):
+            print("ERROR: targets %r not found!" % diff)
+            return False
+
+        return True
+
+    # Reference links:
+    #     - https://prometheus.io/docs/prometheus/latest/querying/api/#instant-queries
+    #     - https://prometheus.io/docs/prometheus/latest/querying/api/#range-queries
+    #     - https://github.com/prometheus/prombench/blob/master/apps/load-generator/main.py
+    def query(self, query_params):
+        try:
+            start = time.time()
+
+            query_type = query_params.get("type", "instant")
+            params = {"query": query_params["query"]}
+            if query_type == "instant":
+                url = "%s%s" % (self.host, Monitoring.PROMETHEUS_API_QUERY)
+            elif query_type == "range":
+                url = "%s%s" % (self.host, Monitoring.PROMETHEUS_API_QUERY_RANGE)
+                params["start"] = start - duration_seconds(query_params.get("start", "0h"))
+                params["end"] = start - duration_seconds(query_params.get("end", "0h"))
+                params["step"] = query_params.get("step", "15s")
+            else:
+                print("ERROR: invalidate query type")
+                return
+
+            resp = requests.get(url, params)
+            dur = time.time() - start
+
+            print("query %s %s, status=%s, size=%d, dur=%.3f" % \
+                (self.host, query_params["query"], resp.status_code, len(resp.text), dur))
+            pp = pprint.PrettyPrinter(indent=2)
+            pp.pprint(resp.json())
+
+        except Exception as e:
+            print("ERROR: Could not query prometheus instance %s. \n %s" % (url, e))
+
+
+def duration_seconds(s):
+    num = int(s[:-1])
+
+    if s.endswith('s'):
+        return timedelta(seconds=num).total_seconds()
+    elif s.endswith('m'):
+        return timedelta(minutes=num).total_seconds()
+    elif s.endswith('h'):
+        return timedelta(hours=num).total_seconds()
+
+    raise "ERROR: unknown duration %s" % s
+
+
+def main():
+    m = Monitoring(PROMETHEUS_URL)
+    if not m.is_targets_healthy():
+        print("ERROR: Prometheus targets is unhealthy!")
+    else:
+        print("Prometheus targets are all healthy!")
+
+    print "\n### query instant"
+    query_params = {
+        "type": "instant",
+        "query": "istio_double_request_count{destination='details.default.svc.cluster.local'}"
+    }
+    m.query(query_params)
+
+    print "\n### query range"
+    query_range_param = {
+        "type": "range",
+        "query": "istio_double_request_count{destination='details.default.svc.cluster.local'}",
+        "start": "5m",
+        "end": "3m",
+        "step": "30s"
+     }
+    m.query(query_range_param)
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/clover/monitoring/validate.py b/clover/monitoring/validate.py
new file mode 100644
index 0000000..fafe5df
--- /dev/null
+++ b/clover/monitoring/validate.py
@@ -0,0 +1,70 @@
+# Copyright (c) Authors of Clover
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+
+from monitoring import Monitoring
+from kubernetes import client, config
+
+PROMETHEUS_URL = "http://127.0.0.1:9090"
+PROMETHEUS_DEPLOYMENT = "prometheus"
+PROMETHEUS_LABELS = "app=prometheus"
+ISTIO_NAMESPACE = "istio-system"
+
+
+def validateDeploy():
+    config.load_kube_config()
+    appsv1 = client.AppsV1Api()
+    corev1 = client.CoreV1Api()
+    find_flag = False
+    prom_pod_name = None
+
+    # check prometheus deploytment
+    ret = appsv1.list_deployment_for_all_namespaces(watch=False)
+    for i in ret.items:
+        if PROMETHEUS_DEPLOYMENT == i.metadata.name and \
+           ISTIO_NAMESPACE == i.metadata.namespace:
+           find_flag = True
+           break
+    if find_flag == False:
+        print("ERROR: Deployment: {} doesn't present in {} namespace".format(
+                        PROMETHEUS_DEPLOYMENT, ISTIO_NAMESPACE))
+        return False
+
+    # find prometheus pod by label selector
+    ret = corev1.list_namespaced_pod(ISTIO_NAMESPACE, label_selector=PROMETHEUS_LABELS)
+    for i in ret.items:
+        prom_pod_name = i.metadata.name
+    if prom_pod_name == None:
+        print("ERROR: prometheus pod not found")
+        return False
+
+    # check prometheus pod status
+    ret = corev1.read_namespaced_pod_status(prom_pod_name, ISTIO_NAMESPACE)
+    if ret.status.phase != "Running":
+        print("ERROR: prometheus pod %s is under %s state" % (prom_pod_name, ret.status.phase))
+        return False
+
+    return True
+
+
+def validateService():
+    m = Monitoring(PROMETHEUS_URL)
+
+    return m.is_targets_healthy()
+
+
+def main():
+    if validateDeploy() and validateService():
+        print"Prometheus monitoring validation has passed"
+        return True
+    else:
+        print"ERROR: Prometheus monitoring validation has failed"
+        return False
+
+
+if __name__ == '__main__':
+    main()
+
diff --git a/clover/tracing/tracing.py b/clover/tracing/tracing.py
new file mode 100644
index 0000000..16b952c
--- /dev/null
+++ b/clover/tracing/tracing.py
@@ -0,0 +1,201 @@
+# Copyright (c) Authors of Clover
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+
+import requests
+import time
+import redis
+
+TRACING_IP = "localhost"
+TRACING_PORT = "30888"
+
+
+class Tracing:
+
+    def __init__(
+      self, tracing_ip, tracing_port, redis_ip='localhost', use_redis=True):
+        self.tracing_ip = tracing_ip
+        self.tracing_port = tracing_port
+        self.testid = '0'
+        self.test_start_time = 0
+        self.use_redis = use_redis
+        if use_redis:
+            try:
+                self.r = redis.StrictRedis(host=redis_ip, port=6379, db=0)
+            except Exception:
+                print("Failed to connect to redis")
+
+    def setRedisSet(self, rkey, rvalue):
+        if self.use_redis:
+            self.r.sadd(rkey, rvalue)
+
+    def setRedisList(self, rkey, rvalue):
+        if self.use_redis:
+            self.r.lpush(rkey, rvalue)
+
+    def setRedisHash(self, rkey, rvalue):
+        if self.use_redis:
+            self.r.hmset(rkey, rvalue)
+
+    def getRedisTestid(self, index):
+        testid = self.r.lrange("testids", index, index)
+        return testid[0]
+
+    def getRedisTraceids(self, testid):
+        rkey = "traceids:" + str(testid)
+        traceids = self.r.smembers(rkey)
+        return traceids
+
+    def getRedisSpanids(self, traceid):
+        rkey = "spanids:" + str(traceid)
+        spanids = self.r.smembers(rkey)
+        return spanids
+
+    def getRedisSpan(self, spanid, traceid):
+        rkey = "spans:" + str(traceid) + ':' + str(spanid)
+        span = self.r.hgetall(rkey)
+        return span
+
+    def getRedisSpanValue(self, spanid, traceid, span_key):
+        rkey = "spans:" + str(traceid) + ':' + str(spanid)
+        span_value = self.r.hget(rkey, span_key)
+        return span_value
+
+    def getRedisTags(self, spanid, traceid):
+        rkey = "tags:" + str(spanid) + ':' + str(traceid)
+        tags = self.r.hgetall(rkey)
+        return tags
+
+    def getRedisTagsValue(self, spanid, traceid, tag_key):
+        rkey = "tags:" + str(spanid) + ':' + str(traceid)
+        tag_value = self.r.hget(rkey, tag_key)
+        return tag_value
+
+    def getRedisTestAll(self, testid):
+        traceids = self.getRedisTraceids(testid)
+        for trace in traceids:
+            spanids = self.getRedisSpanids(trace)
+            for span in spanids:
+                # print(self.getRedisSpan(span, trace))
+                print(self.getRedisSpanValue(span, trace, 'duration'))
+                # print(self.getRedisTags(span, trace))
+                print(self.getRedisTagsValue(span, trace, 'node_id'))
+
+    def setTest(self, testid):
+        self.testid = testid
+        self.setRedisList("testids", testid)
+        self.test_start_time = int(time.time())
+
+    def getServices(self):
+        req_url = 'http://' + self.tracing_ip + ':' + self.tracing_port + \
+                                                        '/api/services'
+        try:
+            response = requests.get(req_url)
+            if response.status_code != 200:
+                print("ERROR: Cannot connect to tracing: {}".format(
+                                        response.status_code))
+                return False
+        except Exception as e:
+            print("ERROR: Cannot connect to tracing")
+            print(e)
+            return False
+
+        data = response.json()
+        services = data['data']
+        return services
+
+    def getTraces(self, service, time_back=3600, limit='1000'):
+        ref_time = int(time.time())
+        pad_time = '757000'
+        end_time = 'end=' + str(ref_time) + pad_time + '&'
+        if time_back == 0:
+            delta = self.test_start_time
+        else:
+            delta = ref_time - time_back
+        start_time = 'start=' + str(delta) + pad_time
+        limit = 'limit=' + limit + '&'
+        loopback = 'loopback=1h&'
+        max_dur = 'maxDuration&'
+        min_dur = 'minDuration&'
+        service = 'service=' + service + '&'
+        url_prefix = 'http://' + self.tracing_ip + ':' + self.tracing_port + \
+            '/api/traces?'
+        req_url = url_prefix + end_time + limit + loopback + max_dur + \
+            min_dur + service + start_time
+
+        try:
+            response = requests.get(req_url)
+            if response.status_code != 200:
+                print("ERROR: Cannot connect to tracing: {}".format(
+                                        response.status_code))
+                return False
+        except Exception as e:
+            print("ERROR: Cannot connect to tracing")
+            print(e)
+            return False
+
+        traces = response.json()
+        return traces
+
+    def numTraces(self, trace):
+        num_traces = len(trace['data'])
+        return str(num_traces)
+
+    def outProcesses(self, trace):
+        processes = []
+        if trace['data']:
+            first_trace = trace['data'][0]
+            for process in first_trace['processes']:
+                processes.append(process)
+            print(processes)
+        return processes
+
+    def outTraces(self, trace):
+        for traces in trace['data']:
+            print("TraceID: {}".format(traces['traceID']))
+            self.setRedisSet(
+              "traceids:{}".format(str(self.testid)), traces['traceID'])
+            for spans in traces['spans']:
+                    print("SpanID: {}".format(spans['spanID']))
+                    self.setRedisSet(
+                       "spanids:{}".format(traces['traceID']), spans['spanID'])
+                    print("Duration: {} usec".format(spans['duration']))
+                    span = {}
+                    span['spanID'] = spans['spanID']
+                    span['duration'] = spans['duration']
+                    span['startTime'] = spans['startTime']
+                    span['operationName'] = spans['operationName']
+                    # print("Tags:\n {} \n".format(spans['tags']))
+                    self.setRedisHash(
+                        "spans:{}:{}".format(
+                           traces['traceID'], spans['spanID']), span)
+                    tag = {}
+                    for tags in spans['tags']:
+                        print("Tag key: {}, value: {}".format(
+                                tags['key'], tags['value']))
+                        tag[tags['key']] = tags['value']
+                    self.setRedisHash("tags:{}:{}".format(
+                                spans['spanID'], traces['traceID']), tag)
+
+    def monitorTraces(self, sample_interval, service='istio-ingress'):
+        loop = True
+        while loop:
+            try:
+                t = self.getTraces(service, 10)
+                num_traces = self.numTraces(t)
+                print("Number of traces: " + num_traces)
+                self.outTraces(t)
+                time.sleep(sample_interval)
+            except KeyboardInterrupt:
+                print("Test Start: {}".format(self.test_start_time))
+                loop = False
+
+    def main(self):
+        self.monitorTraces(1)
+
+
+if __name__ == '__main__':
+    Tracing(TRACING_IP, TRACING_PORT).main()
diff --git a/clover/tracing/tracing_sample.py b/clover/tracing/tracing_sample.py
new file mode 100644
index 0000000..f0234bf
--- /dev/null
+++ b/clover/tracing/tracing_sample.py
@@ -0,0 +1,47 @@
+# Copyright (c) Authors of Clover
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+
+import uuid
+import time
+from tracing import Tracing
+
+t = Tracing('localhost', '30888')
+
+# Get toplevel services stored in tracing
+services = t.getServices()
+print(services)
+
+# Get traces from the last hour for istio-ingress service
+service = 'istio-ingress'
+traces = t.getTraces(service, 3600)
+# Get process names for first trace service
+t.outProcesses(traces)
+
+# Turn off redis tracing store and output basic trace info
+t.use_redis = False
+t.outTraces(traces)
+
+# Setup basic test and store in redis
+t.use_redis = True
+t.setTest(uuid.uuid4())
+time.sleep(20)
+# Get all traces from test start time when time_back=0
+traces = t.getTraces(service, 0)
+# Store traces in redis
+t.outTraces(traces)
+
+# Get test id for some number of tests back
+testid = t.getRedisTestid('0')
+print(testid)
+traceids = t.getRedisTraceids(testid)
+print(traceids)
+
+# Print out span and tag info for all traces in test
+# Will continue to consider what to extract from hashes for e2e validation
+t.getRedisTestAll(testid)
+
+# t.monitorTraces(1)
diff --git a/clover/tracing/validate.py b/clover/tracing/validate.py
new file mode 100644
index 0000000..eed6f9a
--- /dev/null
+++ b/clover/tracing/validate.py
@@ -0,0 +1,66 @@
+# Copyright (c) Authors of Clover
+#
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Apache License, Version 2.0
+# which accompanies this distribution, and is available at
+# http://www.apache.org/licenses/LICENSE-2.0
+
+from tracing import Tracing
+from kubernetes import client, config
+
+
+JAEGER_IP = "localhost"
+# JAEGER_IP = "1.1.1.1"
+JAEGER_PORT = "30888"
+JAEGER_DEPLOYMENT = "jaeger-deployment"
+ISTIO_NAMESPACE = "istio-system"
+ISTIO_SERVICES = ["istio-ingress", "istio-mixer"]
+
+
+def validateDeploy():
+    config.load_kube_config()
+    v1 = client.AppsV1Api()
+
+    deployments = []
+    namespaces = []
+    validate = False
+    ret = v1.list_deployment_for_all_namespaces(watch=False)
+    for i in ret.items:
+        deployments.append(i.metadata.name)
+        namespaces.append(i.metadata.namespace)
+    if JAEGER_DEPLOYMENT in deployments:
+        d_index = deployments.index(JAEGER_DEPLOYMENT)
+        if ISTIO_NAMESPACE in namespaces[d_index]:
+            print("Deployment: {} present in {} namespace".format(
+                          JAEGER_DEPLOYMENT, ISTIO_NAMESPACE))
+            validate = True
+    return validate
+
+# Services in Jaeger will only be present when traffic passes through Istio
+# Requires a deployment in Istio service mesh with some traffic targeting nodes
+def validateServices():
+    t = Tracing(JAEGER_IP, JAEGER_PORT)
+    services = t.getServices()
+    validate = True
+    if services:
+        for s in ISTIO_SERVICES:
+            if s in services:
+                print("Service in tracing: {} present".format(s))
+            else:
+                validate = False
+    else:
+        validate = False
+    return validate
+
+
+def main():
+    if validateDeploy() and validateServices():
+        print"Jaeger tracing validation has passed"
+        return True
+    else:
+        print"Jaeger tracing validation has failed"
+        return False
+
+
+if __name__ == '__main__':
+    main()
diff --git a/docs/logging.rst b/docs/logging.rst
new file mode 100644
index 0000000..196ba40
--- /dev/null
+++ b/docs/logging.rst
@@ -0,0 +1,28 @@
+#######
+Logging
+#######
+
+************
+Installation
+************
+
+Currently, we use the `sample configuration`_ in Istio to install fluentd::
+
+    cd clover/logging
+    kubectl apply -f install
+
+.. _sample configuration: https://istio.io/docs/tasks/telemetry/fluentd.html
+
+********
+Validate
+********
+
+The scripts in ``clover/logging`` validates fluentd installation::
+
+    python clover/logging/validate.py
+
+It validates the installation with the following criterias
+
+#. existence of fluented pod
+#. fluentd input is configured correctly
+#. TBD
diff --git a/docs/monitoring.rst b/docs/monitoring.rst
new file mode 100644
index 0000000..44b01e3
--- /dev/null
+++ b/docs/monitoring.rst
@@ -0,0 +1,31 @@
+##########
+Monitoring
+##########
+
+************
+Installation
+************
+
+Currently, we use the Istio build-in prometheus addon to install prometheus::
+
+    cd <istio-release-path>
+    kubectl apply -f install/kubernetes/addons/prometheus.yaml
+
+********
+Validate
+********
+
+Setup port-forwarding for prometheus by executing the following command::
+
+    kubectl -n istio-system port-forward $(kubectl -n istio-system get pod -l app=prometheus -o jsonpath='{.items[0].metadata.name}') 9090:9090 &
+
+Run the scripts in ``clover/monitoring`` validates prometheus installation::
+
+    python clover/monitoring/validate.py
+
+It validates the installation with the following criterias
+
+#. [DONE] prometheus pod is in Running state
+#. [DONE] prometheus is conneted to monitoring targets
+#. [TODO] test collecting telemetry data from istio
+#. [TODO] TBD
diff --git a/docs/tracing.rst b/docs/tracing.rst
new file mode 100644
index 0000000..79d686c
--- /dev/null
+++ b/docs/tracing.rst
@@ -0,0 +1,44 @@
+#######
+Tracing
+#######
+
+************
+Installation
+************
+
+Currently, we use the Jaeger tracing all-in-one Kubernetes template for development and testing,
+which uses in-memory storage. It can be deployed to the istio-system namespace with the
+following command::
+
+    kubectl apply -n istio-system -f https://raw.githubusercontent.com/jaegertracing/jaeger-kubernetes/master/all-in-one/jaeger-all-in-one-template.yml
+
+The standard Jaeger REST port is at 16686. To make this service available outside of the
+Kubernetes cluster, use the following command::
+
+    kubectl expose -n istio-system deployment jaeger-deployment --port=16686 --type=NodePort
+
+Kubernetes will expose the Jaeger service on another port, which can be found with::
+
+    kubectl get svc -n istio-system
+
+An example listing from the command above is shown below where the Jaeger service is exposed
+externally on port 30888::
+
+    istio-system   jaeger-deployment   NodePort  10.104.113.94  <none> 16686:30888/TCP
+
+Jaeger will be accessible using the host IP of the Kubernetes cluster and port provided.
+
+********
+Validate
+********
+
+The script in ``clover/tracing`` validates Jaeger installation::
+
+    python clover/tracing/validate.py
+
+It validates the installation with the following criteria:
+
+#. Existence of Jaeger all-in-one deployment using Kubernetes
+#. Jaeger service is accessible using IP address and port configured in installation steps
+#. Jaeger can retrieve default service listing for default Istio components
+#. TBD - consider installation of production setup with cassandra or elastic search