diff options
-rw-r--r-- | clover/spark/build.sbt | 19 | ||||
-rw-r--r-- | clover/spark/docker/clover-spark/Dockerfile | 61 | ||||
-rwxr-xr-x | clover/spark/docker/clover-spark/build.sh | 19 | ||||
-rw-r--r-- | clover/spark/docker/clover-spark/jars/clover-spark_2.11-1.0.jar | bin | 0 -> 30907 bytes | |||
-rw-r--r-- | clover/spark/docker/clover-spark/jars/datastax_spark-cassandra-connector-2.3.0-s_2.11.jar | bin | 0 -> 8538929 bytes | |||
-rw-r--r-- | clover/spark/docker/clover-spark/jars/redisclient_2.11-3.7.jar | bin | 0 -> 839600 bytes | |||
-rw-r--r-- | clover/spark/docker/spark-submit/Dockerfile | 23 | ||||
-rwxr-xr-x | clover/spark/docker/spark-submit/build.sh | 17 | ||||
-rwxr-xr-x | clover/spark/docker/spark-submit/runner.sh | 31 | ||||
-rwxr-xr-x | clover/spark/docker/spark-submit/runner_fast.sh | 26 | ||||
-rw-r--r-- | clover/spark/src/main/scala/CloverFast.scala | 55 | ||||
-rw-r--r-- | clover/spark/src/main/scala/CloverSlow.scala | 232 | ||||
-rw-r--r-- | clover/spark/yaml/clover-spark.yaml | 49 |
13 files changed, 532 insertions, 0 deletions
diff --git a/clover/spark/build.sbt b/clover/spark/build.sbt new file mode 100644 index 0000000..ede00bb --- /dev/null +++ b/clover/spark/build.sbt @@ -0,0 +1,19 @@ +// Copyright (c) Authors of Clover +// +// All rights reserved. This program and the accompanying materials +// are made available under the terms of the Apache License, Version 2.0 +// which accompanies this distribution, and is available at +// http://www.apache.org/licenses/LICENSE-2.0 + +name := "Clover Spark" + +version := "1.0" + +scalaVersion := "2.11.6" + +libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.3.2" +libraryDependencies += "datastax" % "spark-cassandra-connector" % "2.3.0-s_2.11" + +libraryDependencies ++= Seq( + "net.debasishg" %% "redisclient" % "3.7" +) diff --git a/clover/spark/docker/clover-spark/Dockerfile b/clover/spark/docker/clover-spark/Dockerfile new file mode 100644 index 0000000..d63c30a --- /dev/null +++ b/clover/spark/docker/clover-spark/Dockerfile @@ -0,0 +1,61 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +ARG spark_jars=jars +ARG img_path=kubernetes/dockerfiles + +# Before building the docker image, first build and make a Spark distribution following +# the instructions in http://spark.apache.org/docs/latest/building-spark.html. +# If this docker file is being used in the context of building your images from a Spark +# distribution, the docker build command should be invoked from the top level directory +# of the Spark distribution. E.g.: +# docker build -t spark:latest -f kubernetes/dockerfiles/spark/Dockerfile . + +RUN set -ex && \ + apk upgrade --no-cache && \ + apk add --no-cache bash tini libc6-compat && \ + mkdir -p /opt/spark && \ + mkdir -p /opt/spark/work-dir \ + touch /opt/spark/RELEASE && \ + rm /bin/sh && \ + ln -sv /bin/bash /bin/sh && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd + +RUN mkdir /spark +WORKDIR /spark +RUN wget https://archive.apache.org/dist/spark/spark-2.3.2/spark-2.3.2-bin-hadoop2.7.tgz +RUN tar -xvzf spark-2.3.2-bin-hadoop2.7.tgz +WORKDIR /spark/spark-2.3.2-bin-hadoop2.7 + +RUN cp -R ${spark_jars} /opt/spark/jars +COPY jars/clover-spark_2.11-1.0.jar /opt/spark/jars +COPY jars/datastax_spark-cassandra-connector-2.3.0-s_2.11.jar /opt/spark/jars +COPY jars/redisclient_2.11-3.7.jar /opt/spark/jars +RUN cp -R bin /opt/spark/bin +RUN cp -R sbin /opt/spark/sbin +RUN cp -R conf /opt/spark/conf +RUN cp -R ${img_path}/spark/entrypoint.sh /opt/ + + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark/work-dir +RUN rm -rf /spark + +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/clover/spark/docker/clover-spark/build.sh b/clover/spark/docker/clover-spark/build.sh new file mode 100755 index 0000000..a1a8788 --- /dev/null +++ b/clover/spark/docker/clover-spark/build.sh @@ -0,0 +1,19 @@ +#!/bin/bash +# +# Copyright (c) Authors of Clover +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 + +IMAGE_PATH=${IMAGE_PATH:-"kube1-node1:5000"} +IMAGE_NAME=${IMAGE_NAME:-"clover-spark:latest"} + +# Copy clover-spark jar first +cp ../../target/scala-2.11/clover-spark_2.11-1.0.jar jars/ + +docker build -t $IMAGE_NAME -f Dockerfile . +docker tag $IMAGE_NAME $IMAGE_PATH/$IMAGE_NAME +docker push $IMAGE_PATH/$IMAGE_NAME + diff --git a/clover/spark/docker/clover-spark/jars/clover-spark_2.11-1.0.jar b/clover/spark/docker/clover-spark/jars/clover-spark_2.11-1.0.jar Binary files differnew file mode 100644 index 0000000..f7cf82c --- /dev/null +++ b/clover/spark/docker/clover-spark/jars/clover-spark_2.11-1.0.jar diff --git a/clover/spark/docker/clover-spark/jars/datastax_spark-cassandra-connector-2.3.0-s_2.11.jar b/clover/spark/docker/clover-spark/jars/datastax_spark-cassandra-connector-2.3.0-s_2.11.jar Binary files differnew file mode 100644 index 0000000..cce8205 --- /dev/null +++ b/clover/spark/docker/clover-spark/jars/datastax_spark-cassandra-connector-2.3.0-s_2.11.jar diff --git a/clover/spark/docker/clover-spark/jars/redisclient_2.11-3.7.jar b/clover/spark/docker/clover-spark/jars/redisclient_2.11-3.7.jar Binary files differnew file mode 100644 index 0000000..8e9f587 --- /dev/null +++ b/clover/spark/docker/clover-spark/jars/redisclient_2.11-3.7.jar diff --git a/clover/spark/docker/spark-submit/Dockerfile b/clover/spark/docker/spark-submit/Dockerfile new file mode 100644 index 0000000..898df1a --- /dev/null +++ b/clover/spark/docker/spark-submit/Dockerfile @@ -0,0 +1,23 @@ +# Copyright (c) Authors of Clover +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 + +FROM java:8 + +RUN mkdir /spark +WORKDIR /spark +RUN wget https://archive.apache.org/dist/spark/spark-2.3.2/spark-2.3.2-bin-hadoop2.7.tgz +# COPY spark-2.3.2-bin-hadoop2.7.tgz /spark +RUN tar -xvzf spark-2.3.2-bin-hadoop2.7.tgz + +COPY runner.sh /spark/spark-2.3.2-bin-hadoop2.7 +COPY runner_fast.sh /spark/spark-2.3.2-bin-hadoop2.7 +WORKDIR /spark/spark-2.3.2-bin-hadoop2.7 + +RUN curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl +RUN chmod +x ./kubectl + +CMD ./runner.sh diff --git a/clover/spark/docker/spark-submit/build.sh b/clover/spark/docker/spark-submit/build.sh new file mode 100755 index 0000000..1bcbbc2 --- /dev/null +++ b/clover/spark/docker/spark-submit/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# +# Copyright (c) Authors of Clover +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +# + +IMAGE_PATH=${IMAGE_PATH:-"localhost:5000"} +IMAGE_NAME=${IMAGE_NAME:-"clover-spark-submit:latest"} + +docker build -f Dockerfile -t $IMAGE_NAME . +docker tag $IMAGE_NAME $IMAGE_PATH/$IMAGE_NAME +docker push $IMAGE_PATH/$IMAGE_NAME + diff --git a/clover/spark/docker/spark-submit/runner.sh b/clover/spark/docker/spark-submit/runner.sh new file mode 100755 index 0000000..b98ff32 --- /dev/null +++ b/clover/spark/docker/spark-submit/runner.sh @@ -0,0 +1,31 @@ +#!/bin/bash +# +# Copyright (c) Authors of Clover +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +# + +# Fast refresh analytics +./runner_fast.sh & + +IMAGE_NAME=${IMAGE_NAME:-"clover-spark:latest"} +IMAGE_PATH=${IMAGE_PATH:-"localhost:5000"} +CLASS_NAME=${CLASS_NAME:-"CloverSlow"} +JAR_NAME=${JAR_NAME:-"clover-spark_2.11-1.0.jar"} + +bin/spark-submit \ + --master k8s://https://kubernetes.default.svc \ + --deploy-mode cluster \ + --name "clover-spark" \ + --class $CLASS_NAME \ + --conf spark.executor.instances=2 \ + --conf spark.kubernetes.container.image="$IMAGE_PATH/$IMAGE_NAME" \ + --conf spark.kubernetes.authenticate.driver.serviceAccountName="clover-spark" \ + --conf spark.kubernetes.namespace="clover-system" \ + --jars local:///opt/spark/jars/redisclient_2.11-3.7.jar,local:///opt/spark/jars/datastax_spark-cassandra-connector-2.3.0-s_2.11.jar \ + local:///opt/spark/jars/$JAR_NAME + +./kubectl -n clover-system delete pod,svc -l spark-role=driver diff --git a/clover/spark/docker/spark-submit/runner_fast.sh b/clover/spark/docker/spark-submit/runner_fast.sh new file mode 100755 index 0000000..2381351 --- /dev/null +++ b/clover/spark/docker/spark-submit/runner_fast.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# +# Copyright (c) Authors of Clover +# +# All rights reserved. This program and the accompanying materials +# are made available under the terms of the Apache License, Version 2.0 +# which accompanies this distribution, and is available at +# http://www.apache.org/licenses/LICENSE-2.0 +# + +IMAGE_NAME=${IMAGE_NAME:-"clover-spark:latest"} +IMAGE_PATH=${IMAGE_PATH:-"localhost:5000"} +CLASS_NAME=${CLASS_NAME:-"CloverFast"} +JAR_NAME=${JAR_NAME:-"clover-spark_2.11-1.0.jar"} + +bin/spark-submit \ + --master k8s://https://kubernetes.default.svc \ + --deploy-mode cluster \ + --name "clover-spark-fast" \ + --class $CLASS_NAME \ + --conf spark.executor.instances=2 \ + --conf spark.kubernetes.container.image="$IMAGE_PATH/$IMAGE_NAME" \ + --conf spark.kubernetes.authenticate.driver.serviceAccountName="clover-spark" \ + --conf spark.kubernetes.namespace="clover-system" \ + --jars local:///opt/spark/jars/redisclient_2.11-3.7.jar,local:///opt/spark/jars/datastax_spark-cassandra-connector-2.3.0-s_2.11.jar \ + local:///opt/spark/jars/$JAR_NAME diff --git a/clover/spark/src/main/scala/CloverFast.scala b/clover/spark/src/main/scala/CloverFast.scala new file mode 100644 index 0000000..b746760 --- /dev/null +++ b/clover/spark/src/main/scala/CloverFast.scala @@ -0,0 +1,55 @@ +// Copyright (c) Authors of Clover +// +// All rights reserved. This program and the accompanying materials +// are made available under the terms of the Apache License, Version 2.0 +// which accompanies this distribution, and is available at +// http://www.apache.org/licenses/LICENSE-2.0 + +import org.apache.spark.sql.SparkSession +import com.datastax.spark.connector._ +import org.apache.spark.sql.cassandra._ +import org.apache.spark.SparkContext +import org.apache.spark.SparkConf + +import com.redis._ + +object CloverFast { + def main(args: Array[String]) { + val sp = SparkSession.builder.appName("Clover Fast") + .getOrCreate() + sp.stop() + + val CassandraConnect = "cassandra.clover-system" + val RedisConnect = "redis.default" + + // Cassandra, Redis, Spark Context + val scch = "spark.cassandra.connection.host" + val conf = new SparkConf(true).set(scch, CassandraConnect) + val redis = new RedisClient(RedisConnect, 6379) + val sc = new SparkContext(conf) + + for( x <- 1 to 10000 ) { + + try { + val spans = sc.cassandraTable("visibility", "spans") + .select("spanid").cassandraCount() + redis.set("span_count", spans) + + val traces = sc.cassandraTable("visibility", "traces") + .select("traceid").cassandraCount() + redis.set("trace_count", traces) + + val metrics = sc.cassandraTable("visibility", "metrics") + .select("monitor_time").cassandraCount() + redis.set("metric_count", metrics) + + } + catch { + case unknown : Throwable => println("System counts exception: " + + unknown) + } + + } + + } +} diff --git a/clover/spark/src/main/scala/CloverSlow.scala b/clover/spark/src/main/scala/CloverSlow.scala new file mode 100644 index 0000000..1866d72 --- /dev/null +++ b/clover/spark/src/main/scala/CloverSlow.scala @@ -0,0 +1,232 @@ +// Copyright (c) Authors of Clover +// +// All rights reserved. This program and the accompanying materials +// are made available under the terms of the Apache License, Version 2.0 +// which accompanies this distribution, and is available at +// http://www.apache.org/licenses/LICENSE-2.0 + + +import org.apache.spark.sql.SparkSession +import com.datastax.spark.connector._ +import org.apache.spark.sql.cassandra._ + + +import org.apache.spark.SparkContext +import org.apache.spark.SparkConf + +import com.redis._ + +object CloverSlow { + def main(args: Array[String]) { + val sp = SparkSession.builder.appName("Clover Slow").getOrCreate() + sp.stop() + + val CassandraConnect = "cassandra.clover-system" + val RedisConnect = "redis.default" + + + // Enable/disable various analytics + val distinct_url_service = false + val response_times = true + + // Cassandra, Redis, Spark Context + val scch = "spark.cassandra.connection.host" + val conf = new SparkConf(true).set(scch, CassandraConnect) + val redis = new RedisClient(RedisConnect, 6379) + val sc = new SparkContext(conf) + + val spark = SparkSession + .builder() + .appName("Clover Visibility Stats") + .config("spark.cassandra.connection.host", CassandraConnect) + .config("spark.cassandra.connection.port", "9042") + .getOrCreate() + + val services = redis.smembers("visibility_services") + + spark + .read.cassandraFormat("spans", "visibility") + .load() + .createOrReplaceTempView("curspans") + + if (distinct_url_service) { + // Get number of distinct URLs per service (node_id) + for (s <- services.get) { + val service = s.get + val perurl = spark.sql( + s""" + |SELECT node_id,count(distinct http_url) + |as urls,collect_set(http_url) as values + |FROM curspans + |WHERE node_id LIKE '%$service%' + |GROUP BY node_id + """.stripMargin) + for ((row) <- perurl.collect) { + println(row) + val node_id = row.get(0) + val url_count = row.get(1) + val url_distinct = row.getList(2).toString + redis.hmset(service, Map("node_id" -> node_id, + "url_count" -> url_count, + "url_distinct" -> url_distinct)) + } + } + } + + for( x <- 1 to 500 ) { + + if (response_times) { + try { + for (s <- services.get) { + val service = s.get.replace('_', '-') + val service_rt = spark.sql( + s""" + |SELECT avg(duration),min(duration),max(duration) + |FROM curspans + |WHERE node_id LIKE '%$service%' + |AND upstream_cluster LIKE '%inbound%' + """.stripMargin) + if (service_rt.count > 0) { + val avg_rt = service_rt.first.getDouble(0) / 1000.0 + val min_rt = service_rt.first.getInt(1) / 1000.0 + val max_rt = service_rt.first.getInt(2) / 1000.0 + redis.hmset(service, Map("avg_rt" -> f"$avg_rt%1.2f", + "min_rt" -> f"$min_rt%1.2f", + "max_rt" -> f"$max_rt%1.2f")) + } else { + redis.hmset(service, Map("avg_rt" -> "NA", + "min_rt" -> "NA", + "max_rt" -> "NA")) + } + } + } catch { + case unknown : Throwable => println("RT exception: " + + unknown) + //unknown.printStackTrace + } + } + + // Per URL counts all nodes + val urlcount = spark.sql( + s""" + |SELECT http_url,count(http_url) as urls FROM curspans + |GROUP BY http_url + """.stripMargin) + redis.del("span_urls") + redis.del("span_urls_z") + for ((row) <- urlcount.collect) { + redis.sadd("span_urls", row.get(0)) + redis.zadd("span_urls_z", row.getLong(1).toDouble, row.get(0)) + } + + // User-Agents all nodes + val uacount = spark.sql( + s""" + |SELECT user_agent,count(user_agent) as ua FROM curspans + |GROUP BY user_agent + """.stripMargin) + redis.del("span_user_agent") + redis.del("span_user_agent_z") + for ((row) <- uacount.collect) { + redis.sadd("span_user_agent", row.get(0)) + redis.zadd("span_user_agent_z", row.getLong(1).toDouble, + row.get(0)) + } + + // Node ids all nodes + val nodecount = spark.sql( + s""" + |SELECT node_id,count(node_id) as node FROM curspans + |GROUP BY node_id + """.stripMargin) + redis.del("span_node_id") + redis.del("span_node_id_z") + for ((row) <- nodecount.collect) { + redis.sadd("span_node_id", row.get(0)) + redis.zadd("span_node_id_z", row.getLong(1).toDouble, row.get(0)) + } + + // Per URL/status codes all nodes + val statuscount = spark.sql( + s""" + |SELECT http_url,status_code,count(status_code) as urls + |FROM curspans + |GROUP BY http_url,status_code + """.stripMargin) + redis.del("span_status_codes_z") + for ((row) <- statuscount.collect) { + val key_url_code = row.get(1) + ", " + row.get(0) + redis.zadd("span_status_codes_z", row.getLong(2).toDouble, + key_url_code) + } + + // Per Service/URL counts + val node_url_count = spark.sql( + s""" + |SELECT node_id,http_url,count(http_url) as urls + |FROM curspans + |GROUP BY node_id,http_url + """.stripMargin) + redis.del("span_node_url_z") + for ((row) <- node_url_count.collect) { + val key_node_url = row.get(0) + ", " + row.get(1) + redis.zadd("span_node_url_z", row.getLong(2).toDouble, + key_node_url) + } + + // Distinct span fields + val distinct_keys = List("operation_name", "upstream_cluster", + "status_code") + for (field <- distinct_keys) { + val distinct_span = spark.sql( + s""" + |SELECT $field FROM curspans + |GROUP BY $field + """.stripMargin) + val dk = "span_" + field + redis.del(dk) + for ((row) <- distinct_span.collect) { + redis.sadd(dk, row.get(0)) + } + } + + // Metrics, per service + spark + .read.cassandraFormat("metrics", "visibility") + .load() + .createOrReplaceTempView("curmetrics") + + val metric_prefixes = redis.smembers("metric_prefixes") + val metric_suffixes = redis.smembers("metric_suffixes") + + try { + for (s <- services.get) { + //val service = s.get.replace('_', '-') + val service = s.get + for (m_prefix <- metric_prefixes.get) { + val mp = m_prefix.get + for (m_suffix <- metric_suffixes.get) { + val ms = m_suffix.get + val metric_result = spark.sql( + s""" + |SELECT m_value FROM curmetrics + |WHERE m_name = '$mp$service$ms' + |ORDER BY m_time DESC LIMIT 100 + """.stripMargin) + val metric_key = "metrics_" + mp + service + ms + redis.del(metric_key) + for ((row) <- metric_result.collect) { + redis.lpush(metric_key, row.get(0)) + } + } + } + } + } catch { + case unknown : Throwable => println("Metrics exception: " + + unknown) + // unknown.printStackTrace + } + } + + } +} diff --git a/clover/spark/yaml/clover-spark.yaml b/clover/spark/yaml/clover-spark.yaml new file mode 100644 index 0000000..f12a66c --- /dev/null +++ b/clover/spark/yaml/clover-spark.yaml @@ -0,0 +1,49 @@ +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: clover-spark + namespace: clover-system +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: clover-spark-default +subjects: + - kind: ServiceAccount + name: default + namespace: clover-system +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1beta1 +kind: ClusterRoleBinding +metadata: + name: clover-spark +subjects: + - kind: ServiceAccount + name: clover-spark + namespace: clover-system +roleRef: + kind: ClusterRole + name: cluster-admin + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: clover-spark-submit + namespace: clover-system + labels: + app: clover-spark +spec: + template: + metadata: + labels: + app: clover-spark + spec: + containers: + - name: clover-spark + image: localhost:5000/clover-spark-submit:latest |