summaryrefslogtreecommitdiffstats
path: root/src/ceph/qa/workunits/hadoop
diff options
context:
space:
mode:
authorQiaowei Ren <qiaowei.ren@intel.com>2018-01-04 13:43:33 +0800
committerQiaowei Ren <qiaowei.ren@intel.com>2018-01-05 11:59:39 +0800
commit812ff6ca9fcd3e629e49d4328905f33eee8ca3f5 (patch)
tree04ece7b4da00d9d2f98093774594f4057ae561d4 /src/ceph/qa/workunits/hadoop
parent15280273faafb77777eab341909a3f495cf248d9 (diff)
initial code repo
This patch creates initial code repo. For ceph, luminous stable release will be used for base code, and next changes and optimization for ceph will be added to it. For opensds, currently any changes can be upstreamed into original opensds repo (https://github.com/opensds/opensds), and so stor4nfv will directly clone opensds code to deploy stor4nfv environment. And the scripts for deployment based on ceph and opensds will be put into 'ci' directory. Change-Id: I46a32218884c75dda2936337604ff03c554648e4 Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Diffstat (limited to 'src/ceph/qa/workunits/hadoop')
-rwxr-xr-xsrc/ceph/qa/workunits/hadoop/repl.sh42
-rwxr-xr-xsrc/ceph/qa/workunits/hadoop/terasort.sh76
-rwxr-xr-xsrc/ceph/qa/workunits/hadoop/wordcount.sh35
3 files changed, 153 insertions, 0 deletions
diff --git a/src/ceph/qa/workunits/hadoop/repl.sh b/src/ceph/qa/workunits/hadoop/repl.sh
new file mode 100755
index 0000000..f2e9fcc
--- /dev/null
+++ b/src/ceph/qa/workunits/hadoop/repl.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+set -e
+set -x
+
+# bail if $TESTDIR is not set as this test will fail in that scenario
+[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
+
+# if HADOOP_PREFIX is not set, use default
+[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }
+
+# create pools with different replication factors
+for repl in 2 3 7 8 9; do
+ name=hadoop.$repl
+ ceph osd pool create $name 8 8
+ ceph osd pool set $name size $repl
+
+ id=`ceph osd dump | sed -n "s/^pool \([0-9]*\) '$name'.*/\1/p"`
+ ceph mds add_data_pool $id
+done
+
+# create a file in each of the pools
+for repl in 2 3 7 8 9; do
+ name=hadoop.$repl
+ $HADOOP_PREFIX/bin/hadoop fs -rm -f /$name.dat
+ dd if=/dev/zero bs=1048576 count=1 | \
+ $HADOOP_PREFIX/bin/hadoop fs -Dceph.data.pools="$name" \
+ -put - /$name.dat
+done
+
+# check that hadoop reports replication matching
+# that of the pool the file was written into
+for repl in 2 3 7 8 9; do
+ name=hadoop.$repl
+ repl2=$($HADOOP_PREFIX/bin/hadoop fs -ls /$name.dat | awk '{print $2}')
+ if [ $repl -ne $repl2 ]; then
+ echo "replication factors didn't match!"
+ exit 1
+ fi
+done
+
+exit 0
diff --git a/src/ceph/qa/workunits/hadoop/terasort.sh b/src/ceph/qa/workunits/hadoop/terasort.sh
new file mode 100755
index 0000000..7996aec
--- /dev/null
+++ b/src/ceph/qa/workunits/hadoop/terasort.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+set -e
+set -x
+
+INPUT=/terasort-input
+OUTPUT=/terasort-output
+REPORT=/tersort-report
+
+num_records=100000
+[ ! -z $NUM_RECORDS ] && num_records=$NUM_RECORDS
+
+# bail if $TESTDIR is not set as this test will fail in that scenario
+[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
+
+# if HADOOP_PREFIX is not set, use default
+[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }
+
+# Nuke hadoop directories
+$HADOOP_PREFIX/bin/hadoop fs -rm -r $INPUT $OUTPUT $REPORT || true
+
+# Generate terasort data
+#
+#-Ddfs.blocksize=512M \
+#-Dio.file.buffer.size=131072 \
+#-Dmapreduce.map.java.opts=-Xmx1536m \
+#-Dmapreduce.map.memory.mb=2048 \
+#-Dmapreduce.task.io.sort.mb=256 \
+#-Dyarn.app.mapreduce.am.resource.mb=1024 \
+#-Dmapred.map.tasks=64 \
+$HADOOP_PREFIX/bin/hadoop jar \
+ $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+ teragen \
+ -Dmapred.map.tasks=9 \
+ $num_records \
+ $INPUT
+
+# Run the sort job
+#
+#-Ddfs.blocksize=512M \
+#-Dio.file.buffer.size=131072 \
+#-Dmapreduce.map.java.opts=-Xmx1536m \
+#-Dmapreduce.map.memory.mb=2048 \
+#-Dmapreduce.map.output.compress=true \
+#-Dmapreduce.map.output.compress.codec=org.apache.hadoop.io.compress.Lz4Codec \
+#-Dmapreduce.reduce.java.opts=-Xmx1536m \
+#-Dmapreduce.reduce.memory.mb=2048 \
+#-Dmapreduce.task.io.sort.factor=100 \
+#-Dmapreduce.task.io.sort.mb=768 \
+#-Dyarn.app.mapreduce.am.resource.mb=1024 \
+#-Dmapred.reduce.tasks=100 \
+#-Dmapreduce.terasort.output.replication=1 \
+$HADOOP_PREFIX/bin/hadoop jar \
+ $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+ terasort \
+ -Dmapred.reduce.tasks=10 \
+ $INPUT $OUTPUT
+
+# Validate the sorted data
+#
+#-Ddfs.blocksize=512M \
+#-Dio.file.buffer.size=131072 \
+#-Dmapreduce.map.java.opts=-Xmx1536m \
+#-Dmapreduce.map.memory.mb=2048 \
+#-Dmapreduce.reduce.java.opts=-Xmx1536m \
+#-Dmapreduce.reduce.memory.mb=2048 \
+#-Dmapreduce.task.io.sort.mb=256 \
+#-Dyarn.app.mapreduce.am.resource.mb=1024 \
+#-Dmapred.reduce.tasks=1 \
+$HADOOP_PREFIX/bin/hadoop jar \
+ $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+ teravalidate \
+ -Dmapred.reduce.tasks=1 \
+ $OUTPUT $REPORT
+
+exit 0
diff --git a/src/ceph/qa/workunits/hadoop/wordcount.sh b/src/ceph/qa/workunits/hadoop/wordcount.sh
new file mode 100755
index 0000000..1ff057a
--- /dev/null
+++ b/src/ceph/qa/workunits/hadoop/wordcount.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+set -e
+set -x
+
+WC_INPUT=/wc_input
+WC_OUTPUT=/wc_output
+DATA_INPUT=$(mktemp -d)
+
+echo "starting hadoop-wordcount test"
+
+# bail if $TESTDIR is not set as this test will fail in that scenario
+[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }
+
+# if HADOOP_PREFIX is not set, use default
+[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }
+
+# Nuke hadoop directories
+$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true
+
+# Fetch and import testing data set
+curl http://download.ceph.com/qa/hadoop_input_files.tar | tar xf - -C $DATA_INPUT
+$HADOOP_PREFIX/bin/hadoop fs -copyFromLocal $DATA_INPUT $WC_INPUT
+rm -rf $DATA_INPUT
+
+# Run the job
+$HADOOP_PREFIX/bin/hadoop jar \
+ $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
+ wordcount $WC_INPUT $WC_OUTPUT
+
+# Cleanup
+$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true
+
+echo "completed hadoop-wordcount test"
+exit 0