summaryrefslogtreecommitdiffstats
path: root/src/ceph/qa/workunits/hadoop/wordcount.sh
blob: 1ff057a7f4a885969a35dd00555953ed8dd6a4d5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/bin/bash

set -e
set -x

WC_INPUT=/wc_input
WC_OUTPUT=/wc_output
DATA_INPUT=$(mktemp -d)

echo "starting hadoop-wordcount test"

# bail if $TESTDIR is not set as this test will fail in that scenario
[ -z $TESTDIR ] && { echo "\$TESTDIR needs to be set, but is not. Exiting."; exit 1; }

# if HADOOP_PREFIX is not set, use default
[ -z $HADOOP_PREFIX ] && { HADOOP_PREFIX=$TESTDIR/hadoop; }

# Nuke hadoop directories
$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true

# Fetch and import testing data set
curl http://download.ceph.com/qa/hadoop_input_files.tar | tar xf - -C $DATA_INPUT
$HADOOP_PREFIX/bin/hadoop fs -copyFromLocal $DATA_INPUT $WC_INPUT
rm -rf $DATA_INPUT

# Run the job
$HADOOP_PREFIX/bin/hadoop jar \
  $HADOOP_PREFIX/share/hadoop/mapreduce/hadoop-mapreduce-examples-*.jar \
  wordcount $WC_INPUT $WC_OUTPUT

# Cleanup
$HADOOP_PREFIX/bin/hadoop fs -rm -r $WC_INPUT $WC_OUTPUT || true

echo "completed hadoop-wordcount test"
exit 0