Skip to content
Snippets Groups Projects
Commit 496ef926 authored by František Dvořák's avatar František Dvořák
Browse files

Preprocess and convert internal table format

parent 16b627d1
No related branches found
No related tags found
No related merge requests found
......@@ -2,13 +2,20 @@
DBNAME="`id -un`_statistics"
JDBC_URL="jdbc:hive2://hador-c1.ics.muni.cz:10000/$DBNAME;principal=hive/hador-c1.ics.muni.cz@ICS.MUNI.CZ"
if [ -z "$YEAR" ]; then
YEAR=`date '+%Y'`
fi
BASE_PATH="`dirname $0`"
LOCAL_PATH="`pwd`"
HDFS_PATH="/user/`id -un`/statistics"
OPTIONS_IMPORT='TBLPROPERTIES("skip.header.line.count"="1")'
OPTIONS_RCFILE=''
ACTIONS=${@:-'upload drop init'}
ACTIONS=${@:-'upload drop init process'}
TABLES='jobs subjobs nodes jobnodes counters jobcounters intervals'
start_time=`TZ=C date --date="$((YEAR-1))-01-01" '+%s'`
end_time=`TZ=C date --date="$YEAR-01-01" '+%s'`
echo "Target HDFS path: ${HDFS_PATH}"
echo
......@@ -24,10 +31,22 @@ for action in ${ACTIONS}; do
upload)
hdfs dfs -rm -r ${HDFS_PATH} || :
hdfs dfs -mkdir -p ${HDFS_PATH}/jobs/
hdfs dfs -mkdir -p ${HDFS_PATH}/jobs/ ${HDFS_PATH}/jobs-import/
for t in ${TABLES}; do
hdfs dfs -mkdir ${HDFS_PATH}/jobs/${t} || : 2>/dev/null
hdfs dfs -put ${LOCAL_PATH}/${t}.csv ${HDFS_PATH}/jobs/${t}/
hdfs dfs -mkdir ${HDFS_PATH}/jobs/${t} ${HDFS_PATH}/jobs-import/${t} || : 2>/dev/null
hdfs dfs -put ${LOCAL_PATH}/${t}.csv ${HDFS_PATH}/jobs-import/${t}/
done
;;
process)
for t in counters intervals nodes; do
beeline -u $JDBC_URL -e "INSERT OVERWRITE TABLE ${t} SELECT * FROM ${t}_import;"
done
for t in jobs subjobs; do
beeline -u $JDBC_URL -e "INSERT OVERWRITE TABLE ${t} SELECT * FROM ${t}_import WHERE finish >= ${start_time}000L AND start < ${end_time}000L;"
done
for t in jobcounters jobnodes; do
beeline -u $JDBC_URL -e "INSERT OVERWRITE TABLE ${t} SELECT * FROM ${t}_import WHERE jobid IN (SELECT jobid FROM jobs);"
done
;;
......@@ -36,9 +55,23 @@ for action in ${ACTIONS}; do
;;
init)
sed -e "s,@HDFS_PATH@,$HDFS_PATH,g" "${BASE_PATH}/hive.sql.in" > hive.sql
beeline -u $JDBC_URL -e "CREATE DATABASE $DBNAME"
beeline -u $JDBC_URL -f hive.sql
sed \
-e "s,@HDFS_PATH@,$HDFS_PATH,g" \
-e "s,@LOCAL_PATH@,jobs-import,g" \
-e "s,@SUFFIX@,_import,g" \
-e "s,@OPTIONS@,$OPTIONS_IMPORT,g" \
"${BASE_PATH}/hive.sql.in" > hive.sql
echo >> hive.sql
sed \
-e "s,@HDFS_PATH@,$HDFS_PATH,g" \
-e "s,@LOCAL_PATH@,jobs,g" \
-e "s,@SUFFIX@,,g" \
-e "s,@OPTIONS@,,g" \
"${BASE_PATH}/hive.sql.in" >> hive.sql
#beeline -u $JDBC_URL -e "CREATE DATABASE $DBNAME"
#beeline -u $JDBC_URL -f hive.sql
;;
esac
......
CREATE EXTERNAL TABLE jobs (
CREATE EXTERNAL TABLE jobs@SUFFIX@ (
id CHAR(80),
name CHAR(128),
user CHAR(20),
......@@ -16,8 +16,8 @@ CREATE EXTERNAL TABLE jobs (
changed TIMESTAMP
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/jobs'
TBLPROPERTIES("skip.header.line.count"="1");
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/jobs'
@OPTIONS@;
CREATE EXTERNAL TABLE subjobs (
......@@ -30,8 +30,8 @@ CREATE EXTERNAL TABLE subjobs (
finish BIGINT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/subjobs'
TBLPROPERTIES("skip.header.line.count"="1");
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/subjobs'
@OPTIONS@;
CREATE EXTERNAL TABLE jobnodes (
......@@ -42,8 +42,8 @@ CREATE EXTERNAL TABLE jobnodes (
nreduce INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/jobnodes'
TBLPROPERTIES("skip.header.line.count"="1");
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/jobnodes'
@OPTIONS@;
CREATE EXTERNAL TABLE jobcounters (
......@@ -55,8 +55,8 @@ CREATE EXTERNAL TABLE jobcounters (
total BIGINT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/jobcounters'
TBLPROPERTIES("skip.header.line.count"="1");
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/jobcounters'
@OPTIONS@;
CREATE EXTERNAL TABLE counters (
......@@ -65,8 +65,8 @@ CREATE EXTERNAL TABLE counters (
name CHAR(128)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/counters'
TBLPROPERTIES("skip.header.line.count"="1");
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/counters'
@OPTIONS@;
CREATE EXTERNAL TABLE nodes (
......@@ -74,8 +74,8 @@ CREATE EXTERNAL TABLE nodes (
host VARCHAR(256)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/nodes'
TBLPROPERTIES("skip.header.line.count"="1");
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/nodes'
@OPTIONS@;
CREATE EXTERNAL TABLE intervals (
......@@ -83,5 +83,6 @@ CREATE EXTERNAL TABLE intervals (
finish INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/intervals/'
TBLPROPERTIES("skip.header.line.count"="1");
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/intervals'
@OPTIONS@;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment