Skip to content
Snippets Groups Projects
Commit 496ef926 authored by František Dvořák's avatar František Dvořák
Browse files

Preprocess and convert internal table format

parent 16b627d1
No related branches found
No related tags found
No related merge requests found
...@@ -2,13 +2,20 @@ ...@@ -2,13 +2,20 @@
DBNAME="`id -un`_statistics" DBNAME="`id -un`_statistics"
JDBC_URL="jdbc:hive2://hador-c1.ics.muni.cz:10000/$DBNAME;principal=hive/hador-c1.ics.muni.cz@ICS.MUNI.CZ" JDBC_URL="jdbc:hive2://hador-c1.ics.muni.cz:10000/$DBNAME;principal=hive/hador-c1.ics.muni.cz@ICS.MUNI.CZ"
if [ -z "$YEAR" ]; then
YEAR=`date '+%Y'`
fi
BASE_PATH="`dirname $0`" BASE_PATH="`dirname $0`"
LOCAL_PATH="`pwd`" LOCAL_PATH="`pwd`"
HDFS_PATH="/user/`id -un`/statistics" HDFS_PATH="/user/`id -un`/statistics"
OPTIONS_IMPORT='TBLPROPERTIES("skip.header.line.count"="1")'
OPTIONS_RCFILE=''
ACTIONS=${@:-'upload drop init'} ACTIONS=${@:-'upload drop init process'}
TABLES='jobs subjobs nodes jobnodes counters jobcounters intervals' TABLES='jobs subjobs nodes jobnodes counters jobcounters intervals'
start_time=`TZ=C date --date="$((YEAR-1))-01-01" '+%s'`
end_time=`TZ=C date --date="$YEAR-01-01" '+%s'`
echo "Target HDFS path: ${HDFS_PATH}" echo "Target HDFS path: ${HDFS_PATH}"
echo echo
...@@ -24,10 +31,22 @@ for action in ${ACTIONS}; do ...@@ -24,10 +31,22 @@ for action in ${ACTIONS}; do
upload) upload)
hdfs dfs -rm -r ${HDFS_PATH} || : hdfs dfs -rm -r ${HDFS_PATH} || :
hdfs dfs -mkdir -p ${HDFS_PATH}/jobs/ hdfs dfs -mkdir -p ${HDFS_PATH}/jobs/ ${HDFS_PATH}/jobs-import/
for t in ${TABLES}; do for t in ${TABLES}; do
hdfs dfs -mkdir ${HDFS_PATH}/jobs/${t} || : 2>/dev/null hdfs dfs -mkdir ${HDFS_PATH}/jobs/${t} ${HDFS_PATH}/jobs-import/${t} || : 2>/dev/null
hdfs dfs -put ${LOCAL_PATH}/${t}.csv ${HDFS_PATH}/jobs/${t}/ hdfs dfs -put ${LOCAL_PATH}/${t}.csv ${HDFS_PATH}/jobs-import/${t}/
done
;;
process)
for t in counters intervals nodes; do
beeline -u $JDBC_URL -e "INSERT OVERWRITE TABLE ${t} SELECT * FROM ${t}_import;"
done
for t in jobs subjobs; do
beeline -u $JDBC_URL -e "INSERT OVERWRITE TABLE ${t} SELECT * FROM ${t}_import WHERE finish >= ${start_time}000L AND start < ${end_time}000L;"
done
for t in jobcounters jobnodes; do
beeline -u $JDBC_URL -e "INSERT OVERWRITE TABLE ${t} SELECT * FROM ${t}_import WHERE jobid IN (SELECT jobid FROM jobs);"
done done
;; ;;
...@@ -36,9 +55,23 @@ for action in ${ACTIONS}; do ...@@ -36,9 +55,23 @@ for action in ${ACTIONS}; do
;; ;;
init) init)
sed -e "s,@HDFS_PATH@,$HDFS_PATH,g" "${BASE_PATH}/hive.sql.in" > hive.sql sed \
beeline -u $JDBC_URL -e "CREATE DATABASE $DBNAME" -e "s,@HDFS_PATH@,$HDFS_PATH,g" \
beeline -u $JDBC_URL -f hive.sql -e "s,@LOCAL_PATH@,jobs-import,g" \
-e "s,@SUFFIX@,_import,g" \
-e "s,@OPTIONS@,$OPTIONS_IMPORT,g" \
"${BASE_PATH}/hive.sql.in" > hive.sql
echo >> hive.sql
sed \
-e "s,@HDFS_PATH@,$HDFS_PATH,g" \
-e "s,@LOCAL_PATH@,jobs,g" \
-e "s,@SUFFIX@,,g" \
-e "s,@OPTIONS@,,g" \
"${BASE_PATH}/hive.sql.in" >> hive.sql
#beeline -u $JDBC_URL -e "CREATE DATABASE $DBNAME"
#beeline -u $JDBC_URL -f hive.sql
;; ;;
esac esac
......
CREATE EXTERNAL TABLE jobs ( CREATE EXTERNAL TABLE jobs@SUFFIX@ (
id CHAR(80), id CHAR(80),
name CHAR(128), name CHAR(128),
user CHAR(20), user CHAR(20),
...@@ -16,8 +16,8 @@ CREATE EXTERNAL TABLE jobs ( ...@@ -16,8 +16,8 @@ CREATE EXTERNAL TABLE jobs (
changed TIMESTAMP changed TIMESTAMP
) )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/jobs' STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/jobs'
TBLPROPERTIES("skip.header.line.count"="1"); @OPTIONS@;
CREATE EXTERNAL TABLE subjobs ( CREATE EXTERNAL TABLE subjobs (
...@@ -30,8 +30,8 @@ CREATE EXTERNAL TABLE subjobs ( ...@@ -30,8 +30,8 @@ CREATE EXTERNAL TABLE subjobs (
finish BIGINT finish BIGINT
) )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/subjobs' STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/subjobs'
TBLPROPERTIES("skip.header.line.count"="1"); @OPTIONS@;
CREATE EXTERNAL TABLE jobnodes ( CREATE EXTERNAL TABLE jobnodes (
...@@ -42,8 +42,8 @@ CREATE EXTERNAL TABLE jobnodes ( ...@@ -42,8 +42,8 @@ CREATE EXTERNAL TABLE jobnodes (
nreduce INT nreduce INT
) )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/jobnodes' STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/jobnodes'
TBLPROPERTIES("skip.header.line.count"="1"); @OPTIONS@;
CREATE EXTERNAL TABLE jobcounters ( CREATE EXTERNAL TABLE jobcounters (
...@@ -55,8 +55,8 @@ CREATE EXTERNAL TABLE jobcounters ( ...@@ -55,8 +55,8 @@ CREATE EXTERNAL TABLE jobcounters (
total BIGINT total BIGINT
) )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/jobcounters' STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/jobcounters'
TBLPROPERTIES("skip.header.line.count"="1"); @OPTIONS@;
CREATE EXTERNAL TABLE counters ( CREATE EXTERNAL TABLE counters (
...@@ -65,8 +65,8 @@ CREATE EXTERNAL TABLE counters ( ...@@ -65,8 +65,8 @@ CREATE EXTERNAL TABLE counters (
name CHAR(128) name CHAR(128)
) )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/counters' STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/counters'
TBLPROPERTIES("skip.header.line.count"="1"); @OPTIONS@;
CREATE EXTERNAL TABLE nodes ( CREATE EXTERNAL TABLE nodes (
...@@ -74,8 +74,8 @@ CREATE EXTERNAL TABLE nodes ( ...@@ -74,8 +74,8 @@ CREATE EXTERNAL TABLE nodes (
host VARCHAR(256) host VARCHAR(256)
) )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/nodes' STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/nodes'
TBLPROPERTIES("skip.header.line.count"="1"); @OPTIONS@;
CREATE EXTERNAL TABLE intervals ( CREATE EXTERNAL TABLE intervals (
...@@ -83,5 +83,6 @@ CREATE EXTERNAL TABLE intervals ( ...@@ -83,5 +83,6 @@ CREATE EXTERNAL TABLE intervals (
finish INT finish INT
) )
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '@HDFS_PATH@/jobs/intervals/' STORED AS TEXTFILE LOCATION '@HDFS_PATH@/@LOCAL_PATH@/intervals'
TBLPROPERTIES("skip.header.line.count"="1"); @OPTIONS@;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment