Skip to content
Snippets Groups Projects
Commit 4ec32817 authored by František Dvořák's avatar František Dvořák
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
#! /bin/sh
USER=bkread
DBNAME=bookkeeping
echo "Password for '${USER}': "
stty -echo
read PASSWORD
stty echo
for t in jobs subjobs nodes jobnodes counters jobcounters; do echo "SELECT * FROM ${t}" | mysql -B -u ${USER} -p${PASSWORD} ${DBNAME} > ${t}.csv; done
1. 1. 2015 ... 1.1.2016
1.1.2016 ... 1.1.2017
1420070400 ... 1451606400
1451606400 ... 1483228800
SELECT id, name, user, submit, start, finish, memory_seconds, cpu_seconds, nmap, nreduce FROM jobs WHERE start >= 1420070400L*1000 AND finish < 1451606400L*1000;
#! /bin/sh -xe
DBNAME="`id -un`_statistics"
JDBC_URL="jdbc:hive2://hador-c1.ics.muni.cz:10000/$DBNAME;principal=hive/hador-c1.ics.muni.cz@ICS.MUNI.CZ"
LOCAL_PATH="`pwd`"
HDFS_PATH="/user/`id -un`/statistics"
ACTIONS=${@:-'upload drop init'}
TABLES='jobs subjobs nodes jobnodes counters jobcounters intervals'
echo "Target HDFS path: ${HDFS_PATH}"
echo
echo "Local data:"
ls -la "${LOCAL_PATH}/*.csv" || :
echo
echo "Continue? (CTRL-C for quit)"
read X
for action in ${ACTIONS}; do
case "${action}" in
upload)
hdfs dfs -rm -r ${HDFS_PATH} || :
hdfs dfs -mkdir -p ${HDFS_PATH}/jobs/
for t in ${TABLES}; do
hdfs dfs -mkdir /user/valtri/statistics/jobs/${t} || : 2>/dev/null
hdfs dfs -put ${LOCAL_PATH}/${t}.csv /user/valtri/statistics/jobs/${t}/
done
;;
drop)
beeline -u $JDBC_URL -e "DROP DATABASE $DBNAME CASCADE" || :
;;
init)
beeline -u $JDBC_URL -e "CREATE DATABASE $DBNAME"
beeline -u $JDBC_URL -f ./hive.sql
;;
esac
done
CREATE EXTERNAL TABLE jobs (
id CHAR(80),
name CHAR(128),
user CHAR(20),
status CHAR(20),
queue CHAR(80),
submit BIGINT,
start BIGINT,
finish BIGINT,
memory_seconds BIGINT,
cpu_seconds INT,
nmap INT,
nreduce INT,
changed TIMESTAMP
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '/user/valtri/statistics/jobs/jobs'
TBLPROPERTIES("skip.header.line.count"="1");
CREATE EXTERNAL TABLE subjobs (
id CHAR(80),
jobid CHAR(80),
nodeid INT,
state CHAR(20),
type CHAR(20),
start BIGINT,
finish BIGINT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '/user/valtri/statistics/jobs/subjobs'
TBLPROPERTIES("skip.header.line.count"="1");
CREATE EXTERNAL TABLE jobnodes (
jobid CHAR(80),
nodeid INT,
elapsed INT,
nmap INT,
nreduce INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '/user/valtri/statistics/jobs/jobnodes'
TBLPROPERTIES("skip.header.line.count"="1");
CREATE EXTERNAL TABLE jobcounters (
jobid CHAR(80),
counterid INT,
nreduce BIGINT,
nmap BIGINT,
total BIGINT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '/user/valtri/statistics/jobs/jobcounters'
TBLPROPERTIES("skip.header.line.count"="1");
CREATE EXTERNAL TABLE counters (
id INT,
groupName CHAR(128),
name CHAR(128)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '/user/valtri/statistics/jobs/counters'
TBLPROPERTIES("skip.header.line.count"="1");
CREATE EXTERNAL TABLE nodes (
id INT,
host VARCHAR(256)
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '/user/valtri/statistics/jobs/nodes'
TBLPROPERTIES("skip.header.line.count"="1");
CREATE EXTERNAL TABLE intervals (
start INT,
finish INT
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE LOCATION '/user/valtri/statistics/jobs/intervals/'
TBLPROPERTIES("skip.header.line.count"="1");
start finish
1420070400 1451606400
1451606400 1483228800
SELECT SUM(cpu_seconds) AS cpu_seconds FROM jobs WHERE finish >= 1451606400L*1000 AND start < 1483228800L*1000;
SELECT user, SUM(cpu_seconds) AS cpu_seconds, COUNT(*) as jobs FROM jobs WHERE finish >= 1451606400L*1000 AND start < 1483228800L*1000 GROUP BY user ORDER BY cpu_seconds DESC;
SELECT COUNT(*) AS njobs FROM jobs WHERE finish >= 1451606400L*1000 AND start < 1483228800L*1000;
SELECT COUNT(*) AS njobs FROM subjobs WHERE finish >= 1451606400L*1000 AND start < 1483228800L*1000;
SELECT COUNT(DISTINCT user) FROM jobs WHERE finish >= 1451606400L*1000 AND start < 1483228800L*1000;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment