ラベル Ambari の投稿を表示しています。 すべての投稿を表示
ラベル Ambari の投稿を表示しています。 すべての投稿を表示

2018年6月25日月曜日

VagrantでAmbari(HDP2.6)/HDFS/HBaseをインストールした仮想マシン(CentOS7.4)を構築する

Apache HBaseは列指向の分散データベースです。

〇Ambariの画面

ブラウザでhttp://192.168.1.117:8080/にアクセスします。ユーザ/パスワードはadmin/adminです。

〇構築方法
以下のVagrantfileを使用して、Ambari/HDFS/HBaseをインストールした仮想マシン(CentOS7.4)を構築する事ができます。

Vagrantfile
VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/centos-7.4"
  config.vm.hostname = "co74hbase.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "co74hbase.vm.internal"
     vbox.cpus = 4
     vbox.memory = 10240 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.117", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.117", :netmask => "255.255.255.0"
  config.vm.provision "shell", inline: <<-SHELL
localectl set-locale LANG=ja_JP.UTF-8
echo "192.168.55.117 co74hbase" >> /etc/hosts

cd /root
mkdir ./.ssh
ssh-keygen -f ./.ssh/id_rsa -t rsa -N ''

# copy private key
cp -f ./.ssh/id_rsa /vagrant
cat ./.ssh/id_rsa.pub >> ./.ssh/authorized_keys
chmod 600 ./.ssh/authorized_keys

# install and configure ambari server
cd /etc/yum.repos.d/
wget http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.6.2.0/ambari.repo
yum -y install ambari-server ambari-agent
ambari-server setup --silent
ambari-server start

# create hive database and hive user
echo "host    all         all         127.0.0.1/32          password" >> /var/lib/pgsql/data/pg_hba.conf
echo "host    all         all         192.168.1.0/24          password" >> /var/lib/pgsql/data/pg_hba.conf
echo "host    all         all         192.168.55.0/24          password" >> /var/lib/pgsql/data/pg_hba.conf
service postgresql restart

yum -y install postgresql-jdbc*
chmod 644 /usr/share/java/postgresql-jdbc.jar

ambari-server setup -s --database=postgres --databasehost=localhost --databaseport=5432 --databasename=hive --databaseusername=hive --databasepassword=hive --jdbc-db=postgres --jdbc-driver=/usr/share/java/postgresql-jdbc.jar
ambari-agent start


cat << EOF > /home/vagrant/cluster_configuration.json
{
  "configurations" : [
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "org.postgresql.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:postgresql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "PostgreSQL",
        "hive_database": "Existing PostgreSQL Database",
        "hive_database_type": "postgres",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "HBASE_CLIENT"
        },
        {
          "name" : "HBASE_MASTER"
        },
        {
          "name" : "HBASE_REGIONSERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "hdp26-minimal-hbase",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/hdp26-minimal-hbase -d @/home/vagrant/cluster_configuration.json

cat << EOF > /home/vagrant/hostmapping.json
{
  "blueprint" : "hdp26-minimal-hbase",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "co74hbase.vm.internal"
        }
      ]
    }
  ]
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/hdp26-minimal-hbase -d @/home/vagrant/hostmapping.json
sleep 30


# wait until the cluster is ready.
ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-hbase/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $ProgressPercent | grep -v 100` ]]; do
  ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-hbase/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $ProgressPercent %"
  sleep 10
done


cat << EOF > /home/vagrant/shutdown_components.sh
#!/bin/bash
#stop all services
curl -u admin:admin -i -H 'X-Requested-By: ambari' -X PUT \
   -d '{"RequestInfo":{"context":"_PARSE_.STOP.ALL_SERVICES","operation_level":{"level":"CLUSTER","cluster_name":"hdp26-minnimal-hbase"}},"Body":{"ServiceInfo":{"state":"INSTALLED"}}}' \
   http://localhost:8080/api/v1/clusters/hdp26-minimal-hbase/services
EOF
chmod +x /home/vagrant/shutdown_components.sh


# execute commands for test...
cat << EOF > test.txt
create 'test', 'cf'
list 'test'
put 'test', 'row1', 'cf:message_id', '100'
put 'test', 'row1', 'cf:message', 'hello'
put 'test', 'row2', 'cf:message_id', '200'
put 'test', 'row2', 'cf:message', 'world'
scan 'test'
get 'test', 'row1'
get 'test', 'row2'
exit
EOF
hbase shell test.txt

echo 'access -> http://192.168.1.117:8080'
echo 'user/password -> admin/admin'
SHELL
end

○関連情報
・Apache HBaseに関する他の記事はこちらを参照してください。
・Ambariに関する他の記事はこちらを参照してください。

2018年6月20日水曜日

VagrantでAmbari(HDP2.6)/HDFS/HBase/Phoenixをインストールした仮想マシン(Ubuntu16.04)を構築する

Apache PhoenixはHBase上に構築されるRDBレイヤーです。

〇Ambariの画面

ブラウザでhttp://192.168.1.117:8080/にアクセスします。ユーザ/パスワードはadmin/adminです。

〇構築方法
以下のVagrantfileを使用して、Ambari/HDFS/HBase/Phoenixをインストールした仮想マシン(Ubuntu16.04)を構築する事ができます。

Vagrantfile
VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/ubuntu-16.04"
  config.vm.hostname = "ub1604phoenix.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "ub1604phoenix.vm.internal"
     vbox.cpus = 4
     vbox.memory = 10240 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.117", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.117", :netmask => "255.255.255.0"
  config.vm.provision "shell", inline: <<-SHELL
echo "192.168.55.117 ub1604phoenix" >> /etc/hosts
apt-get -y install curl

cd /root
mkdir ./.ssh
ssh-keygen -f ./.ssh/id_rsa -t rsa -N ''

# copy private key
cp -f ./.ssh/id_rsa /vagrant
cat ./.ssh/id_rsa.pub >> ./.ssh/authorized_keys
chmod 600 ./.ssh/authorized_keys

# install and configure ambari server
wget -O /etc/apt/sources.list.d/ambari.list http://public-repo-1.hortonworks.com/ambari/ubuntu16/2.x/updates/2.6.2.0/ambari.list
apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD
apt-get update

# install postgresql
apt-get -y install postgresql 
echo "listen_addresses='*'" >> /etc/postgresql/9.5/main/postgresql.conf

sed -i 's/host.*all.*all.*127.0.0.1/#host    all             all             127.0.0.1/g' /etc/postgresql/9.5/main/pg_hba.conf

echo "host    all         all         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.1.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.55.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         hive         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf

# create hive database and hive user
su - postgres << EOF
createdb -T template0 --encoding=UTF8 ambari
psql -c "
alter user postgres with password 'postgres';
create user ambari with password 'ambari';
grant all privileges on database ambari to ambari;
"
EOF
echo "postgres:postgres" | chpasswd
systemctl restart postgresql.service

# install jdbc driver for postgresql
wget https://jdbc.postgresql.org/download/postgresql-42.2.2.jar
mkdir -p /opt/jdbc
cp postgresql-42.2.2.jar /opt/jdbc/postgresql-jdbc.jar
chmod 644 /opt/jdbc/postgresql-jdbc.jar


# install ambari
apt-get -y install ambari-server ambari-agent ambari-metrics-assembly 

ambari-server setup -s --database=postgres --databasehost=localhost --databaseport=5432 --databasename=ambari --databaseusername=ambari --databasepassword=ambari --jdbc-db=postgres --jdbc-driver=/opt/jdbc/postgresql-jdbc.jar
ambari-server setup --silent
ambari-server start
ambari-agent start


cat << EOF > /home/vagrant/cluster_configuration.json
{
  "configurations" : [
    {
      "hbase-env" : {
        "properties_attributes" : { },
        "properties" : {
          "phoenix_sql_enabled" : "true"
        }
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "HBASE_CLIENT"
        },
        {
          "name" : "HBASE_MASTER"
        },
        {
          "name" : "HBASE_REGIONSERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "hdp26-minimal-phoenix",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/hdp26-minimal-phoenix -d @/home/vagrant/cluster_configuration.json

cat << EOF > /home/vagrant/hostmapping.json
{
  "blueprint" : "hdp26-minimal-phoenix",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "ub1604phoenix.vm.internal"
        }
      ]
    }
  ]
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/hdp26-minimal-phoenix -d @/home/vagrant/hostmapping.json
sleep 30


# wait until the cluster is ready.
ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-phoenix/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $ProgressPercent | grep -v 100` ]]; do
  ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-phoenix/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $ProgressPercent %"
  sleep 10
done


cat << EOF > /home/vagrant/shutdown_components.sh
#!/bin/bash
#stop all services
curl -u admin:admin -i -H 'X-Requested-By: ambari' -X PUT \
   -d '{"RequestInfo":{"context":"_PARSE_.STOP.ALL_SERVICES","operation_level":{"level":"CLUSTER","cluster_name":"hdp26-minimal-phoenix"}},"Body":{"ServiceInfo":{"state":"INSTALLED"}}}' \
   http://localhost:8080/api/v1/clusters/hdp26-minimal-phoenix/services
EOF
chmod +x /home/vagrant/shutdown_components.sh


apt-get -y install openjdk-8-jdk
# execute sql commands for test...
cat << EOF > /home/vagrant/test1.txt
create table test1 (message_id integer not null primary key, message varchar(100));
upsert into test1 values (100, 'hello world.');
select * from test1;
!quit
EOF
/usr/hdp/2.6.5.0-292/phoenix/bin/sqlline.py localhost /home/vagrant/test1.txt


echo 'access -> http://192.168.1.117:8080'
echo 'user/password -> admin/admin'
SHELL
end

○関連情報
・Apache HBaseに関する他の記事はこちらを参照してください。
・Ambariに関する他の記事はこちらを参照してください。

2018年6月19日火曜日

VagrantでAmbari(HDP2.6)/HDFS/Hiveをインストールした仮想マシン(Ubuntu16.04)を構築する

HiveでHadoop上のデータにクエリーを実行する事ができます。

〇Ambariの画面

ブラウザでhttp://192.168.1.117:8080/にアクセスします。ユーザ/パスワードはadmin/adminです。

〇構築方法
以下のVagrantfileを使用して、Ambari/HDFS/Hiveをインストールした仮想マシン(Ubuntu16.04)を構築する事ができます。

Vagrantfile
VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/ubuntu-16.04"
  config.vm.hostname = "ub1604hive.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "ub1604hive.vm.internal"
     vbox.cpus = 4
     vbox.memory = 10240 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.117", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.117", :netmask => "255.255.255.0"
  config.vm.provision "shell", inline: <<-SHELL
echo "192.168.55.117 ub1604hive" >> /etc/hosts
apt-get -y install curl

cd /root
mkdir ./.ssh
ssh-keygen -f ./.ssh/id_rsa -t rsa -N ''

# copy private key
cp -f ./.ssh/id_rsa /vagrant
cat ./.ssh/id_rsa.pub >> ./.ssh/authorized_keys
chmod 600 ./.ssh/authorized_keys

# install and configure ambari server
wget -O /etc/apt/sources.list.d/ambari.list http://public-repo-1.hortonworks.com/ambari/ubuntu16/2.x/updates/2.6.2.0/ambari.list
apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD
apt-get update

# install postgresql
apt-get -y install postgresql 
echo "listen_addresses='*'" >> /etc/postgresql/9.5/main/postgresql.conf

sed -i 's/host.*all.*all.*127.0.0.1/#host    all             all             127.0.0.1/g' /etc/postgresql/9.5/main/pg_hba.conf

echo "host    all         all         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.1.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.55.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         hive         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf

# create hive database and hive user
su - postgres << EOF
createdb -T template0 --encoding=UTF8 ambari
createdb -T template0 --encoding=UTF8 hive
createdb -T template0 --encoding=UTF8 oozie
psql -c "
alter user postgres with password 'postgres';
create user ambari with password 'ambari';
grant all privileges on database ambari to ambari;
create user hive with password 'hive';
grant all privileges on database hive to hive;
create user oozie with password 'oozie';
grant all privileges on database oozie to oozie;
"
EOF
echo "postgres:postgres" | chpasswd
systemctl restart postgresql.service

# install jdbc driver for postgresql
wget https://jdbc.postgresql.org/download/postgresql-42.2.2.jar
mkdir -p /opt/jdbc
cp postgresql-42.2.2.jar /opt/jdbc/postgresql-jdbc.jar
chmod 644 /opt/jdbc/postgresql-jdbc.jar


# install ambari
apt-get -y install ambari-server ambari-agent ambari-metrics-assembly 

ambari-server setup -s --database=postgres --databasehost=localhost --databaseport=5432 --databasename=ambari --databaseusername=ambari --databasepassword=ambari --jdbc-db=postgres --jdbc-driver=/opt/jdbc/postgresql-jdbc.jar
ambari-server setup --silent
ambari-server start
ambari-agent start


cat << EOF > /home/vagrant/cluster_configuration.json
{
  "configurations" : [
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "org.postgresql.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:postgresql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "PostgreSQL",
        "hive_database": "Existing PostgreSQL Database",
        "hive_database_type": "postgres",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "TEZ_CLIENT"
        },
        {
          "name" : "HIVE_SERVER"
        },
        {
          "name" : "HIVE_METASTORE"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "WEBHCAT_SERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "hdp26-minimal-hive",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/hdp26-minimal-hive -d @/home/vagrant/cluster_configuration.json

cat << EOF > /home/vagrant/hostmapping.json
{
  "blueprint" : "hdp26-minimal-hive",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "ub1604hive.vm.internal"
        }
      ]
    }
  ]
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/hdp26-minimal-hive -d @/home/vagrant/hostmapping.json
sleep 30


# wait until the cluster is ready.
ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-hive/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $ProgressPercent | grep -v 100` ]]; do
  ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-hive/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $ProgressPercent %"
  sleep 10
done


cat << EOF > /home/vagrant/shutdown_components.sh
#!/bin/bash
#stop all services
curl -u admin:admin -i -H 'X-Requested-By: ambari' -X PUT \
   -d '{"RequestInfo":{"context":"_PARSE_.STOP.ALL_SERVICES","operation_level":{"level":"CLUSTER","cluster_name":"hdp26-minnimal-hive"}},"Body":{"ServiceInfo":{"state":"INSTALLED"}}}' \
   http://localhost:8080/api/v1/clusters/hdp26-minimal-hive/services
EOF
chmod +x /home/vagrant/shutdown_components.sh


cat << EOF > /tmp/test.csv
store_id,sales
100,15000000
200,20000000
300,18000000
EOF


cat << EOF > /tmp/sample.sql
CREATE EXTERNAL TABLE sample (
  store_id INT,
  sales INT
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = ",",
   "quoteChar"     = '"'
)
stored as textfile
LOCATION '/user/hive'
tblproperties ("skip.header.line.count"="1");

LOAD DATA LOCAL INPATH '/tmp/test.csv' OVERWRITE INTO TABLE sample;

select * from sample;
EOF


# upload sample content
sudo -u hive hdfs dfs -put /tmp/sample.sql /user/hive
sudo -u hive hdfs dfs -ls /user/hive

# create table and select
beeline -u 'jdbc:hive2://localhost:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2' -n hive -f /tmp/sample.sql

echo 'access -> http://192.168.1.117:8080'
echo 'user/password -> admin/admin'
SHELL
end

○関連情報
・Ambariに関する他の記事はこちらを参照してください。

2018年5月30日水曜日

VagrantでJupyter LabとHDFS/Hive/Sparkをインストールした仮想マシン(Ubuntu16.04)を構築する

Jupyter Labでインタラクティブなコンピューティング環境を提供する事ができます。AmbariでHDFS/Hive/Sparkをインストールした1ノードクラスタを構築します。
pysparkを使用して、JupyterからSparkに接続します。

〇Jupyter Labの画面


〇構築方法
以下のVagrantfileで、Jupyter LabとHiveをインストールした仮想マシン(Ubuntu16.04)を構築できます。

Vagrantfile
VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/ubuntu-16.04"
  config.vm.hostname = "ub1604jupyterspark.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "ub1604jupyterspark.vm.internal"
     vbox.cpus = 4
     vbox.memory = 10240
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
config.vm.network "private_network", ip: "192.168.55.117", :netmask => "255.255.255.0"
  # bridge netwrok
config.vm.network "public_network", ip: "192.168.1.117", :netmask => "255.255.255.0"
  config.vm.provision "shell", inline: <<-SHELL
echo "192.168.55.117 ub1604jupyterspark" >> /etc/hosts
apt-get -y install curl

cd /root
mkdir ./.ssh
ssh-keygen -f ./.ssh/id_rsa -t rsa -N ''

# copy private key
cp -f ./.ssh/id_rsa /vagrant
cat ./.ssh/id_rsa.pub >> ./.ssh/authorized_keys
chmod 600 ./.ssh/authorized_keys

# install and configure ambari server
wget -O /etc/apt/sources.list.d/ambari.list http://public-repo-1.hortonworks.com/ambari/ubuntu16/2.x/updates/2.6.2.0/ambari.list
apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD
apt-get update

# install postgresql
apt-get -y install postgresql
echo "listen_addresses='*'" >> /etc/postgresql/9.5/main/postgresql.conf

sed -i 's/host.*all.*all.*127.0.0.1/#host    all             all             127.0.0.1/g' /etc/postgresql/9.5/main/pg_hba.conf

echo "host    all         all         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.1.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.55.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         hive         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf

# create hive database and hive user
su - postgres << EOF
createdb -T template0 --encoding=UTF8 ambari
createdb -T template0 --encoding=UTF8 hive
psql -c "
alter user postgres with password 'postgres';
create user ambari with password 'ambari';
grant all privileges on database ambari to ambari;
create user hive with password 'hive';
grant all privileges on database hive to hive;
"
EOF
echo "postgres:postgres" | chpasswd
systemctl restart postgresql.service

# install jdbc driver for postgresql
wget https://jdbc.postgresql.org/download/postgresql-42.2.2.jar
mkdir -p /opt/jdbc
cp postgresql-42.2.2.jar /opt/jdbc/postgresql-jdbc.jar
chmod 644 /opt/jdbc/postgresql-jdbc.jar


# install ambari
apt-get -y install ambari-server ambari-agent ambari-metrics-assembly

ambari-server setup -s --database=postgres --databasehost=localhost --databaseport=5432 --databasename=ambari --databaseusername=ambari --databasepassword=ambari --jdbc-db=postgres --jdbc-driver=/opt/jdbc/postgresql-jdbc.jar
ambari-server setup --silent
ambari-server start
ambari-agent start


cat << EOF > /home/vagrant/cluster_configuration.json
{
  "configurations" : [
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "org.postgresql.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:postgresql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "PostgreSQL",
        "hive_database": "Existing PostgreSQL Database",
        "hive_database_type": "postgres",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "TEZ_CLIENT"
        },
        {
          "name" : "HIVE_SERVER"
        },
        {
          "name" : "HIVE_METASTORE"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "WEBHCAT_SERVER"
        },
        {
          "name" : "PIG"
        },
        {
          "name" : "SLIDER"
        },
        {
          "name" : "SPARK2_THRIFTSERVER"
        },
        {
          "name" : "SPARK2_CLIENT"
        },
        {
          "name" : "SPARK2_JOBHISTORYSERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "hdp26-jupyter-spark",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/hdp26-jupyter-spark -d @/home/vagrant/cluster_configuration.json

cat << EOF > /home/vagrant/hostmapping.json
{
  "blueprint" : "hdp26-jupyter-spark",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "ub1604jupyterspark.vm.internal"
        }
      ]
    }
  ]
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/hdp26-jupyter-spark -d @/home/vagrant/hostmapping.json
sleep 30


# wait until the cluster is ready.
ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-jupyter-spark/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $ProgressPercent | grep -v 100` ]]; do
  ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-jupyter-spark/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $ProgressPercent %"
  sleep 10
done


cat << EOF > /home/vagrant/shutdown_components.sh
#!/bin/bash
#stop all services
curl -u admin:admin -i -H 'X-Requested-By: ambari' -X PUT \
   -d '{"RequestInfo":{"context":"_PARSE_.STOP.ALL_SERVICES","operation_level":{"level":"CLUSTER","cluster_name":"hdp26-minnimal-hive"}},"Body":{"ServiceInfo":{"state":"INSTALLED"}}}' \
http://localhost:8080/api/v1/clusters/hdp26-jupyter-spark/services
EOF
chmod +x /home/vagrant/shutdown_components.sh


cat << EOF > /tmp/test.csv
100,15000000
200,20000000
300,18000000
EOF


cat << EOF > /tmp/sample.sql
CREATE EXTERNAL TABLE sample (
  store_id INT,
  sales INT
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = ",",
   "quoteChar"     = '"'
)
stored as textfile
LOCATION '/user/hive';

LOAD DATA LOCAL INPATH '/tmp/test.csv' OVERWRITE INTO TABLE sample;

select * from sample;
EOF


# upload sample content
sudo -u hive hdfs dfs -put /tmp/sample.sql /user/hive
sudo -u hive hdfs dfs -ls /user/hive

# create table and select
beeline -u 'jdbc:hive2://localhost:10016/' -n hive -f /tmp/sample.sql

# install anaconda & jupyterlab
wget https://repo.continuum.io/archive/Anaconda3-5.1.0-Linux-x86_64.sh
chmod +x Anaconda3-5.1.0-Linux-x86_64.sh
./Anaconda3-5.1.0-Linux-x86_64.sh -b -p /opt/anaconda
source /opt/anaconda/bin/activate
pip install --upgrade pip
pip install jupyterlab

# install pyspark
pip install findspark pyspark


useradd py
mkdir -p /home/py
chown -R py:py /home/py
sudo -u py bash -c "mkdir /home/py/.jupyter"
sudo -u py bash -c "cat << EOF > /home/py/.jupyter/jupyter_notebook_config.py
conf = get_config()
conf.NotebookApp.ip = '*'
conf.NotebookApp.open_browser = False
conf.NotebookApp.port = 8888
conf.NotebookApp.token = 'jupyter'
EOF"

cat << EOF > /etc/systemd/system/jupyter.service
[Unit]
Description=Jupyter notebook
[Service]
Type=simple
Environment=SPARK_HOME=/usr/hdp/current/spark2-client
ExecStartPre=source /opt/anaconda/bin/activate
ExecStart=/opt/anaconda/bin/jupyter lab
User=py
Group=py
WorkingDirectory=/home/py
Restart=always
RestartSec=10
[Install]
WantedBy=multi-user.target
EOF
sudo systemctl enable jupyter
sudo systemctl start jupyter


echo 'access -> http://192.168.1.117:8080'
echo 'user/password -> admin/admin'
echo ''
echo 'jupyter -> http://192.168.55.117:8888/?token=jupyter'
SHELL
end

〇動作確認用コード
import os
os.environ["HADOOP_USER_NAME"] = "hive"
from pyspark import SparkContext
from pyspark import SparkConf
from pyspark.sql import SparkSession

spark.sql("SELECT * from csv.`/user/hive/test.csv`").show()

○関連情報
・Ambariに関する他の記事はこちらを参照してください。

2018年5月28日月曜日

VagrantでAmbari(HDP2.6)/HDFS/Sparkをインストールした仮想マシン(Ubuntu16.04)を構築する

Apache SparkでHadoop上のデータにクエリーを実行する事ができます。

〇Ambariの画面

ブラウザでhttp://192.168.1.117:8080/にアクセスします。ユーザ/パスワードはadmin/adminです。

〇構築方法
以下のVagrantfileを使用して、Ambari/HDFS/Sparkをインストールした仮想マシン(Ubuntu16.04)を構築する事ができます。

Vagrantfile
VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/ubuntu-16.04"
  config.vm.hostname = "ub1604spark.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "ub1604spark.vm.internal"
     vbox.cpus = 4
     vbox.memory = 10240 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.117", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.117", :netmask => "255.255.255.0"
  config.vm.provision "shell", inline: <<-SHELL
echo "192.168.55.117 ub1604spark" >> /etc/hosts
apt-get -y install curl

cd /root
mkdir ./.ssh
ssh-keygen -f ./.ssh/id_rsa -t rsa -N ''

# copy private key
cp -f ./.ssh/id_rsa /vagrant
cat ./.ssh/id_rsa.pub >> ./.ssh/authorized_keys
chmod 600 ./.ssh/authorized_keys

# install and configure ambari server
wget -O /etc/apt/sources.list.d/ambari.list http://public-repo-1.hortonworks.com/ambari/ubuntu16/2.x/updates/2.6.2.0/ambari.list
apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD
apt-get update

# install postgresql
apt-get -y install postgresql 
echo "listen_addresses='*'" >> /etc/postgresql/9.5/main/postgresql.conf

sed -i 's/host.*all.*all.*127.0.0.1/#host    all             all             127.0.0.1/g' /etc/postgresql/9.5/main/pg_hba.conf

echo "host    all         all         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.1.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.55.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         hive         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf

# create hive database and hive user
su - postgres << EOF
createdb -T template0 --encoding=UTF8 ambari
createdb -T template0 --encoding=UTF8 hive
createdb -T template0 --encoding=UTF8 oozie
psql -c "
alter user postgres with password 'postgres';
create user ambari with password 'ambari';
grant all privileges on database ambari to ambari;
create user hive with password 'hive';
grant all privileges on database hive to hive;
create user oozie with password 'oozie';
grant all privileges on database oozie to oozie;
"
EOF
echo "postgres:postgres" | chpasswd
systemctl restart postgresql.service

# install jdbc driver for postgresql
wget https://jdbc.postgresql.org/download/postgresql-42.2.2.jar
mkdir -p /opt/jdbc
cp postgresql-42.2.2.jar /opt/jdbc/postgresql-jdbc.jar
chmod 644 /opt/jdbc/postgresql-jdbc.jar


# install ambari
apt-get -y install ambari-server ambari-agent ambari-metrics-assembly 

ambari-server setup -s --database=postgres --databasehost=localhost --databaseport=5432 --databasename=ambari --databaseusername=ambari --databasepassword=ambari --jdbc-db=postgres --jdbc-driver=/opt/jdbc/postgresql-jdbc.jar
ambari-server setup --silent
ambari-server start
ambari-agent start


cat << EOF > /home/vagrant/cluster_configuration.json
{
  "configurations" : [
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "org.postgresql.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:postgresql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "PostgreSQL",
        "hive_database": "Existing PostgreSQL Database",
        "hive_database_type": "postgres",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "TEZ_CLIENT"
        },
        {
          "name" : "HIVE_SERVER"
        },
        {
          "name" : "HIVE_METASTORE"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "WEBHCAT_SERVER"
        },
        {
          "name" : "PIG"
        },
        {
          "name" : "SLIDER"
        },
        {
          "name" : "SPARK2_THRIFTSERVER"
        },
        {
          "name" : "SPARK2_CLIENT"
        },
        {
          "name" : "SPARK2_JOBHISTORYSERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "hdp26-minimal-spark",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/hdp26-minimal-spark -d @/home/vagrant/cluster_configuration.json

cat << EOF > /home/vagrant/hostmapping.json
{
  "blueprint" : "hdp26-minimal-spark",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "ub1604spark.vm.internal"
        }
      ]
    }
  ]
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/hdp26-minimal-spark -d @/home/vagrant/hostmapping.json
sleep 30


# wait until the cluster is ready.
ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-spark/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $ProgressPercent | grep -v 100` ]]; do
  ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-spark/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $ProgressPercent %"
  sleep 10
done


cat << EOF > /home/vagrant/shutdown_components.sh
#!/bin/bash
#stop all services
curl -u admin:admin -i -H 'X-Requested-By: ambari' -X PUT \
   -d '{"RequestInfo":{"context":"_PARSE_.STOP.ALL_SERVICES","operation_level":{"level":"CLUSTER","cluster_name":"hdp26-minnimal-hive"}},"Body":{"ServiceInfo":{"state":"INSTALLED"}}}' \
   http://localhost:8080/api/v1/clusters/hdp26-minimal-spark/services
EOF
chmod +x /home/vagrant/shutdown_components.sh


cat << EOF > /tmp/test.csv
100,15000000
200,20000000
300,18000000
EOF


cat << EOF > /tmp/sample.sql
CREATE EXTERNAL TABLE sample (
  store_id INT,
  sales INT
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = ",",
   "quoteChar"     = '"'
)
stored as textfile
LOCATION '/user/hive';

LOAD DATA LOCAL INPATH '/tmp/test.csv' OVERWRITE INTO TABLE sample;

select * from sample;
EOF


# upload sample content
sudo -u hive hdfs dfs -put /tmp/sample.sql /user/hive
sudo -u hive hdfs dfs -ls /user/hive

# create table and select
beeline -u 'jdbc:hive2://localhost:10016/' -n hive -f /tmp/sample.sql

echo 'access -> http://192.168.1.117:8080'
echo 'user/password -> admin/admin'
SHELL
end

○関連情報
・Ambariに関する他の記事はこちらを参照してください。

VagrantでAmbari(HDP2.6)/HDFS/HBase/Phoenixをインストールした仮想マシン(CentOS7.4)を構築する

Apache PhoenixはHBase上に構築されるRDBレイヤーです。

〇Ambariの画面

ブラウザでhttp://192.168.1.117:8080/にアクセスします。ユーザ/パスワードはadmin/adminです。

〇構築方法
以下のVagrantfileを使用して、Ambari/HDFS/HBase/Phoenixをインストールした仮想マシン(CentOS7.4)を構築する事ができます。

Vagrantfile
VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/centos-7.4"
  config.vm.hostname = "co74phoenix.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "co74phoenix.vm.internal"
     vbox.cpus = 4
     vbox.memory = 10240 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.117", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.117", :netmask => "255.255.255.0"
  config.vm.provision "shell", inline: <<-SHELL
localectl set-locale LANG=ja_JP.UTF-8
echo "192.168.55.117 co74phoenix" >> /etc/hosts

cd /root
mkdir ./.ssh
ssh-keygen -f ./.ssh/id_rsa -t rsa -N ''

# copy private key
cp -f ./.ssh/id_rsa /vagrant
cat ./.ssh/id_rsa.pub >> ./.ssh/authorized_keys
chmod 600 ./.ssh/authorized_keys

# install and configure ambari server
cd /etc/yum.repos.d/
wget http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.6.2.0/ambari.repo
yum -y install ambari-server ambari-agent
ambari-server setup --silent
ambari-server start

# create hive database and hive user
echo "host    all         all         127.0.0.1/32          password" >> /var/lib/pgsql/data/pg_hba.conf
echo "host    all         all         192.168.1.0/24          password" >> /var/lib/pgsql/data/pg_hba.conf
echo "host    all         all         192.168.55.0/24          password" >> /var/lib/pgsql/data/pg_hba.conf
service postgresql restart

yum -y install postgresql-jdbc*
chmod 644 /usr/share/java/postgresql-jdbc.jar

ambari-server setup -s --database=postgres --databasehost=localhost --databaseport=5432 --databasename=hive --databaseusername=hive --databasepassword=hive --jdbc-db=postgres --jdbc-driver=/usr/share/java/postgresql-jdbc.jar
ambari-agent start


cat << EOF > /home/vagrant/cluster_configuration.json
{
  "configurations" : [
    {
      "hbase-env" : {
        "properties_attributes" : { },
        "properties" : {
          "phoenix_sql_enabled" : "true"
        }
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "HBASE_CLIENT"
        },
        {
          "name" : "HBASE_MASTER"
        },
        {
          "name" : "HBASE_REGIONSERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "hdp26-minimal-phoenix",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/hdp26-minimal-phoenix -d @/home/vagrant/cluster_configuration.json

cat << EOF > /home/vagrant/hostmapping.json
{
  "blueprint" : "hdp26-minimal-phoenix",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "co74phoenix.vm.internal"
        }
      ]
    }
  ]
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/hdp26-minimal-phoenix -d @/home/vagrant/hostmapping.json
sleep 30


# wait until the cluster is ready.
ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-phoenix/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $ProgressPercent | grep -v 100` ]]; do
  ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-phoenix/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $ProgressPercent %"
  sleep 10
done


cat << EOF > /home/vagrant/shutdown_components.sh
#!/bin/bash
#stop all services
curl -u admin:admin -i -H 'X-Requested-By: ambari' -X PUT \
   -d '{"RequestInfo":{"context":"_PARSE_.STOP.ALL_SERVICES","operation_level":{"level":"CLUSTER","cluster_name":"hdp26-minnimal-hbase"}},"Body":{"ServiceInfo":{"state":"INSTALLED"}}}' \
   http://localhost:8080/api/v1/clusters/hdp26-minimal-phoenix/services
EOF
chmod +x /home/vagrant/shutdown_components.sh


# execute sql commands for test...
cat << EOF > test1.txt
create table test1 (message_id integer not null primary key, message varchar(100));
upsert into test1 values (100, 'hello world.');
select * from test1;
!quit
EOF
/usr/hdp/2.6.5.0-292/phoenix/bin/sqlline.py test1.txt


echo 'access -> http://192.168.1.117:8080'
echo 'user/password -> admin/admin'
SHELL
end

○関連情報
・Apache HBaseに関する他の記事はこちらを参照してください。
・Ambariに関する他の記事はこちらを参照してください。

2018年5月25日金曜日

VagrantでAmbari(HDP2.6)/HDFS/HBaseをインストールした仮想マシン(Ubuntu16.04)を構築する

Apache HBaseは列指向の分散データベースです。

〇Ambariの画面

ブラウザでhttp://192.168.1.117:8080/にアクセスします。ユーザ/パスワードはadmin/adminです。

〇構築方法
以下のVagrantfileを使用して、Ambari/HDFS/HBaseをインストールした仮想マシン(Ubuntu16.04)を構築する事ができます。

Vagrantfile
VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/ubuntu-16.04"
  config.vm.hostname = "ub1604hbase.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "ub1604hbase.vm.internal"
     vbox.cpus = 4
     vbox.memory = 10240 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.117", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.117", :netmask => "255.255.255.0"
  config.vm.provision "shell", inline: <<-SHELL
echo "192.168.55.117 ub1604hbase" >> /etc/hosts
apt-get -y install curl

cd /root
mkdir ./.ssh
ssh-keygen -f ./.ssh/id_rsa -t rsa -N ''

# copy private key
cp -f ./.ssh/id_rsa /vagrant
cat ./.ssh/id_rsa.pub >> ./.ssh/authorized_keys
chmod 600 ./.ssh/authorized_keys

# install and configure ambari server
wget -O /etc/apt/sources.list.d/ambari.list http://public-repo-1.hortonworks.com/ambari/ubuntu16/2.x/updates/2.6.2.0/ambari.list
apt-key adv --recv-keys --keyserver keyserver.ubuntu.com B9733A7A07513CAD
apt-get update

# install postgresql
apt-get -y install postgresql 
echo "listen_addresses='*'" >> /etc/postgresql/9.5/main/postgresql.conf

sed -i 's/host.*all.*all.*127.0.0.1/#host    all             all             127.0.0.1/g' /etc/postgresql/9.5/main/pg_hba.conf

echo "host    all         all         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.1.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         all         192.168.55.0/24          password" >> /etc/postgresql/9.5/main/pg_hba.conf
echo "host    all         hive         127.0.0.1/32          password" >> /etc/postgresql/9.5/main/pg_hba.conf

# create hive database and hive user
su - postgres << EOF
createdb -T template0 --encoding=UTF8 ambari
psql -c "
alter user postgres with password 'postgres';
create user ambari with password 'ambari';
grant all privileges on database ambari to ambari;
"
EOF
echo "postgres:postgres" | chpasswd
systemctl restart postgresql.service

# install jdbc driver for postgresql
wget https://jdbc.postgresql.org/download/postgresql-42.2.2.jar
mkdir -p /opt/jdbc
cp postgresql-42.2.2.jar /opt/jdbc/postgresql-jdbc.jar
chmod 644 /opt/jdbc/postgresql-jdbc.jar


# install ambari
apt-get -y install ambari-server ambari-agent ambari-metrics-assembly 

ambari-server setup -s --database=postgres --databasehost=localhost --databaseport=5432 --databasename=ambari --databaseusername=ambari --databasepassword=ambari --jdbc-db=postgres --jdbc-driver=/opt/jdbc/postgresql-jdbc.jar
ambari-server setup --silent
ambari-server start
ambari-agent start


cat << EOF > /home/vagrant/cluster_configuration.json
{
  "configurations" : [
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "org.postgresql.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:postgresql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "PostgreSQL",
        "hive_database": "Existing PostgreSQL Database",
        "hive_database_type": "postgres",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "HBASE_CLIENT"
        },
        {
          "name" : "HBASE_MASTER"
        },
        {
          "name" : "HBASE_REGIONSERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "hdp26-minimal-hbase",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/hdp26-minimal-hbase -d @/home/vagrant/cluster_configuration.json

cat << EOF > /home/vagrant/hostmapping.json
{
  "blueprint" : "hdp26-minimal-hbase",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "ub1604hbase.vm.internal"
        }
      ]
    }
  ]
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/hdp26-minimal-hbase -d @/home/vagrant/hostmapping.json
sleep 30


# wait until the cluster is ready.
ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-hbase/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $ProgressPercent | grep -v 100` ]]; do
  ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-hbase/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $ProgressPercent %"
  sleep 10
done


cat << EOF > /home/vagrant/shutdown_components.sh
#!/bin/bash
#stop all services
curl -u admin:admin -i -H 'X-Requested-By: ambari' -X PUT \
   -d '{"RequestInfo":{"context":"_PARSE_.STOP.ALL_SERVICES","operation_level":{"level":"CLUSTER","cluster_name":"hdp26-minnimal-hbase"}},"Body":{"ServiceInfo":{"state":"INSTALLED"}}}' \
   http://localhost:8080/api/v1/clusters/hdp26-minimal-hbase/services
EOF
chmod +x /home/vagrant/shutdown_components.sh

# execute commands for test...
cat << EOF > test.txt
create 'test', 'cf'
list 'test'
put 'test', 'row1', 'cf:message_id', '100'
put 'test', 'row1', 'cf:message', 'hello'
put 'test', 'row2', 'cf:message_id', '200'
put 'test', 'row2', 'cf:message', 'world'
scan 'test'
get 'test', 'row1'
get 'test', 'row2'
exit
EOF
hbase shell test.txt

echo 'access -> http://192.168.1.117:8080'
echo 'user/password -> admin/admin'
SHELL
end

○関連情報
・Apache HBaseに関する他の記事はこちらを参照してください。
・Ambariに関する他の記事はこちらを参照してください。

2018年5月23日水曜日

VagrantでAmbari(HDP2.6)/HDFS/Sparkをインストールした仮想マシン(CentOS7.4)を構築する

Apache SparkでHadoop上のデータにクエリーを実行する事ができます。

〇Ambariの画面

ブラウザでhttp://192.168.1.117:8080/にアクセスします。ユーザ/パスワードはadmin/adminです。

〇構築方法
以下のVagrantfileを使用して、Ambari/HDFS/Sparkをインストールした仮想マシン(CentOS7.4)を構築する事ができます。

Vagrantfile
VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/centos-7.4"
  config.vm.hostname = "co74spark.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "co74spark.vm.internal"
     vbox.cpus = 4
     vbox.memory = 10240
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
config.vm.network "private_network", ip: "192.168.55.117", :netmask => "255.255.255.0"
  # bridge netwrok
config.vm.network "public_network", ip: "192.168.1.117", :netmask => "255.255.255.0"
  config.vm.provision "shell", inline: <<-SHELL
localectl set-locale LANG=ja_JP.UTF-8
echo "192.168.55.117 co74spark" >> /etc/hosts

cd /root
mkdir ./.ssh
ssh-keygen -f ./.ssh/id_rsa -t rsa -N ''

# copy private key
cp -f ./.ssh/id_rsa /vagrant
cat ./.ssh/id_rsa.pub >> ./.ssh/authorized_keys
chmod 600 ./.ssh/authorized_keys

# install and configure ambari server
cd /etc/yum.repos.d/
wget http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.6.2.0/ambari.repo
yum -y install ambari-server ambari-agent
ambari-server setup --silent
ambari-server start

# create hive database and hive user
echo "host    all         all         127.0.0.1/32          password" >> /var/lib/pgsql/data/pg_hba.conf
echo "host    all         all         192.168.1.0/24          password" >> /var/lib/pgsql/data/pg_hba.conf
echo "host    all         all         192.168.55.0/24          password" >> /var/lib/pgsql/data/pg_hba.conf
sudo su - postgres << EOF
createdb hive
createdb oozie
psql -c "
create user hive with password 'hive';
grant all privileges on database hive to hive;
create user oozie with password 'oozie';
grant all privileges on database oozie to oozie;
"
EOF
service postgresql restart

yum -y install postgresql-jdbc*
chmod 644 /usr/share/java/postgresql-jdbc.jar

ambari-server setup -s --database=postgres --databasehost=localhost --databaseport=5432 --databasename=hive --databaseusername=hive --databasepassword=hive --jdbc-db=postgres --jdbc-driver=/usr/share/java/postgresql-jdbc.jar
ambari-agent start


cat << EOF > /home/vagrant/cluster_configuration.json
{
  "configurations" : [
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "org.postgresql.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:postgresql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "PostgreSQL",
        "hive_database": "Existing PostgreSQL Database",
        "hive_database_type": "postgres",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "TEZ_CLIENT"
        },
        {
          "name" : "HIVE_SERVER"
        },
        {
          "name" : "HIVE_METASTORE"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "WEBHCAT_SERVER"
        },
        {
          "name" : "PIG"
        },
        {
          "name" : "SLIDER"
        },
        {
          "name" : "SPARK2_THRIFTSERVER"
        },
        {
          "name" : "SPARK2_CLIENT"
        },
        {
          "name" : "SPARK2_JOBHISTORYSERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "hdp26-minimal-spark",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/hdp26-minimal-spark -d @/home/vagrant/cluster_configuration.json

cat << EOF > /home/vagrant/hostmapping.json
{
  "blueprint" : "hdp26-minimal-spark",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "co74spark.vm.internal"
        }
      ]
    }
  ]
}
EOF

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/hdp26-minimal-spark -d @/home/vagrant/hostmapping.json
sleep 30


# wait until the cluster is ready.
ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-spark/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $ProgressPercent | grep -v 100` ]]; do
  ProgressPercent=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/hdp26-minimal-spark/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $ProgressPercent %"
  sleep 10
done


cat << EOF > /home/vagrant/shutdown_components.sh
#!/bin/bash
#stop all services
curl -u admin:admin -i -H 'X-Requested-By: ambari' -X PUT \
   -d '{"RequestInfo":{"context":"_PARSE_.STOP.ALL_SERVICES","operation_level":{"level":"CLUSTER","cluster_name":"hdp26-minnimal-hive"}},"Body":{"ServiceInfo":{"state":"INSTALLED"}}}' \
http://localhost:8080/api/v1/clusters/hdp26-minimal-spark/services
EOF
chmod +x /home/vagrant/shutdown_components.sh


cat << EOF > /tmp/test.csv
100,15000000
200,20000000
300,18000000
EOF


cat << EOF > /tmp/sample.sql
CREATE EXTERNAL TABLE sample (
  store_id INT,
  sales INT
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = ",",
   "quoteChar"     = '"'
)
stored as textfile
LOCATION '/user/hive';

LOAD DATA LOCAL INPATH '/tmp/test.csv' OVERWRITE INTO TABLE sample;

select * from sample;
EOF


# upload sample content
sudo -u hive hdfs dfs -put /tmp/sample.sql /user/hive
sudo -u hive hdfs dfs -ls /user/hive

# create table and select
beeline -u 'jdbc:hive2://localhost:10016/' -n hive -f /tmp/sample.sql

echo 'access -> http://192.168.1.117:8080'
echo 'user/password -> admin/admin'
SHELL
end

○関連情報
・Ambariに関する他の記事はこちらを参照してください。

2017年8月26日土曜日

Vagrantを使用してkerberos化した1ノードクラスタのhive環境を構築する

Kerberos認証を使用した1ノードのhive環境は、以下のVagrantfileを使用して構築できます。Kerberosとhiveのインストールと同時にテストユーザ(test)の作成とサンプルテーブルの作成も同時に行います。 Vagrantfile

VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/centos-7.3"
  config.vm.hostname = "krbhive.vm.internal"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "krbhive"
     vbox.cpus = 4
     vbox.memory = 13312 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.70", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.70", :netmask => "255.255.255.0"
  config.vm.network "forwarded_port", guest:22, host:10022, id:"ssh"
  config.vm.provision "shell", inline: <<-SHELL

#echo "192.168.55.70  krbhive.vm.internal krbhive" >> /etc/hosts
sed -i -e 's/127.0.0.1\\t/192.168.55.70\\t/' /etc/hosts


# havegedのインストール
yum -y install epel-release
yum -y install haveged
systemctl enable haveged.service
systemctl start haveged.service

# kerberosインストール
yum -y install krb5-server krb5-workstation pam_krb5

# chrony設定
echo 'allow 192.168.1/24' >> /etc/chrony.conf
echo 'allow 192.168.55/24' >> /etc/chrony.conf

systemctl enable chronyd.service
systemctl start chronyd.service

# kdc.conf/kerb5/conf設定
sed -i -e 's/EXAMPLE.COM/VM.INTERNAL/g' /var/kerberos/krb5kdc/kdc.conf

kdb5_util create -r VM.INTERNAL -s -P admin

sed -i -e 's/# default_realm = EXAMPLE.COM/default_realm = VM.INTERNAL/' /etc/krb5.conf
sed -i -e 's/ default_ccache_name/#default_ccache_name/' /etc/krb5.conf
sed -i -e 's/\\[realms\\]/#[realms]/' /etc/krb5.conf
sed -i -e 's/\\[domain_realm\\]/#[domain_realm]/' /etc/krb5.conf

echo '' >> /etc/krb5.conf
echo '[realms]' >> /etc/krb5.conf
echo 'VM.INTERNAL = {' >> /etc/krb5.conf
echo '  kdc = krbhive.vm.internal' >> /etc/krb5.conf
echo '  admin_server = krbhive.vm.internal' >> /etc/krb5.conf
echo '}' >> /etc/krb5.conf
echo '' >> /etc/krb5.conf
echo '[domain_realm]' >> /etc/krb5.conf
echo '.vm.internal = VM.INTERNAL' >> /etc/krb5.conf
echo 'vm.internal = VM.INTERNAL' >> /etc/krb5.conf

sed -i -e 's/^/#/' /var/kerberos/krb5kdc/kadm5.acl
echo '*/admin@VM.INTERNAL *' >> /var/kerberos/krb5kdc/kadm5.acl

kadmin.local addprinc -pw "admin" root/admin

systemctl enable krb5kdc
systemctl start krb5kdc
systemctl enable kadmin
systemctl start kadmin

# ホスト追加
kadmin.local addprinc -randkey host/krvhive.vm.internal
kadmin.local ktadd host/krbhive.vm.internal

# install mysql
sudo yum -y remove mariadb-libs
yum -y localinstall http://dev.mysql.com/get/mysql57-community-release-el7-7.noarch.rpm
yum -y install mysql mysql-devel mysql-server mysql-utilities
sudo systemctl enable mysqld.service
sudo systemctl start mysqld.service

# change password and create users and databases.
chkconfig mysqld on
service mysqld start
export MYSQL_ROOTPWD='Root123#'
export MYSQL_PWD=`cat /var/log/mysqld.log | awk '/temporary password/ {print $NF}'`
mysql -uroot --connect-expired-password -e "SET PASSWORD = PASSWORD('$MYSQL_ROOTPWD');"
export MYSQL_PWD=$MYSQL_ROOTPWD
export MYSQL_ROOTPWD='root'
mysql -uroot --connect-expired-password -e "UNINSTALL PLUGIN validate_password;"
mysql -uroot --connect-expired-password -e "SET PASSWORD = PASSWORD('$MYSQL_ROOTPWD');"
export MYSQL_PWD=$MYSQL_ROOTPWD
mysql -uroot --connect-expired-password -e "CREATE DATABASE ambari DEFAULT CHARACTER SET utf8;"
mysql -uroot --connect-expired-password -e "CREATE USER ambari@localhost IDENTIFIED BY 'bigdata';"
mysql -uroot --connect-expired-password -e "GRANT ALL PRIVILEGES ON ambari.* TO 'ambari'@'%' IDENTIFIED BY 'bigdata';"

mysql -uroot --connect-expired-password -e "CREATE DATABASE hive DEFAULT CHARACTER SET utf8;"
mysql -uroot --connect-expired-password -e "CREATE USER hive@localhost IDENTIFIED BY 'hive';"
mysql -uroot --connect-expired-password -e "GRANT ALL PRIVILEGES ON hive.* TO 'hive'@'%' IDENTIFIED BY 'hive';"

sudo systemctl stop mysqld.service
sudo cp /vagrant/my.cnf /etc
ln -s /var/lib/mysql/mysql.sock /tmp/mysql.sock
sudo systemctl start mysqld.service

# install JDBC driver
yum -y install mysql-connector-java

# Ambariのインストール
cd /etc/yum.repos.d/
wget http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.5.1.0/ambari.repo
yum -y install ambari-server ambari-agent


# workaround of AMBARI-20532
echo '' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.database=mysql' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.database_name=ambari' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.user.name=ambari' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.user.password=/etc/ambari-server/conf/password.dat' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.driver=/usr/share/java/mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties
echo 'custom.jdbc.name=mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.hostname=localhost' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.port=3306' >> /etc/ambari-server/conf/ambari.properties
ambari-server setup -s --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar -v
ambari-server setup --silent

mysql -u ambari -pbigdata ambari < /var/lib/ambari-server/resources/Ambari-DDL-MySQL-CREATE.sql

ambari-server start
ambari-agent start

# 構成情報のサブミット
curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/krbhive -d @/vagrant/cluster_configuration.json

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/krbhive -d @/vagrant/hostmapping.json
sleep 60

# クラスタが構築されるまで待機
Progress=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/krbhive/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $Progress | grep -v 100` ]]; do
  Progress=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/krbhive/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $Progress%"
  sleep 30
done

# adminユーザのディレクトリ作成
sudo -u hdfs /usr/bin/hdfs dfs -mkdir /user/admin
sudo -u hdfs /usr/bin/hdfs dfs -chown admin /user/admin

# テストユーザの作成とサンプルテーブルの作成
useradd test
cd ~test
kadmin -p root/admin -w admin addprinc -pw test test
kadmin.local ktadd  -norandkey test
kadmin.local xst -norandkey -k test.keytab test@VM.INTERNAL
chown test:test test.keytab
sudo -u hdfs /usr/bin/hdfs dfs -mkdir /user/test
sudo -u hdfs /usr/bin/hdfs dfs -chown test /user/test

cp /vagrant/sample.sql /home/test
chown test:test /home/test/sample.sql
cp /vagrant/sample.csv /tmp
chmod 777 /tmp/sample.csv

sudo -u test kinit -k -t /home/test/test.keytab test
sudo -u test beeline -u 'jdbc:hive2://krbhive.vm.internal:10000/default;principal=hive/krbhive.vm.internal@VM.INTERNAL' -f /home/test/sample.sql

SHELL
end
cluster_configuration.json

{
  "configurations" : [
    {
      "kerberos-env": {
        "properties_attributes" : { },
        "properties" : {
          "realm" : "VM.INTERNAL",
          "kdc_type" : "mit-kdc",
          "kdc_host" : "krbhive.vm.internal",
          "admin_server_host" : "krbhive.vm.internal"
        }
      }
    },
    {
      "krb5-conf": {
        "properties_attributes" : { },
        "properties" : {
          "domains" : "vm.internal",
          "manage_krb5_conf" : "false"
        }
      }
    },
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "com.mysql.jdbc.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:mysql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "MySQL",
        "hive_database": "Existing MySQL Database",
        "hive_database_type": "mysql",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*",
          "hadoop.proxyuser.hive.groups" : "*",
          "hadoop.proxyuser.hive.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "TEZ_CLIENT"
        },
        {
          "name" : "HIVE_SERVER"
        },
        {
          "name" : "HIVE_METASTORE"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "WEBHCAT_SERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "krbhive",
    "stack_name" : "HDP",
    "stack_version" : "2.6",
    "security" : {
      "type" : "KERBEROS"
    }
  }
}
hostmapping.json

{
  "blueprint" : "krbhive",
  "default_password" : "admin",
  "credentials" : [
    {
      "alias" : "kdc.admin.credential",
      "principal" : "root/admin@VM.INTERNAL",
      "key" : "admin",
      "type" : "TEMPORARY"
    }
  ],
  "security" : {
    "type" : "KERBEROS"
  },
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "krbhive.vm.internal"
        }
      ]
    }
  ]
}
my.cnf

[client]
port            = 3306
socket          = /var/lib/mysql/mysql.sock
default-character-set=utf8

[mysqld]
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
user=mysql
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
bind-address = 0.0.0.0
port            = 3306
key_buffer_size = 256M
max_allowed_packet = 16M
table_open_cache = 16
innodb_buffer_pool_size = 512M
innodb_log_file_size = 32M
sort_buffer_size = 8M
read_buffer_size = 8M
read_rnd_buffer_size = 8M
join_buffer_size = 8M
thread_stack = 4M
character-set-server=utf8
lower_case_table_names = 1
innodb_lock_wait_timeout=120
skip-innodb-doublewrite

[mysqld_safe]
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid
sample.sql

CREATE EXTERNAL TABLE sample (
  store_id INT,
  sales INT
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES (
   "separatorChar" = ",",
   "quoteChar"     = "\"",
   "escapeChar"    = "\\"
) 
stored as textfile
LOCATION '/user/test'
tblproperties ("skip.header.line.count"="1");

LOAD DATA LOCAL INPATH '/tmp/sample.csv' OVERWRITE INTO TABLE sample;

select * from sample;
sample.csv

store_id,sales
100,15000000
200,20000000
300,18000000

○関連情報
Vagrantを使用して、Kerberosサーバを構築する
VagrantとAmbari blueprintでhiveの1ノードクラスタを作成する
・Ambariに関する他の記事はこちらを参照してください。

2017年8月6日日曜日

VagrantとAmbari blueprintでSpark2の1ノードクラスタを構築する

以下のVagrantfileで、mysql, Ambari Server, Spark2などがインストールされた1ノードクラスタを構築できます。 Vagrantfile

VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/centos-7.3"
  config.vm.hostname = "min-spark"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "min-spark"
     vbox.cpus = 4
     vbox.memory = 12288 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.20", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.20", :netmask => "255.255.255.0"
  config.vm.network "forwarded_port", guest:22, host:10022, id:"ssh"
  config.vm.provision "shell", inline: <<-SHELL
# firewalld無効化
systemctl stop firewalld
systemctl disable firewalld

# mysqlインストール
sudo yum -y remove mariadb-libs
yum -y localinstall http://dev.mysql.com/get/mysql57-community-release-el7-7.noarch.rpm
yum -y install mysql mysql-devel mysql-server mysql-utilities
sudo systemctl enable mysqld.service
sudo systemctl start mysqld.service

# パスワードの変更とユーザの作成、DB作成
chkconfig mysqld on
service mysqld start
export MYSQL_ROOTPWD='Root123#'
export MYSQL_PWD=`cat /var/log/mysqld.log | awk '/temporary password/ {print $NF}'`
mysql -uroot --connect-expired-password -e "SET PASSWORD = PASSWORD('$MYSQL_ROOTPWD');"
export MYSQL_PWD=$MYSQL_ROOTPWD
export MYSQL_ROOTPWD='root'
mysql -uroot --connect-expired-password -e "UNINSTALL PLUGIN validate_password;"
mysql -uroot --connect-expired-password -e "SET PASSWORD = PASSWORD('$MYSQL_ROOTPWD');"
export MYSQL_PWD=$MYSQL_ROOTPWD
mysql -uroot --connect-expired-password -e "CREATE DATABASE ambari DEFAULT CHARACTER SET utf8;"
mysql -uroot --connect-expired-password -e "CREATE USER ambari@localhost IDENTIFIED BY 'bigdata';"
mysql -uroot --connect-expired-password -e "GRANT ALL PRIVILEGES ON ambari.* TO 'ambari'@'%' IDENTIFIED BY 'bigdata';"

mysql -uroot --connect-expired-password -e "CREATE DATABASE hive DEFAULT CHARACTER SET utf8;"
mysql -uroot --connect-expired-password -e "CREATE USER hive@localhost IDENTIFIED BY 'hive';"
mysql -uroot --connect-expired-password -e "GRANT ALL PRIVILEGES ON hive.* TO 'hive'@'%' IDENTIFIED BY 'hive';"

sudo systemctl stop mysqld.service
sudo cp /vagrant/my.cnf /etc
ln -s /var/lib/mysql/mysql.sock /tmp/mysql.sock
sudo systemctl start mysqld.service

# JDBCドライバーのインストール
yum -y install mysql-connector-java

# ---------
cd /etc/yum.repos.d/
wget http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.5.1.0/ambari.repo
yum -y install ambari-server ambari-agent


# AMBARI-20532
echo '' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.database=mysql' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.database_name=ambari' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.user.name=ambari' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.user.password=/etc/ambari-server/conf/password.dat' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.driver=/usr/share/java/mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties
echo 'custom.jdbc.name=mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.hostname=localhost' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.port=3306' >> /etc/ambari-server/conf/ambari.properties
ambari-server setup -s --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar -v
ambari-server setup --silent

mysql -u ambari -pbigdata ambari < /var/lib/ambari-server/resources/Ambari-DDL-MySQL-CREATE.sql

ambari-server start
ambari-agent start

# blueprintで1ノードクラスタを構築
curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/min-spark -d @/vagrant/cluster_configuration.json

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/min-spark -d @/vagrant/hostmapping.json
sleep 30

# 完了まで待機
Progress=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/min-spark/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $Progress | grep -v 100` ]]; do
  Progress=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/min-spark/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $Progress%"
  sleep 30
done

# adminユーザのディレクトリを作成
sudo -u hdfs /bin/hdfs dfs -mkdir /user/admin
sudo -u hdfs /bin/hdfs dfs -chown admin /user/admin

# ユーザ追加
useradd test
echo test | passwd test --stdin

sudo -u hdfs /bin/hdfs dfs -mkdir /user/test
sudo -u hdfs /bin/hdfs dfs -chown test /user/test

#以下のようにsparkに接続可能
#beeline
#!connect jdbc:hive2://localhost:10016 test


SHELL
end
cluster_configuration.json

{
  "configurations" : [
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "com.mysql.jdbc.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:mysql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "MySQL",
        "hive_database": "Existing MySQL Database",
        "hive_database_type": "mysql",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "TEZ_CLIENT"
        },
        {
          "name" : "HIVE_SERVER"
        },
        {
          "name" : "HIVE_METASTORE"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "WEBHCAT_SERVER"
        },
        {
          "name" : "PIG"
        },
        {
          "name" : "SLIDER"
        },
        {
          "name" : "SPARK2_JOBHISTORYSERVER"
        },
        {
          "name" : "SPARK2_CLIENT"
        },
        {
          "name": "SPARK2_THRIFTSERVER"
        },
        {
          "name": "LIVY2_SERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "min-spark",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
hostmapping.json

{
  "blueprint" : "min-spark",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "min-spark"
        }
      ]
    }
  ]
}
my.cnf

[client]
port            = 3306
socket          = /var/lib/mysql/mysql.sock
default-character-set=utf8

[mysqld]
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
user=mysql
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
bind-address = 0.0.0.0
port            = 3306
key_buffer_size = 256M
max_allowed_packet = 16M
table_open_cache = 16
innodb_buffer_pool_size = 512M
innodb_log_file_size = 32M
sort_buffer_size = 8M
read_buffer_size = 8M
read_rnd_buffer_size = 8M
join_buffer_size = 8M
thread_stack = 4M
character-set-server=utf8
lower_case_table_names = 1
innodb_lock_wait_timeout=120
skip-innodb-doublewrite

[mysqld_safe]
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid

○関連情報
・Ambariに関する他の記事はこちらを参照してください。

2017年8月3日木曜日

VagrantとAmbari blueprintでhiveの1ノードクラスタを作成する

以下のVagrantfileで、mysql, Ambari Server, Hiveなどがインストールされた1ノードクラスタを構築できます。

VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
  config.vm.box = "bento/centos-7.3"
  config.vm.hostname = "min-hive"
  config.vm.provider :virtualbox do |vbox|
     vbox.name = "min-hive"
     vbox.cpus = 4
     vbox.memory = 12288 
     vbox.customize ["modifyvm", :id, "--nicpromisc2","allow-all"]
  end
  # private network
  config.vm.network "private_network", ip: "192.168.55.20", :netmask => "255.255.255.0"
  # bridge netwrok
  config.vm.network "public_network", ip: "192.168.1.20", :netmask => "255.255.255.0"
  config.vm.network "forwarded_port", guest:22, host:10022, id:"ssh"
  config.vm.provision "shell", inline: <<-SHELL
# firewalld無効化
systemctl stop firewalld
systemctl disable firewalld

# mysqlインストール
sudo yum -y remove mariadb-libs
yum -y localinstall http://dev.mysql.com/get/mysql57-community-release-el7-7.noarch.rpm
yum -y install mysql mysql-devel mysql-server mysql-utilities
sudo systemctl enable mysqld.service
sudo systemctl start mysqld.service

# パスワードの変更とユーザの作成、DB作成
chkconfig mysqld on
service mysqld start
export MYSQL_ROOTPWD='Root123#'
export MYSQL_PWD=`cat /var/log/mysqld.log | awk '/temporary password/ {print $NF}'`
mysql -uroot --connect-expired-password -e "SET PASSWORD = PASSWORD('$MYSQL_ROOTPWD');"
export MYSQL_PWD=$MYSQL_ROOTPWD
export MYSQL_ROOTPWD='root'
mysql -uroot --connect-expired-password -e "UNINSTALL PLUGIN validate_password;"
mysql -uroot --connect-expired-password -e "SET PASSWORD = PASSWORD('$MYSQL_ROOTPWD');"
export MYSQL_PWD=$MYSQL_ROOTPWD
mysql -uroot --connect-expired-password -e "CREATE DATABASE ambari DEFAULT CHARACTER SET utf8;"
mysql -uroot --connect-expired-password -e "CREATE USER ambari@localhost IDENTIFIED BY 'bigdata';"
mysql -uroot --connect-expired-password -e "GRANT ALL PRIVILEGES ON ambari.* TO 'ambari'@'%' IDENTIFIED BY 'bigdata';"

mysql -uroot --connect-expired-password -e "CREATE DATABASE hive DEFAULT CHARACTER SET utf8;"
mysql -uroot --connect-expired-password -e "CREATE USER hive@localhost IDENTIFIED BY 'hive';"
mysql -uroot --connect-expired-password -e "GRANT ALL PRIVILEGES ON hive.* TO 'hive'@'%' IDENTIFIED BY 'hive';"

sudo systemctl stop mysqld.service
sudo cp /vagrant/my.cnf /etc
ln -s /var/lib/mysql/mysql.sock /tmp/mysql.sock
sudo systemctl start mysqld.service

# JDBCドライバーのインストール
yum -y install mysql-connector-java

# ---------
cd /etc/yum.repos.d/
wget http://public-repo-1.hortonworks.com/ambari/centos7/2.x/updates/2.5.1.0/ambari.repo
yum -y install ambari-server ambari-agent


# AMBARI-20532
echo '' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.database=mysql' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.database_name=ambari' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.user.name=ambari' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.user.password=/etc/ambari-server/conf/password.dat' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.driver=/usr/share/java/mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties
echo 'custom.jdbc.name=mysql-connector-java.jar' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.hostname=localhost' >> /etc/ambari-server/conf/ambari.properties
echo 'server.jdbc.port=3306' >> /etc/ambari-server/conf/ambari.properties
ambari-server setup -s --jdbc-db=mysql --jdbc-driver=/usr/share/java/mysql-connector-java.jar -v
ambari-server setup --silent

mysql -u ambari -pbigdata ambari < /var/lib/ambari-server/resources/Ambari-DDL-MySQL-CREATE.sql

ambari-server start
ambari-agent start

# blueprintで1ノードクラスタを構築
curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/blueprints/min-hive -d @/vagrant/cluster_configuration.json

curl -H "X-Requested-By: ambari" -X POST -u admin:admin http://localhost:8080/api/v1/clusters/min-hive -d @/vagrant/hostmapping.json
sleep 30

# 完了まで待機
Progress=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/min-hive/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
while [[ `echo $Progress | grep -v 100` ]]; do
  Progress=`curl -s --user admin:admin -X GET http://localhost:8080/api/v1/clusters/min-hive/requests/1 | grep progress_percent | awk '{print $3}' | cut -d . -f 1`
  echo " Progress: $Progress%"
  sleep 30
done

# adminユーザのディレクトリを作成
sudo -u hdfs /usr/bin/hdfs dfs -mkdir /user/admin
sudo -u hdfs /usr/bin/hdfs dfs -chown admin /user/admin

# ユーザ追加
useradd test
echo test | passwd test --stdin

sudo -u hdfs /usr/bin/hdfs dfs -mkdir /user/test
sudo -u hdfs /usr/bin/hdfs dfs -chown test /user/test

SHELL
end
cluster_configuration.json

{
  "configurations" : [
    {
      "hive-site": {
        "hive.support.concurrency": "true",
        "hive.txn.manager": "org.apache.hadoop.hive.ql.lockmgr.DbTxnManager",
        "hive.compactor.initiator.on": "true",
        "hive.compactor.worker.threads": "5",
        "javax.jdo.option.ConnectionDriverName": "com.mysql.jdbc.Driver",
        "javax.jdo.option.ConnectionPassword": "hive",
        "javax.jdo.option.ConnectionURL": "jdbc:mysql://localhost/hive",
        "javax.jdo.option.ConnectionUserName": "hive"
      }
    },
    {
      "hive-env": {
        "hive_ambari_database": "MySQL",
        "hive_database": "Existing MySQL Database",
        "hive_database_type": "mysql",
        "hive_database_name": "hive"
      }
    },
    {
      "core-site": {
        "properties" : {
          "hadoop.proxyuser.root.groups" : "*",
          "hadoop.proxyuser.root.hosts" : "*"
        }
      }
    }
  ],
  "host_groups" : [
    {
      "name" : "host_group_1",
      "components" : [
        {
          "name" : "NAMENODE"
        },
        {
          "name" : "SECONDARY_NAMENODE"
        },
        {
          "name" : "DATANODE"
        },
        {
          "name" : "HDFS_CLIENT"
        },
        {
          "name" : "RESOURCEMANAGER"
        },
        {
          "name" : "NODEMANAGER"
        },
        {
          "name" : "YARN_CLIENT"
        },
        {
          "name" : "HISTORYSERVER"
        },
        {
          "name" : "APP_TIMELINE_SERVER"
        },
        {
          "name" : "ZOOKEEPER_SERVER"
        },
        {
          "name" : "ZOOKEEPER_CLIENT"
        },
        {
          "name" : "METRICS_MONITOR"
        },
        {
          "name" : "TEZ_CLIENT"
        },
        {
          "name" : "HIVE_SERVER"
        },
        {
          "name" : "HIVE_METASTORE"
        },
        {
          "name" : "METRICS_COLLECTOR"
        },
        {
          "name" : "WEBHCAT_SERVER"
        }
      ],
      "cardinality" : "1"
    }
  ],
  "settings" : [{
     "recovery_settings" : [{
       "recovery_enabled" : "true"
    }]
  }],
  "Blueprints" : {
    "blueprint_name" : "min-hive",
    "stack_name" : "HDP",
    "stack_version" : "2.6"
  }
}
hostmapping.json

{
  "blueprint" : "min-hive",
  "default_password" : "admin",
  "provision_action" : "INSTALL_AND_START",
  "host_groups" :[
    {
      "name" : "host_group_1",
      "hosts" : [
        {
          "fqdn" : "min-hive"
        }
      ]
    }
  ]
}
my.cnf

[client]
port            = 3306
socket          = /var/lib/mysql/mysql.sock
default-character-set=utf8

[mysqld]
datadir=/var/lib/mysql
socket=/var/lib/mysql/mysql.sock
user=mysql
# Disabling symbolic-links is recommended to prevent assorted security risks
symbolic-links=0
bind-address = 0.0.0.0
port            = 3306
key_buffer_size = 256M
max_allowed_packet = 16M
table_open_cache = 16
innodb_buffer_pool_size = 512M
innodb_log_file_size = 32M
sort_buffer_size = 8M
read_buffer_size = 8M
read_rnd_buffer_size = 8M
join_buffer_size = 8M
thread_stack = 4M
character-set-server=utf8
lower_case_table_names = 1
innodb_lock_wait_timeout=120
skip-innodb-doublewrite

[mysqld_safe]
log-error=/var/log/mysqld.log
pid-file=/var/run/mysqld/mysqld.pid

○関連情報
Vagrantを使用してkerberos化した1ノードクラスタのhive環境を構築する

・Ambariに関する他の記事はこちらを参照してください。