Commit de4fb9ca authored by Christos Kanellopoulos's avatar Christos Kanellopoulos

Merge pull request #75 from grnet/devel

Merge devel to master
parents bb81b7d8 bef724a2
---
language: python language: python
python: "2.7" python:
- "2.7"
before_install:
# Make sure everything's up to date.
- sudo apt-get update -qq
install: install:
# Install Ansible. - pip install tox
- pip install ansible - pip install flake8-diff
# Add ansible.cfg to pick up roles path.
- "printf '[defaults]\nroles_path = ../../' > ansible.cfg"
script: script:
# We'll add some commands to test the role here. - git fetch origin $TRAVIS_BRANCH:travis_pr_branch
#- cd ansible/roles/common/tests && ansible-playbook -i inventory test.yml --syntax-check - flake8-diff travis_pr_branch
#- ansible-playbook -i inventory test.yml --connection=local --sudo - cd core && tox -e $TOXENV_CORE
#- "ansible-playbook -i inventory test.yml --connection=local --sudo | tee /tmp/output.txt; grep -q 'changed=0.*failed=0' /tmp/output.txt && (echo 'Idempotence test: pass' && exit 0) || (echo 'Idempotence test: fail' && exit 1)" env:
- cd ansible/roles/apache-flink/tests - TOXENV_CORE=py27
# Check the role/playbook's syntax.
- ansible-playbook -i inventory test.yml --syntax-check
# Run the role/playbook with ansible-playbook.
- ansible-playbook -i inventory test.yml --connection=local --sudo
# Run the role/playbook again, checking to make sure it's idempotent.
- ansible-playbook -i inventory test.yml --connection=local --sudo
| grep -q 'changed=0.*failed=0'
&& (echo 'Idempotence test: pass' && exit 0)
|| (echo 'Idempotence test: fail' && exit 1)
# Some MySQL debugging (show all the logs).
- "sudo ls -lah /var/log"
- "sudo cat /var/log/apache-flink/error.log"
- "sudo wget -O hamlet.txt http://www.gutenberg.org/cache/epub/1787/pg1787.txt"
- "sudo bin/flink run ./examples/flink-java-examples-0.8.1-WordCount.jar file://`pwd`/hamlet.txt file://`pwd`/wordcount-result.txt"
...@@ -69,9 +69,6 @@ There are four (4) roles and five (5) playbooks. These are: ...@@ -69,9 +69,6 @@ There are four (4) roles and five (5) playbooks. These are:
- Downloads and installs Apache Flink on master node. - Downloads and installs Apache Flink on master node.
- Starts and Apache Flink, Yarn session. - Starts and Apache Flink, Yarn session.
## How to deploy ## How to deploy
You can deploy the whole cluster by running the cluster-install playbook: You can deploy the whole cluster by running the cluster-install playbook:
......
[master]
snf-669832.vm.okeanos.grnet.gr
[slaves]
snf-669833.vm.okeanos.grnet.gr
snf-669834.vm.okeanos.grnet.gr
---
- hosts: master
user: root
gather_facts: no
roles:
- wait_for_ssh
- hosts: master
user: root
roles:
- proxy
...@@ -24,7 +24,7 @@ ...@@ -24,7 +24,7 @@
template: src=flink-init.j2 dest=/etc/init.d/flink-init owner=flink group=lambda mode=0740 template: src=flink-init.j2 dest=/etc/init.d/flink-init owner=flink group=lambda mode=0740
- name: Start Apache Flink. - name: Start Apache Flink.
shell: /etc/init.d/flink-init start > /dev/null & shell: /etc/init.d/flink-init start > /dev/null
tags: tags:
- start - start
...@@ -9,6 +9,10 @@ SCRIPT_USER=flink ...@@ -9,6 +9,10 @@ SCRIPT_USER=flink
# The path where Apache Flink is installed. # The path where Apache Flink is installed.
INSTALLATION_PATH="{{ installation_path }}" INSTALLATION_PATH="{{ installation_path }}"
# The full path of the pid file to use. Apache Flink is run as an Apache Yarn application. The id of this application
# is stored on this file.
APPLICATION_ID="$INSTALLATION_PATH/flink/flink.pid"
# The full path of the lock file to use. # The full path of the lock file to use.
LOCKFILE="$INSTALLATION_PATH/flink/flink-lock" LOCKFILE="$INSTALLATION_PATH/flink/flink-lock"
...@@ -20,15 +24,50 @@ START_COMMAND="$INSTALLATION_PATH/flink/bin/yarn-session.sh -n {{ number_of_task ...@@ -20,15 +24,50 @@ START_COMMAND="$INSTALLATION_PATH/flink/bin/yarn-session.sh -n {{ number_of_task
STOP_COMMAND="$HADOOP_HOME/bin/yarn application --kill" STOP_COMMAND="$HADOOP_HOME/bin/yarn application --kill"
start(){ start(){
# Assert that there is no other Apache Flink instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache Flink.
sudo -E -u $SCRIPT_USER nohup $START_COMMAND > /dev/null & sudo -E -u $SCRIPT_USER nohup $START_COMMAND > /dev/null &
# Get the returned value and create a lock file to prevent multiple instantiations.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
# Wait for a minute Apache Flink to start. The condition check whether an application under user "flink" has been
# started on Apache Yarn and whether a minute has passed.
i=0
while [ "$(sudo -u $SCRIPT_USER $HADOOP_HOME/bin/yarn application --list | cut -f4 | grep "flink")" == "" ] && [ $i -lt 6 ]
do
sleep 10
i=$((i+1))
done
# Save the application id of this Apache Flink application.
$(sudo -u $SCRIPT_USER $HADOOP_HOME/bin/yarn application --list | cut -f1,4 | grep "flink" | cut -f1 > $APPLICATION_ID)
chown $SCRIPT_USER $APPLICATION_ID
chmod 644 $APPLICATION_ID
return $RETVAL return $RETVAL
} }
stop(){ stop(){
id=$(sudo -u $SCRIPT_USER $HADOOP_HOME/bin/yarn application --list | grep "Flink session" | cut -f1) # Assert that an Apache Flink instance, created with this script, is running.
sudo -E -u $SCRIPT_USER nohup $STOP_COMMAND $id > /dev/null & [ ! -f $LOCKFILE ] && return 0
# Read the application id and execute the command to stop Apache Flink. The command will block
# until the service has been stopped.
id=$(sudo -u $SCRIPT_USER cat $APPLICATION_ID)
sudo -E -u $SCRIPT_USER $STOP_COMMAND $id > /dev/null
# Delete the files on Apache HDFS created when Apache Flink was started.
# These files are created by Apache Yarn to distribute Flink accross all nodes.
sudo -u $SCRIPT_USER $HADOOP_HOME/bin/hadoop fs -rm -r -skipTrash /user/flink/.flink/$id
# Delete application id file.
sudo -u $SCRIPT_USER rm $APPLICATION_ID
# Get the returned value of the executed command and remove the lock file.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL return $RETVAL
...@@ -55,6 +94,7 @@ case "$1" in ...@@ -55,6 +94,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || : [ -f $LOCKFILE ] && restart || :
;; ;;
status) status)
# If the lock file exists, then Apache Flink is running.
[ -f $LOCKFILE ] && echo "Apache Flink is running." || echo "Apache Flink is not running." [ -f $LOCKFILE ] && echo "Apache Flink is running." || echo "Apache Flink is not running."
RETVAL=$? RETVAL=$?
;; ;;
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
- format-hdfs - format-hdfs
- name: Start Apache HDFS. - name: Start Apache HDFS.
shell: /etc/init.d/hdfs-init start > /dev/null & shell: /etc/init.d/hdfs-init start > /dev/null
tags: tags:
- start-hdfs - start-hdfs
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
- start-hdfs - start-hdfs
- name: Start Apache Yarn. - name: Start Apache Yarn.
shell: /etc/init.d/yarn-init start > /dev/null & shell: /etc/init.d/yarn-init start > /dev/null
tags: tags:
- start-yarn - start-yarn
......
...@@ -18,8 +18,13 @@ ...@@ -18,8 +18,13 @@
- name: Configure core. - name: Configure core.
template: src=core-site.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/core-site.xml" owner=hduser group=lambda mode=0644 template: src=core-site.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/core-site.xml" owner=hduser group=lambda mode=0644
- name: Configure Apache HDFS. - name: Configure Apache HDFS for master node.
template: src=hdfs-site.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/hdfs-site.xml" backup=no owner=hduser group=lambda mode=0644 template: src=hdfs-site-master.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/hdfs-site.xml" backup=no owner=hduser group=lambda mode=0644
when: "'master' in group_names"
- name: Configure Apache HDFS for slave nodes.
template: src=hdfs-site-slave.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/hdfs-site.xml" backup=no owner=hduser group=lambda mode=0644
when: "'slaves' in group_names"
- name: Configure Apache Yarn. - name: Configure Apache Yarn.
template: src=yarn-site.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/yarn-site.xml" owner=hduser group=lambda mode=0644 template: src=yarn-site.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/yarn-site.xml" owner=hduser group=lambda mode=0644
...@@ -18,7 +18,7 @@ ...@@ -18,7 +18,7 @@
<configuration> <configuration>
<property> <property>
<name>fs.default.name</name> <name>fs.defaultFS</name>
<value>hdfs://{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr",".local") }}:9000</value> <value>hdfs://{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr",".local") }}:9000</value>
</property> </property>
<property> <property>
......
...@@ -19,14 +19,26 @@ START_COMMAND="$INSTALLATION_PATH/hadoop/sbin/start-dfs.sh" ...@@ -19,14 +19,26 @@ START_COMMAND="$INSTALLATION_PATH/hadoop/sbin/start-dfs.sh"
STOP_COMMAND="$INSTALLATION_PATH/hadoop/sbin/stop-dfs.sh" STOP_COMMAND="$INSTALLATION_PATH/hadoop/sbin/stop-dfs.sh"
start(){ start(){
sudo -u $SCRIPT_USER nohup $START_COMMAND > /dev/null & # Assert that there is no other Apache HDFS instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache HDFS. The command waits until HDFS has been started.
sudo -u $SCRIPT_USER $START_COMMAND > /dev/null
# Get the returned value of the executed command and create a lock file to prevent multiple instantiations.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
return $RETVAL return $RETVAL
} }
stop(){ stop(){
sudo -u $SCRIPT_USER nohup $STOP_COMMAND > /dev/null & # Assert that an Apache HDFS instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return 0
# Execute the command to stop Apache HDFS. The command waits until HDFS has been stopped.
sudo -u $SCRIPT_USER $STOP_COMMAND > /dev/null
# Get the returned value of the executed command and delete the lock file.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL return $RETVAL
...@@ -53,6 +65,7 @@ case "$1" in ...@@ -53,6 +65,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || : [ -f $LOCKFILE ] && restart || :
;; ;;
status) status)
# If the lock file exists, then Apache HDFS is running.
[ -f $LOCKFILE ] && echo "Apache HDFS is running." || echo "Apache HDFS is not running." [ -f $LOCKFILE ] && echo "Apache HDFS is running." || echo "Apache HDFS is not running."
RETVAL=$? RETVAL=$?
;; ;;
......
...@@ -25,4 +25,8 @@ ...@@ -25,4 +25,8 @@
<name>dfs.permissions</name> <name>dfs.permissions</name>
<value>false</value> <value>false</value>
</property> </property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file://{{ installation_path }}/hadoop/hdfs/name</value>
</property>
</configuration> </configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>{{ dfs_replication }}</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file://{{ installation_path }}/hadoop/hdfs/data</value>
</property>
</configuration>
...@@ -19,14 +19,26 @@ START_COMMAND="$INSTALLATION_PATH/hadoop/sbin/start-yarn.sh" ...@@ -19,14 +19,26 @@ START_COMMAND="$INSTALLATION_PATH/hadoop/sbin/start-yarn.sh"
STOP_COMMAND="$INSTALLATION_PATH/hadoop/sbin/stop-yarn.sh" STOP_COMMAND="$INSTALLATION_PATH/hadoop/sbin/stop-yarn.sh"
start(){ start(){
sudo -u $SCRIPT_USER nohup $START_COMMAND > /dev/null & # Assert that there is no other Apache Yarn instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache Yarn. The command waits until Yarn has been started.
sudo -u $SCRIPT_USER $START_COMMAND > /dev/null
# Get the returned value of the executed command and create a lock file to prevent multiple instantiations.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
return $RETVAL return $RETVAL
} }
stop(){ stop(){
sudo -u $SCRIPT_USER nohup $STOP_COMMAND > /dev/null & # Assert that an Apache Yarn instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return 0
# Execute the command to stop Apache Yarn. The command waits until Yarn has been stopped.
sudo -u $SCRIPT_USER $STOP_COMMAND > /dev/null
# Get the returned value of the executed command and delete the lock file.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL return $RETVAL
...@@ -53,6 +65,7 @@ case "$1" in ...@@ -53,6 +65,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || : [ -f $LOCKFILE ] && restart || :
;; ;;
status) status)
# If the lock file exists, then Apache Yarn is running.
[ -f $LOCKFILE ] && echo "Apache Yarn is running." || echo "Apache Yarn is not running." [ -f $LOCKFILE ] && echo "Apache Yarn is running." || echo "Apache Yarn is not running."
RETVAL=$? RETVAL=$?
;; ;;
......
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
- configure-kafka - configure-kafka
- name: Start Apache Zookeeper server. - name: Start Apache Zookeeper server.
shell: /etc/init.d/zookeeper-init start > /dev/null & shell: /etc/init.d/zookeeper-init start > /dev/null
tags: tags:
- start-zookeeper - start-zookeeper
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
- start-zookeeper - start-zookeeper
- name: Start Apache Kafka server. - name: Start Apache Kafka server.
shell: /etc/init.d/kafka-init start > /dev/null & shell: /etc/init.d/kafka-init start > /dev/null
tags: tags:
- start-kafka - start-kafka
......
...@@ -16,14 +16,26 @@ LOCKFILE="$INSTALLATION_PATH/kafka/kafka-lock" ...@@ -16,14 +16,26 @@ LOCKFILE="$INSTALLATION_PATH/kafka/kafka-lock"
START_COMMAND="$INSTALLATION_PATH/kafka/bin/kafka-server-start.sh $INSTALLATION_PATH/kafka/config/server.properties" START_COMMAND="$INSTALLATION_PATH/kafka/bin/kafka-server-start.sh $INSTALLATION_PATH/kafka/config/server.properties"
start(){ start(){
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE --exec $START_COMMAND # Assert that there is no other Apache Kafka instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache Kafka and wait until the service has been started.
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE --exec $START_COMMAND --retry 5
# Get the returned value of the executed command and create a lock file to prevent multiple instantiations.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
return $RETVAL return $RETVAL
} }
stop(){ stop(){
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --stop --remove-pidfile --pidfile $PIDFILE # Assert that an Apache Kafka instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return 0
# Execute the command to stop Apache Kafka and wait until the service has been stopped.
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --stop --remove-pidfile --pidfile $PIDFILE --retry 5
# Get the returned value of the executed command and delete the lock file.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL return $RETVAL
...@@ -50,6 +62,7 @@ case "$1" in ...@@ -50,6 +62,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || : [ -f $LOCKFILE ] && restart || :
;; ;;
status) status)
# If the lock file exists, then Apache Kafka is running.
[ -f $LOCKFILE ] && echo "Apache Kafka is running." || echo "Apache kafka is not running." [ -f $LOCKFILE ] && echo "Apache Kafka is running." || echo "Apache kafka is not running."
RETVAL=$? RETVAL=$?
;; ;;
......
...@@ -16,14 +16,26 @@ LOCKFILE="$INSTALLATION_PATH/kafka/zookeeper-lock" ...@@ -16,14 +16,26 @@ LOCKFILE="$INSTALLATION_PATH/kafka/zookeeper-lock"
START_COMMAND="$INSTALLATION_PATH/kafka/bin/zookeeper-server-start.sh $INSTALLATION_PATH/kafka/config/zookeeper.properties" START_COMMAND="$INSTALLATION_PATH/kafka/bin/zookeeper-server-start.sh $INSTALLATION_PATH/kafka/config/zookeeper.properties"
start(){ start(){
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE --exec $START_COMMAND # Assert that there is no other Apache Zookeeper instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache Zookeeper and wait until the service has been started.
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE --exec $START_COMMAND --retry 5
# Get the returned value of the executed command and create a lock file to prevent multiple instantiations.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
return $RETVAL return $RETVAL
} }
stop(){ stop(){
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --stop --remove-pidfile --pidfile $PIDFILE # Assert that an Apache Zookeeper instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return
# Execute the command to stop Apache Zookeeper and wait until the service has been stopped.
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --stop --remove-pidfile --pidfile $PIDFILE --retry 5
# Get the returned value of the executed command and delete the lock file.
RETVAL=$? RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE) [ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL return $RETVAL
...@@ -50,6 +62,7 @@ case "$1" in ...@@ -50,6 +62,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || : [ -f $LOCKFILE ] && restart || :
;; ;;
status) status)
# If the lock file exists, then Apache Zookeeper is running.
[ -f $LOCKFILE ] && echo "Apache Zookeeper is running." || echo "Apache Zookeeper is not running." [ -f $LOCKFILE ] && echo "Apache Zookeeper is running." || echo "Apache Zookeeper is not running."
RETVAL=$? RETVAL=$?
;; ;;
......
deb http://ftp.de.debian.org/debian/ jessie main
deb-src http://ftp.de.debian.org/debian/ jessie main
deb http://security.debian.org/ jessie/updates main
deb-src http://security.debian.org/ jessie/updates main
# jessie-updates, previously known as 'volatile'
deb http://ftp.de.debian.org/debian/ jessie-updates main
deb-src http://ftp.de.debian.org/debian/ jessie-updates main
deb http://apt.dev.grnet.gr jessie/
---
- name: Create users for each application.
include: users.yml
when: "'slaves' in group_names"
- name: Include common tasks.
include: common-1.yml
- name: Include more common tasks.
include: common-2.yml
--- ---
- name: Fix locale problem.
command: update-locale LANGUAGE="en_US.UTF-8" LC_ALL="en_US.UTF-8"
- name: Copy sources list.
copy: src=sources.list dest=/etc/apt/sources.list owner=root group=root mode=0640
- name: Set hostname - name: Set hostname
hostname: name={{ inventory_hostname | replace(".vm.okeanos.grnet.gr",".local") }} hostname: name={{ inventory_hostname | replace(".vm.okeanos.grnet.gr",".local") }}
...@@ -30,3 +35,15 @@ ...@@ -30,3 +35,15 @@
- name: Add kafka user to sudo group. - name: Add kafka user to sudo group.
user: name=kafka group=sudo user: name=kafka group=sudo
- name: Install supervisord with apt.
apt: name=supervisor state=latest
environment: proxy_env
- name: Configure supervisord for master.
template: src=supervisord-master.conf.j2 dest=/etc/supervisor/supervisord.conf owner=root group=root mode=0600
when: "'master' in group_names"
- name: Configure supervisord for slaves.
template: src=supervisord-slaves.conf.j2 dest=/etc/supervisor/supervisord.conf owner=root group=root mode=0600
when: "'slaves' in group_names"
...@@ -5,7 +5,12 @@ ...@@ -5,7 +5,12 @@
tags: tags:
- master - master
- name: Include tasks for all nodes. - name: Create users for each application.
include: all.yml include: users.yml
tags: when: "'slaves' in group_names"
- all
- name: Include common tasks.
include: common-1.yml
- name: Include more common tasks.
include: common-2.yml
...@@ -2,14 +2,6 @@ ...@@ -2,14 +2,6 @@
- name: Create users for each application. - name: Create users for each application.
include: users.yml include: users.yml
# - name: Generate ssh key for root.
# shell: cat /dev/zero | ssh-keygen -q -N ""
# args:
# creates: /root/.ssh/id_rsa.pub
# - name: Fetch id_rsa.pub file from root.
# fetch: src=/root/.ssh/id_rsa.pub dest=/tmp/fetched/root_id_rsa.pub flat=yes
- name: Generate ssh key for hduser. - name: Generate ssh key for hduser.
shell: cat /dev/zero | ssh-keygen -q -N "" shell: cat /dev/zero | ssh-keygen -q -N ""
args: args:
...@@ -34,3 +26,15 @@ ...@@ -34,3 +26,15 @@
- name: Set up ssh config for flink user. - name: Set up ssh config for flink user.
template: src=ssh-config.j2 dest=/home/flink/.ssh/config owner=flink group=lambda mode=600 template: src=ssh-config.j2 dest=/home/flink/.ssh/config owner=flink group=lambda mode=600
- name: Copy Lambda Instance init script.
template: src=lambda-init.j2 dest=/etc/init.d/lambda-init owner=root group=lambda mode=0740
# The Lambda services will be started individually during the execution of the playbooks.
# The lock file neends to be created so that lambda-init script is aware that there is
# a Lambda Instance running.
- name: Create Lambda Instance lock file.
file: path=/root/lambda-lock state=touch owner=root group=lambda mode=0640
- name: Put lambda init script on boot and shutdown sequence.
command: update-rc.d lambda-init defaults
#!/bin/bash
# The full path of the lock file to use.
LOCKFILE="/root/lambda-lock"
start(){
# Assert that there is no other Lambda instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Start Apache HDFS.
echo "Starting Apache HDFS..."
/etc/init.d/hdfs-init start
returnedValue=$?
if [ $returnedValue -eq 0 ]
then
echo "Apache HDFS has been started!"
# Force Apache HDFS to exit safe mode so that Apache Flink can be started later on this script.
{{ hadoop_home }}/bin/hdfs dfsadmin -safemode leave
else
echo "Apache HDFS has failed to start with returned code $returnedValue."
fi
# Start Apache Yarn.
echo "Starting Apache Yarn..."
/etc/init.d/yarn-init start
returnedValue=$?
if [ $returnedValue -eq 0 ]
then
echo "Apache Yarn has been started!"
else
echo "Apache Yarn has failed to start with returned code $returnedValue."
fi
# Start supervisord on master node.
echo "Starting Supervisord..."
supervisord -c /etc/supervisor/supervisord.conf --logfile=/root/supervisord.log
returnedValue=$?
if [ $returnedValue -eq 0 ]
then
echo "Supervisord on master node has been started!"
else
echo "Supervisord on master node has failed to start with returned code $returnedValue."
fi
# Start Apache Zookeeper.
echo "Starting Apache Zookeeper..."
supervisorctl start apache_zookeeper
# Wait for Apache Zookeeper to start.
while [ "$(supervisorctl status apache_zookeeper | tr -s ' ' | cut -f2 -d' ')" == "STARTING" ]
do
sleep 10
done
apache_zookeeper_status=$(supervisorctl status apache_zookeeper | tr -s ' ' | cut -f2 -d' ')
if [ "$apache_zookeeper_status" != "RUNNING" ]
then