Commit de4fb9ca authored by Christos Kanellopoulos's avatar Christos Kanellopoulos
Browse files

Merge pull request #75 from grnet/devel

Merge devel to master
parents bb81b7d8 bef724a2
---
language: python
python: "2.7"
before_install:
# Make sure everything's up to date.
- sudo apt-get update -qq
install:
# Install Ansible.
- pip install ansible
# Add ansible.cfg to pick up roles path.
- "printf '[defaults]\nroles_path = ../../' > ansible.cfg"
python:
- "2.7"
install:
- pip install tox
- pip install flake8-diff
script:
# We'll add some commands to test the role here.
#- cd ansible/roles/common/tests && ansible-playbook -i inventory test.yml --syntax-check
#- ansible-playbook -i inventory test.yml --connection=local --sudo
#- "ansible-playbook -i inventory test.yml --connection=local --sudo | tee /tmp/output.txt; grep -q 'changed=0.*failed=0' /tmp/output.txt && (echo 'Idempotence test: pass' && exit 0) || (echo 'Idempotence test: fail' && exit 1)"
- cd ansible/roles/apache-flink/tests
# Check the role/playbook's syntax.
- ansible-playbook -i inventory test.yml --syntax-check
# Run the role/playbook with ansible-playbook.
- ansible-playbook -i inventory test.yml --connection=local --sudo
# Run the role/playbook again, checking to make sure it's idempotent.
- ansible-playbook -i inventory test.yml --connection=local --sudo
| grep -q 'changed=0.*failed=0'
&& (echo 'Idempotence test: pass' && exit 0)
|| (echo 'Idempotence test: fail' && exit 1)
# Some MySQL debugging (show all the logs).
- "sudo ls -lah /var/log"
- "sudo cat /var/log/apache-flink/error.log"
- "sudo wget -O hamlet.txt http://www.gutenberg.org/cache/epub/1787/pg1787.txt"
- "sudo bin/flink run ./examples/flink-java-examples-0.8.1-WordCount.jar file://`pwd`/hamlet.txt file://`pwd`/wordcount-result.txt"
- git fetch origin $TRAVIS_BRANCH:travis_pr_branch
- flake8-diff travis_pr_branch
- cd core && tox -e $TOXENV_CORE
env:
- TOXENV_CORE=py27
......@@ -69,9 +69,6 @@ There are four (4) roles and five (5) playbooks. These are:
- Downloads and installs Apache Flink on master node.
- Starts and Apache Flink, Yarn session.
## How to deploy
You can deploy the whole cluster by running the cluster-install playbook:
......
[master]
snf-669832.vm.okeanos.grnet.gr
[slaves]
snf-669833.vm.okeanos.grnet.gr
snf-669834.vm.okeanos.grnet.gr
---
- hosts: master
user: root
gather_facts: no
roles:
- wait_for_ssh
- hosts: master
user: root
roles:
- proxy
......@@ -24,7 +24,7 @@
template: src=flink-init.j2 dest=/etc/init.d/flink-init owner=flink group=lambda mode=0740
- name: Start Apache Flink.
shell: /etc/init.d/flink-init start > /dev/null &
shell: /etc/init.d/flink-init start > /dev/null
tags:
- start
......@@ -9,6 +9,10 @@ SCRIPT_USER=flink
# The path where Apache Flink is installed.
INSTALLATION_PATH="{{ installation_path }}"
# The full path of the pid file to use. Apache Flink is run as an Apache Yarn application. The id of this application
# is stored on this file.
APPLICATION_ID="$INSTALLATION_PATH/flink/flink.pid"
# The full path of the lock file to use.
LOCKFILE="$INSTALLATION_PATH/flink/flink-lock"
......@@ -20,15 +24,50 @@ START_COMMAND="$INSTALLATION_PATH/flink/bin/yarn-session.sh -n {{ number_of_task
STOP_COMMAND="$HADOOP_HOME/bin/yarn application --kill"
start(){
# Assert that there is no other Apache Flink instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache Flink.
sudo -E -u $SCRIPT_USER nohup $START_COMMAND > /dev/null &
# Get the returned value and create a lock file to prevent multiple instantiations.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
# Wait for a minute Apache Flink to start. The condition check whether an application under user "flink" has been
# started on Apache Yarn and whether a minute has passed.
i=0
while [ "$(sudo -u $SCRIPT_USER $HADOOP_HOME/bin/yarn application --list | cut -f4 | grep "flink")" == "" ] && [ $i -lt 6 ]
do
sleep 10
i=$((i+1))
done
# Save the application id of this Apache Flink application.
$(sudo -u $SCRIPT_USER $HADOOP_HOME/bin/yarn application --list | cut -f1,4 | grep "flink" | cut -f1 > $APPLICATION_ID)
chown $SCRIPT_USER $APPLICATION_ID
chmod 644 $APPLICATION_ID
return $RETVAL
}
stop(){
id=$(sudo -u $SCRIPT_USER $HADOOP_HOME/bin/yarn application --list | grep "Flink session" | cut -f1)
sudo -E -u $SCRIPT_USER nohup $STOP_COMMAND $id > /dev/null &
# Assert that an Apache Flink instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return 0
# Read the application id and execute the command to stop Apache Flink. The command will block
# until the service has been stopped.
id=$(sudo -u $SCRIPT_USER cat $APPLICATION_ID)
sudo -E -u $SCRIPT_USER $STOP_COMMAND $id > /dev/null
# Delete the files on Apache HDFS created when Apache Flink was started.
# These files are created by Apache Yarn to distribute Flink accross all nodes.
sudo -u $SCRIPT_USER $HADOOP_HOME/bin/hadoop fs -rm -r -skipTrash /user/flink/.flink/$id
# Delete application id file.
sudo -u $SCRIPT_USER rm $APPLICATION_ID
# Get the returned value of the executed command and remove the lock file.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL
......@@ -55,6 +94,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || :
;;
status)
# If the lock file exists, then Apache Flink is running.
[ -f $LOCKFILE ] && echo "Apache Flink is running." || echo "Apache Flink is not running."
RETVAL=$?
;;
......
......@@ -11,7 +11,7 @@
- format-hdfs
- name: Start Apache HDFS.
shell: /etc/init.d/hdfs-init start > /dev/null &
shell: /etc/init.d/hdfs-init start > /dev/null
tags:
- start-hdfs
......@@ -21,7 +21,7 @@
- start-hdfs
- name: Start Apache Yarn.
shell: /etc/init.d/yarn-init start > /dev/null &
shell: /etc/init.d/yarn-init start > /dev/null
tags:
- start-yarn
......
......@@ -18,8 +18,13 @@
- name: Configure core.
template: src=core-site.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/core-site.xml" owner=hduser group=lambda mode=0644
- name: Configure Apache HDFS.
template: src=hdfs-site.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/hdfs-site.xml" backup=no owner=hduser group=lambda mode=0644
- name: Configure Apache HDFS for master node.
template: src=hdfs-site-master.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/hdfs-site.xml" backup=no owner=hduser group=lambda mode=0644
when: "'master' in group_names"
- name: Configure Apache HDFS for slave nodes.
template: src=hdfs-site-slave.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/hdfs-site.xml" backup=no owner=hduser group=lambda mode=0644
when: "'slaves' in group_names"
- name: Configure Apache Yarn.
template: src=yarn-site.xml.j2 dest="{{ installation_path }}/hadoop/etc/hadoop/yarn-site.xml" owner=hduser group=lambda mode=0644
......@@ -18,7 +18,7 @@
<configuration>
<property>
<name>fs.default.name</name>
<name>fs.defaultFS</name>
<value>hdfs://{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr",".local") }}:9000</value>
</property>
<property>
......
......@@ -19,14 +19,26 @@ START_COMMAND="$INSTALLATION_PATH/hadoop/sbin/start-dfs.sh"
STOP_COMMAND="$INSTALLATION_PATH/hadoop/sbin/stop-dfs.sh"
start(){
sudo -u $SCRIPT_USER nohup $START_COMMAND > /dev/null &
# Assert that there is no other Apache HDFS instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache HDFS. The command waits until HDFS has been started.
sudo -u $SCRIPT_USER $START_COMMAND > /dev/null
# Get the returned value of the executed command and create a lock file to prevent multiple instantiations.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
return $RETVAL
}
stop(){
sudo -u $SCRIPT_USER nohup $STOP_COMMAND > /dev/null &
# Assert that an Apache HDFS instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return 0
# Execute the command to stop Apache HDFS. The command waits until HDFS has been stopped.
sudo -u $SCRIPT_USER $STOP_COMMAND > /dev/null
# Get the returned value of the executed command and delete the lock file.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL
......@@ -53,6 +65,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || :
;;
status)
# If the lock file exists, then Apache HDFS is running.
[ -f $LOCKFILE ] && echo "Apache HDFS is running." || echo "Apache HDFS is not running."
RETVAL=$?
;;
......
......@@ -25,4 +25,8 @@
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file://{{ installation_path }}/hadoop/hdfs/name</value>
</property>
</configuration>
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>{{ dfs_replication }}</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file://{{ installation_path }}/hadoop/hdfs/data</value>
</property>
</configuration>
......@@ -19,14 +19,26 @@ START_COMMAND="$INSTALLATION_PATH/hadoop/sbin/start-yarn.sh"
STOP_COMMAND="$INSTALLATION_PATH/hadoop/sbin/stop-yarn.sh"
start(){
sudo -u $SCRIPT_USER nohup $START_COMMAND > /dev/null &
# Assert that there is no other Apache Yarn instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache Yarn. The command waits until Yarn has been started.
sudo -u $SCRIPT_USER $START_COMMAND > /dev/null
# Get the returned value of the executed command and create a lock file to prevent multiple instantiations.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
return $RETVAL
}
stop(){
sudo -u $SCRIPT_USER nohup $STOP_COMMAND > /dev/null &
# Assert that an Apache Yarn instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return 0
# Execute the command to stop Apache Yarn. The command waits until Yarn has been stopped.
sudo -u $SCRIPT_USER $STOP_COMMAND > /dev/null
# Get the returned value of the executed command and delete the lock file.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL
......@@ -53,6 +65,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || :
;;
status)
# If the lock file exists, then Apache Yarn is running.
[ -f $LOCKFILE ] && echo "Apache Yarn is running." || echo "Apache Yarn is not running."
RETVAL=$?
;;
......
......@@ -11,7 +11,7 @@
- configure-kafka
- name: Start Apache Zookeeper server.
shell: /etc/init.d/zookeeper-init start > /dev/null &
shell: /etc/init.d/zookeeper-init start > /dev/null
tags:
- start-zookeeper
......@@ -21,7 +21,7 @@
- start-zookeeper
- name: Start Apache Kafka server.
shell: /etc/init.d/kafka-init start > /dev/null &
shell: /etc/init.d/kafka-init start > /dev/null
tags:
- start-kafka
......
......@@ -16,14 +16,26 @@ LOCKFILE="$INSTALLATION_PATH/kafka/kafka-lock"
START_COMMAND="$INSTALLATION_PATH/kafka/bin/kafka-server-start.sh $INSTALLATION_PATH/kafka/config/server.properties"
start(){
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE --exec $START_COMMAND
# Assert that there is no other Apache Kafka instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache Kafka and wait until the service has been started.
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE --exec $START_COMMAND --retry 5
# Get the returned value of the executed command and create a lock file to prevent multiple instantiations.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
return $RETVAL
}
stop(){
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --stop --remove-pidfile --pidfile $PIDFILE
# Assert that an Apache Kafka instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return 0
# Execute the command to stop Apache Kafka and wait until the service has been stopped.
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --stop --remove-pidfile --pidfile $PIDFILE --retry 5
# Get the returned value of the executed command and delete the lock file.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL
......@@ -50,6 +62,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || :
;;
status)
# If the lock file exists, then Apache Kafka is running.
[ -f $LOCKFILE ] && echo "Apache Kafka is running." || echo "Apache kafka is not running."
RETVAL=$?
;;
......
......@@ -16,14 +16,26 @@ LOCKFILE="$INSTALLATION_PATH/kafka/zookeeper-lock"
START_COMMAND="$INSTALLATION_PATH/kafka/bin/zookeeper-server-start.sh $INSTALLATION_PATH/kafka/config/zookeeper.properties"
start(){
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE --exec $START_COMMAND
# Assert that there is no other Apache Zookeeper instance, created with this script, running.
[ -f $LOCKFILE ] && return 0
# Execute the command to start Apache Zookeeper and wait until the service has been started.
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --start --background --make-pidfile --pidfile $PIDFILE --exec $START_COMMAND --retry 5
# Get the returned value of the executed command and create a lock file to prevent multiple instantiations.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER touch $LOCKFILE)
return $RETVAL
}
stop(){
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --stop --remove-pidfile --pidfile $PIDFILE
# Assert that an Apache Zookeeper instance, created with this script, is running.
[ ! -f $LOCKFILE ] && return
# Execute the command to stop Apache Zookeeper and wait until the service has been stopped.
sudo -u $SCRIPT_USER /sbin/start-stop-daemon --stop --remove-pidfile --pidfile $PIDFILE --retry 5
# Get the returned value of the executed command and delete the lock file.
RETVAL=$?
[ $RETVAL -eq 0 ] && $(sudo -u $SCRIPT_USER rm -f $LOCKFILE)
return $RETVAL
......@@ -50,6 +62,7 @@ case "$1" in
[ -f $LOCKFILE ] && restart || :
;;
status)
# If the lock file exists, then Apache Zookeeper is running.
[ -f $LOCKFILE ] && echo "Apache Zookeeper is running." || echo "Apache Zookeeper is not running."
RETVAL=$?
;;
......
deb http://ftp.de.debian.org/debian/ jessie main
deb-src http://ftp.de.debian.org/debian/ jessie main
deb http://security.debian.org/ jessie/updates main
deb-src http://security.debian.org/ jessie/updates main
# jessie-updates, previously known as 'volatile'
deb http://ftp.de.debian.org/debian/ jessie-updates main
deb-src http://ftp.de.debian.org/debian/ jessie-updates main
deb http://apt.dev.grnet.gr jessie/
---
- name: Create users for each application.
include: users.yml
when: "'slaves' in group_names"
- name: Include common tasks.
include: common-1.yml
- name: Include more common tasks.
include: common-2.yml
---
- name: Fix locale problem.
command: update-locale LANGUAGE="en_US.UTF-8" LC_ALL="en_US.UTF-8"
- name: Copy sources list.
copy: src=sources.list dest=/etc/apt/sources.list owner=root group=root mode=0640
- name: Set hostname
hostname: name={{ inventory_hostname | replace(".vm.okeanos.grnet.gr",".local") }}
......@@ -30,3 +35,15 @@
- name: Add kafka user to sudo group.
user: name=kafka group=sudo
- name: Install supervisord with apt.
apt: name=supervisor state=latest
environment: proxy_env
- name: Configure supervisord for master.
template: src=supervisord-master.conf.j2 dest=/etc/supervisor/supervisord.conf owner=root group=root mode=0600
when: "'master' in group_names"
- name: Configure supervisord for slaves.
template: src=supervisord-slaves.conf.j2 dest=/etc/supervisor/supervisord.conf owner=root group=root mode=0600
when: "'slaves' in group_names"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment