Commit f183883a authored by Paschalis Korosoglou's avatar Paschalis Korosoglou

Merge pull request #27 from ioantsaf/ansible-proxy

LAM-42 Ansible playbooks for http proxy, ssh bastion proxy, LAM-40 ansible inventory
parents 51104772 0361037b
......@@ -61,3 +61,5 @@ target/
MANIFEST
old_kamaki
*.idea
coverage.xml
......@@ -26,6 +26,7 @@ both the master and the slaves.
## Playbooks and Roles
There are four (4) roles and five (5) playbooks. These are:
- proxy role, run from proxy playbook.
- common role, run from common playbook.
- apache-hadoop role, run from apache-hadoop playbook.
- apache-kafka role, run from apache-kafka playbook.
......@@ -36,6 +37,12 @@ There are four (4) roles and five (5) playbooks. These are:
## Role Explanation
### proxy
- Installs squid http proxy on master node, using apt package manager.
- Configures the acls of the proxy, and sets the localnet to allowed.
- Restarts the http proxy service
### common
- Installs all the packages that are needed in order for the cluster to run.
......@@ -63,6 +70,8 @@ There are four (4) roles and five (5) playbooks. These are:
- Starts and Apache Flink, Yarn session.
## How to deploy
You can deploy the whole cluster by running the cluster-install playbook:
......
#
# config file for ansible
# https://raw.githubusercontent.com/ansible/ansible/devel/examples/ansible.cfg
#
[defaults]
remote_user = root
hostfile = hosts
#[ssh_connection]
#ssh_args = -o StrictHostKeyChecking=no -F ssh.config -q
---
- hosts: master
user: root
gather_facts: no
roles:
- wait_for_ssh
- hosts: master
user: root
roles:
- proxy
- hosts: all
user: root
roles:
- common
- apache-hadoop
- apache-kafka
- apache-flink
---
- hosts: all
user: root
roles:
- ../roles/common
- ../roles/apache-hadoop
- ../roles/apache-kafka
- ../roles/apache-flink
---
- hosts: master
user: root
gather_facts: yes
roles:
- ../roles/apache-flink
- apache-flink
---
- hosts: all
user: root
roles:
- ../roles/apache-hadoop
- apache-hadoop
---
- hosts: all
user: root
roles:
- ../roles/apache-kafka
- apache-kafka
---
- hosts: master
user: root
roles:
- proxy
---
- hosts: master
user: root
tasks:
- name: Touch master file
file: path=/root/master state=touch mode="u=rw,g=r,o=r"
tags: touch
- name: Remove master file
file: path=/root/master state=absent
tags: rm
- name: Copy hosts file
template: src=../roles/common/templates/hosts.j2 dest=/etc/hosts backup=no owner=root group=root mode=0750
tags: hosts
- hosts: slaves
user: root
gather_facts: False
tasks:
- name: Touch slave file
file: path=/root/slave state=touch mode="u=rw,g=r,o=r"
tags: touch
- name: Remove slave file
file: path=/root/slave state=absent
tags: rm
- name: Copy hosts file
template: src=../roles/common/templates/hosts.j2 dest=/etc/hosts backup=no owner=root group=root mode=0750
tags: hosts
---
- hosts: master
user: root
tasks:
- name: install screen to master without proxy
apt: name=screen state=installed
tags: install
- hosts: slaves
user: root
tasks:
- name: install screen to slaves using proxy
apt: name=screen state=installed
environment: proxy_env
tags: install
---
- hosts: master
user: root
gather_facts: no
roles:
- wait_for_ssh
......@@ -35,8 +35,8 @@
tags:
- create-dirs
- name: Create Apache HDFS user/root directory.
command: "{{ installation_path }}/hadoop/bin/hadoop fs -mkdir /user/root"
- name: Create Apache HDFS user/hduser directory.
command: "{{ installation_path }}/hadoop/bin/hadoop fs -mkdir /user/hduser"
tags:
- create-dirs
......@@ -19,7 +19,7 @@
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr","") }}:9000</value>
<value>hdfs://{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr",".local") }}:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
......
......@@ -15,14 +15,14 @@
<configuration>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr","") }}:8025</value>
<value>{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr",".local") }}:8025</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr","") }}:8030</value>
<value>{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr",".local") }}:8030</value>
</property>
<property>
<name>yarn.resourcemanager.address</name>
<value>{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr","") }}:8050</value>
<value>{{ groups.master | replace("[","") | replace("'","") | replace("]","") | replace(".vm.okeanos.grnet.gr",".local") }}:8050</value>
</property>
</configuration>
---
- name: create topics
shell: "{{ installation_path }}/kafka/bin/kafka-topics.sh --create --zookeeper {{ hostvars['master-node']['internal_ip'] }}:2181 --replication-factor {{ groups['slaves']|count + 1 }} --partitions 1 --topic input"
shell: "{{ installation_path }}/kafka/bin/kafka-topics.sh --create --zookeeper {{ hostvars[groups['master'][0]]['internal_ip'] }}:2181 --replication-factor {{ groups['slaves']|count + 1 }} --partitions 1 --topic input"
notify:
- create batch output topic
- name: create batch output topic
shell: "{{ installation_path }}/kafka/bin/kafka-topics.sh --create --zookeeper {{ hostvars['master-node']['internal_ip'] }}:2181 --replication-factor {{ groups['slaves']|count + 1 }} --partitions 1 --topic batch-output"
shell: "{{ installation_path }}/kafka/bin/kafka-topics.sh --create --zookeeper {{ hostvars[groups['master'][0]]['internal_ip'] }}:2181 --replication-factor {{ groups['slaves']|count + 1 }} --partitions 1 --topic batch-output"
notify:
- create stream output topic
- name: create stream output topic
shell: "{{ installation_path }}/kafka/bin/kafka-topics.sh --create --zookeeper {{ hostvars['master-node']['internal_ip'] }}:2181 --replication-factor {{ groups['slaves']|count + 1 }} --partitions 1 --topic stream-output"
shell: "{{ installation_path }}/kafka/bin/kafka-topics.sh --create --zookeeper {{ hostvars[groups['master'][0]]['internal_ip'] }}:2181 --replication-factor {{ groups['slaves']|count + 1 }} --partitions 1 --topic stream-output"
---
- name: Create users for each application.
include: users.yml
when: "'slaves' in group_names"
- name: Include common tasks.
include: common-1.yml
......
---
- name: Set hostname
hostname: name={{ inventory_hostname | replace(".vm.okeanos.grnet.gr",".local") }}
when: "'slaves' in group_names"
- name: Copy hosts file.
template: src=hosts.j2 dest=/etc/hosts backup=no owner=root group=lambda mode=0750
- name: Upgrade packages.
apt: upgrade=dist update_cache=yes
environment: proxy_env
- name: Install the latest Java 7.
apt: name=openjdk-7-jdk state=latest install_recommends=no update_cache=yes
environment: proxy_env
- name: Copy environment file.
template: src=environment.j2 dest=/etc/environment backup=no owner=root group=lambda mode=0750
- name: Install sudo.
apt: name=sudo state=latest
environment: proxy_env
- name: Add hduser to sudo group.
user: name=hduser group=sudo
......
......@@ -5,8 +5,7 @@
tags:
- master
- name: Include tasks for slaves.
include: slaves.yml
when: "'slaves' in group_names"
- name: Include tasks for all nodes.
include: all.yml
tags:
- slaves
- all
......@@ -2,9 +2,6 @@
- name: Create users for each application.
include: users.yml
- name: Include common tasks.
include: common-1.yml
# - name: Generate ssh key for root.
# shell: cat /dev/zero | ssh-keygen -q -N ""
# args:
......@@ -37,6 +34,3 @@
- name: Set up ssh config for flink user.
template: src=ssh-config.j2 dest=/home/flink/.ssh/config owner=flink group=lambda mode=600
- name: Include more common tasks.
include: common-2.yml
{% for master in groups["master"] %}
Host {{ master }}
Host {{ master | replace(".vm.okeanos.grnet.gr","*") }}
StrictHostKeyChecking no
{% endfor %}
......
---
- name: restart squid3
action: service name=squid3 state=restarted
# enabled=yes
---
- name: Copy hosts file.
template: src=hosts.j2 dest=/etc/hosts backup=no owner=root mode=0750
- name: Install squid http proxy
apt: name=squid3 state=latest
tags: install
- name: Configure squid http proxy
lineinfile: destfile="/etc/squid3/squid.conf" regexp="^acl localnet" insertafter="^#acl localnet"
line="acl localnet src {{ hostvars[groups['all'][0]]['local_net'] }}"
state=present
notify:
- restart squid3
tags: config
- name: Configure squid http proxy
lineinfile: destfile="/etc/squid3/squid.conf" regexp="^#http_access allow localnet"
line="http_access allow localnet"
state=present
notify:
- restart squid3
tags: config
127.0.0.1 localhost
{% for master in groups["master"] %}
{{ hostvars[master]["internal_ip"] }} {{ master | replace(".vm.okeanos.grnet.gr",".local") }}
{% endfor %}
{% for slave in groups["slaves"] %}
{{ hostvars[slave]["internal_ip"] }} {{ slave | replace(".vm.okeanos.grnet.gr",".local") }}
{% endfor %}
# The following lines are desirable for IPv6 capable hosts
::1 localhost ip6-localhost ip6-loopback
ff02::1 ip6-allnodes
ff02::2 ip6-allrouters
---
- name: Wait for port 22 to be ready
local_action: wait_for port=22 host="{{ inventory_hostname }}" search_regex=OpenSSH
Host bastion
HostName snf-xxxxxx.vm.okeanos.grnet.gr
User root
ProxyCommand none
Host *
ProxyCommand ssh -i /tmp/tmpxxxxxx -W %h:%p root@snf-xxxxxx.vm.okeanos.grnet.gr
# Core python library
## Instalation
- Install required packages `pip install -r requirements.txt`
- Install package using `python setup.py install`
## Description
The libraries contained in the core package are responsible for creating a cluster of VMs and installing all the required packages and configs to have a complete lambda instance. A description of the libraries follows:
### provisioner
The library is responsible for creating a VM cluster, using the Kamaki python API. It reads the authentication info from the .kamakirc, and accepts the cluster specs as arguments.
### ansible_manager
The library is responsible for managing the ansible, that will run on the cluster. Its tasks are:
* It reads a dictionary, containing the necessary info about the cluster and its nodes
* It creates an ansible inventory object, using the dictionary
* It creates the necessary group and host vars, required for ansible to run on all the nodes and configure them properly
* It sets some ansible constants, required eg for SSH tunnelling through the master node
* It runs ansible playbooks using the previously mentioned inventory and constants
### cluster_creator
The script is responsible for creating the entire lambda instance.
* It sets the provisioner arguments (cluster specs), then calls the provisioner to create the cluster.
* After that, it gets the output dictionary of the provisioner and adds some more values to it, which are obtained using the provisioner, after the cluster creation.
* It calls the ansible_manager, to create the inventory, using the dictionary as input.
* Finally, it uses the created manager object (containing the inventory and constants), to run the required playbooks in the correct order, to create the lambda instance.
## Prerequisites
* kamaki 0.13.4 or later
* ansible 1.9.2 or later
* crypto 1.4.1 or later
## Installation
- Create a .kamakirc configuration in your home folder and add all the required configurations.
Here is an example configuration
```
[global]
default_cloud = lambda
; ca_certs = /path/to/certs
[cloud "lambda"]
url = https://accounts.okeanos.grnet.gr/identity/v2.0
token = your-okeanos-token
```
Note that you may retrieve your ~okeanos API token, after logging into the service, by visiting [this page][api_link].
- Install required packages. Within the `core` directory execute `sudo pip install -r requirements.txt`.
- Install package using `sudo python setup.py install`
## Usage
To create a lambda instance, one must run `python cluster_creator.py` from within the `core/fokia` directory. To change the default settings (one master instance and one slave instance) one has to edit the `cluster_creator.py` script prior to executing it.
## Testing
......@@ -14,3 +66,5 @@ To test the library we use `tox`. In order to run the tests:
- Run `tox`
This will automatically create the testing environments required and run the tests
[api_link]: https://accounts.okeanos.grnet.gr/ui/api_access
\ No newline at end of file
import inspect
import os
import tempfile
import ansible
from ansible.playbook import PlayBook
from ansible import callbacks
from ansible import utils
class Manager:
def __init__(self, provisioner_response):
self.inventory = {}
self.inventory['master'] = {
'name': 'snf-' + str(provisioner_response['nodes']['master']['id']),
'ip': provisioner_response['nodes']['master']['internal_ip']}
self.inventory['slaves'] = []
for response in provisioner_response['nodes']['slaves']:
self.inventory['slaves'].append(
{'name': 'snf-' + str(response['id']),
'ip': response['internal_ip']})
self.cidr = provisioner_response['subnet']['cidr']
with tempfile.NamedTemporaryFile(mode='w', delete=False) as kf:
kf.write(provisioner_response['pk'])
self.temp_file = kf.name
# print self.temp_file
ansible.constants.ANSIBLE_SSH_ARGS = '-o "ProxyCommand ssh -i %s -o StrictHostKeyChecking=no -W %%h:%%p root@%s.vm.okeanos.grnet.gr"' \
% (self.temp_file, self.inventory['master']['name'])
ansible.constants.DEFAULT_TIMEOUT = 30
# ansible.constants.DEFAULT_PRIVATE_KEY_FILE = self.temp_file
ansible.constants.HOST_KEY_CHECKING = False
# ansible.constants.DEFAULT_GATHERING = 'explicit'
def create_inventory(self):
"""
Create the inventory using the ansible library objects
:return:
"""
all_hosts = []
host = self.inventory['master']
all_hosts.append(host['name'] + '.vm.okeanos.grnet.gr')
ansible_host = ansible.inventory.host.Host(name=all_hosts[-1])
for host in self.inventory['slaves']:
all_hosts.append(host['name'] + '.local')
ansible_host = ansible.inventory.host.Host(name=all_hosts[-1])
self.ansible_inventory = ansible.inventory.Inventory(host_list=all_hosts)
all_group = self.ansible_inventory.get_group('all')
all_group.set_variable('ansible_ssh_private_key_file', self.temp_file)
all_group.set_variable('local_net', self.cidr)
all_ansible_hosts = all_group.get_hosts()
master_group = ansible.inventory.group.Group(name='master')
master_group.set_variable('proxy_env', {})
ansible_host = all_ansible_hosts[0]
ansible_host.set_variable('internal_ip', self.inventory['master']['ip'])
ansible_host.set_variable('id', 0)
master_group.add_host(ansible_host)
self.ansible_inventory.add_group(master_group)
all_group.add_child_group(master_group)
slaves_group = ansible.inventory.group.Group(name='slaves')
slaves_group.set_variable('proxy_env',
{'http_proxy': 'http://' + self.inventory['master']['name'] + '.local:3128'})
# slaves_group.set_variable('http_proxy', 'http://' + self.inventory['master']['name'] + '.local:3128')
for host_id, host in enumerate(self.inventory['slaves'], start=1):
ansible_host = all_ansible_hosts[host_id]
ansible_host.set_variable('internal_ip', host['ip'])
ansible_host.set_variable('id', host_id)
slaves_group.add_host(ansible_host)
self.ansible_inventory.add_group(slaves_group)
all_group.add_child_group(slaves_group)
# print self.ansible_inventory.groups_list()
return self.ansible_inventory
def run_playbook(self, playbook_file, tags=None):
"""
Run the playbook_file using created inventory and tags specified
:return:
"""
stats = callbacks.AggregateStats()
playbook_cb = callbacks.PlaybookCallbacks(verbose=utils.VERBOSITY)
runner_cb = callbacks.PlaybookRunnerCallbacks(stats, verbose=utils.VERBOSITY)
pb = PlayBook(playbook=playbook_file, inventory=self.ansible_inventory, stats=stats,
callbacks=playbook_cb,
runner_callbacks=runner_cb, only_tags=tags)
pb.run()
def cleanup(self):
os.remove(self.temp_file)
if __name__ == "__main__":
response = {
u'ips': [{u'floating_network_id': u'2186', u'floating_ip_address': u'83.212.116.49', u'id': u'688160'}],
u'nodes': {
u'master': {'internal_ip': u'192.168.0.2', u'adminPass': u'0igc3vbnSx', u'id': 666976, u'name': u'test_vm'},
u'slaves': [{'internal_ip': u'192.168.0.3', u'id': 666977, u'name': u'lambda-node1'}]},
u'vpn': {u'type': u'MAC_FILTERED', u'id': u'143713'},
'pk': 'Dummy pk',
u'subnet': {u'cidr': u'192.168.0.0/24', u'gateway_ip': u'192.168.0.1', u'id': u'142761'}}
script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
manager = Manager(response)
manager.create_inventory()
# manager.run_playbook(playbook_file=script_path + "/../../ansible/playbooks/test/testinventory.yml", tags=['hosts'])
# manager.run_playbook(playbook_file=script_path + "/../../ansible/playbooks/test/testproxy.yml", tags=['install'])
manager.run_playbook(playbook_file=script_path + "/../../ansible/playbooks/cluster-install.yml")
manager.cleanup()
import argparse
import time
import os
import inspect
from fokia.provisioner import Provisioner
from fokia.ansible_manager import Manager
if __name__ == "__main__":
start_time = time.time()
script_path = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parser = argparse.ArgumentParser(description="Okeanos VM provisioning")
parser.add_argument('--cloud', type=str, dest="cloud", default="lambda")
parser.add_argument('--project-name', type=str, dest="project_name",
default="lambda.grnet.gr")
parser.add_argument('--slaves', type=int, dest='slaves', default=1)
parser.add_argument('--vcpus_master', type=int, dest='vcpus_master', default=4)
parser.add_argument('--vcpus_slave', type=int, dest='vcpus_slave', default=4)
parser.add_argument('--ram_master', type=int, dest='ram_master', default=4096) # in MB
parser.add_argument('--ram_slave', type=int, dest='ram_slave', default=4096) # in MB
parser.add_argument('--disk_master', type=int, dest='disk_master', default=40) # in GB
parser.add_argument('--disk_slave', type=int, dest='disk_slave', default=40) # in GB
parser.add_argument('--ip_request', type=int, dest='ip_request', default=1)
parser.add_argument('--network_request', type=int, dest='network_request', default=1)
parser.add_argument('--image_name', type=str, dest='image_name', default='debian')
parser.add_argument('--cluster_size', type=int, dest='cluster_size', default=2)
args = parser.parse_args()
provisioner = Provisioner(cloud_name=args.cloud)
provisioner.create_lambda_cluster('lambda-master', slaves=args.slaves,
cluster_size=args.cluster_size,
vcpus_master=args.vcpus_master,
vcpus_slave=args.vcpus_slave,
ram_master=args.ram_master,
ram_slave=args.ram_slave,
disk_master=args.disk_master,
disk_slave=args.disk_slave,
ip_request=args.ip_request,
network_request=args.network_request,
project_name=args.project_name)
provisioner_response = provisioner.get_cluster_details()
master_id = provisioner_response['nodes']['master']['id']
master_ip = provisioner.get_server_private_ip(master_id)
provisioner_response['nodes']['master']['internal_ip'] = master_ip
slave_ids = [slave['id'] for slave in provisioner_response['nodes']['slaves']]
for i, slave in enumerate(provisioner_response['nodes']['slaves']):
slave_ip = provisioner.get_server_private_ip(slave['id'])
provisioner_response['nodes']['slaves'][i]['internal_ip'] = slave_ip
provisioner_response['pk'] = provisioner.get_private_key()
print 'response =', provisioner_response
provisioner_time = time.time()
manager = Manager(provisioner_response)
manager.create_inventory()
# manager.run_playbook(playbook_file=script_path + "/../../ansible/playbooks/test/testinventory.yml", tags=['hosts'])
# manager.run_playbook(playbook_file=script_path + "/../../ansible/playbooks/test/testproxy.yml", tags=['install'])
manager.run_playbook(playbook_file=script_path + "/../../ansible/playbooks/cluster-install.yml")
manager.cleanup()
provisioner_duration = provisioner_time - start_time
ansible_duration = time.time() - provisioner_time
print 'VM provisioning took', round(provisioner_duration), 'seconds'
print 'Ansible playbooks took', round(ansible_duration), 'seconds'
......@@ -16,9 +16,6 @@ from fokia.cluster_error_constants import *
from Crypto.PublicKey import RSA
from base64 import b64encode
if not defaults.CACERTS_DEFAULT_PATH:
https.patch_with_certs(CA_CERTS_PATH)
storage_templates = ['drdb', 'ext_vlmc']
......@@ -32,6 +29,24 @@ class Provisioner:
# Load .kamakirc configuration
logger.info("Retrieving .kamakirc configuration")
self.config = KamakiConfig()
if not defaults.CACERTS_DEFAULT_PATH:
ca_certs = self.config.get('global', 'ca_certs')
if ca_certs:
https.patch_with_certs(ca_certs)
else:
try:
from ssl import get_default_verify_paths
ca_certs = get_default_verify_paths().cafile or get_default_verify_paths().openssl_cafile
except:
pass
if ca_certs:
https.patch_with_certs(ca_certs)
else:
logger.warn("COULD NOT FIND ANY CERTIFICATES, PLEASE SET THEM IN YOUR "
".kamakirc global section, option ca_certs")
https.patch_ignore_ssl()
cloud_section = self.config._sections['cloud'].get(cloud_name)
if not cloud_section:
message = "Cloud '%s' was not found in you .kamakirc configuration file. " \
......
kamaki
\ No newline at end of file
kamaki>=0.13.4
ansible>=1.9.2
crypto>=1.4.1
pycrypto>=2.6.1
\ No newline at end of file
from fokia.ansible_manager import Manager
from mock import patch
test_provisioner_response = {
u'ips': [{u'floating_network_id': u'2186', u'floating_ip_address': u'83.212.116.49',
u'id': u'688160'}],
u'nodes': {
u'master': {'internal_ip': u'192.168.0.2', u'adminPass': u'0igc3vbnSx', u'id': 666976,
u'name': u'test_vm'},
u'slaves': [{'internal_ip': u'192.168.0.3', u'id': 666977, u'name': u'lambda-node1'}]},
u'vpn': {u'type': u'MAC_FILTERED', u'id': u'143713'},
'pk': 'Dummy pk',
u'subnet': {u'cidr': u'192.168.0.0/24', u'gateway_ip': u'192.168.0.1', u'id': u'142761'}}
def test_playbook_run():
with patch('fokia.ansible_manager.PlayBook') as pb, \
patch('fokia.ansible_manager.callbacks') as cb, \
patch('fokia.ansible_manager.utils') as ut:
cb.PlaybookCallbacks.return_value = "a"
cb.PlaybookRunnerCallbacks.return_value = "b"
cb.AggregateStats.return_value = "c"
manager = Manager(test_provisioner_response)
manager.create_inventory()
manager.run_playbook(playbook_file="../ansible/playbooks/testinventory.yml",
tags=["touch"])
assert pb.call_args[1]['inventory'].groups[0].name == 'all'
assert pb.call_args[1]['inventory'].groups[1].name == 'master'
assert pb.call_args[1]['inventory'].groups[2].name == 'slaves'
assert pb.call_args[1]['inventory'].groups[1].hosts[