Commit c31e9689 authored by Maiken's avatar Maiken
Browse files

Adding the act role, and other smaller adjustements

parent c52bc03b
......@@ -194,11 +194,13 @@ elasticluster -v setup $clustername -- <path-to-your-elasticluster-installation>
--extra-vars="@$play_vars/blockstorage.yml" \
--extra-vars="@$play_vars/griduser_local.yml" \
--extra-vars="@$play_vars/os_env.yml" \
--extra-vars="@$play_vars/nfs_export_mounts_local.yml"
--extra-vars="@$play_vars/nfs_export_mounts_local.yml" \
--extra-vars="@/home/centos/grid-uh-cloud/ansible/group_vars/all" \
--extra-vars="@/home/centos/grid-uh-cloud/ansible/group_vars/frontend"
### step3
cd <path-to-your-arc-ce-git-clone>
ansible-playbook <path-to-your-arc-ce-git-clone>/contrib/ansible/arc-ce/site_arc-ce.yml \
ansible-playbook <path-to-your-arc-ce-git-clone>/contrib/ansible/arc-ce/site_arc-ce_act.yml \
-i ~/.elasticluster/storage/$clustername.inventory \
--extra-vars="localuser=$localuser installationtype=$installtype arc_major=$arc_major arc_repo=$arc_repo lrms_type=$lrmstype" \
--extra-vars="@$play_vars/griduser_local.yml" \
......
#!/usr/bin/python
import logging
from act.arc.aCTDBArc import aCTDBArc
from act.common.aCTProxy import aCTProxy
from act.common.aCTLogger import aCTLogger
logger = aCTLogger('acttest', cluster='test')
log = logger()
db = aCTDBArc(log, "act")
xrsl = '''&(executable=/bin/sleep)
(arguments=1)
(stdout=stdout)
(rerun=2)
(gmlog=gmlog)
(inputfiles = (file1 "srm://srm.ndgf.org:8443;cache=no/atlas/disk/atlasdatadisk/rucio/mc15_13TeV/fe/a0/AOD.07849074._019904.pool.root.1"))
'''
p=aCTProxy(logging.getLogger(), 1)
voms="atlas"
attribute="" # e.g. attribute="/atlas/Role=production"
proxypath=p.conf.get(["voms", "proxypath"])
validHours=5
proxyid =1 # p.createVOMSAttribute(voms, attribute, proxypath, validHours)
db.insertArcJobDescription(xrsl, clusterlist='local://localhost:/main', proxyid=proxyid, maxattempts=5)
"atlas" "voms2.cern.ch" "15001" "/DC=ch/DC=cern/OU=computers/CN=voms2.cern.ch" "atlas"
"atlas" "lcg-voms2.cern.ch" "15001" "/DC=ch/DC=cern/OU=computers/CN=lcg-voms2.cern.ch" "atlas"
\ No newline at end of file
## assumes proxyfiles are present in files/ folder to copy to remote host
### Other preparations
#######
- name: Install pip
yum:
name: python-pip
state: present
- name: Install required pexpect python module needed to use expect ansible module on host
command: pip install pexpect>=3.3
- name: Copy vomses file
copy:
src: /home/centos/grid-uh-cloud/ansible/roles/act/files/vomses
dest: /etc/grid-security/vomses
#not sure I need this
#- name: Create public jobdir for act jobs
# file:
# path: "{{ joblog_dir }}"
# state: directory
# owner: "{{ user_name_grid }}"
# group: "{{ group_name_grid }}"
# mode: 0755
# recurse: yes
### Installation and configuration
#####################
- name: Install needed dependency (mysql-connector-python)
yum:
name: "{{ item }}"
with_items:
- mysql-connector-python
- cpan
- name: Install needed perl stuff cpanminus
command: "{{ item }}"
with_items:
- cpan App::cpanminus
- name: Install needed perl module JSON/XS.pm
command: "{{ item }}"
with_items:
- cpan JSON/XM.pm
##### Checkout aCT from github
- name: "Check if {{ install_dir }}/aCT already exists"
stat:
path: "{{ install_dir }}/aCT"
register: dir_exists
- name: "Remove {{ install_dir }}/aCT if it exists"
file:
path: "{{ install_dir }}/aCT"
state: absent
when: dir_exists.stat.exists
- name: Check out aCT
become: yes
become_user: "{{ localuser }}"
git:
repo: 'https://github.com/ATLASControlTower/aCT.git'
dest: '{{ install_dir }}/aCT'
register: out
ignore_errors: True
when: arc_major == "6"
- name: Stat the aCT dir
stat: path="{{ install_dir }}/aCT"
register: act_dir
#### Configuration of aCT
##might be better to have these in /home/centos as the expect ansible module can only run from /home/centos
- name: Place template aCTConfigARC in correct location
template: backup=yes src=aCTConfigARC.j2 dest={{ install_dir }}/aCT/aCTConfigARC.xml owner={{ localuser }} mode=0644
when: act_dir.stat.exists
- name: Place template aCTConfigATLAS in correct location
template: backup=yes src=aCTConfigATLAS.j2 dest={{ install_dir }}/aCT/aCTConfigATLAS.xml owner={{ localuser }} mode=0644
when: act_dir.stat.exists
- name: Copy to home folder aCTConfigARC
template: backup=yes src=aCTConfigARC.j2 dest=/home/{{ localuser }}/aCTConfigARC.xml owner={{ localuser }} mode=0644
when: act_dir.stat.exists
- name: Place template aCTConfigATLAS in correct location
template: backup=yes src=aCTConfigATLAS.j2 dest=/home/{{ localuser }}/aCTConfigATLAS.xml owner={{ localuser }} mode=0644
when: act_dir.stat.exists
## Proxy-stuff
#############
- name: Prepare proxy folder
file:
path: "{{ grid_homedir }}/proxies"
state: directory
mode: 0700
owner: "{{ localuser }}"
group: "{{ localuser }}"
- name: Copy the act-long-proxy
become: yes
become_user: "{{ localuser }}"
copy:
src: /home/centos/grid-uh-cloud/ansible/roles/act/files/atlact1.rfc.long.proxy
dest: "{{ grid_homedir }}"
mode: 0400
owner: "{{ localuser }}"
group: "{{ localuser }}"
- name: Create a new proxy file
become: yes
become_user: "{{ localuser }}"
command: "{{ install_dir }}/bin/arcproxy --voms=atlas:/atlas/Role=production --cert={{ grid_homedir }}/atlact1.rfc.long.proxy --key={{ grid_homedir }}/atlact1.rfc.long.proxy --proxy={{ grid_homedir }}/proxies/proxiesid1"
##### Modify bashrc file
## Fix env vars
#########################
- name: Create bashrc file from template
template:
backup: yes
src: bashrc.j2
dest: "/home/{{ localuser }}/bashrc"
owner: "{{ localuser }}"
group: "{{ localuser }}"
mode: 0644
- name: Hack to move bashrc to .bashrc
shell: mv /home/{{ localuser }}/bashrc /home/{{ localuser }}/.bashrc
- name: set paths
become: yes
become_user: "{{ localuser }}"
environment: "{{local_env}}"
shell: "echo $PYTHONPATH"
register: pythonpath
- name: show pythonpath
become: yes
become_user: "{{ localuser }}"
environment: "{{local_env}}"
debug:
msg: "{{pythonpath}}"
- name: show env
become: yes
become_user: "{{ localuser }}"
environment: "{{local_env}}"
debug:
var: "{{local_env}}"
####################
#### Overwrite addnewjob file
- name: Add own addnewjob script
copy:
src: addnewjob.py
dest: "{{ install_dir }}/aCT/src/test/addnewjob.py"
owner: "{{ localuser }}"
group: "{{ localuser }}"
### Start-up procedure for aCT #################
- name: Create act database
mysql_db:
name: act
state: present
- name: Create user centos at localhost in mysql
mysql_user:
name: "{{ localuser }}"
priv: 'act.*:ALL,GRANT'
state: present
password: "{{ slurm_db_pw }}"
- name: Create arc table
become: yes
become_user: "{{ localuser }}"
environment: "{{local_env}}"
expect:
command: "/usr/bin/python {{ install_dir }}/aCT/src/act/arc/aCTDBArc.py"
responses:
'Are you sure you want to recreate it\? \(y\/n\)': 'yes'
echo: yes
ignore_errors: yes
- name: Create proxy table
become: yes
become_user: "{{ localuser }}"
environment: "{{local_env}}"
command: "/usr/bin/python {{ install_dir }}/aCT/src/act/common/aCTProxy.py"
- name: Create panda table
become: yes
become_user: "{{ localuser }}"
environment: "{{local_env}}"
command: "/usr/bin/python {{ install_dir }}/aCT/src/act/atlas/aCTDBPanda.py"
- name: Restart aCT
become: yes
become_user: "{{ localuser }}"
environment: "{{local_env}}"
command: "/usr/bin/python {{ install_dir }}/aCT/src/act/common/aCTMain.py restart"
---
- name: Load the correct variables file (rhel6.yml)
include_vars:
file: rhel6.yml
when: is_rhel6_compatible
- name: Load the correct variables file (rhel7.yml)
include_vars:
file: rhel7.yml
when: is_rhel7_compatible
- name: Include the stuff - just do like this to avoid adding tags to all tasks
include: install.yml
tags:
- installact
...
\ No newline at end of file
<config>
<db>
<type>mysql</type>
<name>act</name>
<user>{{ localuser }}</user>
<password>{{ slurm_db_pw }}</password>
<host>localhost</host>
<port>3306</port>
</db>
<loop>
<periodicrestart>
<actsubmitter>120</actsubmitter>
<actstatus>600</actstatus>
<actfetcher>600</actfetcher>
<actcleaner>600</actcleaner>
</periodicrestart>
</loop>
<tmp>
<dir>/tmp</dir>
</tmp>
<actlocation>
<dir>{{ install_dir }}/aCT/src/</dir>
<pidfile>{{ grid_homedir }}/act.pid</pidfile>
</actlocation>
<logger>
<level>debug</level>
<arclevel>debug</arclevel>
<logdir>{{ grid_homedir }}</logdir>
<rotate>25</rotate>
</logger>
<atlasgiis>
<timeout>20</timeout>
</atlasgiis>
<queuesreject>
<item>bigmem</item>
<item>tier3</item>
<item>infiniband</item>
<item>gridsim</item>
</queuesreject>
<clustersreject>
<item>nosuchcluster</item>
</clustersreject>
<jobs>
<checkinterval>30</checkinterval>
<checkmintime>20</checkmintime>
<maxtimerunning>259200</maxtimerunning>
<maxtimehold>172800</maxtimehold>
<maxtimeundefined>3600</maxtimeundefined>
</jobs>
<voms>
<vo>atlas</vo>
<roles>
<item>production</item>
</roles>
<bindir>{{ install_dir }}/bin</bindir>
<proxylifetime>345600</proxylifetime>
<minlifetime>259200</minlifetime>
<proxypath>{{ grid_homedir }}/atlact1.rfc.long.proxy</proxypath>
<cacertdir>/etc/grid-security/certificates</cacertdir>
<proxystoredir>{{ grid_homedir }}/proxies</proxystoredir>
</voms>
<errors>
<toresubmit>
<arcerrors>
<item>Job was lost</item>
<item>Job submission to LRMS failed</item>
<item>Failed extracting LRMS ID</item>
<item>Job disappeared from SGE</item>
<item>Failed extracting LRMS ID</item>
<item>Job was cancelled</item>
<item>Internal error in uploader</item>
<item>Job finished with non-zero exit code</item>
<item>LRMS error: (127) Job failed</item>
<item>Job missing from SLURM</item>
<item>Submission: runtime script</item>
<item>LRMS error: (-4)</item>
<item>Job failed with exit code 1</item>
<item>Job was killed by PBS</item>
<item>Local submission client failed</item>
<item>LRMS error: (255)</item>
<item>xxx LRMS error: (257)</item>
<item>LRMS error: (1)</item>
<item>LRMS error: (-1)</item>
<item>LRMS error: (91)</item>
<item>LRMS error: (257) Node fail</item>
<item>Failed initiating job submission to LRMS</item>
</arcerrors>
</toresubmit>
</errors>
</config>
<config>
<executable>
<wrapperurl>http://www-f9.ijs.si;cache=check/grid/ARCpilot-test</wrapperurl>
<ptarurl>http://pandaserver.cern.ch:25080;cache=check/cache/pilot/pilotcode.tar.gz</ptarurl>
<ptarurlrc>http://project-atlas-gmsb.web.cern.ch;cache=no/project-atlas-gmsb/pilotcode-rc.tar.gz</ptarurlrc>
</executable>
<joblog>
<urlprefix>{{ joblog_dir }}</urlprefix>
<dir>/data/user/act/www/jobs</dir>
</joblog>
<agis>
<server>http://atlas-agis-api.cern.ch/request/pandaqueue/query/list/&#63;json&#38;preset&#61;schedconf.all</server>
<objectstores>http://atlas-agis-api.cern.ch/request/ddmendpoint/query/list/&#63;json&#38;type&#91;&#93;&#61;OS_LOGS&#38;type&#91;&#93;&#61;OS_ES</objectstores>
<jsonfilename>{{ install_dir }}/aCT/tmp/agis.json</jsonfilename>
<osfilename>{{ install_dir }}/aCT/tmp/oses.json</osfilename>
<pilotmanager>aCT</pilotmanager>
<maxjobs>0</maxjobs>
</agis>
<panda>
<server>https://pandaserver.cern.ch:25443/server/panda/</server>
<heartbeattime>1800</heartbeattime>
<threads>1</threads>
<getjobs>1</getjobs>
<schedulerid>aCT-atlact1-1</schedulerid>
<timeout>60</timeout>
<minjobs>10</minjobs>
<sites>
<site>
<name>{{ panda_queue }}</name>
<endpoints>
<item>local://localhost:/{{ queue }}</item>
</endpoints>
<maxjobs>1</maxjobs>
<corecount>1</corecount>
</site>
</sites>
</panda>
</config>
# .bashrc
# Source global definitions
if [ -f /etc/bashrc ]; then
. /etc/bashrc
fi
# User specific aliases and functions
export PATH={{ install_dir }}/bin:{{ install_dir }}/sbin:$PATH
export LD_LIBRARY_PATH={{ install_dir }}/lib/arc
{% if is_rhel7_compatible %}
export PYTHONPATH={{ install_dir }}/aCT/src:{{ install_dir }}/lib64/python2.7/site-packages:{{ install_dir }}/lib64/python2.7/site-packages/arc:$PYTHONPATH
{% elif is_rhel6_compatible %}
export PYTHONPATH={{ install_dir }}/aCT/src:{{ install_dir }}/lib64/python2.6/site-packages:{{ install_dir }}/lib64/python2.6/site-packages/arc:$PYTHONPATH
{% endif %}
export ARC_LOCATION={{ install_dir }}
export ARC_CONFIG={{ install_dir }}/etc/arc.conf
export X509_USER_PROXY={{ grid_homedir }}/atlact1.rfc.long.proxy
export LANGUAGE=en_US.UTF-8
export LC_ALL=en_US.UTF-8
export LANG=en_US.UTF-8
export LC_TYPE=en_US.UTF-8
\ No newline at end of file
---
local_env: "{{ local_env_rhel6 }}"
...
\ No newline at end of file
---
local_env: "{{ local_env_rhel7 }}"
...
\ No newline at end of file
......@@ -2,11 +2,19 @@
# tasks file for setting up cvmfs
#
- name: Install repo for cvmfs
yum: pkg=https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm
- name: wget cvmfs-repo
command: wget https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm --no-check-certificate
- name: Install cvmfs stuff
- name: Install cvmfs-repo
yum:
name: cvmfs-release-latest.noarch.rpm
#- name: Install repo for cvmfs
# yum: pkg=https://ecsft.cern.ch/dist/cvmfs/cvmfs-release/cvmfs-release-latest.noarch.rpm validate_certs=no
- name: Install cvmfs stuff (cvmfs and cvmfs-config-default)
yum: name={{item}} state=present
with_items:
- cvmfs
......@@ -16,7 +24,7 @@
- name: Change permissions of auto.master to manually change auto.master to include cvmfs - for some reason having troubles with cvmfs_config setup
file:
path: /etc/auto.master
chmod: 644
mode: 0644
- name: prepare basic setup
......
# This file is managed by ansible
CVMFS_REPOSITORIES=atlas.cern.ch,atlas-condb.cern.ch
CVMFS_HTTP_PROXY="http://cvmfs-squid1.grid.uio.no:3128|http://cvmfs-squid2.grid.uio.no:3128;http://cvmfs-squid1.grid.uio.no:3128;http://cvmfs-squid2.grid.uio.no:3128"
CVMFS_REPOSITORIES=atlas.cern.ch,atlas-condb.cern.ch
CVMFS_QUOTA_LIMIT=20000
CVMFS_CACHE_BASE=/atlas_cvmfs
CVMFS_SHARED_CACHE=yes
CVMFS_REPOSITORIES={{CVMFS_REPOSITORIES}}
CVMFS_HTTP_PROXY={{CVMFS_HTTP_PROXY}}
CVMFS_QUOTA_LIMIT={{CVMFS_QUOTA_LIMIT}}
CVMFS_CACHE_BASE={{CVMFS_CACHE_BASE}}
CVMFS_SHARED_CACHE={{CVMFS_SHARED_CACHE}}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment