Add suite for akka persistence when upgrading ODL
[integration/test.git] / csit / libraries / ClusterManagement.robot
1 *** Settings ***
2 Documentation     Resource housing Keywords common to several suites for cluster functional testing.
3 ...
4 ...               Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved.
5 ...
6 ...               This program and the accompanying materials are made available under the
7 ...               terms of the Eclipse Public License v1.0 which accompanies this distribution,
8 ...               and is available at http://www.eclipse.org/legal/epl-v10.html
9 ...
10 ...
11 ...               This resource holds private state (in suite variables),
12 ...               which is generated once at Setup.
13 ...               The state includes IP addresses and Http (RequestsLibrary) sessions.
14 ...               Most functionality deals with stopping/starting controllers
15 ...               and finding leaders/followers for a Shard.
16 ...
17 ...               odl-jolokia is assumed to be installed.
18 ...
19 ...               Keywords are ordered from friendly ones to fiddly ones.
20 ...               TODO: Figure out more deterministic but still user-friendly ordering.
21 ...
22 ...               TODO: Unify capitalization of Leaders and Followers.
23 ...
24 ...               TODO: Move Keywords related to iptables manipulation from ClusterKeywords
25 ...               here, or to separate Resource.
26 Library           RequestsLibrary    # for Create_Session and To_Json
27 Library           Collections
28 Resource          ${CURDIR}/TemplatedRequests.robot    # for Get_As_Json_From_Uri
29 Resource          ${CURDIR}/Utils.robot    # for Run_Command_On_Controller
30
31 *** Variables ***
32 ${JAVA_HOME}      ${EMPTY}    # releng/builder scripts should provide correct value
33 ${JOLOKIA_CONF_SHARD_MANAGER_URI}    jolokia/read/org.opendaylight.controller:Category=ShardManager,name=shard-manager-config,type=DistributedConfigDatastore
34 ${JOLOKIA_OPER_SHARD_MANAGER_URI}    jolokia/read/org.opendaylight.controller:Category=ShardManager,name=shard-manager-operational,type=DistributedOperationalDatastore
35 ${JOLOKIA_READ_URI}    jolokia/read/org.opendaylight.controller
36 ${RESTCONF_MODULES_DIR}    ${CURDIR}/../variables/restconf/modules
37
38 *** Keywords ***
39 ClusterManagement_Setup
40     [Documentation]    Detect repeated call, or detect number of members and initialize derived suite variables.
41     # Avoid multiple initialization by several downstream libraries.
42     ${already_done} =    BuiltIn.Get_Variable_Value    \${ClusterManagement__has_setup_run}    False
43     BuiltIn.Return_From_Keyword_If    ${already_done}
44     BuiltIn.Set_Suite_Variable    \${ClusterManagement__has_setup_run}    True
45     ${cluster_size} =    BuiltIn.Get_Variable_Value    \${NUM_ODL_SYSTEM}    1
46     ${status}    ${possibly_int_of_members} =    BuiltIn.Run_Keyword_And_Ignore_Error    BuiltIn.Convert_To_Integer    ${cluster_size}
47     ${int_of_members} =    BuiltIn.Set_Variable_If    '${status}' != 'PASS'    ${1}    ${possibly_int_of_members}
48     ClusterManagement__Compute_Derived_Variables    int_of_members=${int_of_members}
49
50 Kill_Members_From_List_Or_All
51     [Arguments]    ${member_index_list}=${EMPTY}    ${confirm}=True
52     [Documentation]    If the list is empty, kill all ODL instances. Otherwise, kill members based on present indices.
53     ...    If \${confirm} is True, sleep 1 second and verify killed instances are not there anymore.
54     ${command} =    BuiltIn.Set_Variable    ps axf | grep java | grep karaf | awk '{print \"kill -9 \" $1}' | sh
55     Run_Command_On_List_Or_All    command=${command}    member_index_list=${member_index_list}
56     BuiltIn.Return_From_Keyword_If    not ${confirm}
57     # TODO: Convert to WUKS with configurable timeout if it turns out 1 second is not enough.
58     BuiltIn.Sleep    1s    Kill -9 closes open files, which may take longer than ssh overhead, but not long enough to warrant WUKS.
59     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
60     : FOR    ${index}    IN    @{index_list}
61     \    Verify_Karaf_Is_Not_Running_On_Member    member_index=${index}
62
63 ClusterManagement__Build_List
64     [Arguments]    ${member}
65     ${member_int}=    BuiltIn.Convert_To_Integer    ${member}
66     ${index_list}=    BuiltIn.Create_List    ${member_int}
67     [Return]    ${index_list}
68
69 Kill_Single_Member
70     [Arguments]    ${member}    ${confirm}=True
71     [Documentation]    Convenience keyword that kills the specified member of the cluster.
72     ${index_list}=    ClusterManagement__Build_List    ${member}
73     Kill_Members_From_List_Or_All    ${index_list}    ${confirm}
74
75 Clean_Journals_And_Snapshots_On_List_Or_All
76     [Arguments]    ${member_index_list}=${EMPTY}    ${karaf_home}=${WORKSPACE}${/}${BUNDLEFOLDER}
77     [Documentation]    Delete journal and snapshots directories on every node listed (or all).
78     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
79     ${command} =    Set Variable    rm -rf "${karaf_home}/journal" "${karaf_home}/snapshots"
80     : FOR    ${index}    IN    @{index_list}    # usually: 1, 2, 3.
81     \    Run_Command_On_Member    command=${command}    member_index=${index}
82
83 Start_Members_From_List_Or_All
84     [Arguments]    ${member_index_list}=${EMPTY}    ${wait_for_sync}=True    ${timeout}=300s    ${karaf_home}=${WORKSPACE}${/}${BUNDLEFOLDER}    ${export_java_home}=${JAVA_HOME}
85     [Documentation]    If the list is empty, start all cluster members. Otherwise, start members based on present indices.
86     ...    If ${wait_for_sync}, wait for cluster sync on listed members.
87     ...    Optionally karaf_home can be overriden. Optionally specific JAVA_HOME is used for starting.
88     ${base_command} =    BuiltIn.Set_Variable    ${karaf_home}/bin/start
89     ${command} =    BuiltIn.Set_Variable_If    "${export_java_home}"    export JAVA_HOME="${export_java_home}"; ${base_command}    ${base_command}
90     Run_Command_On_List_Or_All    command=${command}    member_index_list=${member_index_list}
91     BuiltIn.Return_From_Keyword_If    not ${wait_for_sync}
92     BuiltIn.Wait_Until_Keyword_Succeeds    ${timeout}    1s    Check_Cluster_Is_In_Sync    member_index_list=${member_index_list}
93     # TODO: Do we also want to check Shard Leaders here?
94
95 Start_Single_Member
96     [Arguments]    ${member}    ${wait_for_sync}=True    ${timeout}=300s
97     [Documentation]    Convenience keyword that starts the specified member of the cluster.
98     ${index_list}=    ClusterManagement__Build_List    ${member}
99     Start_Members_From_List_Or_All    ${index_list}    ${wait_for_sync}    ${timeout}
100
101 Verify_Leader_Exists_For_Each_Shard
102     [Arguments]    ${shard_name_list}    ${shard_type}=operational    ${member_index_list}=${EMPTY}    ${verify_restconf}=True
103     [Documentation]    For each shard name, call Get_Leader_And_Followers_For_Shard.
104     ...    Not much logic there, but single Keyword is useful when using BuiltIn.Wait_Until_Keyword_Succeeds.
105     : FOR    ${shard_name}    IN    @{shard_name_list}
106     \    Get_Leader_And_Followers_For_Shard    shard_name=${shard_name}    shard_type=${shard_type}    validate=True    member_index_list=${member_index_list}    verify_restconf=${verify_restconf}
107
108 Get_Leader_And_Followers_For_Shard
109     [Arguments]    ${shard_name}=default    ${shard_type}=operational    ${validate}=True    ${member_index_list}=${EMPTY}    ${verify_restconf}=True
110     [Documentation]    Get role lists, validate there is one leader, return the leader and list of followers.
111     ...    Optionally, issue GET to a simple restconf URL to make sure subsequent operations will not encounter 503.
112     ${leader_list}    ${follower_list} =    Get_State_Info_For_Shard    shard_name=${shard_name}    shard_type=${shard_type}    validate=True    member_index_list=${member_index_list}
113     ...    verify_restconf=${verify_restconf}
114     ${leader_count} =    BuiltIn.Get_Length    ${leader_list}
115     BuiltIn.Run_Keyword_If    ${leader_count} < 1    BuiltIn.Fail    No leader found.
116     BuiltIn.Length_Should_Be    ${leader_list}    ${1}    Too many Leaders.
117     ${leader} =    Collections.Get_From_List    ${leader_list}    0
118     [Return]    ${leader}    ${follower_list}
119
120 Resolve_Http_Session_For_Member
121     [Arguments]    ${member_index}
122     [Documentation]    Return RequestsLibrary session alias pointing to node of given index.
123     ${session} =    BuiltIn.Set_Variable    ClusterManagement__session_${member_index}
124     [Return]    ${session}
125
126 Get_State_Info_For_Shard
127     [Arguments]    ${shard_name}=default    ${shard_type}=operational    ${validate}=False    ${member_index_list}=${EMPTY}    ${verify_restconf}=False
128     [Documentation]    Return lists of Leader and Follower member indices from a given member index list
129     ...    (or from the full list if empty). If \${shard_type} is not 'config', 'operational' is assumed.
130     ...    If \${validate}, Fail if raft state is not Leader or Follower (for example on Candidate).
131     ...    The biggest difference from Get_Leader_And_Followers_For_Shard
132     ...    is that no check on number of Leaders is performed.
133     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
134     # TODO: Support alternative capitalization of 'config'?
135     ${ds_type} =    BuiltIn.Set_Variable_If    '${shard_type}' != 'config'    operational    config
136     ${leader_list} =    BuiltIn.Create_List
137     ${follower_list} =    BuiltIn.Create_List
138     : FOR    ${index}    IN    @{index_list}    # usually: 1, 2, 3.
139     \    ${raft_state} =    Get_Raft_State_Of_Shard_At_Member    shard_name=${shard_name}    shard_type=${ds_type}    member_index=${index}    verify_restconf=${verify_restconf}
140     \    BuiltIn.Run_Keyword_If    'Follower' == '${raft_state}'    Collections.Append_To_List    ${follower_list}    ${index}
141     \    ...    ELSE IF    'Leader' == '${raft_state}'    Collections.Append_To_List    ${leader_list}    ${index}
142     \    ...    ELSE IF    ${validate}    BuiltIn.Fail    Unrecognized Raft state: ${raft_state}
143     [Return]    ${leader_list}    ${follower_list}
144
145 Check_Cluster_Is_In_Sync
146     [Arguments]    ${member_index_list}=${EMPTY}
147     [Documentation]    Fail if no-sync is detected on a member from list (or any).
148     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
149     : FOR    ${index}    IN    @{index_list}    # usually: 1, 2, 3.
150     \    ${status} =    Get_Sync_Status_Of_Member    member_index=${index}
151     \    # The previous line may have failed already. If not, check status.
152     \    BuiltIn.Continue_For_Loop_If    'True' == '${status}'
153     \    BuiltIn.Fail    Index ${index} has incorrect status: ${status}
154
155 Verify_Karaf_Is_Not_Running_On_Member
156     [Arguments]    ${member_index}
157     [Documentation]    Fail if non-zero karaf instances are counted on member of given index.
158     ${count} =    Count_Running_Karafs_On_Member    member_index=${member_index}
159     BuiltIn.Should_Be_Equal    0    ${count}    Found running Karaf count: ${count}
160
161 Verify_Single_Karaf_Is_Running_On_Member
162     [Arguments]    ${member_index}
163     [Documentation]    Fail if number of karaf instances on member of given index is not one.
164     ${count} =    Count_Running_Karafs_On_Member    member_index=${member_index}
165     BuiltIn.Should_Be_Equal    1    ${count}    Wrong number of Karafs running: ${count}
166
167 Run_Command_On_List_Or_All
168     [Arguments]    ${command}    ${member_index_list}=${EMPTY}
169     [Documentation]    Cycle through indices (or all), run command on each.
170     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
171     : FOR    ${index}    IN    @{index_list}
172     \    Run_Command_On_Member    command=${command}    member_index=${index}
173
174 Get_Sync_Status_Of_Member
175     [Arguments]    ${member_index}
176     [Documentation]    Obtain IP, two GETs from jolokia URIs, return combined sync status as string.
177     ${session} =    Resolve_Http_Session_For_Member    member_index=${member_index}
178     ${conf_text} =    Get_As_Json_From_Uri    uri=${JOLOKIA_CONF_SHARD_MANAGER_URI}    session=${session}
179     ${conf_status} =    ClusterManagement__Parse_Sync_Status    shard_manager_text=${conf_text}
180     BuiltIn.Return_From_Keyword_If    'False' == ${conf_status}    False
181     ${oper_text} =    Get_As_Json_From_Uri    uri=${JOLOKIA_OPER_SHARD_MANAGER_URI}    session=${session}
182     ${oper_status} =    ClusterManagement__Parse_Sync_Status    shard_manager_text=${oper_text}
183     [Return]    ${oper_status}
184
185 Run_Command_On_Member
186     [Arguments]    ${command}    ${member_index}
187     [Documentation]    Obtain IP, call Utils and return output. This does not preserve active ssh session.
188     ${member_ip} =    Collections.Get_From_Dictionary    dictionary=${ClusterManagement__index_to_ip_mapping}    key=${member_index}
189     ${output} =    Utils.Run_Command_On_Controller    ${member_ip}    ${command}
190     [Return]    ${output}
191
192 Count_Running_Karafs_On_Member
193     [Arguments]    ${member_index}
194     [Documentation]    Remotely execute grep for karaf process, return count as string.
195     ${command} =    BuiltIn.Set_Variable    ps axf | grep karaf | grep -v grep | wc -l
196     ${count} =    Run_Command_On_Member    command=${command}    member_index=${member_index}
197     [Return]    ${count}
198
199 Get_Raft_State_Of_Shard_At_Member
200     [Arguments]    ${shard_name}    ${shard_type}    ${member_index}    ${verify_restconf}=False
201     [Documentation]    Send request to Jolokia on indexed member, return extracted Raft status.
202     ...    Optionally, check restconf works.
203     ${session} =    Resolve_Http_Session_For_Member    member_index=${member_index}
204     # TODO: Does the used URI tend to generate large data which floods log.html?
205     BuiltIn.Run_Keyword_If    ${verify_restconf}    TemplatedRequests.Get_As_Json_Templated    session=${session}    folder=${RESTCONF_MODULES_DIR}    verify=False
206     ${type_class} =    Resolve_Shard_Type_Class    shard_type=${shard_type}
207     ${uri} =    BuiltIn.Set_Variable    ${JOLOKIA_READ_URI}:Category=Shards,name=member-${member_index}-shard-${shard_name}-${shard_type},type=${type_class}
208     ${data_text} =    TemplatedRequests.Get_As_Json_From_Uri    uri=${uri}    session=${session}
209     ${data_object} =    RequestsLibrary.To_Json    ${data_text}
210     ${value} =    Collections.Get_From_Dictionary    ${data_object}    value
211     ${raft_state} =    Collections.Get_From_Dictionary    ${value}    RaftState
212     [Return]    ${raft_state}
213
214 Resolve_Shard_Type_Class
215     [Arguments]    ${shard_type}
216     [Documentation]    Simple lookup for class name corresponding to desired type.
217     BuiltIn.Run_Keyword_If    '${shard_type}' == 'config'    BuiltIn.Return_From_Keyword    DistributedConfigDatastore
218     ...    ELSE IF    '${shard_type}' == 'operational'    BuiltIn.Return_From_Keyword    DistributedOperationalDatastore
219     BuiltIn.Fail    Unrecognized shard type: ${shard_type}
220
221 ClusterManagement__Parse_Sync_Status
222     [Arguments]    ${shard_manager_text}
223     [Documentation]    Return sync status parsed out of given text. Called twice by Get_Sync_Status_Of_Member.
224     BuiltIn.Log    ${shard_manager_text}
225     ${manager_object} =    RequestsLibrary.To_Json    ${shard_manager_text}
226     ${value_object} =    Collections.Get_From_Dictionary    dictionary=${manager_object}    key=value
227     ${sync_status} =    Collections.Get_From_Dictionary    dictionary=${value_object}    key=SyncStatus
228     [Return]    ${sync_status}
229
230 ClusterManagement__Given_Or_Internal_Index_List
231     [Arguments]    ${given_list}=${EMPTY}
232     [Documentation]    Utility to allow \${EMPTY} as default argument value, as the internal list is computed at runtime.
233     ${given_length} =    BuiltIn.Get_Length    ${given_list}
234     ${return_list} =    BuiltIn.Set_Variable_If    ${given_length} > 0    ${given_list}    ${ClusterManagement__member_index_list}
235     [Return]    ${return_list}
236
237 ClusterManagement__Compute_Derived_Variables
238     [Arguments]    ${int_of_members}
239     [Documentation]    Construct index list, session list and IP mapping, publish them as suite variables.
240     @{member_index_list} =    BuiltIn.Create_List
241     @{session_list} =    BuiltIn.Create_List
242     &{index_to_ip_mapping} =    BuiltIn.Create_Dictionary
243     : FOR    ${index}    IN RANGE    1    ${int_of_members+1}
244     \    ClusterManagement__Include_Member_Index    ${index}    ${member_index_list}    ${session_list}    ${index_to_ip_mapping}
245     BuiltIn.Set_Suite_Variable    \${ClusterManagement__member_index_list}    ${member_index_list}
246     BuiltIn.Set_Suite_Variable    \${ClusterManagement__index_to_ip_mapping}    ${index_to_ip_mapping}
247     BuiltIn.Set_Suite_Variable    \${ClusterManagement__session_list}    ${session_list}
248
249 ClusterManagement__Include_Member_Index
250     [Arguments]    ${index}    ${member_index_list}    ${session_list}    ${index_to_ip_mapping}
251     [Documentation]    Add a corresponding item based on index into the last three arguments.
252     ...    Create the Http session whose alias is added to list.
253     Collections.Append_To_List    ${member_index_list}    ${index}
254     ${member_ip} =    BuiltIn.Set_Variable    ${ODL_SYSTEM_${index}_IP}
255     # ${index} is int (not string) so "key=value" syntax does not work in the following line.
256     Collections.Set_To_Dictionary    ${index_to_ip_mapping}    ${index}    ${member_ip}
257     # Http session, with ${AUTH}, without headers.
258     ${session_alias} =    Resolve_Http_Session_For_Member    member_index=${index}
259     RequestsLibrary.Create_Session    ${session_alias}    http://${member_ip}:${RESTCONFPORT}    auth=${AUTH}    max_retries=0
260     Collections.Append_To_List    ${session_list}    ${session_alias}