Fix spurious warnings after a peer is killed
[integration/test.git] / csit / libraries / ClusterManagement.robot
1 *** Settings ***
2 Documentation     Resource housing Keywords common to several suites for cluster functional testing.
3 ...
4 ...               Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved.
5 ...
6 ...               This program and the accompanying materials are made available under the
7 ...               terms of the Eclipse Public License v1.0 which accompanies this distribution,
8 ...               and is available at http://www.eclipse.org/legal/epl-v10.html
9 ...
10 ...
11 ...               This resource holds private state (in suite variables),
12 ...               which is generated once at Setup.
13 ...               The state includes list with indexes (numbers enumerating cluster members),
14 ...               IP addresses and Http (RequestsLibrary) sessions.
15 ...               Most functionality deals with stopping/starting controllers
16 ...               and finding leaders/followers for a Shard.
17 ...
18 ...               odl-jolokia is assumed to be installed.
19 ...
20 ...               Keywords which run commands on ODL systems do not preserve active SSH session.
21 ...               TODO: Should they?
22 ...
23 ...               Keywords are ordered from friendly ones to fiddly ones.
24 ...               TODO: Figure out more deterministic but still user-friendly ordering.
25 ...
26 ...               TODO: Unify capitalization of Leaders and Followers.
27 ...
28 ...               TODO: Move Keywords related to iptables manipulation from ClusterKeywords
29 ...               here, or to separate Resource.
30 Library           RequestsLibrary    # for Create_Session and To_Json
31 Library           Collections
32 Resource          ${CURDIR}/TemplatedRequests.robot    # for Get_As_Json_From_Uri
33 Resource          ${CURDIR}/Utils.robot    # for Run_Command_On_Controller
34
35 *** Variables ***
36 ${JOLOKIA_CONF_SHARD_MANAGER_URI}    jolokia/read/org.opendaylight.controller:Category=ShardManager,name=shard-manager-config,type=DistributedConfigDatastore
37 ${JOLOKIA_OPER_SHARD_MANAGER_URI}    jolokia/read/org.opendaylight.controller:Category=ShardManager,name=shard-manager-operational,type=DistributedOperationalDatastore
38 ${JOLOKIA_READ_URI}    jolokia/read/org.opendaylight.controller
39 ${KARAF_HOME}     ${WORKSPACE}${/}${BUNDLEFOLDER}
40 ${RESTCONF_MODULES_DIR}    ${CURDIR}/../variables/restconf/modules
41
42 *** Keywords ***
43 ClusterManagement_Setup
44     [Documentation]    Detect repeated call, or detect number of members and initialize derived suite variables.
45     # Avoid multiple initialization by several downstream libraries.
46     ${already_done} =    BuiltIn.Get_Variable_Value    \${ClusterManagement__has_setup_run}    False
47     BuiltIn.Return_From_Keyword_If    ${already_done}
48     BuiltIn.Set_Suite_Variable    \${ClusterManagement__has_setup_run}    True
49     ${status}    ${possibly_int_of_members} =    BuiltIn.Run_Keyword_And_Ignore_Error    BuiltIn.Convert_To_Integer    ${NUM_ODL_SYSTEM}
50     ${int_of_members} =    BuiltIn.Set_Variable_If    '${status}' != 'PASS'    ${1}    ${possibly_int_of_members}
51     ClusterManagement__Compute_Derived_Variables    int_of_members=${int_of_members}
52
53 Kill_Members_From_List_Or_All
54     [Arguments]    ${member_index_list}=${EMPTY}    ${confirm}=True
55     [Documentation]    If the list is empty, kill all ODL instances. Otherwise, kill members based on present indices.
56     ...    If \${confirm} is True, sleep 1 second and verify killed instances are not there anymore.
57     ${command} =    BuiltIn.Set_Variable    ps axf | grep karaf | grep -v grep | awk '{print \"kill -9 \" $1}' | sh
58     Run_Command_On_List_Or_All    command=${command}    member_index_list=${member_index_list}
59     BuiltIn.Return_From_Keyword_If    not ${confirm}
60     # TODO: Convert to WUKS with configurable timeout if it turns out 1 second is not enough.
61     BuiltIn.Sleep    1s    Kill -9 closes open files, which may take longer than ssh overhead, but not long enough to warrant WUKS.
62     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
63     : FOR    ${index}    IN    @{index_list}
64     \    Verify_Karaf_Is_Not_Running_On_Member    member_index=${index}
65
66 Clean_Journals_And_Snapshots_On_List_Or_All
67     [Arguments]    ${member_index_list}=${EMPTY}
68     [Documentation]    Delete journal and snapshots directories on every node listed (or all).
69     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
70     ${command} =    Set Variable    rm -rf "${KARAF_HOME}/journal" "${KARAF_HOME}/snapshots"
71     : FOR    ${index}    IN    @{index_list}    # usually: 1, 2, 3.
72     \    Run_Command_On_Member    command=${command}    member_index=${index}
73
74 Start_Members_From_List_Or_All
75     [Arguments]    ${member_index_list}=${EMPTY}    ${wait_for_sync}=True    ${timeout}=300s
76     [Documentation]    If the list is empty, start all cluster members. Otherwise, start members based on present indices.
77     ...    If ${wait_for_sync}, wait for cluster sync on listed members.
78     ${command} =    BuiltIn.Set_Variable    ${KARAF_HOME}/bin/start
79     Run_Command_On_List_Or_All    command=${command}    member_index_list=${member_index_list}
80     BuiltIn.Return_From_Keyword_If    not ${wait_for_sync}
81     BuiltIn.Wait_Until_Keyword_Succeeds    ${timeout}    1s    Check_Cluster_Is_In_Sync    member_index_list=${member_index_list}
82     # TODO: Do we also want to check Shard Leaders here?
83
84 Verify_Leader_Exists_For_Each_Shard
85     [Arguments]    ${shard_name_list}    ${shard_type}=operational    ${member_index_list}=${EMPTY}    ${verify_restconf}=True
86     [Documentation]    For each shard name, call Get_Leader_And_Followers_For_Shard.
87     ...    Not much logic there, but single Keyword is useful when using BuiltIn.Wait_Until_Keyword_Succeeds.
88     : FOR    ${shard_name}    IN    @{shard_name_list}
89     \    Get_Leader_And_Followers_For_Shard    shard_name=${shard_name}    shard_type=${shard_type}    validate=True    member_index_list=${member_index_list}    verify_restconf=${verify_restconf}
90
91 Get_Leader_And_Followers_For_Shard
92     [Arguments]    ${shard_name}=default    ${shard_type}=operational    ${validate}=True    ${member_index_list}=${EMPTY}    ${verify_restconf}=True
93     [Documentation]    Get role lists, validate there is one leader, return the leader and list of followers.
94     ...    Optionally, issue GET to a simple restconf URL to make sure subsequent operations will not encounter 503.
95     ${leader_list}    ${follower_list} =    Get_State_Info_For_Shard    shard_name=${shard_name}    shard_type=${shard_type}    validate=True    member_index_list=${member_index_list}
96     ...    verify_restconf=${verify_restconf}
97     ${leader_count} =    BuiltIn.Get_Length    ${leader_list}
98     BuiltIn.Run_Keyword_If    ${leader_count} < 1    BuiltIn.Fail    No leader found.
99     BuiltIn.Length_Should_Be    ${leader_list}    ${1}    Too many Leaders.
100     ${leader} =    Collections.Get_From_List    ${leader_list}    0
101     [Return]    ${leader}    ${follower_list}
102
103 Resolve_Http_Session_For_Member
104     [Arguments]    ${member_index}
105     [Documentation]    Return RequestsLibrary session alias pointing to node of given index.
106     ${session} =    BuiltIn.Set_Variable    ClusterManagement__session_${member_index}
107     [Return]    ${session}
108
109 Get_State_Info_For_Shard
110     [Arguments]    ${shard_name}=default    ${shard_type}=operational    ${validate}=False    ${member_index_list}=${EMPTY}    ${verify_restconf}=False
111     [Documentation]    Return lists of Leader and Follower member indices from a given member index list
112     ...    (or from the full list if empty). If \${shard_type} is not 'config', 'operational' is assumed.
113     ...    If \${validate}, Fail if raft state is not Leader or Follower (for example on Candidate).
114     ...    The biggest difference from Get_Leader_And_Followers_For_Shard
115     ...    is that no check on number of Leaders is performed.
116     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
117     # TODO: Support alternative capitalization of 'config'?
118     ${ds_type} =    BuiltIn.Set_Variable_If    '${shard_type}' != 'config'    operational    config
119     ${leader_list} =    BuiltIn.Create_List
120     ${follower_list} =    BuiltIn.Create_List
121     : FOR    ${index}    IN    @{index_list}    # usually: 1, 2, 3.
122     \    ${raft_state} =    Get_Raft_State_Of_Shard_At_Member    shard_name=${shard_name}    shard_type=${ds_type}    member_index=${index}    verify_restconf=${verify_restconf}
123     \    BuiltIn.Run_Keyword_If    'Follower' == '${raft_state}'    Collections.Append_To_List    ${follower_list}    ${index}
124     \    ...    ELSE IF    'Leader' == '${raft_state}'    Collections.Append_To_List    ${leader_list}    ${index}
125     \    ...    ELSE IF    ${validate}    BuiltIn.Fail    Unrecognized Raft state: ${raft_state}
126     [Return]    ${leader_list}    ${follower_list}
127
128 Check_Cluster_Is_In_Sync
129     [Arguments]    ${member_index_list}=${EMPTY}
130     [Documentation]    Fail if no-sync is detected on a member from list (or any).
131     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
132     : FOR    ${index}    IN    @{index_list}    # usually: 1, 2, 3.
133     \    ${status} =    Get_Sync_Status_Of_Member    member_index=${index}
134     \    # The previous line may have failed already. If not, check status.
135     \    BuiltIn.Continue_For_Loop_If    'True' == '${status}'
136     \    BuiltIn.Fail    Index ${index} has incorrect status: ${status}
137
138 Verify_Karaf_Is_Not_Running_On_Member
139     [Arguments]    ${member_index}
140     [Documentation]    Fail if non-zero karaf instances are counted on member of given index.
141     ${count} =    Count_Running_Karafs_On_Member    member_index=${member_index}
142     BuiltIn.Should_Be_Equal    0    ${count}    Found running Karaf count: ${count}
143
144 Verify_Single_Karaf_Is_Running_On_Member
145     [Arguments]    ${member_index}
146     [Documentation]    Fail if number of karaf instances on member of given index is not one.
147     ${count} =    Count_Running_Karafs_On_Member    member_index=${member_index}
148     BuiltIn.Should_Be_Equal    1    ${count}    Wrong number of Karafs running: ${count}
149
150 Run_Command_On_List_Or_All
151     [Arguments]    ${command}    ${member_index_list}=${EMPTY}
152     [Documentation]    Cycle through indices (or all), run command on each.
153     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
154     : FOR    ${index}    IN    @{index_list}
155     \    Run_Command_On_Member    command=${command}    member_index=${index}
156
157 Get_Sync_Status_Of_Member
158     [Arguments]    ${member_index}
159     [Documentation]    Obtain IP, two GETs from jolokia URIs, return combined sync status as string.
160     ${session} =    Resolve_Http_Session_For_Member    member_index=${member_index}
161     ${conf_text} =    Get_As_Json_From_Uri    uri=${JOLOKIA_CONF_SHARD_MANAGER_URI}    session=${session}
162     ${conf_status} =    ClusterManagement__Parse_Sync_Status    shard_manager_text=${conf_text}
163     BuiltIn.Return_From_Keyword_If    'False' == ${conf_status}    False
164     ${oper_text} =    Get_As_Json_From_Uri    uri=${JOLOKIA_OPER_SHARD_MANAGER_URI}    session=${session}
165     ${oper_status} =    ClusterManagement__Parse_Sync_Status    shard_manager_text=${oper_text}
166     [Return]    ${oper_status}
167
168 Run_Command_On_Member
169     [Arguments]    ${command}    ${member_index}
170     [Documentation]    Obtain IP, call Utils and return output. This does not preserve active ssh session.
171     ${member_ip} =    Collections.Get_From_Dictionary    dictionary=${ClusterManagement__index_to_ip_mapping}    key=${member_index}
172     ${output} =    Utils.Run_Command_On_Controller    ${member_ip}    ${command}
173     [Return]    ${output}
174
175 Count_Running_Karafs_On_Member
176     [Arguments]    ${member_index}
177     [Documentation]    Remotely execute grep for karaf process, return count as string.
178     ${command} =    BuiltIn.Set_Variable    ps axf | grep karaf | grep -v grep | wc -l
179     ${count} =    Run_Command_On_Member    command=${command}    member_index=${member_index}
180     [Return]    ${count}
181
182 Get_Raft_State_Of_Shard_At_Member
183     [Arguments]    ${shard_name}    ${shard_type}    ${member_index}    ${verify_restconf}=False
184     [Documentation]    Send request to Jolokia on indexed member, return extracted Raft status.
185     ...    Optionally, check restconf works.
186     ${session} =    Resolve_Http_Session_For_Member    member_index=${member_index}
187     # TODO: Does the used URI tend to generate large data which floods log.html?
188     BuiltIn.Run_Keyword_If    ${verify_restconf}    TemplatedRequests.Get_As_Json_Templated    session=${session}    folder=${RESTCONF_MODULES_DIR}    verify=False
189     ${type_class} =    Resolve_Shard_Type_Class    shard_type=${shard_type}
190     ${uri} =    BuiltIn.Set_Variable    ${JOLOKIA_READ_URI}:Category=Shards,name=member-${member_index}-shard-${shard_name}-${shard_type},type=${type_class}
191     ${data_text} =    TemplatedRequests.Get_As_Json_From_Uri    uri=${uri}    session=${session}
192     ${data_object} =    RequestsLibrary.To_Json    ${data_text}
193     ${value} =    Collections.Get_From_Dictionary    ${data_object}    value
194     ${raft_state} =    Collections.Get_From_Dictionary    ${value}    RaftState
195     [Return]    ${raft_state}
196
197 Resolve_Shard_Type_Class
198     [Arguments]    ${shard_type}
199     [Documentation]    Simple lookup for class name corresponding to desired type.
200     BuiltIn.Run_Keyword_If    '${shard_type}' == 'config'    BuiltIn.Return_From_Keyword    DistributedConfigDatastore
201     ...    ELSE IF    '${shard_type}' == 'operational'    BuiltIn.Return_From_Keyword    DistributedOperationalDatastore
202     BuiltIn.Fail    Unrecognized shard type: ${shard_type}
203
204 ClusterManagement__Parse_Sync_Status
205     [Arguments]    ${shard_manager_text}
206     [Documentation]    Return sync status parsed out of given text. Called twice by Get_Sync_Status_Of_Member.
207     BuiltIn.Log    ${shard_manager_text}
208     ${manager_object} =    RequestsLibrary.To_Json    ${shard_manager_text}
209     ${value_object} =    Collections.Get_From_Dictionary    dictionary=${manager_object}    key=value
210     ${sync_status} =    Collections.Get_From_Dictionary    dictionary=${value_object}    key=SyncStatus
211     [Return]    ${sync_status}
212
213 ClusterManagement__Given_Or_Internal_Index_List
214     [Arguments]    ${given_list}=${EMPTY}
215     [Documentation]    Utility to allow \${EMPTY} as default argument value, as the internal list is computed at runtime.
216     ${given_length} =    BuiltIn.Get_Length    ${given_list}
217     ${return_list} =    BuiltIn.Set_Variable_If    ${given_length} > 0    ${given_list}    ${ClusterManagement__member_index_list}
218     [Return]    ${return_list}
219
220 ClusterManagement__Compute_Derived_Variables
221     [Arguments]    ${int_of_members}
222     [Documentation]    Construct index list, session list and IP mapping, publish them as suite variables.
223     @{member_index_list} =    BuiltIn.Create_List
224     @{session_list} =    BuiltIn.Create_List
225     &{index_to_ip_mapping} =    BuiltIn.Create_Dictionary
226     : FOR    ${index}    IN RANGE    1    ${int_of_members+1}
227     \    ClusterManagement__Include_Member_Index    ${index}    ${member_index_list}    ${session_list}    ${index_to_ip_mapping}
228     BuiltIn.Set_Suite_Variable    \${ClusterManagement__member_index_list}    ${member_index_list}
229     BuiltIn.Set_Suite_Variable    \${ClusterManagement__index_to_ip_mapping}    ${index_to_ip_mapping}
230     BuiltIn.Set_Suite_Variable    \${ClusterManagement__session_list}    ${session_list}
231
232 ClusterManagement__Include_Member_Index
233     [Arguments]    ${index}    ${member_index_list}    ${session_list}    ${index_to_ip_mapping}
234     [Documentation]    Add a corresponding item based on index into the last three arguments.
235     ...    Create the Http session whose alias is added to list.
236     Collections.Append_To_List    ${member_index_list}    ${index}
237     ${member_ip} =    BuiltIn.Set_Variable    ${ODL_SYSTEM_${index}_IP}
238     # ${index} is int (not string) so "key=value" syntax does not work in the following line.
239     Collections.Set_To_Dictionary    ${index_to_ip_mapping}    ${index}    ${member_ip}
240     # Http session, with ${AUTH}, without headers.
241     ${session_alias} =    Resolve_Http_Session_For_Member    member_index=${index}
242     RequestsLibrary.Create_Session    ${session_alias}    http://${member_ip}:${RESTCONFPORT}    auth=${AUTH}    max_retries=0
243     Collections.Append_To_List    ${session_list}    ${session_alias}