Add suite for akka persistence when upgrading ODL 50/37150/40
authorVratko Polak <vrpolak@cisco.com>
Mon, 23 May 2016 17:21:14 +0000 (19:21 +0200)
committerJamo Luhrsen <jluhrsen@redhat.com>
Wed, 25 May 2016 22:19:17 +0000 (22:19 +0000)
+ Add a testplan file.
+ Include a utility for rapid PATCH requests for replacing cars.
+ Change ClusterManagement to work with single node
+ Change ClusterManagement to support alternative installation directories.
+ SSHKeywords.Execute_Command_Passes is now much more customizable.
+ SSHKeywords.Execute_Command_Should_Pass as its wrapper with different defaults.

Change-Id: If4f6751a16534295eed9662015f06d5f4287308b
Signed-off-by: Vratko Polak <vrpolak@cisco.com>
csit/libraries/ClusterManagement.robot
csit/libraries/SSHKeywords.robot
csit/suites/controller/akka_upgrade/1node.robot [new file with mode: 0644]
csit/testplans/controller-akka1.txt [new file with mode: 0644]
tools/odl-mdsal-clustering-tests/patch_cars_be_sr2.py [new file with mode: 0644]

index c2b8ac678722344f19e0b87dad576d57a74cb510..b94f4021e980ce158d01add08a338b906c7e4a25 100644 (file)
@@ -29,10 +29,10 @@ Resource          ${CURDIR}/TemplatedRequests.robot    # for Get_As_Json_From_Ur
 Resource          ${CURDIR}/Utils.robot    # for Run_Command_On_Controller
 
 *** Variables ***
+${JAVA_HOME}      ${EMPTY}    # releng/builder scripts should provide correct value
 ${JOLOKIA_CONF_SHARD_MANAGER_URI}    jolokia/read/org.opendaylight.controller:Category=ShardManager,name=shard-manager-config,type=DistributedConfigDatastore
 ${JOLOKIA_OPER_SHARD_MANAGER_URI}    jolokia/read/org.opendaylight.controller:Category=ShardManager,name=shard-manager-operational,type=DistributedOperationalDatastore
 ${JOLOKIA_READ_URI}    jolokia/read/org.opendaylight.controller
-${KARAF_HOME}     ${WORKSPACE}${/}${BUNDLEFOLDER}
 ${RESTCONF_MODULES_DIR}    ${CURDIR}/../variables/restconf/modules
 
 *** Keywords ***
@@ -42,7 +42,8 @@ ClusterManagement_Setup
     ${already_done} =    BuiltIn.Get_Variable_Value    \${ClusterManagement__has_setup_run}    False
     BuiltIn.Return_From_Keyword_If    ${already_done}
     BuiltIn.Set_Suite_Variable    \${ClusterManagement__has_setup_run}    True
-    ${status}    ${possibly_int_of_members} =    BuiltIn.Run_Keyword_And_Ignore_Error    BuiltIn.Convert_To_Integer    ${NUM_ODL_SYSTEM}
+    ${cluster_size} =    BuiltIn.Get_Variable_Value    \${NUM_ODL_SYSTEM}    1
+    ${status}    ${possibly_int_of_members} =    BuiltIn.Run_Keyword_And_Ignore_Error    BuiltIn.Convert_To_Integer    ${cluster_size}
     ${int_of_members} =    BuiltIn.Set_Variable_If    '${status}' != 'PASS'    ${1}    ${possibly_int_of_members}
     ClusterManagement__Compute_Derived_Variables    int_of_members=${int_of_members}
 
@@ -50,7 +51,7 @@ Kill_Members_From_List_Or_All
     [Arguments]    ${member_index_list}=${EMPTY}    ${confirm}=True
     [Documentation]    If the list is empty, kill all ODL instances. Otherwise, kill members based on present indices.
     ...    If \${confirm} is True, sleep 1 second and verify killed instances are not there anymore.
-    ${command} =    BuiltIn.Set_Variable    ps axf | grep karaf | grep -v grep | awk '{print \"kill -9 \" $1}' | sh
+    ${command} =    BuiltIn.Set_Variable    ps axf | grep java | grep karaf | awk '{print \"kill -9 \" $1}' | sh
     Run_Command_On_List_Or_All    command=${command}    member_index_list=${member_index_list}
     BuiltIn.Return_From_Keyword_If    not ${confirm}
     # TODO: Convert to WUKS with configurable timeout if it turns out 1 second is not enough.
@@ -72,18 +73,20 @@ Kill_Single_Member
     Kill_Members_From_List_Or_All    ${index_list}    ${confirm}
 
 Clean_Journals_And_Snapshots_On_List_Or_All
-    [Arguments]    ${member_index_list}=${EMPTY}
+    [Arguments]    ${member_index_list}=${EMPTY}    ${karaf_home}=${WORKSPACE}${/}${BUNDLEFOLDER}
     [Documentation]    Delete journal and snapshots directories on every node listed (or all).
     ${index_list} =    ClusterManagement__Given_Or_Internal_Index_List    given_list=${member_index_list}
-    ${command} =    Set Variable    rm -rf "${KARAF_HOME}/journal" "${KARAF_HOME}/snapshots"
+    ${command} =    Set Variable    rm -rf "${karaf_home}/journal" "${karaf_home}/snapshots"
     : FOR    ${index}    IN    @{index_list}    # usually: 1, 2, 3.
     \    Run_Command_On_Member    command=${command}    member_index=${index}
 
 Start_Members_From_List_Or_All
-    [Arguments]    ${member_index_list}=${EMPTY}    ${wait_for_sync}=True    ${timeout}=300s
+    [Arguments]    ${member_index_list}=${EMPTY}    ${wait_for_sync}=True    ${timeout}=300s    ${karaf_home}=${WORKSPACE}${/}${BUNDLEFOLDER}    ${export_java_home}=${JAVA_HOME}
     [Documentation]    If the list is empty, start all cluster members. Otherwise, start members based on present indices.
     ...    If ${wait_for_sync}, wait for cluster sync on listed members.
-    ${command} =    BuiltIn.Set_Variable    ${KARAF_HOME}/bin/start
+    ...    Optionally karaf_home can be overriden. Optionally specific JAVA_HOME is used for starting.
+    ${base_command} =    BuiltIn.Set_Variable    ${karaf_home}/bin/start
+    ${command} =    BuiltIn.Set_Variable_If    "${export_java_home}"    export JAVA_HOME="${export_java_home}"; ${base_command}    ${base_command}
     Run_Command_On_List_Or_All    command=${command}    member_index_list=${member_index_list}
     BuiltIn.Return_From_Keyword_If    not ${wait_for_sync}
     BuiltIn.Wait_Until_Keyword_Succeeds    ${timeout}    1s    Check_Cluster_Is_In_Sync    member_index_list=${member_index_list}
index f3f79a2d6a8051da1cbefd9335629be4a1a92bad..2113ebb88eadfde8897b3746800323ac7821d084 100644 (file)
@@ -12,6 +12,9 @@ Documentation     Resource enhancing SSHLibrary with Keywords used in multiple s
 ...               When the Keywords assume a SSH session is active,
 ...               and if the Keywords do not fit into a more specific Resource,
 ...               you can place them here.
+...
+...               TODO: Migrate Keywords related to handling SSH here.
+...               That may include Utils.Flexible_SSH_Login, KarafKeywords.Restore_Current_SSH_Connection_From_Index and similar.
 Library           SSHLibrary
 Resource          ${CURDIR}/Utils.robot
 
@@ -30,14 +33,30 @@ Open_Connection_To_Tools_System
     [Return]    ${tools}
 
 Execute_Command_Passes
-    [Arguments]    ${command}
-    [Documentation]    Execute command via SSH. If RC is nonzero, log everything. Retrun bool string of command success.
+    [Arguments]    ${command}    ${return_success_only}=True    ${log_on_success}=False    ${log_on_failure}=True    ${stderr_must_be_empty}=False
+    [Documentation]    Execute command via the active SSH connection. For success, rc has to be zero and optionally stderr has to be empty.
+    ...    Log everything, depending on arguments and success. Retrun either success string or stdout.
+    ...    TODO: Do we want to support customizing return values the same way as SSHLibrary.Execute_Command does?
     ${stdout}    ${stderr}    ${rc} =    SSHLibrary.Execute_Command    ${command}    return_stderr=True    return_rc=True
-    BuiltIn.Return_From_Keyword_If    ${rc} == 0    True
+    ${emptiness_status}    ${result} =    BuiltIn.Run_Keyword_And_Ignore_Error    BuiltIn.Should_Be_Empty    ${stderr}
+    ${success} =    BuiltIn.Set_Variable_If    (${rc} == 0) and (("${emptiness_status}" == "PASS") or not ${stderr_must_be_empty})    True    False
+    BuiltIn.Run_Keyword_If    (${log_on_success} and ${success}) or (${log_on_failure} and not ${success})    Log_Command_Results    ${stdout}    ${stderr}    ${rc}
+    BuiltIn.Return_From_Keyword_If    ${return_success_only}    ${success}
+    BuiltIn.Return_From_Keyword_If    ${success}    ${stdout}
+    BuiltIn.Fail    Got rc: ${rc} or stdout was not empty: ${stdout}
+
+Execute_Command_Should_Pass
+    [Arguments]    ${command}    ${log_on_success}=True    ${log_on_failure}=True    ${stderr_must_be_empty}=False
+    [Documentation]    A wrapper for Execute_Command_Passes with return_success_only=False
+    ...    Also, log_on_success defaults to True (but is customizable, unlike return_success_only)..
+    BuiltIn.Run_Keyword_And_Return    Execute_Command_Passes    ${command}    return_success_only=False    log_on_success=${log_on_success}    log_on_failure=${log_on_failure}    stderr_must_be_empty=${stderr_must_be_empty}
+
+Log_Command_Results
+    [Arguments]    ${stdout}    ${stderr}    ${rc}
+    [Documentation]    Log everything returned by SSHLibrary.Execute_Command
     BuiltIn.Log    ${stdout}
     BuiltIn.Log    ${stderr}
     BuiltIn.Log    ${rc}
-    [Return]    False
 
 Require_Python
     [Documentation]    Verify current SSH connection leads to machine with python working. Fatal fail otherwise.
diff --git a/csit/suites/controller/akka_upgrade/1node.robot b/csit/suites/controller/akka_upgrade/1node.robot
new file mode 100644 (file)
index 0000000..930d99f
--- /dev/null
@@ -0,0 +1,145 @@
+*** Settings ***
+Documentation     Suite for testing upgrading persisted data from earlier release.
+...
+...               Copyright (c) 2016 Cisco Systems, Inc. and others. All rights reserved.
+...
+...               This program and the accompanying materials are made available under the
+...               terms of the Eclipse Public License v1.0 which accompanies this distribution,
+...               and is available at http://www.eclipse.org/legal/epl-v10.html
+...
+...
+...               This suite kills the running (newer) ODL at its default location.
+...               It then installs (configurable) older ODL to an alternative location,
+...               pushes large amount of car data, verifies and kills the older ODL.
+...               The journal and snapshot files are transferred to the default location
+...               and the newer ODL is started.
+...               Then it verifies the config data is still present and matches what was seen before.
+...
+...               In principle, the suite should also work if "newer" ODL is in fact older.
+...               The limiting factor is featuresBoot, the value should be applicable to both ODL versions.
+...
+...               Note that in order to create traffic large enough for snapshots to be created,
+...               this suite also actis as a stress test for Restconf.
+...               But as that is not a primary focus of this suite,
+...               data seen on newer ODL is only compared to what was seen on the older ODL.
+...
+...               As using Robotframework would be both too slow and too memory consuming,
+...               this suite uses a specialized Python utility for pushing the data locally on ODL_SYSTEM.
+...               The utility filename is configurable, as there may be changes in PATCH behavior in future.
+...
+...               This suite uses relatively new support for PATCH http method.
+...               It repetitively replaces a segment of cars with moving IDs,
+...               so that there is a lot of data in journal (both write and delete),
+...               but the overall size of data stored remains limited.
+...
+...               This is 1-node suite, but it still uses ClusterManagement.Check_Cluster_Is_In_Sync
+...               in order to detect the same sync condition as 3-node suite would do.
+...               Jolokia feature is required for that.
+...
+...               Minimal set of features to be installed: odl-restconf, odl-jolokia, odl-clustering-test-app.
+Suite Setup       Setup_Suite
+Test Setup        SetupUtils.Setup_Test_With_Logging_And_Without_Fast_Failing
+Test Teardown     SetupUtils.Teardown_Test_Show_Bugs_If_Test_Failed
+Default Tags      1node    carpeople    critical
+Library           SSHLibrary
+Resource          ${CURDIR}/../../../libraries/ClusterManagement.robot
+Resource          ${CURDIR}/../../../libraries/SetupUtils.robot
+Resource          ${CURDIR}/../../../libraries/SSHKeywords.robot
+Resource          ${CURDIR}/../../../libraries/TemplatedRequests.robot
+
+*** Variables ***
+${ALTERNATIVE_BUNDLEFOLDER_PARENT}    /tmp/older
+${CAR_VAR_DIR}    ${CURDIR}/../../../variables/carpeople/libtest/cars
+${CLUSTER_BOOTUP_SYNC_TIMEOUT}    1200s    # Rebooting after kill may take longer time, especially for -all- install.
+${ITERATIONS}     1000
+${MOVE_PER_ITER}    1000
+${PREVIOUS_ODL_RELEASE_ZIP_URL}    https://nexus.opendaylight.org/content/repositories/public/org/opendaylight/integration/distribution-karaf/0.4.2-Beryllium-SR2/distribution-karaf-0.4.2-Beryllium-SR2.zip
+${PYTHON_UTILITY_FILENAME}    patch_cars_be_sr2.py
+${SEGMENT_SIZE}    10000
+
+*** Test Cases ***
+Kill_Original_Odl
+    [Documentation]    The ODL prepared by releng/builder is the newer one, kill it.
+    ...    Also, remove journal and snapshots.
+    ClusterManagement.Kill_Members_From_List_Or_All
+    ClusterManagement.Clean_Journals_And_Snapshots_On_List_Or_All
+
+Install_Older_Odl
+    [Documentation]    Download .zip of older ODL, unpack, delete .zip, copy featuresBoot line.
+    # Download.
+    SSHKeywords.Execute_Command_Should_Pass    mkdir -p "${ALTERNATIVE_BUNDLEFOLDER_PARENT}" && cd "${ALTERNATIVE_BUNDLEFOLDER_PARENT}" && rm -rf * && wget -N "${PREVIOUS_ODL_RELEASE_ZIP_URL}"
+    # Unzip and detect bundle folder name.
+    ${bundle_dir} =    SSHKeywords.Execute_Command_Should_Pass    cd "${ALTERNATIVE_BUNDLEFOLDER_PARENT}" && unzip -q *.zip && rm *.zip && ls -1
+    BuiltIn.Set_Suite_Variable    \${alternative_bundlefolder}    ${ALTERNATIVE_BUNDLEFOLDER_PARENT}/${bundle_dir}
+    # TODO: Add more strict checks. Folder should have single line, without .zip extension.
+    # Extract featuresBoot lines.
+    ${cfg_filename} =    BuiltIn.Set_Variable    org.apache.karaf.features.cfg
+    ${cfg_older} =    BuiltIn.Set_Variable    ${WORKSPACE}/${BUNDLEFOLDER}/etc/${cfg_filename}
+    ${cfg_newer} =    BuiltIn.Set_Variable    ${alternative_bundlefolder}/etc/${cfg_filename}
+    ${vanilla_line} =    SSHKeywords.Execute_Command_Should_Pass    grep 'featuresBoot' "${cfg_newer}" | grep -v 'featuresBootAsynchronous'
+    ${older_line} =    SSHKeywords.Execute_Command_Should_Pass    grep 'featuresBoot' "${cfg_older}" | grep -v 'featuresBootAsynchronous'
+    # Replace the vanilla line.
+    SSHKeywords.Execute_Command_Should_Pass    sed -i 's/${vanilla_line}/${older_line}/g' "${cfg_newer}"
+    # Verify the replaced line.
+    ${newer_line} =    SSHKeywords.Execute_Command_Should_Pass    grep 'featuresBoot' "${cfg_newer}" | grep -v 'featuresBootAsynchronous'
+    BuiltIn.Should_Not_Be_Equal    ${vanilla_line}    ${newer_line}
+    BuiltIn.Should_Be_Equal    ${older_line}    ${newer_line}
+
+Start_Older_Odl
+    [Documentation]    Start older ODL on background.
+    [Tags]    1node    carpeople    # Not critical, to save space in default log.html presentation
+    ClusterManagement.Start_Members_From_List_Or_All    wait_for_sync=True    timeout=${CLUSTER_BOOTUP_SYNC_TIMEOUT}    karaf_home=${alternative_bundlefolder}
+    # This is deliberately analoguous to killing the whole cluster.
+    # (As opposed to killing just one member, but for 1 node it is the same.)
+
+Add_Data
+    [Documentation]    Put car data to config datastore of older ODL.
+    ${command} =    BuiltIn.Set_Variable    python ${PYTHON_UTILITY_FILENAME} --segment-size=${SEGMENT_SIZE} --iterations=${ITERATIONS} --move-per-iter=${MOVE_PER_ITER}
+    SSHKeywords.Execute_Command_Should_Pass    ${command}    stderr_must_be_empty=True
+    # TODO: I have seen 401 here once. Implement workaround or report a Bug.
+
+Remember_Data
+    [Documentation]    Get and save the stored data for later comparison.
+    ${data} =    TemplatedRequests.Get_As_Json_Templated    folder=${CAR_VAR_DIR}    verify=False
+    BuiltIn.Set_Suite_Variable    \${data_before}    ${data}
+
+Validate_Data
+    [Documentation]    Compare the saved data against what the data should look like.
+    ${first_id} =    BuiltIn.Evaluate    (${ITERATIONS} - 1) * ${MOVE_PER_ITER} + 1
+    SetupUtils.Set_Known_Bug_Id    5909
+    # The following line is the second part of TemplatedRequests.Get_As_Json_Templated for verify=True.
+    TemplatedRequests.Verify_Response_As_Json_Templated    response=${data_before}    folder=${CAR_VAR_DIR}    base_name=data    iterations=${SEGMENT_SIZE}    iter_start=${first_id}
+
+Kill_Older_Odl
+    [Documentation]    Kill the older ODL immediatelly.
+    ClusterManagement.Kill_Members_From_List_Or_All
+
+Transfer_Persisted_Data
+    [Documentation]    Move snapshots and journal into the original ODL installation.
+    # SSHLibrary.Switch_Connection    ${odl_system_ssh_index}
+    ${stdout} =    SSHKeywords.Execute_Command_Should_Pass    cp -rv "${alternative_bundlefolder}/snapshots" "${WORKSPACE}/${BUNDLEFOLDER}/" && cp -rv "${alternative_bundlefolder}/journal" "${WORKSPACE}/${BUNDLEFOLDER}/"
+    # TODO: Should we require a snapshot was created?
+
+Start_Newer_Odl
+    [Documentation]    Start the newer ODL on background.
+    [Tags]    1node    carpeople    # Not critical, to save space in default log.html presentation
+    ClusterManagement.Start_Members_From_List_Or_All    wait_for_sync=True    timeout=${CLUSTER_BOOTUP_SYNC_TIMEOUT}
+
+Verify_Data_Is_Restored
+    [Documentation]    Get car data from config datastore and verify it matches what was seen before.
+    ${data_after} =    TemplatedRequests.Get_As_Json_Templated    folder=${CAR_VAR_DIR}    verify=False
+    BuiltIn.Should_Be_Equal    ${data_before}    ${data_after}
+
+Archive_Older_Karaf_Log
+    [Documentation]    Only original location benefits from automatic karaf.log archivation.
+    SSHKeywords.Execute_Command_Should_Pass    xz -9evv ${alternative_bundlefolder}/data/log/karaf.log
+    SSHLibrary.Get_File    ${alternative_bundlefolder}/data/log/karaf.log.xz    older.karaf.log.xz
+    # TODO: Uncompress first (or last) megabyte for better readability?
+
+*** Keywords ***
+Setup_Suite
+    [Documentation]    Activate dependency Resources, create SSH connection, copy Python utility.
+    ClusterManagement.ClusterManagement_Setup
+    TemplatedRequests.Create_Default_Session
+    ${connection} =    SSHKeywords.Open_Connection_To_ODL_System
+    SSHLibrary.Put_File    ${CURDIR}/../../../../tools/odl-mdsal-clustering-tests/${PYTHON_UTILITY_FILENAME}
diff --git a/csit/testplans/controller-akka1.txt b/csit/testplans/controller-akka1.txt
new file mode 100644 (file)
index 0000000..50d2ab2
--- /dev/null
@@ -0,0 +1,2 @@
+# Place the suites in run order:
+integration/test/csit/suites/controller/akka_upgrade/1node.robot
diff --git a/tools/odl-mdsal-clustering-tests/patch_cars_be_sr2.py b/tools/odl-mdsal-clustering-tests/patch_cars_be_sr2.py
new file mode 100644 (file)
index 0000000..3ae61d1
--- /dev/null
@@ -0,0 +1,90 @@
+"""
+The purpose of this script to create enough traffic in config datastore
+to trigger creation of Snapshot.
+This script uses PATCH http method for handling "moving segment" of cars.
+The car data is minimal, containing only an ID (car-<num>).
+This script is tailored to behavior of Berylium-SR2,
+if the behavior changes, new script will be needed.
+"""
+
+
+import argparse
+import string
+import sys
+import requests
+
+
+def main():
+    """
+    The main function that does it all.
+
+    TODO: Move argument parsing to a separate function,
+    so allow the main logic to be started programmatically?
+    """
+
+    # Constants
+    car_entry_template = string.Template('''      {
+       "id": "car-$NUM"
+      }''')
+
+    patch_data_template = string.Template('''{
+ "ietf-restconf:yang-patch": {
+  "patch-id": "$ID",
+  "edit": [
+   {
+    "edit-id": "0",
+    "operation": "replace",
+    "target": "/car:car-entry[car:id='0']",
+    "value": {
+     "car:car-entry": [
+$ENTRIES
+     ]
+    }
+   }
+  ]
+ }
+}''')
+
+    # Arguments
+    parser = argparse.ArgumentParser(description="Config datastore"
+                                                 "scale test script")
+    parser.add_argument("--host", default="127.0.0.1",
+                        help="Host where odl controller is running."
+                             "(default: 127.0.0.1)")
+    parser.add_argument("--port", default="8181",
+                        help="Port on which odl's RESTCONF is listening"
+                             "(default: 8181)")
+    parser.add_argument("--start-id", type=int, default=1,
+                        help="ID number of the first car. (default:1)")
+    parser.add_argument("--segment-size", type=int, default=1,
+                        help="Number of cars in segment. (default:1)")
+    parser.add_argument("--iterations", type=int, default=1,
+                        help="How many times the segment sent. (default:1)")
+    parser.add_argument("--move-per-iter", type=int, default=1,
+                        help="Each segment has IDs moved by this. (default:1)")
+    parser.add_argument("--user", help="Restconf user name", default="admin")
+    parser.add_argument("--password", help="Restconf password", default="admin")
+
+    args = parser.parse_args()
+
+    # Logic
+    url = "http://" + args.host + ':' + args.port + "/restconf/config/car:cars"
+    auth = (args.user, args.password)
+    headers = {"Content-Type": "application/yang.patch+json"}
+    session = requests.Session()
+    for iteration in range(args.iterations):
+        entry_list = []
+        for num_entry in range(args.segment_size):
+            num_id = args.start_id + iteration * args.move_per_iter + num_entry
+            entry_list.append(car_entry_template.substitute({"NUM": str(num_id)}))
+        mapping = {"ID": str(iteration), "ENTRIES": ",\n".join(entry_list)}
+        data = patch_data_template.substitute(mapping)
+        response = session.patch(url=url, auth=auth, headers=headers, data=data)
+        if response.status_code not in [200, 201, 204]:
+            print "status:", response.status_code
+            print "text:", response.text
+            sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()