check_status.py update
diff --git a/cluster-mgmt/bin/check_status.py b/cluster-mgmt/bin/check_status.py
index eb5f535..61f2108 100755
--- a/cluster-mgmt/bin/check_status.py
+++ b/cluster-mgmt/bin/check_status.py
@@ -10,10 +10,12 @@
 correct_nr_switch=[6,50,25,25,25,25,25,25]
 correct_intra_link=[16, 98, 48, 48, 48, 48, 48, 48]
 
-
 #nr_links=(switch[1]+switch[2]+switch[3]+switch[4]+switch[5]+switch[6]+switch[7]+len(switch)-1+8)*2
 nr_links= (49 + 24 * 6 + 7 + 8) * 2
 
+cluster_basename=os.environ.get("ONOS_CLUSTER_BASENAME")
+nr_nodes=os.environ.get("ONOS_CLUSTER_NR_NODES")
+
 def get_json(url):
   print url
   try:
@@ -31,18 +33,31 @@
   return parsedResult 
 
 def check_switch():
+  buf = ""
+  retcode = 0
+
   url="http://%s:%s/wm/core/topology/switches/all/json" % (RestIP, RestPort)
   parsedResult = get_json(url)
 
   if parsedResult == "":
-    return
+    retcode = 1
+    return (retcode, "Rest API has an issue")
 
-  print "switch: total %d switches" % len(parsedResult)
+  url = "http://%s:%s/wm/registry/switches/json" % (RestIP, RestPort)
+  registry = get_json(url)
+
+  if registry == "":
+    retcode = 1
+    return (retcode, "Rest API has an issue")
+
+
+  buf += "switch: total %d switches\n" % len(parsedResult)
   cnt = []
   active = []
   for r in range(8):
     cnt.append(0)
     active.append(0)
+
   for s in parsedResult:
     if s['dpid'] in core_switches:
       nw_index = 0
@@ -53,22 +68,33 @@
     if s['state']  == "ACTIVE":
       active[nw_index] += 1
 
+    if not s['dpid'] in registry:
+      buf += "switch:  dpid %s lost controller\n" % (s['dpid'])
+
   for r in range(8):
-    print "switch: network %d : %d switches %d active" % (r+1, cnt[r], active[r])
+    buf += "switch: network %d : %d switches %d active\n" % (r+1, cnt[r], active[r])
     if correct_nr_switch[r] != cnt[r]:
-      print "switch fail: network %d should have %d switches but has %d" % (r+1, correct_nr_switch[r], cnt[r])
+      buf += "switch fail: network %d should have %d switches but has %d\n" % (r+1, correct_nr_switch[r], cnt[r])
+      retcode = 1
 
     if correct_nr_switch[r] != active[r]:
-      print "switch fail: network %d should have %d active switches but has %d" % (r+1, correct_nr_switch[r], active[r])
+      buf += "switch fail: network %d should have %d active switches but has %d\n" % (r+1, correct_nr_switch[r], active[r])
+      retcode = 1
+
+  return (retcode, buf)
 
 def check_link():
+  buf = ""
+  retcode = 0
+
   url = "http://%s:%s/wm/core/topology/links/json" % (RestIP, RestPort)
   parsedResult = get_json(url)
 
   if parsedResult == "":
-    return
+    retcode = 1
+    return (retcode, "Rest API has an issue")
 
-  print "link: total %d links (correct : %d)" % (len(parsedResult), nr_links)
+  buf += "link: total %d links (correct : %d)\n" % (len(parsedResult), nr_links)
   intra = []
   interlink=0
   for r in range(8):
@@ -94,17 +120,25 @@
 
   for r in range(8):
     if intra[r] != correct_intra_link[r]:
-      print "link fail: network %d should have %d intra links but has %d" % (r+1, correct_intra_link[r], intra[r])
+      buf += "link fail: network %d should have %d intra links but has %d\n" % (r+1, correct_intra_link[r], intra[r])
+      retcode = 1
 
   if interlink != 14:
-      print "link fail: There should be %d intra links (uni-directional) but %d" % (14, interlink)
+      buf += "link fail: There should be %d intra links (uni-directional) but %d\n" % (14, interlink)
+      retcode = 1
 
-def check_mastership():
+  return (retcode, buf)
+
+def check_switch_local():
+  buf = "check_switch_local\n"
+  retcode = 0
+
   url = "http://%s:%s/wm/registry/switches/json" % (RestIP, RestPort)
   parsedResult = get_json(url)
 
   if parsedResult == "":
-    return
+    retcode = 1
+    return (retcode, "Rest API has an issue")
 
   for s in parsedResult:
     #print s,len(s),s[0]['controllerId']
@@ -115,23 +149,97 @@
       nw =int(s.split(':')[-2], 16)
 
     if len(parsedResult[s]) > 1:
-      print "ownership fail: switch %s has more than 1 ownership" % (s)
-    elif int(ctrl[-1]) != nw:
-      print "ownership fail: switch %s is owened by %s" % (s, ctrl)
+      buf += "switch_local warn: switch %s has more than 1 controller: " % (s)
+      for i in parsedResult[s]:
+        buf += "%s " % (i['controllerId'])
+      buf += "\n"
+      retcode = 1
 
-def check_controllers():
+    if int(ctrl[-1]) != nw:
+      buf += "switch_local fail: switch %s is wrongly controlled by %s\n" % (s, ctrl)
+      retcode = 1
+      
+  return (retcode, buf)
+
+def check_switch_all(nr_ctrl):
+  buf = "check_switch_all\n"
+  retcode = 0
+
   url = "http://%s:%s/wm/registry/controllers/json" % (RestIP, RestPort)
   parsedResult = get_json(url)
 
   if parsedResult == "":
-    return
+    retcode = 1
+    return (retcode, "Rest API has an issue")
 
-  unique=list(set(parsedResult))
-  if len(unique) != 8:
-    print "controller fail: there are %d controllers" % (len(parsedResult))
+  ## Check Dup Controller ##
+  controllers=list(set(parsedResult))
+  if len (controllers) != len(parsedResult):
+    buf += "Duplicated Controller in registory: " + str(parsedResult) + "\n"
+    retcode = 1
+
+  ## Check Missing Controller ##
+  if len (controllers) != nr_ctrl:
+    buf += "Missiing Controller in registory: " + str(parsedResult) + "\n"
+    retcode = 1
+
+  ## Check Core Controller Exist ##
+  core_ctrl="%s1" % (cluster_basename)
+  if not core_ctrl in controllers:
+    buf += "Core controller missing in registory: " + str(parsedResult) + "\n"
+    retcode = 1
+
+  controllers.remove(core_ctrl)
+
+  url = "http://%s:%s/wm/registry/switches/json" % (RestIP, RestPort)
+  parsedResult = get_json(url)
+
+  if parsedResult == "":
+    retcode = 1
+    return (retcode, "Rest API has an issue")
+
+  for s in parsedResult:
+    ctrl_set = []
+    for c in parsedResult[s]:
+      ctrl_set.append(c['controllerId'])
+
+    if s in core_switches:
+      nw = 1
+    else:
+      nw =int(s.split(':')[-2], 16)
+
+    if nw == 1 and len(ctrl_set) != 1:
+      buf += "Core switch %s has more than 1 controller: %s\n" % (s, ctrl_set)
+    elif nw != 1:
+      if len(list(set(ctrl_set))) != len(ctrl_set):
+        buf += "Edge switch %s has dup controller: %s\n" % (s, ctrl_set)
+      elif len(list(set(ctrl_set))) != len(controllers):
+        buf += "Edge switch %s has missing controller: %s\n" % (s, ctrl_set)
+
+  return (retcode, buf)
+
+def check_controllers(n):
+  retcode = 0
+  buf = ""
+  url = "http://%s:%s/wm/registry/controllers/json" % (RestIP, RestPort)
+  parsedResult = get_json(url)
+
+  if parsedResult == "":
+    retcode = 1
+
+    return (retcode, "Rest API has an issue")
+
+  for i,c in enumerate(parsedResult):
+    buf += "%d : %s\n" % (i,c)
+
+  if len(parsedResult) != n:
+    buf += "controller fail: there are %d controllers (should be %d)\n" % (len(parsedResult), n)
+    retcode = 1
+
+  return (retcode, buf)
 
 if __name__ == "__main__":
-  check_switch()
-  check_link()
-  check_mastership()
-  check_controllers()
+  print "%s" % check_switch()[1]
+  print "%s" % check_link()[1]
+  print "%s" % check_switch_local()[1]
+  print "%s" % check_controllers(8)[1]