路由器发生异常时邮件通知

网络示意图:路由器发生异常时邮件通知

定期的监控路由器的状态,如果超过了设置的阀值时,就将结果通过邮件发送给管理员。
可以设置以下状态的阀值。

  • CPU使用率
  • 内存使用率
  • 设备内的温度(※)
  • PP接口的线路使用率(发送负载、接收负载)
  • IP映射表当前使用中的端口数

(※) 只有RTX1200支持此功能。

RTX1200的设置例

LAN的
接口的设置
(使用LAN1接口)

ip lan1 address 192.168.0.1/24

WAN的
接口的设置
(使用LAN2接口)

pp select 1
pp always-on on
pppoe use lan2
pp auth accept pap chap
pp auth myname(连接ISP的ID) (连接ISP的密码)
ppp lcp mru on 1454
ppp ipcp ipaddress on
ppp ipcp msext on
ip pp mtu 1454
ip pp nat descriptor 1
pp enable 1
ip route default gateway pp 1

NAT的设置

nat descriptor type 1 masquerade

DHCP的设置

dhcp service server
dhcp scope 1 192.168.0.2-192.168.0.100/24

DNS的设置

dns server(ISP指定的DNS服务器地址)
dns private address spoof on

过滤的设置

ip filter source-route on
ip filter directed-broadcast on
ip filter 1010 reject * * udp,tcp 135 *
ip filter 1011 reject * * udp,tcp * 135
ip filter 1012 reject * * udp,tcp netbios_ns-netbios_ssn *
ip filter 1013 reject * * udp,tcp * netbios_ns-netbios_ssn
ip filter 1014 reject * * udp,tcp 445 *
ip filter 1015 reject * * udp,tcp * 445
ip filter 1020 reject 192.168.0.0/24 *
ip filter 1030 pass * 192.168.0.0/24 icmp
ip filter 2000 reject * *
ip filter 3000 pass * *
ip filter dynamic 100 * * ftp
ip filter dynamic 101 * * www
ip filter dynamic 102 * * domain
ip filter dynamic 103 * * smtp
ip filter dynamic 104 * * pop3
ip filter dynamic 105 * * netmeeting
ip filter dynamic 106 * * tcp
ip filter dynamic 107 * * udp
pp select 1
ip pp secure filter in 1020 1030 2000
ip pp secure filter out 1010 1011 1012 1013 1014 1015 3000 dynamic 100 101 102 103 104 105 106 107 # 注释1
pp enable 1

Lua脚本的启动设置

schedule at 1 startup * lua(Lua脚本文件名)

Lua脚本例

设置值

-- 监控的间隔 (1-864000 秒)
idle_time =(监控间隔)

-- CPU使用率的时间单位("5sec", "1min" 或 "5min")
cpu_time = "(时间单位)"

-- 监控的PP接口号码(1 - 30)
peer_num =(接口号码)

-- NAT映射表的编号(1 - 2147483647)
nat_descriptor =(NAT映射表编号)

-- 各状态的阀值
th_tbl = {
 cpu =(CPU使用率(%)),
 mem =(内存使用率(%)),
 tmp =(设备内温度(℃)), -- RTX1200
 snd =(PP接口发送负载率(%)),
 rcv =(PP接口接收负载率(%)),
 nat =(IP映射使用的端口数(个))
}

-- 连续多少次超过、或恢复才判断为异常或正常(1, 2 ..)
count =(次数)

-- 恢复正常时是否发送邮件通知(发送:true / 不发送:false)
down_mail =(true / false)

-- 邮件的设置
mail_tbl = {
   smtp_address = "(SMTP服务器地址)",
   smtp_auth_name = "(邮箱用户名)",
   smtp_auth_password = "(邮箱密码)",
   smtp_auth_protocol = "(验证方式)",     #验证方式一般为plain
   from = "(发件人邮箱)",
   to = "(收件人邮箱)"
     }

-- 邮件发送失败时,输出的SYSLOG的等级(info, debug, notice)
log_level = "(SYSLOG等级)"

取得CPU使用率的函数

function set_cpu_ptn(key)
 local ptn

 if (key == "5sec") or (key == "1min") or (key == "5min") then
  ptn = "(%d+)%%%(" .. key .. "%)"
  return ptn
 end

 return nil
end

取得路由器当前状态的函数

function rt_res_status(t)
 local rtn, str
 local cmd = "show environment"
 
 rtn, str = rt.command(cmd)
 if (rtn) and (str) then
  for k, v in pairs(t) do
   v.val = str:match(v.ptn)
   if (v.val) then
    v.val = tonumber(v.val)
   end
  end
 else
  str = cmd .. "exec failed.\r\n\r\n"
 end

 return rtn, str
end

计算PP接口的线路负载率的函数

function pp_load_info(num)
 local rtn, snd, rcv, str, n
 local t = {}
 local cmd = "show status pp " .. tostring(num)
 local ptn = "Load%:%s+(%d+)%.%d+%%"

 rtn, str = rt.command(cmd)
 if (rtn) and (str) then
  n = 1
  for w in string.gmatch(str, ptn) do
   t[n] = w
   n = n + 1
  end

  if (t[1]) then
   rcv = tonumber(t[1])
  end
  if (t[2]) then
   snd = tonumber(t[2])
  end
 end

 return rtn, rcv, snd, str
end

计算IP映射表使用端口数的函数

function natmsq_use_status(id)
 local rtn, str, num
 local cmd = "show nat descriptor address " .. tostring(id)
 local ptn = "(%d+) is using."
 
 rtn, str = rt.command(cmd)
 if (rtn) and (str) then
  num = str:match(ptn)
  if (num) then
   num = tonumber(num)
  end
 else
  str = cmd .. "exec failed.\r\n"
 end

 return rtn, num, str
end

各状态超过阀值时,或者恢复正常时返回信息的函数

function make_msg(t, val, th, down)
 local rtn
 local str = ""

 if (val) then
  rtn = count_proc(t, val, th)
  if (rtn < 0) then
   if (down) then
    str = t.title .. "is less than the threshold value."
   end
  elseif (rtn > 0) then
   str = t.title .. "is more than the threshold value.\r\n"
   str = str .. string.format(" %s: %d%s\r\n threshold: %d%s\r\n\r\n",
      t.title, val, t.unit, th, t.unit)
  end
 end

 return str
end

超过阀值(或恢复)的连续次数的计数函数

function count_proc(t, val, th)
 local rtn = 0

 if (val > th) then
  if (not t.flag) then
   t.over = t.over + 1
   if (t.over == count) then
    rtn = 1
    t.flag = true
   end
  else
   if (t.down > 0) then
    t.down = 0
   end
  end
 else
  if (t.flag) then
   t.down = t.down + 1
   if (t.down == count) then
    rtn = -1
    t.flag = false
    t.over = 0
    t.down = 0
   end
  else
   if (t.over > 0) then
    t.over = 0
   end
  end
 end

 return rtn

end

取得现在的时间

function time_stamp()
 local t

 t = os.date("*t")
 return string.format("%d/%02d/%02d %02d:%02d:%02d",
  t.year, t.month, t.day, t.hour, t.min, t.sec)
end

主程序

-- 硬件资源的信息表
local res_tbl = {
 cpu = {ptn = "", val = 0, over = 0, down = 0, flag = false, title = "CPU Load(" .. cpu_time .. ")", unit = "%"},
 mem = {ptn = "(%d+)%% used", val = 0, over = 0, down = 0, flag = false, title = "Memory Load", unit = "%%"},
 tmp = {ptn = "Inside Temperature%(C.%): (%d+)", val = 0, over = 0, down = 0, flag = false, title = "Inside Temperature", unit = "C."}
}
local pp_tbl = {
 rcv = {over = 0, down = 0, flag = false, title = "PP Received Load", unit = "%"},
 snd = {over = 0, down = 0, flag = false, title = "PP Transmitted Load", unit = "%"}
}
local nat_tbl = {over = 0, down = 0, flag = false, title = "NAT masquerade port is using", unit = ""}

local rtn, str, nat_use
local rt_name = string.match(_RT_FIRM_REVISION, "(%w+)")

res_tbl.cpu.ptn = set_cpu_ptn(cpu_time)
assert(res_tbl.cpu.ptn)

while (true) do
 mail_tbl.text = ""

 -- CPU使用率, 内存使用率 (,设备内温度 RTX1200)
 rtn, str = rt_res_status(res_tbl)
 if (rtn) then
  mail_tbl.text = mail_tbl.text .. make_msg(res_tbl.cpu, res_tbl.cpu.val, th_tbl.cpu, down_mail)
  mail_tbl.text = mail_tbl.text .. make_msg(res_tbl.mem, res_tbl.mem.val, th_tbl.mem, down_mail)
  if (rt_name == "RTX1200") then
   mail_tbl.text = mail_tbl.text .. make_msg(res_tbl.tmp, res_tbl.tmp.val, th_tbl.tmp, down_mail)
  end
 end

 -- PP接口线路使用率
 rtn, rcv, snd, str = pp_load_info(peer_num)
 if (rtn) then
  mail_tbl.text = mail_tbl.text .. make_msg(pp_tbl.rcv, rcv, th_tbl.rcv, down_mail)
  mail_tbl.text = mail_tbl.text .. make_msg(pp_tbl.snd, snd, th_tbl.snd, down_mail)
 else
  mail_tbl.text = str
 end

 -- IP映射表使用端口数
 rtn, nat_use, str = natmsq_use_status(nat_descriptor)
 if (rtn) then
  if (nat_use) then
   mail_tbl.text = mail_tbl.text .. make_msg(nat_tbl, nat_use, th_tbl.nat, down_mail)
  end
 else
  mail_tbl.text = str
 end

 if (mail_tbl.text:len() > 0) then
  mail_tbl.subject = string.format("resource loadwatch (%s)", time_stamp())
  rtn = rt.mail(mail_tbl)
  if (not rtn) then
   rt.syslog(log_level, "failed to send mail.(Lua脚本文件名)")
  end
 end

 rt.sleep(idle_time)
end

返回顶部