利用splunk分析nginx日志,分析异常状态,分析接口性能问题

一、nginx log_format

log_format main '$remote_addr - $remote_user [$time_local] "$request" '
                                        '"$status" $host $upstream_addr $body_bytes_sent "$http_referer" '
                                        '"$http_user_agent" "$http_x_forwarded_for" '
                                        '"$gzip_ratio" $request_time $bytes_sent $request_length $upstream_response_time "$http_fiddlerhost" "$http_xonlinehost"';

 二、分析场景

1、分析线上各api性能情况(平均响应时间、最大响应时间、调用量、90%响应时间、99%响应时间、中位数、标准方差等)

host = REAL-HOST-1 OR host = REAL-HOST-2 source=*REAL-SOURCE-PATH* |rex "(?P<remoteIP>^\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b)"|  rex "(?:\s\".+?\")\s\"(?P<agent>[a-zA-Z\%].+?)\"\s(?:\"-\"\s)" | rex "(?:\s\d+?)\s(?P<request_length>\d+?)\s" | rex "(?:HTTP/1\.1\"\s\")(?P<httpstatus>[\d-]+)" | rex "(?i)\.*? (?<tomcat>(?:[0-9]{1,3}\.){3}[0-9]{1,3}.+?:\d{2,4})(?= )"| rex "\s(?P<ehost>([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{2,})\s" | rex "(?:\"[\d-\.]+?\"\s)(?P<urltime>\d+\.\d+?)(?:\s\d+?\s)" | rex "(?:\")(?<url>[^\?]+)(?:\??.*\sHTTP/1\.)"  | eval url = replace(url,"\b[\w]{70,}\b","TOKEN-STRING")| eval url = replace(url,"[\w-]{36}","UUID") | eval url = replace(url,"[\w-]{32}","UUID") | eval url = replace(url,"[0-9A-Za-z]{20}","STRING") | eval url = replace(url,"\d{2,}","NUM") | eval url = ehost + " " + url | chart avg(urltime),max(urltime),count,upperperc90(urltime),upperperc95(urltime),upperperc99(urltime),median(urltime),stdev(urltime) by url | sort -count | rename url as API,avg(urltime) as 平均时间,max(urltime) as 最大时间,count as 调用量,median(urltime) as 中位数,stdev(urltime) as 标准方差,upperperc90(urltime) as 90时间,upperperc95(urltime) as 95时间,upperperc99(urltime) as 99时间

 效果图 


利用splunk分析nginx日志,分析异常状态,分析接口性能问题 2、按接口、http 响应码统计

host = REAL-HOST-1 OR host = REAL-HOST-2 source=*REAL-SOURCE-PATH* |rex "(?P<remoteIP>^\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b)"|  rex "(?:\s\".+?\")\s\"(?P<agent>[a-zA-Z\%].+?)\"\s(?:\"-\"\s)" | rex "(?:\s\d+?)\s(?P<request_length>\d+?)\s" | rex "(?:HTTP/1\.1\"\s\")(?P<httpstatus>[\d-]+)" | rex "(?i)\.*? (?<tomcat>(?:[0-9]{1,3}\.){3}[0-9]{1,3}.+?:\d{2,4})(?= )"| rex "\s(?P<ehost>([a-z0-9]+(-[a-z0-9]+)*\.)+[a-z0-9]{2,})\s" | rex "(?:\"[\d-\.]+?\"\s)(?P<urltime>\d+\.\d+?)(?:\s\d+?\s)" | rex "(?:\")(?<url>[^\?]+)(?:\??.*\sHTTP/1\.)"  | eval url = replace(url,"\b[\w]{70,}\b","TOKEN-STRING")| eval url = replace(url,"[\w-]{36}","UUID") | eval url = replace(url,"[\w-]{32}","UUID") | eval url = replace(url,"[0-9A-Za-z]{20}","STRING") | eval url = replace(url,"\d{2,}","NUM") | eval url = ehost + " " + url | chart count by url,httpstatus

 效果图:


利用splunk分析nginx日志,分析异常状态,分析接口性能问题
 

相关推荐