# 前言

# 插件作用

线上服务接口调取不了,但是定时任务却能正常执行,排查之后,基本确定是tomcat的假死导致,以下是解决过程 【转】https://zhuanlan.zhihu.com/p/165995050 (opens new window)

# jstack排查

# 查看cpu和内存

top -c 

top - 16:43:14 up 4 days, 20:57,  0 users,  load average: 0.20, 0.13, 0.16
Tasks:   4 total,   1 running,   3 sleeping,   0 stopped,   0 zombie
%Cpu(s):  0.6 us,  0.8 sy,  0.0 ni, 98.6 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
KiB Mem:  32194248 total,  6510148 used, 25684100 free,     2232 buffers
KiB Swap: 33550332 total,        0 used, 33550332 free.  2033696 cached Mem

   PID USER      PR  NI    VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND                                                                                                                                                                
     1 root      20   0 18.098g 2.393g  15072 S   4.7  7.8   8:53.46 java -Dfilloginchecke.encoding=UTF-8 -Djava.security.egd=file:/./urandom -jar /app.jar                                                                                 
   338 root      20   0   21968   2188   1660 S   0.0  0.0   0:00.23 bash                                                                                                                                                                   
   458 root      20   0   21944   2164   1656 S   0.0  0.0   0:00.05 bash                                                                                                                                                                   
   835 root      20   0   23628   1572   1188 R   0.0  0.0   0:00.00 top c 

可以看到运行的程序 pid 是1 以及内存和cpu 情况

# jstack java的进程id

jstack 1

"redisson-netty-2-8" ##59 prio=5 os_prio=0 tid=0x00007f0008001000 nid=0x4d runnable [0x00007f00383fb000]
   java.lang.Thread.State: RUNNABLE
        at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
        at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
        at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
        at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
        - locked <0x00000005d7161b70> (a io.netty.channel.nio.SelectedSelectionKeySet)
        - locked <0x00000005d7161b88> (a java.util.Collections$UnmodifiableSet)
        - locked <0x00000005d7161b28> (a sun.nio.ch.EPollSelectorImpl)
        at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)
        at io.netty.channel.nio.SelectedSelectionKeySetSelector.select(SelectedSelectionKeySetSelector.java:62)
        at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:791)
        at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:439)
        at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:906)
        at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
        at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
        at java.lang.Thread.run(Thread.java:745)

"redisson-netty-2-7" ##58 prio=5 os_prio=0 tid=0x00007f00e2a29800 nid=0x4c runnable [0x00007f001eaf8000]
   java.lang.Thread.State: RUNNABLE
        at sun.nio.ch.EPollArrayWrapper.epollWait(Native Method)
        at sun.nio.ch.EPollArrayWrapper.poll(EPollArrayWrapper.java:269)
        at sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:93)
        at sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:86)
        - locked <0x00000005d69ee848> (a io.netty.channel.nio.SelectedSelectionKeySet)
        - locked <0x00000005d7298b40> (a java.util.Collections$UnmodifiableSet)
        - locked <0x00000005d69ee7a0> (a sun.nio.ch.EPollSelectorImpl)
        at sun.nio.ch.SelectorImpl.select(SelectorImpl.java:97)
        at io.netty.channel.nio.SelectedSelectionKeySetSelector.select(SelectedSelectionKeySetSelector.java:62)
        at io.netty.channel.nio.NioEventLoop.select(NioEventLoop.java:791)
        at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:439)
        at io.netty.util.concurrent.SingleThreadEventExecutor$5.run(SingleThreadEventExecutor.java:906)
        at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
        at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
        at java.lang.Thread.run(Thread.java:745)

查找DeadThread 并没有死锁

# 查看进程中最耗cpu的子线程

top -p  1 -H

top - 16:48:57 up 4 days, 21:03,  0 users,  load average: 0.16, 0.20, 0.18
Threads: 163 total,   0 running, 163 sleeping,   0 stopped,   0 zombie
%Cpu(s):  0.7 us,  1.0 sy,  0.0 ni, 98.3 id,  0.0 wa,  0.0 hi,  0.0 si,  0.0 st
KiB Mem:  32194248 total,  6485560 used, 25708688 free,     2232 buffers
KiB Swap: 33550332 total,        0 used, 33550332 free.  2047080 cached Mem

   PID USER      PR  NI    VIRT    RES    SHR S %CPU %MEM     TIME+ COMMAND                                                                                                                                                                 
   297 root      20   0 18.098g 2.356g  15072 S  3.3  7.7   2:14.20 java                                                                                                                                                                    
   142 root      20   0 18.098g 2.356g  15072 S  0.3  7.7   0:05.04 java                                                                                                                                                                    
   199 root      20   0 18.098g 2.356g  15072 S  0.3  7.7   0:06.95 java                                                                                                                                                                    
     1 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:00.05 java                                                                                                                                                                    
     6 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:28.20 java                                                                                                                                                                    
     7 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.59 java                                                                                                                                                                    
     8 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.65 java                                                                                                                                                                    
     9 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.68 java                                                                                                                                                                    
    10 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.65 java                                                                                                                                                                    
    11 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.67 java                                                                                                                                                                    
    12 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.67 java                                                                                                                                                                    
    13 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.69 java                                                                                                                                                                    
    14 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.62 java                                                                                                                                                                    
    15 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.64 java                                                                                                                                                                    
    16 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.68 java                                                                                                                                                                    
    17 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.64 java                                                                                                                                                                    
    18 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.64 java                                                                                                                                                                    
    19 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:01.67 java                                                                                                                                                                    
    20 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:02.15 java                                                                                                                                                                    
    21 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:00.03 java                                                                                                                                                                    
    22 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:00.04 java                                                                                                                                                                    
    23 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:00.00 java                                                                                                                                                                    
    24 root      20   0 18.098g 2.356g  15072 S  0.0  7.7   0:25.34 java 

最耗资源的是297

  • 将最耗cpu的线程id转换为16进制输出
printf "%x \n" 297

root@598b9d456d11:/## printf "%x \n" 297
129 
root@598b9d456d11:/## 
  • 查询具体出现问题的代码位置
jstack 1 | grep 129 -A 30

root@598b9d456d11:/## jstack 1 | grep 129 -A 30
"System Clock" ##245 daemon prio=5 os_prio=0 tid=0x00007efedc090000 nid=0x129 waiting on condition [0x00007eff841b5000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x0000000775c55c38> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
        at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078)
        at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093)
        at java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809)
        at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

"redisson-3-16" ##226 prio=5 os_prio=0 tid=0x00007f000002d800 nid=0x116 waiting on condition [0x00007f001ebf9000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x00000005d69284e0> (a java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
        at java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
        at java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
        at java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1067)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
        at java.lang.Thread.run(Thread.java:745)
  • 当时出现很多time_waited的状态,通过这种方法找到了问题代码的具体位置

# netstat 查找

# docker安装netstat

apt-get update

apt-get install net-tools

# linux 查看

  • 查看tomcat服务对应的端口的连接状态,可以确定是有很多的TIME_WAIT状态
netstat -an|grep 9909|awk '{count[$6]++} END{for (i in count) print(i,count[i])}'

FIN_WAIT2 2
LISTEN 1
TIME_WAIT 356
ESTABLISHED 13
  • 如果是tcp的问题,用如下的命令
netstat -n | awk '/^tcp/ {++S[$NF]} END {for(a in S) print a, S[a]}'

FIN_WAIT2 8
TIME_WAIT 6
ESTABLISHED 358
最后编辑时间: 12/24/2020, 3:33:40 PM