一、環境
shenyu:2.5.1
vmOps: -XX:MaxDirectMemorySize=200m
二、場景
- 文件multipart上傳,文件大小18M
- 不開啓任何插件
- 不停請求約10幾次(每次響應後再請求)
拋出OutOfDirectMemoryError
異常
三、解決方案
shenyu2.5.0 報 OutOfDirectMemoryError
四、判斷修復成功
嘗試內存分析工具:
- visalvm
- arthas
- jprofile
- idea profile
都無法定位是否修復成功
4.1 netty原生的內存泄漏檢測工具
在idea vmOps設置
-XX:MaxDirectMemorySize=200m
-Dio.netty.leakDetection.level=paranoid
-XX:NativeMemoryTracking=detail
-Dio.netty.leakDetection.targetRecords=1
發現不生效,嘗試斷點ResourceLeakDetector
的static
代碼塊初始化是否正確
public class ResourceLeakDetector<T> {
private static final String PROP_LEVEL_OLD = "io.netty.leakDetectionLevel";
private static final String PROP_LEVEL = "io.netty.leakDetection.level";
private static final Level DEFAULT_LEVEL = Level.SIMPLE;
private static final String PROP_TARGET_RECORDS = "io.netty.leakDetection.targetRecords";
private static final int DEFAULT_TARGET_RECORDS = 4;
private static final String PROP_SAMPLING_INTERVAL = "io.netty.leakDetection.samplingInterval";
// There is a minor performance benefit in TLR if this is a power of 2.
private static final int DEFAULT_SAMPLING_INTERVAL = 128;
private static final int TARGET_RECORDS;
static final int SAMPLING_INTERVAL;
/**
* Represents the level of resource leak detection.
*/
public enum Level {
/**
* Disables resource leak detection.
*/
DISABLED,
/**
* Enables simplistic sampling resource leak detection which reports there is a leak or not,
* at the cost of small overhead (default).
*/
SIMPLE,
/**
* Enables advanced sampling resource leak detection which reports where the leaked object was accessed
* recently at the cost of high overhead.
*/
ADVANCED,
/**
* Enables paranoid resource leak detection which reports where the leaked object was accessed recently,
* at the cost of the highest possible overhead (for testing purposes only).
*/
PARANOID;
/**
* Returns level based on string value. Accepts also string that represents ordinal number of enum.
*
* @param levelStr - level string : DISABLED, SIMPLE, ADVANCED, PARANOID. Ignores case.
* @return corresponding level or SIMPLE level in case of no match.
*/
static Level parseLevel(String levelStr) {
String trimmedLevelStr = levelStr.trim();
for (Level l : values()) {
if (trimmedLevelStr.equalsIgnoreCase(l.name()) || trimmedLevelStr.equals(String.valueOf(l.ordinal()))) {
return l;
}
}
return DEFAULT_LEVEL;
}
}
private static Level level;
private static final InternalLogger logger = InternalLoggerFactory.getInstance(ResourceLeakDetector.class);
static {
// 初始化配置
final boolean disabled;
if (SystemPropertyUtil.get("io.netty.noResourceLeakDetection") != null) {
disabled = SystemPropertyUtil.getBoolean("io.netty.noResourceLeakDetection", false);
logger.debug("-Dio.netty.noResourceLeakDetection: {}", disabled);
logger.warn(
"-Dio.netty.noResourceLeakDetection is deprecated. Use '-D{}={}' instead.",
PROP_LEVEL, DEFAULT_LEVEL.name().toLowerCase());
} else {
disabled = false;
}
Level defaultLevel = disabled? Level.DISABLED : DEFAULT_LEVEL;
// First read old property name
String levelStr = SystemPropertyUtil.get(PROP_LEVEL_OLD, defaultLevel.name());
// If new property name is present, use it
levelStr = SystemPropertyUtil.get(PROP_LEVEL, levelStr);
Level level = Level.parseLevel(levelStr);
TARGET_RECORDS = SystemPropertyUtil.getInt(PROP_TARGET_RECORDS, DEFAULT_TARGET_RECORDS);
SAMPLING_INTERVAL = SystemPropertyUtil.getInt(PROP_SAMPLING_INTERVAL, DEFAULT_SAMPLING_INTERVAL);
// 設置檢測級別
ResourceLeakDetector.level = level;
if (logger.isDebugEnabled()) {
logger.debug("-D{}: {}", PROP_LEVEL, level.name().toLowerCase());
logger.debug("-D{}: {}", PROP_TARGET_RECORDS, TARGET_RECORDS);
}
}
/**
* @deprecated Use {@link #setLevel(Level)} instead.
*/
@Deprecated
public static void setEnabled(boolean enabled) {
setLevel(enabled? Level.SIMPLE : Level.DISABLED);
}
/**
* Returns {@code true} if resource leak detection is enabled.
*/
public static boolean isEnabled() {
return getLevel().ordinal() > Level.DISABLED.ordinal();
}
/**
* Sets the resource leak detection level.
*/
public static void setLevel(Level level) {
ResourceLeakDetector.level = ObjectUtil.checkNotNull(level, "level");
}
/**
* Returns the current resource leak detection level.
*/
public static Level getLevel() {
return level;
}
}
發現是正常設置,懷疑還有地方對其靜態變量進行設置,在setLevel(Level level)
斷點,通過異常棧向上尋找發現NettyAutoConfiguration
在初始化後對其修改,對spring
參數配置
spring:
netty:
leakDetection: PARANOID
調用幾次後打印了netty
的內存泄漏日誌
io.netty.util.ResourceLeakDetector reportTracedLeak
嚴重: LEAK: ByteBuf.release() was not called before it's garbage-collected. See https://netty.io/wiki/reference-counted-objects.html for more information.
但是這樣觀測性太差
4.2 自定義檢測
package org.apache.shenyu.bootstrap;
import java.lang.reflect.Field;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
import org.springframework.util.ReflectionUtils;
import io.netty.util.internal.PlatformDependent;
@Component
public class DirectMemoryReporter implements CommandLineRunner {
private static final Logger log = LoggerFactory.getLogger(DirectMemoryReporter.class);
private static final String HEALTH_POINT = "netty_direct_memory";
private volatile AtomicLong directMemory;
@Override
public void run(String... args) throws Exception {
Field field = ReflectionUtils.findField(PlatformDependent.class, "DIRECT_MEMORY_COUNTER");
try {
ReflectionUtils.makeAccessible(field);
directMemory = ((AtomicLong) field.get(PlatformDependent.class));
startReport();
} catch (IllegalAccessException e) {
log.error("get netty_direct_memory with reflect occur error:" + e.getMessage(), e);
}
}
public void startReport() {
ScheduledExecutorService service = Executors.newSingleThreadScheduledExecutor();
service.scheduleAtFixedRate(this::doReport, 5, 1, TimeUnit.SECONDS);
}
private void doReport() {
try {
long memoryWithKB = directMemory.get();
log.info("HEALTH_POINT:{} KB", memoryWithKB / 1024L);
} catch (Exception e) {
log.error("get netty_direct_memory with reflect occur error:" + e.getMessage(), e);
}
}
}
打印日誌
4.3 健康檢查端點
結合prometheus
進行監控報警
@Endpoint(id = "netty.direct.memory")
public class NettyDirectMemoryEndponit {
private static final Logger log = LoggerFactory.getLogger(NettyDirectMemoryEndponit.class);
private volatile AtomicLong directMemory;
public AtomicLong getDirectMemory() {
if (Objects.isNull(directMemory)) {
Field field = ReflectionUtils.findField(PlatformDependent.class, "DIRECT_MEMORY_COUNTER");
try {
ReflectionUtils.makeAccessible(field);
directMemory = ((AtomicLong) field.get(PlatformDependent.class));
} catch (IllegalAccessException e) {
log.error("get netty_direct_memory with reflect occur error:" + e.getMessage(), e);
}
}
return directMemory;
}
@ReadOperation
public long getExecutors() {
try {
long memoryWithKB = getDirectMemory().get();
return memoryWithKB / 1024L;
} catch (Exception e) {
log.error("get netty_direct_memory with reflect occur error:" + e.getMessage(), e);
}
return 0L;
}
}