3

docker 容器中的 Jdk-availableProcessors

 2 years ago
source link: https://blog.victorchu.info/posts/5c77d37/
Go to the source link to view the article. You can view the picture content, updated content and better typesetting reading experience. If the link is broken, please click the button below to view the snapshot at that time.
neoserver,ios ssh client

docker 容器中的 Jdk-availableProcessors

最近在线上环境遇到一个问题,nacos 客户端线程池中有 96 个线程在等待。一开始以为是哪里配置有误,于是检查了 nacos 的配置。没有发现问题。于是只能看 nacos 源码了.

public ClientWorker(final HttpAgent agent, final ConfigFilterChainManager configFilterChainManager, final Properties properties) {
this.agent = agent;
this.configFilterChainManager = configFilterChainManager;

// Initialize the timeout parameter

init(properties);

executor = Executors.newScheduledThreadPool(1, new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
Thread t = new Thread(r);
t.setName("com.alibaba.nacos.client.Worker." + agent.getName());
t.setDaemon(true);
return t;
}
});

executorService = Executors.newScheduledThreadPool(Runtime.getRuntime().availableProcessors(), new ThreadFactory() {
@Override
public Thread newThread(Runnable r) {
Thread t = new Thread(r);
t.setName("com.alibaba.nacos.client.Worker.longPolling." + agent.getName());
t.setDaemon(true);
return t;
}
});

executor.scheduleWithFixedDelay(new Runnable() {
@Override
public void run() {
try {
checkConfigInfo();
} catch (Throwable e) {
LOGGER.error("[" + agent.getName() + "] [sub-check] rotate check error", e);
}
}
}, 1L, 10L, TimeUnit.MILLISECONDS);
}

如上面的代码,nacos 长轮询线程池在初始化时使用了 Runtime.getRuntime().availableProcessors(). 而宿主机恰好是 48 核 * 2。因此判断 JVM 获取可用核数错误,拿到的是宿主机核数而非容器可用核数 1

availableProcessors () 的源码分析

availableProcessors 方法在 java.lang.Runtime 类中,是个 native 方法。需要跟到 hotspot 代码中调查。

// Runtime.java
// native代码
// 返回JAVA进程可用核数
public native int availableProcessors();

JDK 8u191 之前的代码:

// os_linux.cpp
int os::active_processor_count() {
// Linux doesn't yet have a (official) notion of processor sets,
// so just return the number of online processors.
int online_cpus = ::sysconf(_SC_NPROCESSORS_ONLN);
assert(online_cpus > 0 && online_cpus <= processor_count(), "sanity check");
return online_cpus;
}

通过 sysconf 获取系统参数_SC_NPROCESSORS_ONLN,所以返回的是宿主机可用核数。

JDK 8u191 发布了 Java Improvements for Docker Containers,支持 Docker 容器,并添加了两个 JVM 参数:

-XX:-UseContainerSupport 关闭容器支持
-XX:ActiveProcessorCount 手动指定可用 CPU 数量

JDK 8u191 的代码不好找,直接看 JDK 15 的:

// os_linux.cpp
// 如果指定了JVM参数-XX:ActiveProcessorCount, 直接返回-XX:ActiveProcessorCount的值
// 如果在容器里面,调用OSContainer::active_processor_count
// 否则,调用Linux::active_processor_count(
int os::active_processor_count() {
// User has overridden the number of active processors
if (ActiveProcessorCount > 0) {
log_trace(os)("active_processor_count: "
"active processor count set by user : %d",
ActiveProcessorCount);
return ActiveProcessorCount;
}

int active_cpus;
if (OSContainer::is_containerized()) {
active_cpus = OSContainer::active_processor_count();
log_trace(os)("active_processor_count: determined by OSContainer: %d",
active_cpus);
} else {
// 返回当前进程的可用核数,较之前版本增加了cpu亲缘性处理
active_cpus = os::Linux::active_processor_count();
}

return active_cpus;
}
````

```c
// osContainer_linux.cpp
int OSContainer::active_processor_count() {
assert(cgroup_subsystem != NULL, "cgroup subsystem not available");
// 调用cgroup的active_processor_count
// cgroup是内核提供的资源隔离机制,容器化的基础
return cgroup_subsystem->active_processor_count();
}
// cgroupSubsystem_linux.cpp
// 如果容器指定了cpu.cfs_period_us和cpu.cfs_quota_us,就用quota除以时间周期
// 如果容器指定了cpu.shares,则使用shares计算,shares是相对值
int CgroupSubsystem::active_processor_count() {
int quota_count = 0, share_count = 0;
int cpu_count, limit_count;
int result;

CachingCgroupController* contrl = cpu_controller();
CachedMetric* cpu_limit = contrl->metrics_cache();
if (!cpu_limit->should_check_metric()) {
int val = (int)cpu_limit->value();
log_trace(os, container)("CgroupSubsystem::active_processor_count (cached): %d", val);
return val;
}

cpu_count = limit_count = os::Linux::active_processor_count();
int quota = cpu_quota();
int period = cpu_period();
int share = cpu_shares();

if (quota > -1 && period > 0) {
quota_count = ceilf((float)quota / (float)period);
log_trace(os, container)("CPU Quota count based on quota/period: %d", quota_count);
}
if (share > -1) {
share_count = ceilf((float)share / (float)PER_CPU_SHARES);
log_trace(os, container)("CPU Share count based on shares: %d", share_count);
}

if (quota_count !=0 && share_count != 0) {
// 如果JVM参数PreferContainerQuotaForCPUCount为true,则返回quota_count
// 否则返回quota_count和share_count的最小值
if (PreferContainerQuotaForCPUCount) {
limit_count = quota_count;
} else {
limit_count = MIN2(quota_count, share_count);
}
} else if (quota_count != 0) {
limit_count = quota_count;
} else if (share_count != 0) {
limit_count = share_count;
}

// cpu count是内核返回的可用核数
// 返回cpu_count和limit_count的最小值
result = MIN2(cpu_count, limit_count);
log_trace(os, container)("OSContainer::active_processor_count: %d", result);

// Update cached metric to avoid re-reading container settings too often
cpu_limit->set_value(result, OSCONTAINER_CACHE_TIMEOUT);

return result;
}

Linux 查看物理 CPU 个数、核数、逻辑 CPU 个数

  • CPU 总核数 = 物理 CPU 个数 * 每颗物理 CPU 的核数
  • 总逻辑 CPU 数 = 物理 CPU 个数 每颗物理 CPU 的核数 超线程数
查看CPU信息(型号)
[root@AAA ~]# cat /proc/cpuinfo | grep name | cut -f2 -d: | uniq -c
96 Intel(R) Xeon(R) Platinum 8255C CPU @ 2.50GHz

# 查看物理CPU个数
[root@AAA ~]# cat /proc/cpuinfo| grep "physical id"| sort| uniq| wc -l
2

# 查看每个物理CPU中core的个数(即核数)
[root@AAA ~]# cat /proc/cpuinfo| grep "cpu cores"| uniq
cpu cores : 24

# 查看逻辑CPU的个数
[root@AAA ~]# cat /proc/cpuinfo| grep "processor"| wc -l
96

About Joyk


Aggregate valuable and interesting links.
Joyk means Joy of geeK