cadvisor master branch
cadvisor支持的所有指標
代碼位置:github.com/google/cadvisor/container/factory.go
CpuUsageMetrics MetricKind = "cpu"
ProcessSchedulerMetrics MetricKind = "sched"
PerCpuUsageMetrics MetricKind = "percpu"
MemoryUsageMetrics MetricKind = "memory"
CpuLoadMetrics MetricKind = "cpuLoad"
DiskIOMetrics MetricKind = "diskIO"
DiskUsageMetrics MetricKind = "disk"
NetworkUsageMetrics MetricKind = "network"
NetworkTcpUsageMetrics MetricKind = "tcp"
NetworkAdvancedTcpUsageMetrics MetricKind = "advtcp"
NetworkUdpUsageMetrics MetricKind = "udp"
AcceleratorUsageMetrics MetricKind = "accelerator"
AppMetrics MetricKind = "app"
ProcessMetrics MetricKind = "process"
HugetlbUsageMetrics MetricKind = "hugetlb"
PerfMetrics MetricKind = "perf_event"
默認被忽略的指標
NetworkUdpUsageMetrics MetricKind = "udp"
NetworkTcpUsageMetrics MetricKind = "tcp"
NetworkAdvancedTcpUsageMetrics MetricKind = "advtcp"
ProcessSchedulerMetrics MetricKind = "sched"
ProcessMetrics MetricKind = "process"
HugetlbUsageMetrics MetricKind = "hugetlb"
cadvisor收集到的指標默認在內存裏面緩存2m.
var (
storageDriver = flag.String("storage_driver", "", fmt.Sprintf("Storage `driver` to use. Data is always cached shortly in memory, this controls where data is pushed besides the local cache. Empty means none, multiple separated by commas. Options are: <empty>, %s", strings.Join(storage.ListDrivers(), ", ")))
storageDuration = flag.Duration("storage_duration", 2*time.Minute, "How long to keep data stored (Default: 2min).")
)
// NewMemoryStorage creates a memory storage with an optional backend storage option.
func NewMemoryStorage() (*memory.InMemoryCache, error) {
backendStorages := []storage.StorageDriver{}
for _, driver := range strings.Split(*storageDriver, ",") {
if driver == "" {
continue
}
storage, err := storage.New(driver)
if err != nil {
return nil, err
}
backendStorages = append(backendStorages, storage)
klog.V(1).Infof("Using backend storage type %q", driver)
}
klog.V(1).Infof("Caching stats in memory for %v", *storageDuration)
return memory.New(*storageDuration, backendStorages), nil
}
cadvisor獲取文件系統信息
sysFs提供了以下interface
// Abstracts the lowest level calls to sysfs.
type SysFs interface {
// Get NUMA nodes paths
GetNodesPaths() ([]string, error)
// Get paths to CPUs in provided directory e.g. /sys/devices/system/node/node0 or /sys/devices/system/cpu
GetCPUsPaths(cpusPath string) ([]string, error)
// Get physical core id for specified CPU
GetCoreID(coreIDFilePath string) (string, error)
// Get physical package id for specified CPU
GetCPUPhysicalPackageID(cpuPath string) (string, error)
// Get total memory for specified NUMA node
GetMemInfo(nodeDir string) (string, error)
// Get hugepages from specified directory
GetHugePagesInfo(hugePagesDirectory string) ([]os.FileInfo, error)
// Get hugepage_nr from specified directory
GetHugePagesNr(hugePagesDirectory string, hugePageName string) (string, error)
// Get directory information for available block devices.
GetBlockDevices() ([]os.FileInfo, error)
// Get Size of a given block device.
GetBlockDeviceSize(string) (string, error)
// Get scheduler type for the block device.
GetBlockDeviceScheduler(string) (string, error)
// Get device major:minor number string.
GetBlockDeviceNumbers(string) (string, error)
GetNetworkDevices() ([]os.FileInfo, error)
GetNetworkAddress(string) (string, error)
GetNetworkMtu(string) (string, error)
GetNetworkSpeed(string) (string, error)
GetNetworkStatValue(dev string, stat string) (uint64, error)
// Get directory information for available caches accessible to given cpu.
GetCaches(id int) ([]os.FileInfo, error)
// Get information for a cache accessible from the given cpu.
GetCacheInfo(cpu int, cache string) (CacheInfo, error)
GetSystemUUID() (string, error)
}
container manager
代碼位置:github.com/google/cadvisor/manager/manager.go
// New takes a memory storage and returns a new manager.
func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig HouskeepingConfig, includedMetricsSet container.MetricSet, collectorHttpClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string, perfEventsFile string) (Manager, error) {
if memoryCache == nil {
return nil, fmt.Errorf("manager requires memory storage")
}
// Detect the container we are running on.
selfContainer, err := cgroups.GetOwnCgroupPath("cpu")
if err != nil {
return nil, err
}
klog.V(2).Infof("cAdvisor running in container: %q", selfContainer)
context := fs.Context{}
if err := container.InitializeFSContext(&context); err != nil {
return nil, err
}
fsInfo, err := fs.NewFsInfo(context)
if err != nil {
return nil, err
}
// If cAdvisor was started with host's rootfs mounted, assume that its running
// in its own namespaces.
inHostNamespace := false
if _, err := os.Stat("/rootfs/proc"); os.IsNotExist(err) {
inHostNamespace = true
}
// Register for new subcontainers.
eventsChannel := make(chan watcher.ContainerEvent, 16)
newManager := &manager{
containers: make(map[namespacedContainerName]*containerData),
quitChannels: make([]chan error, 0, 2),
memoryCache: memoryCache,
fsInfo: fsInfo,
sysFs: sysfs,
cadvisorContainer: selfContainer,
inHostNamespace: inHostNamespace,
startupTime: time.Now(),
maxHousekeepingInterval: *houskeepingConfig.Interval,
allowDynamicHousekeeping: *houskeepingConfig.AllowDynamic,
includedMetrics: includedMetricsSet,
containerWatchers: []watcher.ContainerWatcher{},
eventsChannel: eventsChannel,
collectorHttpClient: collectorHttpClient,
nvidiaManager: accelerators.NewNvidiaManager(),
rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList,
}
machineInfo, err := machine.Info(sysfs, fsInfo, inHostNamespace)
if err != nil {
return nil, err
}
newManager.machineInfo = *machineInfo
klog.V(1).Infof("Machine: %+v", newManager.machineInfo)
newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores)
if err != nil {
return nil, err
}
versionInfo, err := getVersionInfo()
if err != nil {
return nil, err
}
klog.V(1).Infof("Version: %+v", *versionInfo)
newManager.eventHandler = events.NewEventManager(parseEventsStoragePolicy())
return newManager, nil
}
得到cgroup信息
func GetOwnCgroupPath(subsystem string) (string, error) {
cgroup, err := GetOwnCgroup(subsystem)
if err != nil {
return "", err
}
return getCgroupPathHelper(subsystem, cgroup)
}
讀取的是 容器內的 /proc/self/cgroup
// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
cgroups, err := ParseCgroupFile("/proc/self/cgroup")
if err != nil {
return "", err
}
return getControllerPath(subsystem, cgroups)
}
獲取cgroup的mount point和root
mount point是讀取的/proc/self/mountinfo
func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
// We are not using mount.GetMounts() because it's super-inefficient,
// parsing it directly sped up x10 times because of not using Sscanf.
// It was one of two major performance drawbacks in container start.
if !isSubsystemAvailable(subsystem) {
return "", "", NewNotFoundError(subsystem)
}
f, err := os.Open("/proc/self/mountinfo")
if err != nil {
return "", "", err
}
defer f.Close()
if IsCgroup2UnifiedMode() {
subsystem = ""
}
return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}
文件系統信息(fsinfo)
func NewFsInfo(context Context) (FsInfo, error) {
mounts, err := mount.ParseMountInfo("/proc/self/mountinfo")
if err != nil {
return nil, err
}
fsUUIDToDeviceName, err := getFsUUIDToDeviceNameMap()
if err != nil {
// UUID is not always available across different OS distributions.
// Do not fail if there is an error.
klog.Warningf("Failed to get disk UUID mapping, getting disk info by uuid will not work: %v", err)
}
// Avoid devicemapper container mounts - these are tracked by the ThinPoolWatcher
excluded := []string{fmt.Sprintf("%s/devicemapper/mnt", context.Docker.Root)}
fsInfo := &RealFsInfo{
partitions: processMounts(mounts, excluded),
labels: make(map[string]string),
mounts: make(map[string]mount.MountInfo),
dmsetup: devicemapper.NewDmsetupClient(),
fsUUIDToDeviceName: fsUUIDToDeviceName,
}
for _, mount := range mounts {
fsInfo.mounts[mount.MountPoint] = mount
}
// need to call this before the log line below printing out the partitions, as this function may
// add a "partition" for devicemapper to fsInfo.partitions
fsInfo.addDockerImagesLabel(context, mounts)
fsInfo.addCrioImagesLabel(context, mounts)
klog.V(1).Infof("Filesystem UUIDs: %+v", fsInfo.fsUUIDToDeviceName)
klog.V(1).Infof("Filesystem partitions: %+v", fsInfo.partitions)
fsInfo.addSystemRootLabel(mounts)
return fsInfo, nil
}
machine info
幾乎涵蓋了常用指標信息
cpu: /proc/cpuinfo
mem:/proc/meminfo
hugepace: /sys/kernel/mm/hugepages/
fsinfo: /proc/diskstats
網絡設備(網卡): ls /sys/class/net
網絡地址: cat /sys/class/net/網卡/address
網絡mtu: cat /sys/class/net/網卡/mtu
網絡傳輸速率: cat /sys/class/net/網卡/speed
網絡io: ls /sys/class/net/網卡/statistics
func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.MachineInfo, error) {
rootFs := "/"
if !inHostNamespace {
rootFs = "/rootfs"
}
cpuinfo, err := ioutil.ReadFile(filepath.Join(rootFs, "/proc/cpuinfo"))
if err != nil {
return nil, err
}
clockSpeed, err := GetClockSpeed(cpuinfo)
if err != nil {
return nil, err
}
memoryCapacity, err := GetMachineMemoryCapacity()
if err != nil {
return nil, err
}
memoryByType, err := GetMachineMemoryByType(memoryControllerPath)
if err != nil {
return nil, err
}
nvmInfo, err := nvm.GetInfo()
if err != nil {
return nil, err
}
hugePagesInfo, err := sysinfo.GetHugePagesInfo(sysFs, hugepagesDirectory)
if err != nil {
return nil, err
}
filesystems, err := fsInfo.GetGlobalFsInfo()
if err != nil {
klog.Errorf("Failed to get global filesystem information: %v", err)
}
diskMap, err := sysinfo.GetBlockDeviceInfo(sysFs)
if err != nil {
klog.Errorf("Failed to get disk map: %v", err)
}
netDevices, err := sysinfo.GetNetworkDevices(sysFs)
if err != nil {
klog.Errorf("Failed to get network devices: %v", err)
}
topology, numCores, err := GetTopology(sysFs)
if err != nil {
klog.Errorf("Failed to get topology information: %v", err)
}
systemUUID, err := sysinfo.GetSystemUUID(sysFs)
if err != nil {
klog.Errorf("Failed to get system UUID: %v", err)
}
realCloudInfo := cloudinfo.NewRealCloudInfo()
cloudProvider := realCloudInfo.GetCloudProvider()
instanceType := realCloudInfo.GetInstanceType()
instanceID := realCloudInfo.GetInstanceID()
machineInfo := &info.MachineInfo{
NumCores: numCores,
NumPhysicalCores: GetPhysicalCores(cpuinfo),
NumSockets: GetSockets(cpuinfo),
CpuFrequency: clockSpeed,
MemoryCapacity: memoryCapacity,
MemoryByType: memoryByType,
NVMInfo: nvmInfo,
HugePages: hugePagesInfo,
DiskMap: diskMap,
NetworkDevices: netDevices,
Topology: topology,
MachineID: getInfoFromFiles(filepath.Join(rootFs, *machineIdFilePath)),
SystemUUID: systemUUID,
BootID: getInfoFromFiles(filepath.Join(rootFs, *bootIdFilePath)),
CloudProvider: cloudProvider,
InstanceType: instanceType,
InstanceID: instanceID,
}
for i := range filesystems {
fs := filesystems[i]
inodes := uint64(0)
if fs.Inodes != nil {
inodes = *fs.Inodes
}
machineInfo.Filesystems = append(machineInfo.Filesystems, info.FsInfo{Device: fs.Device, DeviceMajor: uint64(fs.Major), DeviceMinor: uint64(fs.Minor), Type: fs.Type.String(), Capacity: fs.Capacity, Inodes: inodes, HasInodes: fs.Inodes != nil})
}
return machineInfo, nil
}