prometheus如何收集cadvisor指標

 

cadvisor master branch

cadvisor支持的所有指標

代碼位置:github.com/google/cadvisor/container/factory.go

CpuUsageMetrics                MetricKind = "cpu"
ProcessSchedulerMetrics        MetricKind = "sched"
PerCpuUsageMetrics             MetricKind = "percpu"
MemoryUsageMetrics             MetricKind = "memory"
CpuLoadMetrics                 MetricKind = "cpuLoad"
DiskIOMetrics                  MetricKind = "diskIO"
DiskUsageMetrics               MetricKind = "disk"
NetworkUsageMetrics            MetricKind = "network"
NetworkTcpUsageMetrics         MetricKind = "tcp"
NetworkAdvancedTcpUsageMetrics MetricKind = "advtcp"
NetworkUdpUsageMetrics         MetricKind = "udp"
AcceleratorUsageMetrics        MetricKind = "accelerator"
AppMetrics                     MetricKind = "app"
ProcessMetrics                 MetricKind = "process"
HugetlbUsageMetrics            MetricKind = "hugetlb"
PerfMetrics                    MetricKind = "perf_event"

默認被忽略的指標

NetworkUdpUsageMetrics         MetricKind = "udp"
NetworkTcpUsageMetrics         MetricKind = "tcp"
NetworkAdvancedTcpUsageMetrics MetricKind = "advtcp"
ProcessSchedulerMetrics        MetricKind = "sched"
ProcessMetrics                 MetricKind = "process"
HugetlbUsageMetrics            MetricKind = "hugetlb"

cadvisor收集到的指標默認在內存裏面緩存2m.

var (
	storageDriver   = flag.String("storage_driver", "", fmt.Sprintf("Storage `driver` to use. Data is always cached shortly in memory, this controls where data is pushed besides the local cache. Empty means none, multiple separated by commas. Options are: <empty>, %s", strings.Join(storage.ListDrivers(), ", ")))
	storageDuration = flag.Duration("storage_duration", 2*time.Minute, "How long to keep data stored (Default: 2min).")
)

// NewMemoryStorage creates a memory storage with an optional backend storage option.
func NewMemoryStorage() (*memory.InMemoryCache, error) {
	backendStorages := []storage.StorageDriver{}
	for _, driver := range strings.Split(*storageDriver, ",") {
		if driver == "" {
			continue
		}
		storage, err := storage.New(driver)
		if err != nil {
			return nil, err
		}
		backendStorages = append(backendStorages, storage)
		klog.V(1).Infof("Using backend storage type %q", driver)
	}
	klog.V(1).Infof("Caching stats in memory for %v", *storageDuration)
	return memory.New(*storageDuration, backendStorages), nil
}

cadvisor獲取文件系統信息

sysFs提供了以下interface

// Abstracts the lowest level calls to sysfs.
type SysFs interface {
	// Get NUMA nodes paths
	GetNodesPaths() ([]string, error)
	// Get paths to CPUs in provided directory e.g. /sys/devices/system/node/node0 or /sys/devices/system/cpu
	GetCPUsPaths(cpusPath string) ([]string, error)
	// Get physical core id for specified CPU
	GetCoreID(coreIDFilePath string) (string, error)
	// Get physical package id for specified CPU
	GetCPUPhysicalPackageID(cpuPath string) (string, error)
	// Get total memory for specified NUMA node
	GetMemInfo(nodeDir string) (string, error)
	// Get hugepages from specified directory
	GetHugePagesInfo(hugePagesDirectory string) ([]os.FileInfo, error)
	// Get hugepage_nr from specified directory
	GetHugePagesNr(hugePagesDirectory string, hugePageName string) (string, error)
	// Get directory information for available block devices.
	GetBlockDevices() ([]os.FileInfo, error)
	// Get Size of a given block device.
	GetBlockDeviceSize(string) (string, error)
	// Get scheduler type for the block device.
	GetBlockDeviceScheduler(string) (string, error)
	// Get device major:minor number string.
	GetBlockDeviceNumbers(string) (string, error)

	GetNetworkDevices() ([]os.FileInfo, error)
	GetNetworkAddress(string) (string, error)
	GetNetworkMtu(string) (string, error)
	GetNetworkSpeed(string) (string, error)
	GetNetworkStatValue(dev string, stat string) (uint64, error)

	// Get directory information for available caches accessible to given cpu.
	GetCaches(id int) ([]os.FileInfo, error)
	// Get information for a cache accessible from the given cpu.
	GetCacheInfo(cpu int, cache string) (CacheInfo, error)

	GetSystemUUID() (string, error)
}

container manager

代碼位置:github.com/google/cadvisor/manager/manager.go

// New takes a memory storage and returns a new manager.
func New(memoryCache *memory.InMemoryCache, sysfs sysfs.SysFs, houskeepingConfig HouskeepingConfig, includedMetricsSet container.MetricSet, collectorHttpClient *http.Client, rawContainerCgroupPathPrefixWhiteList []string, perfEventsFile string) (Manager, error) {
	if memoryCache == nil {
		return nil, fmt.Errorf("manager requires memory storage")
	}

	// Detect the container we are running on.
	selfContainer, err := cgroups.GetOwnCgroupPath("cpu")
	if err != nil {
		return nil, err
	}
	klog.V(2).Infof("cAdvisor running in container: %q", selfContainer)

	context := fs.Context{}

	if err := container.InitializeFSContext(&context); err != nil {
		return nil, err
	}

	fsInfo, err := fs.NewFsInfo(context)
	if err != nil {
		return nil, err
	}

	// If cAdvisor was started with host's rootfs mounted, assume that its running
	// in its own namespaces.
	inHostNamespace := false
	if _, err := os.Stat("/rootfs/proc"); os.IsNotExist(err) {
		inHostNamespace = true
	}

	// Register for new subcontainers.
	eventsChannel := make(chan watcher.ContainerEvent, 16)

	newManager := &manager{
		containers:                            make(map[namespacedContainerName]*containerData),
		quitChannels:                          make([]chan error, 0, 2),
		memoryCache:                           memoryCache,
		fsInfo:                                fsInfo,
		sysFs:                                 sysfs,
		cadvisorContainer:                     selfContainer,
		inHostNamespace:                       inHostNamespace,
		startupTime:                           time.Now(),
		maxHousekeepingInterval:               *houskeepingConfig.Interval,
		allowDynamicHousekeeping:              *houskeepingConfig.AllowDynamic,
		includedMetrics:                       includedMetricsSet,
		containerWatchers:                     []watcher.ContainerWatcher{},
		eventsChannel:                         eventsChannel,
		collectorHttpClient:                   collectorHttpClient,
		nvidiaManager:                         accelerators.NewNvidiaManager(),
		rawContainerCgroupPathPrefixWhiteList: rawContainerCgroupPathPrefixWhiteList,
	}

	machineInfo, err := machine.Info(sysfs, fsInfo, inHostNamespace)
	if err != nil {
		return nil, err
	}
	newManager.machineInfo = *machineInfo
	klog.V(1).Infof("Machine: %+v", newManager.machineInfo)

	newManager.perfManager, err = perf.NewManager(perfEventsFile, machineInfo.NumCores)
	if err != nil {
		return nil, err
	}

	versionInfo, err := getVersionInfo()
	if err != nil {
		return nil, err
	}
	klog.V(1).Infof("Version: %+v", *versionInfo)

	newManager.eventHandler = events.NewEventManager(parseEventsStoragePolicy())
	return newManager, nil
}

得到cgroup信息

func GetOwnCgroupPath(subsystem string) (string, error) {
	cgroup, err := GetOwnCgroup(subsystem)
	if err != nil {
		return "", err
	}

	return getCgroupPathHelper(subsystem, cgroup)
}

讀取的是 容器內的  /proc/self/cgroup

// GetOwnCgroup returns the relative path to the cgroup docker is running in.
func GetOwnCgroup(subsystem string) (string, error) {
	cgroups, err := ParseCgroupFile("/proc/self/cgroup")
	if err != nil {
		return "", err
	}

	return getControllerPath(subsystem, cgroups)
}

獲取cgroup的mount point和root

mount point是讀取的/proc/self/mountinfo

func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) {
	// We are not using mount.GetMounts() because it's super-inefficient,
	// parsing it directly sped up x10 times because of not using Sscanf.
	// It was one of two major performance drawbacks in container start.
	if !isSubsystemAvailable(subsystem) {
		return "", "", NewNotFoundError(subsystem)
	}

	f, err := os.Open("/proc/self/mountinfo")
	if err != nil {
		return "", "", err
	}
	defer f.Close()

	if IsCgroup2UnifiedMode() {
		subsystem = ""
	}

	return findCgroupMountpointAndRootFromReader(f, cgroupPath, subsystem)
}

文件系統信息(fsinfo)

func NewFsInfo(context Context) (FsInfo, error) {
	mounts, err := mount.ParseMountInfo("/proc/self/mountinfo")
	if err != nil {
		return nil, err
	}

	fsUUIDToDeviceName, err := getFsUUIDToDeviceNameMap()
	if err != nil {
		// UUID is not always available across different OS distributions.
		// Do not fail if there is an error.
		klog.Warningf("Failed to get disk UUID mapping, getting disk info by uuid will not work: %v", err)
	}

	// Avoid devicemapper container mounts - these are tracked by the ThinPoolWatcher
	excluded := []string{fmt.Sprintf("%s/devicemapper/mnt", context.Docker.Root)}
	fsInfo := &RealFsInfo{
		partitions:         processMounts(mounts, excluded),
		labels:             make(map[string]string),
		mounts:             make(map[string]mount.MountInfo),
		dmsetup:            devicemapper.NewDmsetupClient(),
		fsUUIDToDeviceName: fsUUIDToDeviceName,
	}

	for _, mount := range mounts {
		fsInfo.mounts[mount.MountPoint] = mount
	}

	// need to call this before the log line below printing out the partitions, as this function may
	// add a "partition" for devicemapper to fsInfo.partitions
	fsInfo.addDockerImagesLabel(context, mounts)
	fsInfo.addCrioImagesLabel(context, mounts)

	klog.V(1).Infof("Filesystem UUIDs: %+v", fsInfo.fsUUIDToDeviceName)
	klog.V(1).Infof("Filesystem partitions: %+v", fsInfo.partitions)
	fsInfo.addSystemRootLabel(mounts)
	return fsInfo, nil
}

machine info

幾乎涵蓋了常用指標信息

cpu: /proc/cpuinfo

mem:/proc/meminfo

hugepace: /sys/kernel/mm/hugepages/

fsinfo: /proc/diskstats

網絡設備(網卡): ls /sys/class/net

網絡地址: cat /sys/class/net/網卡/address

網絡mtu:  cat /sys/class/net/網卡/mtu

網絡傳輸速率: cat /sys/class/net/網卡/speed

網絡io: ls /sys/class/net/網卡/statistics

 

func Info(sysFs sysfs.SysFs, fsInfo fs.FsInfo, inHostNamespace bool) (*info.MachineInfo, error) {
	rootFs := "/"
	if !inHostNamespace {
		rootFs = "/rootfs"
	}

	cpuinfo, err := ioutil.ReadFile(filepath.Join(rootFs, "/proc/cpuinfo"))
	if err != nil {
		return nil, err
	}
	clockSpeed, err := GetClockSpeed(cpuinfo)
	if err != nil {
		return nil, err
	}

	memoryCapacity, err := GetMachineMemoryCapacity()
	if err != nil {
		return nil, err
	}

	memoryByType, err := GetMachineMemoryByType(memoryControllerPath)
	if err != nil {
		return nil, err
	}

	nvmInfo, err := nvm.GetInfo()
	if err != nil {
		return nil, err
	}

	hugePagesInfo, err := sysinfo.GetHugePagesInfo(sysFs, hugepagesDirectory)
	if err != nil {
		return nil, err
	}

	filesystems, err := fsInfo.GetGlobalFsInfo()
	if err != nil {
		klog.Errorf("Failed to get global filesystem information: %v", err)
	}

	diskMap, err := sysinfo.GetBlockDeviceInfo(sysFs)
	if err != nil {
		klog.Errorf("Failed to get disk map: %v", err)
	}

	netDevices, err := sysinfo.GetNetworkDevices(sysFs)
	if err != nil {
		klog.Errorf("Failed to get network devices: %v", err)
	}

	topology, numCores, err := GetTopology(sysFs)
	if err != nil {
		klog.Errorf("Failed to get topology information: %v", err)
	}

	systemUUID, err := sysinfo.GetSystemUUID(sysFs)
	if err != nil {
		klog.Errorf("Failed to get system UUID: %v", err)
	}

	realCloudInfo := cloudinfo.NewRealCloudInfo()
	cloudProvider := realCloudInfo.GetCloudProvider()
	instanceType := realCloudInfo.GetInstanceType()
	instanceID := realCloudInfo.GetInstanceID()

	machineInfo := &info.MachineInfo{
		NumCores:         numCores,
		NumPhysicalCores: GetPhysicalCores(cpuinfo),
		NumSockets:       GetSockets(cpuinfo),
		CpuFrequency:     clockSpeed,
		MemoryCapacity:   memoryCapacity,
		MemoryByType:     memoryByType,
		NVMInfo:          nvmInfo,
		HugePages:        hugePagesInfo,
		DiskMap:          diskMap,
		NetworkDevices:   netDevices,
		Topology:         topology,
		MachineID:        getInfoFromFiles(filepath.Join(rootFs, *machineIdFilePath)),
		SystemUUID:       systemUUID,
		BootID:           getInfoFromFiles(filepath.Join(rootFs, *bootIdFilePath)),
		CloudProvider:    cloudProvider,
		InstanceType:     instanceType,
		InstanceID:       instanceID,
	}

	for i := range filesystems {
		fs := filesystems[i]
		inodes := uint64(0)
		if fs.Inodes != nil {
			inodes = *fs.Inodes
		}
		machineInfo.Filesystems = append(machineInfo.Filesystems, info.FsInfo{Device: fs.Device, DeviceMajor: uint64(fs.Major), DeviceMinor: uint64(fs.Minor), Type: fs.Type.String(), Capacity: fs.Capacity, Inodes: inodes, HasInodes: fs.Inodes != nil})
	}

	return machineInfo, nil
}

 

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章