1概念
neutron dhcp为租户网络提供DHCP服务IP地址动态分配,另外还会提供metadata请求服务。
重要的配置选项
interface_driver:dhcp agent使用interface_driver来创建tap设备。
dhcp_driver:默认配置是neutron.agent.linux.dhcp.Dnsmasq。dhcp agent默认使用dnsmasq来实现dhcp服务。
dhcp_agent_manager:RPC服务的manager,默认是neutron.agent.dhcp_agent.DhcpAgentWithStateReport
dhcp agent主要三个功能:
报告状态。
处理RPC API。
启动dhcp服务
1). 启动一个协程定期上报neutron-dhcp-agent network状态,通过rpc上报给neutron-server启动时创建。然后通过core_plugin上报给数据库并进行更新network。
2). 启动dnsmasq进程,检测qdhcp-xxxx namespace中的ns-xxxx端口接收到的dhcp discover请求。在启动dnsmasq进程的过程中,是否需要创建namespace中的ns-xxx端口,是否需要配置namespace中的iptables,是否需要refresh dnsmasq进程所需配置文件。
创建 network 并在 subnet 上 enable DHCP 时,网络节点上的 DHCP agent 会启动一个 dnsmasq 进程为 network 提供 DHCP 服务。dnsmasq 与 network 是一对一关系,一个 dnsmasq 进程可以为同一 netowrk 中所有 enable 了 DHCP 的 subnet 提供服务。
dnsmasq 重要的启动参数:
–dhcp-hostsfile
存放 DHCP host 的文件,这里的 host 在就是 instance。 dnsmasq 从该文件获取 host 的 IP 与 MAC 的对应关系。 每个 host 对应一个条目,来源于 Neutron 数据库。
–interface
指定提供 DHCP 服务的 interface。 dnsmasq 会在该 interface 上监听 instance 的 DHCP 请求。
#ip netns exec qdhcp-c4206574-8125-41e6-be09-5a624dadb570 ip link
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN mode DEFAULT
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
20: tap07130e1c-a6: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UNKNOWN mode DEFAULT
link/ether fa:16:3f:ba:33:99 brd ff:ff:ff:ff:ff:ff
2 源码分析
[entry_points]
console_scripts =
neutron-bgp-dragent = neutron.cmd.eventlet.agents.bgp_dragent:main
neutron-db-manage = neutron.db.migration.cli:main
neutron-debug = neutron.debug.shell:main
neutron-dhcp-agent = neutron.cmd.eventlet.agents.dhcp:main
neutron-keepalived-state-change = neutron.cmd.keepalived_state_change:main
neutron-dhcp-agent的入口为neutron.agent.dhcp_agent:main,以Service启动;Manager类为DhcpAgentWithStateReport,汇报DHCPAgent的状态。
def main():
register_options(cfg.CONF)
common_config.init(sys.argv[1:])
config.setup_logging()
server = neutron_service.Service.create(
binary='neutron-dhcp-agent',
topic=topics.DHCP_AGENT,
report_interval=cfg.CONF.AGENT.report_interval,
manager='neutron.agent.dhcp.agent.DhcpAgentWithStateReport')
service.launch(cfg.CONF, server).wait()
读取注册配置(包括dhcpagent、interface_driver、use_namespace等)。
创建一个service。主题是DHCP_AGENT,默认驱动是Dnsmasq,默认的管理器是DhcpAgentWithStateReport
plugin端的rpc调用方法(一般由neutron.api.v2.base.py发出)在neutron.api.rpc.agentnoftifiers.DhcpAgentNotifyAPI中实现,发出notification消息,会调用agent中对应的方法:
VALID_RESOURCES = ['network', 'subnet', 'port']
VALID_METHOD_NAMES = ['network.create.end',
'network.update.end',
'network.delete.end',
'subnet.create.end',
'subnet.update.end',
'subnet.delete.end',
'port.create.end',
'port.update.end',
'port.delete.end']
2.1 DhcpAgentWithStateReport类
class DhcpAgentWithStateReport(DhcpAgent):
def __init__(self, host=None, conf=None):
super(DhcpAgentWithStateReport, self).__init__(host=host, conf=conf)
self.state_rpc = agent_rpc.PluginReportStateAPI(topics.REPORTS)
self.agent_state = {
'binary': 'neutron-dhcp-agent',
'host': host,
'availability_zone': self.conf.AGENT.availability_zone,
'topic': topics.DHCP_AGENT,
'configurations': {
'dhcp_driver': self.conf.dhcp_driver,
'dhcp_lease_duration': self.conf.dhcp_lease_duration,
'log_agent_heartbeats': self.conf.AGENT.log_agent_heartbeats},
'start_flag': True,
'agent_type': constants.AGENT_TYPE_DHCP}
report_interval = self.conf.AGENT.report_interval
if report_interval:
self.heartbeat = loopingcall.FixedIntervalLoopingCall(
self._report_state)
self.heartbeat.start(interval=report_interval)
DhcpAgentWithStateReport继承2.2中的DhcpAgent类,主要作用时创建一个协程定期向neutron-server上报agent的network状态,然后更新到数据库。
2.2 DhcpAgent类
DhcpAgent初始化过程:继承manager.Manager继承n_rpc.RpcCallback和periodic_task.PeriodicTasks,提供周期性运行任务的方法。
NetworCache: 保存active的dhcp networks,DhcpAgentWithStateReport类的_report_state将数据信息上报到neutron-server
dhcp_dir:保存dhcp networks信息,/var/lib/neutron/dhcp
dhcp_driver: neutron.agent.linux.dhcp.Dnsmasq,dhcp_driver_cls是Dnsmasq的实例化
DhcpPluginApi中topics.PLUGIN为q-plugin,agent一端rpc api,作为向plugin发出rpc消息的handler
def __init__(self, host=None, conf=None):
super(DhcpAgent, self).__init__(host=host)
self.needs_resync_reasons = collections.defaultdict(list)
self.conf = conf or cfg.CONF
self.cache = NetworkCache()
self.dhcp_driver_cls = importutils.import_class(self.conf.dhcp_driver)
ctx = context.get_admin_context_without_session()
self.plugin_rpc = DhcpPluginApi(topics.PLUGIN, ctx, self.conf.host)
# create dhcp dir to store dhcp info
dhcp_dir = os.path.dirname("/%s/dhcp/" % self.conf.state_path)
utils.ensure_dir(dhcp_dir)
self.dhcp_version = self.dhcp_driver_cls.check_version()
self._populate_networks_cache()
# keep track of mappings between networks and routers for
# metadata processing
self._metadata_routers = {} # {network_id: router_id}
self._process_monitor = external_process.ProcessMonitor(
config=self.conf,
resource_type='dhcp')
def _populate_networks_cache(self):
"""Populate the networks cache when the DHCP-agent starts."""
try:
existing_networks = self.dhcp_driver_cls.existing_dhcp_networks(
self.conf
)
for net_id in existing_networks:
net = dhcp.NetModel({"id": net_id, "subnets": [], "ports": []})
self.cache.put(net)
except NotImplementedError:
# just go ahead with an empty networks cache
LOG.debug("The '%s' DHCP-driver does not support retrieving of a "
"list of existing networks",
self.conf.dhcp_driver)
@classmethod
def existing_dhcp_networks(cls, conf):
"""Return a list of existing networks ids that we have configs for."""
confs_dir = cls.get_confs_dir(conf)
try:
return [
c for c in os.listdir(confs_dir)
if uuidutils.is_uuid_like(c)
]
except OSError:
return []
@staticmethod
def get_confs_dir(conf):
return os.path.abspath(os.path.normpath(conf.dhcp_confs))
_populate_networks_cache: /var/lib/neutron/dhcp目录下的dhcp netoworks实例NetModel对象存入self.cache中。
def after_start(self):
self.run()
LOG.info(_LI("DHCP agent started"))
def run(self):
"""Activate the DHCP agent."""
self.sync_state()
self.periodic_resync()
@utils.synchronized('dhcp-agent')
def sync_state(self, networks=None):
"""Sync the local DHCP state with Neutron. If no networks are passed,
or 'None' is one of the networks, sync all of the networks.
"""
only_nets = set([] if (not networks or None in networks) else networks)
LOG.info(_LI('Synchronizing state'))
pool = eventlet.GreenPool(self.conf.num_sync_threads)
known_network_ids = set(self.cache.get_network_ids())
try:
active_networks = self.plugin_rpc.get_active_networks_info()
LOG.info(_LI('All active networks have been fetched through RPC.'))
active_network_ids = set(network.id for network in active_networks)
for deleted_id in known_network_ids - active_network_ids:
try:
self.disable_dhcp_helper(deleted_id)
except Exception as e:
self.schedule_resync(e, deleted_id)
LOG.exception(_LE('Unable to sync network state on '
'deleted network %s'), deleted_id)
for network in active_networks:
if (not only_nets or # specifically resync all
network.id not in known_network_ids or # missing net
network.id in only_nets): # specific network to sync
pool.spawn(self.safe_configure_dhcp_for_network, network)
pool.waitall()
LOG.info(_LI('Synchronizing state complete'))
sync_state:会发出rpc消息给plugin,获取最新的网络状态并更新本地信息,在调用dnsmasq进程使之生效。在启动后只运行一次。将未在数据库中的,从cache中移除,更新active的dhcp networks。调用safe_configure_dhcp_for_network
def configure_dhcp_for_network(self, network):
if not network.admin_state_up:
return
enable_metadata = self.dhcp_driver_cls.should_enable_metadata(
self.conf, network)
dhcp_network_enabled = False
for subnet in network.subnets:
if subnet.enable_dhcp:
if self.call_driver('enable', network):
dhcp_network_enabled = True
self.cache.put(network)
break
if enable_metadata and dhcp_network_enabled:
for subnet in network.subnets:
if subnet.ip_version == 4 and subnet.enable_dhcp:
self.enable_isolated_metadata_proxy(network)
break
elif (not self.conf.force_metadata and
not self.conf.enable_isolated_metadata):
# In the case that the dhcp agent ran with metadata enabled,
# and dhcp agent now starts with metadata disabled, check and
# delete any metadata_proxy.
self.disable_isolated_metadata_proxy(network)
最终调用call_driver:
def call_driver(self, action, network, **action_kwargs):
"""Invoke an action on a DHCP driver instance."""
LOG.debug('Calling driver for network: %(net)s action: %(action)s',
{'net': network.id, 'action': action})
try:
# the Driver expects something that is duck typed similar to
# the base models.
driver = self.dhcp_driver_cls(self.conf,
network,
self._process_monitor,
self.dhcp_version,
self.plugin_rpc)
getattr(driver, action)(**action_kwargs)
return True
call_driver调用Dnsmasq中enable方法
def enable(self):
"""Enables DHCP for this network by spawning a local process."""
if self.active:
self.restart()
elif self._enable_dhcp():
common_utils.ensure_dir(self.network_conf_dir)
interface_name = self.device_manager.setup(self.network)
self.interface_name = interface_name
self.spawn_process()
def _get_process_manager(self, cmd_callback=None):
return external_process.ProcessManager(
conf=self.conf,
uuid=self.network.id,
namespace=self.network.namespace,
default_cmd_callback=cmd_callback,
pid_file=self.get_conf_file_name('pid'),
run_as_root=True)
如果dnsmasq进程处于active状态则重启,从/var/lib/neutron/dhcp/${networkid}下取pid
def active(self):
pid = self.pid
if pid is None:
return False
cmdline = '/proc/%s/cmdline' % pid
try:
with open(cmdline, "r") as f:
return self.uuid in f.readline()
except IOError:
return False
enable函数在创建dnsmasq判断network下是否有enable dhcp的subnet,network至少有一个subnet enable dhcp才会执行创建dnsmasq进程。
device_manager.setup,为dhcp network创建和初始化设备
def setup(self, network):
"""Create and initialize a device for network's DHCP on this host."""
port = self.setup_dhcp_port(network)
self._update_dhcp_port(network, port)
interface_name = self.get_interface_name(network, port)
if ip_lib.ensure_device_is_ready(interface_name,
namespace=network.namespace):
LOG.debug('Reusing existing device: %s.', interface_name)
else:
try:
self.driver.plug(network.id,
port.id,
interface_name,
port.mac_address,
namespace=network.namespace,
mtu=network.get('mtu'))
except Exception:
with excutils.save_and_reraise_exception():
LOG.exception(_LE('Unable to plug DHCP port for '
'network %s. Releasing port.'),
network.id)
self.plugin.release_dhcp_port(network.id, port.device_id)
self.fill_dhcp_udp_checksums(namespace=network.namespace)
ip_cidrs = []
for fixed_ip in port.fixed_ips:
subnet = fixed_ip.subnet
if not ipv6_utils.is_auto_address_subnet(subnet):
net = netaddr.IPNetwork(subnet.cidr)
ip_cidr = '%s/%s' % (fixed_ip.ip_address, net.prefixlen)
ip_cidrs.append(ip_cidr)
if self.driver.use_gateway_ips:
# For each DHCP-enabled subnet, add that subnet's gateway
# IP address to the Linux device for the DHCP port.
for subnet in network.subnets:
if not subnet.enable_dhcp:
continue
gateway = subnet.gateway_ip
if gateway:
net = netaddr.IPNetwork(subnet.cidr)
ip_cidrs.append('%s/%s' % (gateway, net.prefixlen))
if self.conf.force_metadata or self.conf.enable_isolated_metadata:
ip_cidrs.append(METADATA_DEFAULT_CIDR)
self.driver.init_l3(interface_name, ip_cidrs,
namespace=network.namespace)
self._set_default_route(network, interface_name)
try:
self._cleanup_stale_devices(network, port)
except Exception:
# catch everything as we don't want to fail because of
# cleanup step
LOG.error(_LE("Exception during stale dhcp device cleanup"))
return interface_name
setup_dhcp_port函数的作用为创建或更新dhcp port信息
def setup_dhcp_port(self, network):
"""Create/update DHCP port for the host if needed and return port."""
# The ID that the DHCP port will have (or already has).
device_id = self.get_device_id(network)
# Get the set of DHCP-enabled subnets on this network.
dhcp_subnets = {subnet.id: subnet for subnet in network.subnets
if subnet.enable_dhcp}
# There are 3 cases: either the DHCP port already exists (but
# might need to be updated for a changed set of subnets); or
# some other code has already prepared a 'reserved' DHCP port,
# and we just need to adopt that; or we need to create a new
# DHCP port. Try each of those in turn until we have a DHCP
# port.
for setup_method in (self._setup_existing_dhcp_port,
self._setup_reserved_dhcp_port,
self._setup_new_dhcp_port):
dhcp_port = setup_method(network, device_id, dhcp_subnets)
if dhcp_port:
break
else:
raise exceptions.Conflict()
# Convert subnet_id to subnet dict
fixed_ips = [dict(subnet_id=fixed_ip.subnet_id,
ip_address=fixed_ip.ip_address,
subnet=dhcp_subnets[fixed_ip.subnet_id])
for fixed_ip in dhcp_port.fixed_ips]
ips = [DictModel(item) if isinstance(item, dict) else item
for item in fixed_ips]
dhcp_port.fixed_ips = ips
return dhcp_port
get_device_id 为:dhcp-%{networkid},例如:qdhcp-c4206574-8125-41e6-be09-5a624dadb570
get_interface_name为:tap%{portkid的11位},例如:tap07130e1c-a6
ensure_device_is_ready: 检验interface_name是否已经在本地的host的命名空间中被创建,执行命令:ip netns exec qdhcp-xxxx ip link tapxxxx up