OVS源碼--vswitchd啓動(二十二)

ovs vswitchd的啓動

vswitchd啓動代碼可參考ovs-vswitchd.c的main函數,其中最重要的兩個函數是bridge_run以及netdev_run

bridge_run

void bridge_run() {
    ...
    /* Initialize the ofproto library.  This only needs to run once, but
     * it must be done after the configuration is set.  If the
     * initialization has already occurred, bridge_init_ofproto()
     * returns immediately. */
    bridge_init_ofproto(cfg);
    bridge_run__();
    ...
}

bridge_init_ofproto用於初始化ofproto-dpif組件,首先會從ovsdb記錄中彙總bridge, port, interfaces等信息,之後針對這些調用ofproto_init

/* Must be called to initialize the ofproto library.
 *
 * The caller may pass in 'iface_hints', which contains an shash of
 * "iface_hint" elements indexed by the interface's name.  The provider
 * may use these hints to describe the startup configuration in order to
 * reinitialize its state.  The caller owns the provided data, so a
 * provider will make copies of anything required.  An ofproto provider
 * will remove any existing state that is not described by the hint, and
 * may choose to remove it all. */
void
ofproto_init(const struct shash *iface_hints)
{
    struct shash_node *node;
    size_t i;

    ofproto_class_register(&ofproto_dpif_class);

    /* Make a local copy, since we don't own 'iface_hints' elements. */
    SHASH_FOR_EACH(node, iface_hints) {
        const struct iface_hint *orig_hint = node->data;
        struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
        const char *br_type = ofproto_normalize_type(orig_hint->br_type);

        new_hint->br_name = xstrdup(orig_hint->br_name);
        new_hint->br_type = xstrdup(br_type);
        new_hint->ofp_port = orig_hint->ofp_port;

        shash_add(&init_ofp_ports, node->name, new_hint);
    }

    for (i = 0; i < n_ofproto_classes; i++) {
        ofproto_classes[i]->init(&init_ofp_ports);
    }

    ofproto_unixctl_init();
}

ofproto_init首先註冊ofproto_dpif_class,這個是ovs的ofproto-dpif的默認實現,之後對已經註冊的所有ofproto_classes,都會調用ofproto_class->init函數。
ofproto_dpif_class是ovs的ofproto實現,在ofproto/ofproto-dpif.c中,這裏先介紹init函數

static void
init(const struct shash *iface_hints)
{
    struct shash_node *node;

    /* Make a local copy, since we don't own 'iface_hints' elements. */
    SHASH_FOR_EACH(node, iface_hints) {
        const struct iface_hint *orig_hint = node->data;
        struct iface_hint *new_hint = xmalloc(sizeof *new_hint);

        new_hint->br_name = xstrdup(orig_hint->br_name);
        new_hint->br_type = xstrdup(orig_hint->br_type);
        new_hint->ofp_port = orig_hint->ofp_port;

        shash_add(&init_ofp_ports, node->name, new_hint);
    }

    ofproto_unixctl_init();
    udpif_init();
}

ofproto_unixctl_init,udpif_init都通過unixctl_command_register註冊了多個ovs-appctl的命令參數,

bridge_run___最終都是調用的ofproto_dpif_class裏註冊的函數

static void
bridge_run__(void)
{
    struct bridge *br;
    struct sset types;
    const char *type;

    /* Let each datapath type do the work that it needs to do. */
    sset_init(&types);
    ofproto_enumerate_types(&types);
    SSET_FOR_EACH (type, &types) {
        ofproto_type_run(type);
    }
    sset_destroy(&types);

    /* Let each bridge do the work that it needs to do. */
    HMAP_FOR_EACH (br, node, &all_bridges) {
        ofproto_run(br->ofproto);
    }
}

ofproto_enumerate_types實際調用的是ofproto_dpif_class裏註冊的enumerate_types函數,裏面實際調用了dp_enumerate_types

dp_enumerate_types(struct sset *types)
{
    struct shash_node *node;

    dp_initialize();

    ovs_mutex_lock(&dpif_mutex);
    SHASH_FOR_EACH(node, &dpif_classes) {
        const struct registered_dpif_class *registered_class = node->data;
        sset_add(types, registered_class->dpif_class->type);
    }
    ovs_mutex_unlock(&dpif_mutex);
}

dp_enumerate_types列出所有支持的dpif_class,目前主要有dpif_netdev_class以及dpif_netlink_class兩類

ofproto_type_run實際調用了ofproto_dpif_class的type_run函數,裏面有調用了dpif_class->run,以及udpif_run
struct udpif代表了ofproto_dpif的upcall handler結構體,包含兩部分,一是struct handler的數組,用於處理upcall請求,可以看做是一個線程池,二是revalidators的數組,這塊我沒有細看,應該是一種類似gc機制的線程池,用於回收過期的flow

/* An upcall handler for ofproto_dpif.
 *
 * udpif keeps records of two kind of logically separate units:
 *
 * upcall handling
 * ---------------
 *
 *    - An array of 'struct handler's for upcall handling and flow
 *      installation.
 *
 * flow revalidation
 * -----------------
 *
 *    - Revalidation threads which read the datapath flow table and maintains
 *      them.
 */
struct udpif {
    struct ovs_list list_node;         /* In all_udpifs list. */

    struct dpif *dpif;                 /* Datapath handle. */
    struct dpif_backer *backer;        /* Opaque dpif_backer pointer. */

    struct handler *handlers;          /* Upcall handlers. */
    size_t n_handlers;

    struct revalidator *revalidators;  /* Flow revalidators. */
    size_t n_revalidators;

    struct latch exit_latch;           /* Tells child threads to exit. */

    /* Revalidation. */
    struct seq *reval_seq;             /* Incremented to force revalidation. */
    bool reval_exit;                   /* Set by leader on 'exit_latch. */
    struct ovs_barrier reval_barrier;  /* Barrier used by revalidators. */
    struct dpif_flow_dump *dump;       /* DPIF flow dump state. */
    long long int dump_duration;       /* Duration of the last flow dump. */
    struct seq *dump_seq;              /* Increments each dump iteration. */
    atomic_bool enable_ufid;           /* If true, skip dumping flow attrs. */

    /* These variables provide a mechanism for the main thread to pause
     * all revalidation without having to completely shut the threads down.
     * 'pause_latch' is shared between the main thread and the lead
     * revalidator thread, so when it is desirable to halt revalidation, the
     * main thread will set the latch. 'pause' and 'pause_barrier' are shared
     * by revalidator threads. The lead revalidator will set 'pause' when it
     * observes the latch has been set, and this will cause all revalidator
     * threads to wait on 'pause_barrier' at the beginning of the next
     * revalidation round. */
    bool pause;                        /* Set by leader on 'pause_latch. */
    struct latch pause_latch;          /* Set to force revalidators pause. */
    struct ovs_barrier pause_barrier;  /* Barrier used to pause all */
                                       /* revalidators by main thread. */

    /* Following fields are accessed and modified by different threads. */
    atomic_uint flow_limit;            /* Datapath flow hard limit. */

    /* n_flows_mutex prevents multiple threads updating these concurrently. */
    atomic_uint n_flows;               /* Number of flows in the datapath. */
    atomic_llong n_flows_timestamp;    /* Last time n_flows was updated. */
    struct ovs_mutex n_flows_mutex;

    /* Following fields are accessed and modified only from the main thread. */
    struct unixctl_conn **conns;       /* Connections waiting on dump_seq. */
    uint64_t conn_seq;                 /* Corresponds to 'dump_seq' when
                                          conns[n_conns-1] was stored. */
    size_t n_conns;                    /* Number of connections waiting. */
};

struct handler以及struct revalidator

/* A thread that reads upcalls from dpif, forwards each upcall's packet,
 * and possibly sets up a kernel flow as a cache. */
struct handler {
    struct udpif *udpif;               /* Parent udpif. */
    pthread_t thread;                  /* Thread ID. */
    uint32_t handler_id;               /* Handler id. */
};

/* A thread that processes datapath flows, updates OpenFlow statistics, and
 * updates or removes them if necessary.
 *
 * Revalidator threads operate in two phases: "dump" and "sweep". In between
 * each phase, all revalidators sync up so that all revalidator threads are
 * either in one phase or the other, but not a combination.
 *
 *     During the dump phase, revalidators fetch flows from the datapath and
 *     attribute the statistics to OpenFlow rules. Each datapath flow has a
 *     corresponding ukey which caches the most recently seen statistics. If
 *     a flow needs to be deleted (for example, because it is unused over a
 *     period of time), revalidator threads may delete the flow during the
 *     dump phase. The datapath is not guaranteed to reliably dump all flows
 *     from the datapath, and there is no mapping between datapath flows to
 *     revalidators, so a particular flow may be handled by zero or more
 *     revalidators during a single dump phase. To avoid duplicate attribution
 *     of statistics, ukeys are never deleted during this phase.
 *     During the sweep phase, each revalidator takes ownership of a different
 *     slice of umaps and sweeps through all ukeys in those umaps to figure out
 *     whether they need to be deleted. During this phase, revalidators may
 *     fetch individual flows which were not dumped during the dump phase to
 *     validate them and attribute statistics.
 */
struct revalidator {
    struct udpif *udpif;               /* Parent udpif. */
    pthread_t thread;                  /* Thread ID. */
    unsigned int id;                   /* ovsthread_id_self(). */
};

之後對每個bridge,調用ofproto_run

netdev_run

/* Performs periodic work needed by all the various kinds of netdevs.
 *
 * If your program opens any netdevs, it must call this function within its
 * main poll loop. */
void
netdev_run(void)
    OVS_EXCLUDED(netdev_mutex)
{
    netdev_initialize();

    struct netdev_registered_class *rc;
    CMAP_FOR_EACH (rc, cmap_node, &netdev_classes) {
        if (rc->class->run) {
            rc->class->run(rc->class);
        }
    }
}

netdev_run會首先調用netdev_initialize,該函數用一個struct ovsthread_once代碼塊來管理,保證只會調用一次。其主要工作是註冊了不同的netdev_class和tunnel_class

static void
netdev_initialize(void)
    OVS_EXCLUDED(netdev_mutex)
{
    static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;

    if (ovsthread_once_start(&once)) {
        fatal_signal_add_hook(restore_all_flags, NULL, NULL, true);
        netdev_vport_patch_register();
        netdev_register_provider(&netdev_linux_class);
        netdev_register_provider(&netdev_internal_class);
        netdev_register_provider(&netdev_tap_class);
        netdev_vport_tunnel_register();
        ovsthread_once_done(&once);
    }
}

netdev_vport_patch_register註冊了patch port這麼一類的vport(patch port用於連接不同的bridge,本身沒有dpif的任何屬性),netdev_vport_tunnel_register則註冊了多種tunnel類型,e.g.

    static const struct vport_class vport_classes[] = {
        TUNNEL_CLASS("geneve", "genev_sys", netdev_geneve_build_header,
                                            netdev_tnl_push_udp_header,
                                            netdev_geneve_pop_header),
        TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header,
                                       netdev_gre_push_header,
                                       netdev_gre_pop_header),
        TUNNEL_CLASS("ipsec_gre", "gre_sys", NULL, NULL, NULL),
        TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header,
                                           netdev_tnl_push_udp_header,
                                           netdev_vxlan_pop_header),
        TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL),
        TUNNEL_CLASS("stt", "stt_sys", NULL, NULL, NULL),
    };

除此之外,多個類型的netdev_class也被註冊進來,包括dpdk的多個netdev_class

const struct netdev_class netdev_linux_class =
    NETDEV_LINUX_CLASS(
        "system",
        netdev_linux_construct,
        netdev_linux_get_stats,
        netdev_linux_get_features,
        netdev_linux_get_status);

const struct netdev_class netdev_tap_class =
    NETDEV_LINUX_CLASS(
        "tap",
        netdev_linux_construct_tap,
        netdev_tap_get_stats,
        netdev_linux_get_features,
        netdev_linux_get_status);

const struct netdev_class netdev_internal_class =
    NETDEV_LINUX_CLASS(
        "internal",
        netdev_linux_construct,
        netdev_internal_get_stats,
        NULL,                  /* get_features */
        netdev_internal_get_status);

static const struct netdev_class dpdk_class =
    NETDEV_DPDK_CLASS(
        "dpdk",
        netdev_dpdk_construct,
        netdev_dpdk_destruct,
        netdev_dpdk_set_config,
        netdev_dpdk_set_tx_multiq,
        netdev_dpdk_eth_send,
        netdev_dpdk_get_carrier,
        netdev_dpdk_get_stats,
        netdev_dpdk_get_features,
        netdev_dpdk_get_status,
        netdev_dpdk_reconfigure,
        netdev_dpdk_rxq_recv);

static const struct netdev_class dpdk_ring_class =
    NETDEV_DPDK_CLASS(
        "dpdkr",
        netdev_dpdk_ring_construct,
        netdev_dpdk_destruct,
        netdev_dpdk_ring_set_config,
        netdev_dpdk_set_tx_multiq,
        netdev_dpdk_ring_send,
        netdev_dpdk_get_carrier,
        netdev_dpdk_get_stats,
        netdev_dpdk_get_features,
        netdev_dpdk_get_status,
        netdev_dpdk_reconfigure,
        netdev_dpdk_rxq_recv);

static const struct netdev_class dpdk_vhost_class =
    NETDEV_DPDK_CLASS(
        "dpdkvhostuser",
        netdev_dpdk_vhost_construct,
        netdev_dpdk_vhost_destruct,
        NULL,
        NULL,
        netdev_dpdk_vhost_send,
        netdev_dpdk_vhost_get_carrier,
        netdev_dpdk_vhost_get_stats,
        NULL,
        NULL,
        netdev_dpdk_vhost_reconfigure,
        netdev_dpdk_vhost_rxq_recv);

最後對於每一類註冊的netdev_class,都會調用run函數,以netdev_linux_class爲例(dpdk_class的run函數爲空),主要是通過netlink sock來定期做一些更新的工作,不細說了

原文鏈接:https://blog.csdn.net/majieyue/article/details/52801322

發表評論
所有評論
還沒有人評論,想成為第一個評論的人麼? 請在上方評論欄輸入並且點擊發布.
相關文章