Line data Source code
1 : /*
2 : * Copyright (c) 2016 Juniper Networks, Inc. All rights reserved.
3 : */
4 :
5 : #ifndef SRC_BGP_BGP_MEMBERSHIP_H_
6 : #define SRC_BGP_BGP_MEMBERSHIP_H_
7 :
8 : #include <boost/dynamic_bitset.hpp>
9 : #include <boost/scoped_ptr.hpp>
10 : #include <tbb/spin_rw_mutex.h>
11 :
12 : #include <list>
13 : #include <map>
14 : #include <set>
15 : #include <string>
16 : #include <vector>
17 : #include <atomic>
18 :
19 : #include "base/lifetime.h"
20 : #include "base/queue_task.h"
21 : #include "db/db_table.h"
22 : #include "bgp/bgp_ribout.h"
23 :
24 : class BgpNeighborResp;
25 : class BgpServer;
26 : class BgpTable;
27 : class IPeer;
28 : class RibOut;
29 : class ShowMembershipPeerInfo;
30 : class ShowRoutingInstanceTable;
31 : class TaskTrigger;
32 :
33 : //
34 : // This class implements membership management for a BgpServer.
35 :
36 : // It provides methods for an IPeer to manage it's membership in BgpTables.
37 : // There are two kinds of memberships for a (IPeer, BgpTable) pair - RibIn
38 : // and RibOut. RibIn membership is needed for an IPeer to add BgpPaths to a
39 : // BgpTable. RibOut membership is needed to advertise routes from a BgpTable
40 : // to an IPeer.
41 : //
42 : // APIs are provided to manage these 2 types of memberships together as well
43 : // as separately. An API to walk all BgpPaths added by an IPeer to a BgpTable
44 : // is also provided. This is used by clients to delete BgpPaths when a peer
45 : // is going down, to mark BgpPaths as stale when handling graceful restart of
46 : // a peer etc. The actual logic of deleting or modifying a BgpPath has to be
47 : // in the client code.
48 : //
49 : // Many APIs require traversal of the entire BgpTable and so are asynchronous.
50 : // An IPeer gets notified on completion of API via virtual function callback.
51 : // Along similar lines, when using a walk API, an IPeer gets notified for each
52 : // BgpPath added by it via a virtual function. A client is allowed to have 1
53 : // outstanding request for a given (IPeer, BgpTable) pair.
54 : //
55 : // There are scenarios where multiple IPeers subscribe to a given BgpTable at
56 : // roughly the same time. Further, this can happen for many BgpTables at about
57 : // the same time as well. The implementation handles this quite efficiently by
58 : // accumulating multiple (IPeer, BgpTable) requests to reduce/minimize the
59 : // number of table walks. The table walk functionality is delegated to the
60 : // Walker class.
61 : //
62 : // Membership information corresponding to (IPeer, BgpTable) pairs is organized
63 : // with the above goal in mind. An IPeer is represented using a PeerState and a
64 : // BgpTable is represented using a RibState. A PeerStateMap and RibStateMap are
65 : // used for efficient lookup and insertion. A (IPeer, BgpTable) is represented
66 : // using a PeerRibState. Linkage between PeerState, RibState, and PeerRibState
67 : // is described in more detail later.
68 : //
69 : // Updates to the PeerStateMap, RibStateMap and other maps/sets maintained in
70 : // a PeerState and RibState can happen synchronously from from the API calls.
71 : // This allows for easy detection of violations in API assumptions e.g. only a
72 : // single outstanding request for a (IPeer, BgpTable) pair. Since multiple API
73 : // calls can happen in parallel, access to internal state has to be serialized
74 : // using a read-write mutex. A read-write mutex is used to allow parallel calls
75 : // to GetRegistrationInfo from multiple db::DBTable tasks. All other API calls
76 : // are relatively infrequent.
77 : //
78 : // The read-write mutex ensures consistency of BgpMembershipManager's internal
79 : // state. However, the BgpMembershipManager also needs to call external APIs
80 : // which have their own concurrency requirements. A WorkQueue of Events is
81 : // used to satisfy these requirements. The WorkQueue is processed in context
82 : // of bgp::PeerMembership task and appropriate exclusion polices are specified
83 : // at system initialization. Client callbacks for API complete notifications
84 : // are also made from the bgp:PeerMembership task.
85 : //
86 : class BgpMembershipManager {
87 : public:
88 : typedef boost::function<void(IPeer *, BgpTable *, bool)>
89 : PeerRegistrationCallback;
90 :
91 : explicit BgpMembershipManager(BgpServer *server);
92 : virtual ~BgpMembershipManager();
93 :
94 : int RegisterPeerRegistrationCallback(PeerRegistrationCallback callback);
95 : void UnregisterPeerRegistrationCallback(int id);
96 :
97 : virtual void Register(IPeer *peer, BgpTable *table,
98 : const RibExportPolicy &policy, int instance_id = -1);
99 : void RegisterRibIn(IPeer *peer, BgpTable *table);
100 : virtual void Unregister(IPeer *peer, BgpTable *table);
101 : void UnregisterRibIn(IPeer *peer, BgpTable *table);
102 : virtual void UnregisterRibOut(IPeer *peer, BgpTable *table);
103 : void WalkRibIn(IPeer *peer, BgpTable *table);
104 :
105 : bool GetRegistrationInfo(const IPeer *peer, const BgpTable *table,
106 : int *instance_id = NULL, uint64_t *subscription_gen_id = NULL) const;
107 : void SetRegistrationInfo(const IPeer *peer, const BgpTable *table,
108 : int instance_id, uint64_t subscription_gen_id);
109 :
110 : bool IsRegistered(const IPeer *peer, const BgpTable *table) const;
111 : bool IsRibInRegistered(const IPeer *peer, const BgpTable *table) const;
112 : bool IsRibOutRegistered(const IPeer *peer, const BgpTable *table) const;
113 : uint32_t GetRibOutQueueDepth(const IPeer *peer,
114 : const BgpTable *table) const;
115 :
116 : void GetRegisteredRibs(const IPeer *peer,
117 : std::list<BgpTable *> *table_list) const;
118 :
119 : void FillRoutingInstanceTableInfo(ShowRoutingInstanceTable *srit,
120 : const BgpTable *table) const;
121 : void FillPeerMembershipInfo(const IPeer *peer, BgpNeighborResp *resp) const;
122 :
123 81591 : BgpServer *server() { return server_; }
124 : bool IsQueueEmpty() const;
125 : size_t GetMembershipCount() const;
126 : uint64_t current_jobs_count() const { return current_jobs_count_; }
127 : uint64_t total_jobs_count() const { return total_jobs_count_; }
128 :
129 : protected:
130 : class PeerRibState;
131 : struct Event;
132 :
133 : virtual bool EventCallbackInternal(Event *event);
134 : virtual bool AssertRegister(PeerRibState *prs, bool do_assert = true);
135 : virtual bool AssertUnregister(PeerRibState *prs, bool do_assert = true);
136 : virtual bool AssertRegisterRibIn(PeerRibState *prs, IPeer *peer,
137 : bool do_assert = true);
138 : virtual bool AssertWalkRibIn(PeerRibState *prs, bool do_assert = true);
139 :
140 : mutable tbb::spin_rw_mutex rw_mutex_;
141 :
142 : private:
143 : class PeerState;
144 : class RibState;
145 : class Walker;
146 :
147 : friend class BgpMembershipManager::PeerState;
148 : friend class BgpMembershipManager::RibState;
149 : friend class BgpMembershipManager::PeerRibState;
150 : friend class BgpMembershipManager::Walker;
151 : friend class BgpMembershipTest;
152 : friend class BgpServerUnitTest;
153 : friend class BgpXmppUnitTest;
154 :
155 : enum Action {
156 : NONE,
157 : RIBOUT_ADD,
158 : RIBIN_DELETE,
159 : RIBIN_WALK,
160 : RIBIN_WALK_RIBOUT_DELETE,
161 : RIBIN_DELETE_RIBOUT_DELETE
162 : };
163 :
164 : enum EventType {
165 : REGISTER_RIB,
166 : REGISTER_RIB_COMPLETE,
167 : UNREGISTER_RIB,
168 : UNREGISTER_RIB_COMPLETE,
169 : WALK_RIB_COMPLETE
170 : };
171 :
172 : typedef std::vector<PeerRegistrationCallback> PeerRegistrationListenerList;
173 : typedef std::map<const IPeer *, PeerState *> PeerStateMap;
174 : typedef std::map<const BgpTable *, RibState *> RibStateMap;
175 : typedef std::set<PeerRibState *> PeerRibList;
176 :
177 : void UnregisterRibInUnlocked(PeerRibState *prs);
178 :
179 : PeerState *LocatePeerState(IPeer *peer);
180 : PeerState *FindPeerState(const IPeer *peer);
181 : const PeerState *FindPeerState(const IPeer *peer) const;
182 : void DestroyPeerState(PeerState *ps);
183 :
184 : RibState *LocateRibState(BgpTable *table);
185 : RibState *FindRibState(const BgpTable *table);
186 : const RibState *FindRibState(const BgpTable *table) const;
187 : void DestroyRibState(RibState *ps);
188 : void EnqueueRibState(RibState *rs);
189 :
190 : PeerRibState *LocatePeerRibState(IPeer *peer, BgpTable *table);
191 : PeerRibState *FindPeerRibState(const IPeer *peer, const BgpTable *table);
192 : const PeerRibState *FindPeerRibState(const IPeer *peer,
193 : const BgpTable *table) const;
194 : void DestroyPeerRibState(PeerRibState *prs);
195 :
196 : void TriggerRegisterRibCompleteEvent(IPeer *peer, BgpTable *table);
197 : void TriggerUnregisterRibCompleteEvent(IPeer *peer, BgpTable *table);
198 : void TriggerWalkRibCompleteEvent(IPeer *peer, BgpTable *table);
199 :
200 : void ProcessRegisterRibEvent(Event *event);
201 : void ProcessRegisterRibCompleteEvent(Event *event);
202 : void ProcessUnregisterRibEvent(Event *event);
203 : void ProcessUnregisterRibCompleteEvent(Event *event);
204 : void ProcessWalkRibCompleteEvent(Event *event);
205 :
206 332332 : void EnqueueEvent(Event *event) { event_queue_->Enqueue(event); }
207 : bool EventCallback(Event *event);
208 :
209 : void NotifyPeerRegistration(IPeer *peer, BgpTable *table, bool unregister);
210 :
211 : // Testing only.
212 84 : void SetQueueDisable(bool value) { event_queue_->set_disable(value); }
213 30 : Walker *walker() { return walker_.get(); }
214 :
215 : BgpServer *server_;
216 : std::atomic<uint64_t> current_jobs_count_;
217 : std::atomic<uint64_t> total_jobs_count_;
218 : RibStateMap rib_state_map_;
219 : PeerStateMap peer_state_map_;
220 : boost::scoped_ptr<Walker> walker_;
221 : boost::scoped_ptr<WorkQueue<Event *> > event_queue_;
222 :
223 : boost::dynamic_bitset<> registration_bmap_;
224 : PeerRegistrationListenerList registration_callbacks_;
225 :
226 : DISALLOW_COPY_AND_ASSIGN(BgpMembershipManager);
227 : };
228 :
229 : struct BgpMembershipManager::Event {
230 : friend class BgpMembershipManager;
231 :
232 : typedef BgpMembershipManager::EventType EventType;
233 :
234 : Event(EventType event_type, IPeer *peer, BgpTable *table);
235 : Event(EventType event_type, IPeer *peer, BgpTable *table,
236 : const RibExportPolicy &policy, int instance_id);
237 :
238 : EventType event_type;
239 : IPeer *peer;
240 : BgpTable *table;
241 : RibExportPolicy policy;
242 : int instance_id;
243 : };
244 :
245 : //
246 : // This represents an IPeer within the BgpMembershipManager, which maintains a
247 : // map of PeerStates keyed an IPeer pointer.
248 : //
249 : // The PeerRibStateMap allows efficient creation and lookup of PeerRibState.
250 : // It can be accessed synchronously from the API calls to BgpMembershipManager
251 : // or from the bgp::PeerMembership task. In the former case, the read-write
252 : // mutex in the BgpMembershipManager is sufficient to serialize access to the
253 : // PeerRibStateMap. In the latter case, task exclusion policies prevent any
254 : // parallel access.
255 : //
256 : class BgpMembershipManager::PeerState {
257 : public:
258 : typedef BgpMembershipManager::RibState RibState;
259 : typedef BgpMembershipManager::PeerRibState PeerRibState;
260 : typedef std::map<const RibState *, PeerRibState *> PeerRibStateMap;
261 :
262 : PeerState(BgpMembershipManager *manager, IPeer *peer);
263 : ~PeerState();
264 :
265 : PeerRibState *LocatePeerRibState(RibState *rs);
266 : PeerRibState *FindPeerRibState(const RibState *rs);
267 : const PeerRibState *FindPeerRibState(const RibState *rs) const;
268 : bool RemovePeerRibState(PeerRibState *prs);
269 :
270 : void GetRegisteredRibs(std::list<BgpTable *> *table_list) const;
271 937 : size_t GetMembershipCount() const { return rib_map_.size(); }
272 : void FillPeerMembershipInfo(BgpNeighborResp *resp) const;
273 :
274 603991 : IPeer *peer() { return peer_; }
275 : const IPeer *peer() const { return peer_; }
276 :
277 : private:
278 : BgpMembershipManager *manager_;
279 : IPeer *peer_;
280 : PeerRibStateMap rib_map_;
281 :
282 : DISALLOW_COPY_AND_ASSIGN(PeerState);
283 : };
284 :
285 : //
286 : // This represents a BgpTable within the BgpMembershipManager, which maintains
287 : // a map of RibStates keyed a BgpTable pointer.
288 : //
289 : // A RibState maintains two PeerRibLists.
290 : // The pending list contains all the PeerRibStates for which some action needs
291 : // to performed during the next walk of the associated BgpTable. This is used
292 : // by the Walker when it's starting a walk of the BgpTable.
293 : // The regular PeerRibList contains all the PeerRibStates for this RibState.
294 : // It is used only for introspect.
295 : //
296 : class BgpMembershipManager::RibState {
297 : public:
298 : typedef BgpMembershipManager::PeerRibState PeerRibState;
299 : typedef BgpMembershipManager::PeerRibList PeerRibList;
300 : typedef PeerRibList::iterator iterator;
301 :
302 : explicit RibState(BgpMembershipManager *manager, BgpTable *table);
303 : ~RibState();
304 1779 : void ManagedDelete() {}
305 :
306 130417 : iterator begin() { return pending_peer_rib_list_.begin(); }
307 299453 : iterator end() { return pending_peer_rib_list_.end(); }
308 :
309 : void EnqueuePeerRibState(PeerRibState *prs);
310 : void ClearPeerRibStateList();
311 :
312 : void InsertPeerRibState(PeerRibState *prs);
313 : bool RemovePeerRibState(PeerRibState *prs);
314 :
315 : void FillRoutingInstanceTableInfo(ShowRoutingInstanceTable *srit) const;
316 :
317 1098125 : BgpTable *table() const { return table_; }
318 130417 : void increment_walk_count() { walk_count_++; }
319 :
320 : private:
321 : BgpMembershipManager *manager_;
322 : BgpTable *table_;
323 : uint32_t request_count_;
324 : uint32_t walk_count_;
325 : PeerRibList peer_rib_list_;
326 : PeerRibList pending_peer_rib_list_;
327 : LifetimeRef<RibState> table_delete_ref_;
328 :
329 : DISALLOW_COPY_AND_ASSIGN(RibState);
330 : };
331 :
332 : //
333 : // This class represents the membership of an IPeer in a BgpTable. The result
334 : // of this membership is a RibOut instance. An instance of a PeerRibState is
335 : // created when an IPeer registers with a BgpTable and gets deleted when the
336 : // IPeer unregisters from the BgpTable.
337 : //
338 : // A PeerState has a map of PeerRibStates keyed by RibState pointer.
339 : // A PeerRibState is on a list of PeerRibStates in it's RibState.
340 : // If a PeerRibState has a pending action, it's also on the pending list in
341 : // the RibState.
342 : // The action is NONE in steady state.
343 : //
344 : class BgpMembershipManager::PeerRibState {
345 : public:
346 : PeerRibState(BgpMembershipManager *manager, PeerState *ps, RibState *rs);
347 : ~PeerRibState();
348 :
349 : void RegisterRibOut(const RibExportPolicy &policy);
350 : void UnregisterRibOut();
351 : void DeactivateRibOut();
352 : void UnregisterRibIn();
353 : void WalkRibIn();
354 :
355 : void FillMembershipInfo(ShowMembershipPeerInfo *smpi) const;
356 :
357 10480 : const IPeer *peer() const { return ps_->peer(); }
358 335963 : PeerState *peer_state() { return ps_; }
359 : const PeerState *peer_state() const { return ps_; }
360 158964 : RibState *rib_state() { return rs_; }
361 163182 : RibOut *ribout() const { return ribout_; }
362 163182 : int ribout_index() const { return ribout_index_; }
363 12360 : const BgpTable *table() const { return rs_->table(); }
364 :
365 1281310 : BgpMembershipManager::Action action() const { return action_; }
366 169114 : void set_action(BgpMembershipManager::Action action) { action_ = action; }
367 169116 : void clear_action() { action_ = BgpMembershipManager::NONE; }
368 601695 : bool ribin_registered() const { return ribin_registered_; }
369 168082 : void set_ribin_registered(bool value) { ribin_registered_ = value; }
370 566712 : bool ribout_registered() const { return ribout_registered_; }
371 40 : void set_ribout_registered(bool value) { ribout_registered_ = value; }
372 68278 : int instance_id() const { return instance_id_; }
373 233661 : void set_instance_id(int instance_id) { instance_id_ = instance_id; }
374 66844 : uint64_t subscription_gen_id() const { return subscription_gen_id_; }
375 152030 : void set_subscription_gen_id(uint64_t subscription_gen_id) {
376 152030 : subscription_gen_id_ = subscription_gen_id;
377 152030 : }
378 :
379 : private:
380 : BgpMembershipManager *manager_;
381 : PeerState *ps_;
382 : RibState *rs_;
383 : RibOut *ribout_;
384 : int ribout_index_;
385 : BgpMembershipManager::Action action_;
386 : bool ribin_registered_;
387 : bool ribout_registered_;
388 : int instance_id_;
389 : uint64_t subscription_gen_id_;
390 :
391 : DISALLOW_COPY_AND_ASSIGN(PeerRibState);
392 : };
393 :
394 : //
395 : // This class is responsible for efficient implementation of BgpTable walks
396 : // for the BgpMembershipManager. It accepts walk requests for any number of
397 : // RibStates and triggers table walks one at a time. It has a maximum of one
398 : // ongoing table walk at any given time.
399 : //
400 : // The RibStateList contains all RibStates for which walks have not yet been
401 : // started. The Walker removes the first RibState from the list and starts a
402 : // table walk for it.
403 : //
404 : // The RibStateSet is used to prevent duplicates in the RibStateList. Using
405 : // just the RibStateSet to maintain the pending RibStates would have caused
406 : // problems if the same RibState is enqueued repeatedly. In that case, the
407 : // Walker would walk the same BgpTable repeatedly and starve out all other
408 : // RibStates. Using the RibStateSet and RibStateList together prevents this
409 : // problem.
410 : //
411 : // Items are inserted into RibStateList either from the bgp::PeerMembership
412 : // task or from other tasks that invoke the BgpMembershipManager public APIs.
413 : // There's no issues with concurrent access in the former case. In the latter
414 : // case, access is serialized because of the mutex in BgpMembershipManager.
415 : //
416 : // The Walker creates temporary internal state when it starts a table walk so
417 : // that walk callbacks for each DBEntry can be handled with minimal processing
418 : // overhead. Details on this temporary state are as follows:
419 : //
420 : // - walk_ref_ is the walker for the current walk
421 : // - rs_ is the RibState for which the walk was started
422 : // - peer_rib_list_ is the list of PeerRibStates for the current RibState
423 : // that have a pending action. The pending list in RibState is logically
424 : // moved to this field. This allows the RibState to accumulate a new set
425 : // of pending PeerRibStates that can be serviced in a subsequent walk.
426 : // The peer_rib_list_ is used to create and enqueue events when the table
427 : // walk finishes.
428 : // - peer_list_ is the list of IPeers to be notified about BgpPaths added
429 : // by them for RibIn processing.
430 : // - ribout_state_map_ is a map of RibOutStates that need to be processed
431 : // for each route.
432 : // - ribout_state_list_ is a list of same RibOutStates as ribout_state_map_.
433 : // It allows simpler traversal compared to the ribout_state_map_ when each
434 : // DBEntry is processed.
435 : //
436 : // A RibOutState is created for each unique RibOut in the PeerRibStates in
437 : // peer_rib_list_. It's join and leave bitsets are based on the action in
438 : // the PeerRibStates.
439 : //
440 : // A TaskTrigger that runs in context of bgp::PeerMembership task is used to
441 : // handle start and finish of table walks. This avoids concurrency issues in
442 : // accessing/clearing the pending list in the RibState. Note that TaskTrigger
443 : // in this class and the WorkQueue in BgpMembershipManager both use instance
444 : // id of 0, so they can't run concurrently.
445 : //
446 : class BgpMembershipManager::Walker {
447 : public:
448 : explicit Walker(BgpMembershipManager *manager);
449 : ~Walker();
450 :
451 : void Enqueue(RibState *rs);
452 : bool IsQueueEmpty() const;
453 :
454 : private:
455 : friend class BgpMembershipTest;
456 :
457 : class RibOutState {
458 : public:
459 126613 : explicit RibOutState(RibOut *ribout) : ribout_(ribout) { }
460 126613 : ~RibOutState() { }
461 :
462 452515 : RibOut *ribout() { return ribout_; }
463 81591 : void JoinPeer(int index) { join_bitset_.set(index); }
464 81591 : void LeavePeer(int index) { leave_bitset_.set(index); }
465 452478 : const RibPeerSet &join_bitset() { return join_bitset_; }
466 452725 : const RibPeerSet &leave_bitset() { return leave_bitset_; }
467 :
468 : private:
469 : RibOut *ribout_;
470 : RibPeerSet join_bitset_;
471 : RibPeerSet leave_bitset_;
472 :
473 : DISALLOW_COPY_AND_ASSIGN(RibOutState);
474 : };
475 :
476 : typedef BgpMembershipManager::Event Event;
477 : typedef BgpMembershipManager::RibState RibState;
478 : typedef BgpMembershipManager::PeerRibState PeerRibState;
479 : typedef BgpMembershipManager::PeerRibList PeerRibList;
480 : typedef std::set<RibState *> RibStateSet;
481 : typedef std::list<RibState *> RibStateList;
482 : typedef std::map<RibOut *, RibOutState *> RibOutStateMap;
483 : typedef std::list<RibOutState *> RibOutStateList;
484 : typedef std::set<const IPeer *> PeerList;
485 :
486 : RibOutState *LocateRibOutState(RibOut *ribout);
487 : bool WalkCallback(DBTablePartBase *tpart, DBEntryBase *db_entry);
488 : void WalkDoneCallback(DBTableBase *table);
489 : void WalkStart();
490 : void WalkFinish();
491 : bool WalkTrigger();
492 :
493 : // Testing only.
494 : void SetQueueDisable(bool value);
495 29 : size_t GetQueueSize() const { return rib_state_list_size_; }
496 4 : size_t GetPeerListSize() const { return peer_list_.size(); }
497 2 : size_t GetPeerRibListSize() const { return peer_rib_list_.size(); }
498 4 : size_t GetRibOutStateListSize() const { return ribout_state_list_size_; }
499 : void PostponeWalk();
500 : void ResumeWalk();
501 :
502 : BgpMembershipManager *manager_;
503 : RibStateSet rib_state_set_;
504 : RibStateList rib_state_list_;
505 : boost::scoped_ptr<TaskTrigger> trigger_;
506 :
507 : bool postpone_walk_;
508 : bool walk_started_;
509 : bool walk_completed_;
510 : DBTable::DBTableWalkRef walk_ref_;
511 : RibState *rs_;
512 : PeerRibList peer_rib_list_;
513 : PeerList peer_list_;
514 : RibOutStateMap ribout_state_map_;
515 : RibOutStateList ribout_state_list_;
516 : size_t rib_state_list_size_;
517 : size_t ribout_state_list_size_;
518 :
519 : DISALLOW_COPY_AND_ASSIGN(Walker);
520 : };
521 :
522 : #endif // SRC_BGP_BGP_MEMBERSHIP_H_
|