Tor 0.4.9.8
Loading...
Searching...
No Matches
conflux.c
Go to the documentation of this file.
1/* Copyright (c) 2021, The Tor Project, Inc. */
2/* See LICENSE for licensing information */
3
4/**
5 * \file conflux.c
6 * \brief Conflux multipath core algorithms
7 */
8
9#include "core/or/relay_msg.h"
10#define TOR_CONFLUX_PRIVATE
11
12#include "core/or/or.h"
13
14#include "core/or/circuit_st.h"
15#include "core/or/sendme.h"
16#include "core/or/relay.h"
20#include "core/or/circuitlist.h"
21#include "core/or/circuituse.h"
22#include "core/or/conflux.h"
26#include "core/or/conflux_st.h"
29#include "app/config/config.h"
30
31/** One million microseconds in a second */
32#define USEC_PER_SEC 1000000
33
34static inline uint64_t cwnd_sendable(const circuit_t *on_circ,
35 uint64_t in_usec, uint64_t our_usec);
36
37/* Track the total number of bytes used by all ooo_q so it can be used by the
38 * OOM handler to assess.
39 *
40 * When adding or subtracting to this value, use conflux_msg_alloc_cost(). */
41static uint64_t total_ooo_q_bytes = 0;
42
43/**
44 * Return the total number of required allocated to store `msg`.
45 */
46size_t
48{
49 return msg->msg->length + sizeof(conflux_msg_t) + sizeof(relay_msg_t);
50}
51
52/**
53 * Determine if we should multiplex a specific relay command or not.
54 *
55 * TODO: Version of this that is the set of forbidden commands
56 * on linked circuits
57 */
58bool
59conflux_should_multiplex(int relay_command)
60{
61 switch (relay_command) {
62 /* These are all fine to multiplex, and must be
63 * so that ordering is preserved */
64 case RELAY_COMMAND_BEGIN:
65 case RELAY_COMMAND_DATA:
66 case RELAY_COMMAND_END:
67 case RELAY_COMMAND_CONNECTED:
68 return true;
69
70 /* We can't multiplex these because they are
71 * circuit-specific */
72 case RELAY_COMMAND_SENDME:
73 case RELAY_COMMAND_EXTEND:
74 case RELAY_COMMAND_EXTENDED:
75 case RELAY_COMMAND_TRUNCATE:
76 case RELAY_COMMAND_TRUNCATED:
77 case RELAY_COMMAND_DROP:
78 return false;
79
80 /* We must multiplex RESOLVEs because their ordering
81 * impacts begin/end. */
82 case RELAY_COMMAND_RESOLVE:
83 case RELAY_COMMAND_RESOLVED:
84 return true;
85
86 /* These are all circuit-specific */
87 case RELAY_COMMAND_BEGIN_DIR:
88 case RELAY_COMMAND_EXTEND2:
89 case RELAY_COMMAND_EXTENDED2:
90 case RELAY_COMMAND_ESTABLISH_INTRO:
91 case RELAY_COMMAND_ESTABLISH_RENDEZVOUS:
92 case RELAY_COMMAND_INTRODUCE1:
93 case RELAY_COMMAND_INTRODUCE2:
94 case RELAY_COMMAND_RENDEZVOUS1:
95 case RELAY_COMMAND_RENDEZVOUS2:
96 case RELAY_COMMAND_INTRO_ESTABLISHED:
97 case RELAY_COMMAND_RENDEZVOUS_ESTABLISHED:
98 case RELAY_COMMAND_INTRODUCE_ACK:
99 case RELAY_COMMAND_PADDING_NEGOTIATE:
100 case RELAY_COMMAND_PADDING_NEGOTIATED:
101 return false;
102
103 /* These must be multiplexed because their ordering
104 * relative to BEGIN/END must be preserved */
105 case RELAY_COMMAND_XOFF:
106 case RELAY_COMMAND_XON:
107 return true;
108
109 /* These two are not multiplexed, because they must
110 * be processed immediately to update sequence numbers
111 * before any other cells are processed on the circuit */
112 case RELAY_COMMAND_CONFLUX_SWITCH:
113 case RELAY_COMMAND_CONFLUX_LINK:
114 case RELAY_COMMAND_CONFLUX_LINKED:
115 case RELAY_COMMAND_CONFLUX_LINKED_ACK:
116 return false;
117
118 default:
119 log_warn(LD_BUG, "Conflux asked to multiplex unknown relay command %d",
120 relay_command);
121 return false;
122 }
123}
124
125/** Return the leg for a circuit in a conflux set. Return NULL if not found. */
128{
129 conflux_leg_t *leg_found = NULL;
130 tor_assert(cfx);
131 tor_assert(cfx->legs);
132
133 // Find the leg that the cell is written on
135 if (leg->circ == circ) {
136 leg_found = leg;
137 break;
138 }
139 } CONFLUX_FOR_EACH_LEG_END(leg);
140
141 return leg_found;
142}
143
144/**
145 * Gets the maximum last_seq_sent from all legs.
146 */
147uint64_t
149{
150 uint64_t max_seq_sent = 0;
151
153 if (leg->last_seq_sent > max_seq_sent) {
154 max_seq_sent = leg->last_seq_sent;
155 }
156 } CONFLUX_FOR_EACH_LEG_END(leg);
157
158 return max_seq_sent;
159}
160
161/**
162 * Gets the maximum last_seq_recv from all legs.
163 */
164uint64_t
166{
167 uint64_t max_seq_recv = 0;
168
170 if (leg->last_seq_recv > max_seq_recv) {
171 max_seq_recv = leg->last_seq_recv;
172 }
173 } CONFLUX_FOR_EACH_LEG_END(leg);
174
175 return max_seq_recv;
176}
177
178/** Return the total memory allocation the circuit is using by conflux. If this
179 * circuit is not a Conflux circuit, 0 is returned. */
180uint64_t
182{
183 if (circ->conflux) {
184 return smartlist_len(circ->conflux->ooo_q) * sizeof(void*)
185 + circ->conflux->ooo_q_alloc_cost;
186 }
187 return 0;
188}
189
190/** Return the total memory allocation in bytes by the subsystem.
191 *
192 * At the moment, only out of order queues are consiered. */
193uint64_t
195{
196 return total_ooo_q_bytes;
197}
198
199/** The OOM handler is asking us to try to free at least bytes_to_remove. */
200size_t
201conflux_handle_oom(size_t bytes_to_remove)
202{
203 (void) bytes_to_remove;
204
205 /* We are not doing anything on the sets, the OOM handler will trigger a
206 * circuit clean up which will affect conflux sets, by pruning oldest
207 * circuits. */
208
209 log_info(LD_CIRC, "OOM handler triggered. OOO queus allocation: %" PRIu64,
210 total_ooo_q_bytes);
211 return 0;
212}
213
214/** Free all cells in the ooo_q of the given cfx which updates the
215 * total_ooo_q_bytes.
216 *
217 * Must be called before freeing the queue itself. */
218void
220{
221 tor_assert(cfx);
222 tor_assert(cfx->ooo_q);
223
225 size_t cost = conflux_msg_alloc_cost(msg);
226 if (BUG(cost > total_ooo_q_bytes)) {
227 total_ooo_q_bytes = 0;
228 } else {
229 total_ooo_q_bytes -= cost;
230 }
231 conflux_relay_msg_free(msg);
232 });
233 smartlist_clear(cfx->ooo_q);
234}
235
236/**
237 * Returns true if a circuit has package window space to send, and is
238 * not blocked locally.
239 */
240static inline bool
242{
243 const congestion_control_t *cc = circuit_ccontrol(circ);
244 bool cc_sendable = true;
245
246 /* We consider ourselves blocked if we're within 1 sendme of the
247 * cwnd, because inflight is decremented before this check */
248 // TODO-329-TUNING: This subtraction not be right.. It depends
249 // on call order wrt decisions and sendme arrival
250 if (cc->inflight >= cc->cwnd) {
251 cc_sendable = false;
252 }
253
254 /* Origin circuits use the package window of the last hop, and
255 * have an outbound cell direction (towards exit). Otherwise,
256 * there is no cpath and direction is inbound. */
257 if (CIRCUIT_IS_ORIGIN(circ)) {
258 return cc_sendable && !circ->circuit_blocked_on_n_chan;
259 } else {
260 return cc_sendable && !circ->circuit_blocked_on_p_chan;
261 }
262}
263
264/**
265 * Return the circuit with the minimum RTT. Do not use any
266 * other circuit.
267 *
268 * This algorithm will minimize RTT always, and will not provide
269 * any throughput benefit. We expect it to be useful for VoIP/UDP
270 * use cases. Because it only uses one circuit on a leg at a time,
271 * it can have more than one circuit per guard (ie: to find
272 * lower-latency middles for the path).
273 */
274static const circuit_t *
276{
277 uint64_t min_rtt = UINT64_MAX;
278 const circuit_t *circ = NULL;
279
280 /* Can't get here without any legs. */
282
284
285 /* Ignore circuits with no RTT measurement */
286 if (leg->circ_rtts_usec && leg->circ_rtts_usec < min_rtt) {
287 circ = leg->circ;
288 min_rtt = leg->circ_rtts_usec;
289 }
290 } CONFLUX_FOR_EACH_LEG_END(leg);
291
292 /* If the minRTT circuit can't send, dont send on any circuit. */
293 if (!circ || !circuit_ready_to_send(circ)) {
294 return NULL;
295 }
296 return circ;
297}
298
299/**
300 * Favor the circuit with the lowest RTT that still has space in the
301 * congestion window.
302 *
303 * This algorithm will maximize total throughput at the expense of
304 * bloating out-of-order queues.
305 */
306static const circuit_t *
308{
309 uint64_t low_rtt = UINT64_MAX;
310 const circuit_t *circ = NULL;
311
312 /* Can't get here without any legs. */
314
316 /* If the package window is full, skip it */
317 if (!circuit_ready_to_send(leg->circ)) {
318 continue;
319 }
320
321 /* Ignore circuits with no RTT */
322 if (leg->circ_rtts_usec && leg->circ_rtts_usec < low_rtt) {
323 low_rtt = leg->circ_rtts_usec;
324 circ = leg->circ;
325 }
326 } CONFLUX_FOR_EACH_LEG_END(leg);
327
328 /* At this point, if we found a circuit, we've already validated that its
329 * congestion window has room. */
330 return circ;
331}
332
333/**
334 * Returns the amount of room in a cwnd on a circuit.
335 */
336static inline uint64_t
338{
339 const congestion_control_t *cc = circuit_ccontrol(on_circ);
340 tor_assert(cc);
341
342 if (cc->cwnd < cc->inflight)
343 return 0;
344
345 return cc->cwnd - cc->inflight;
346}
347
348/**
349 * Return the amount of congestion window we can send on
350 * on_circ during in_usec. However, if we're still in
351 * slow-start, send the whole window to establish the true
352 * cwnd.
353 */
354static inline uint64_t
355cwnd_sendable(const circuit_t *on_circ, uint64_t in_usec,
356 uint64_t our_usec)
357{
358 const congestion_control_t *cc = circuit_ccontrol(on_circ);
359 tor_assert(cc);
360 uint64_t cwnd_adjusted = cwnd_available(on_circ);
361
362 if (our_usec == 0 || in_usec == 0) {
363 log_fn(LOG_PROTOCOL_WARN, LD_CIRC,
364 "cwnd_sendable: Missing RTT data. in_usec: %" PRIu64
365 " our_usec: %" PRIu64, in_usec, our_usec);
366 return cwnd_adjusted;
367 }
368
369 if (cc->in_slow_start) {
370 return cwnd_adjusted;
371 } else {
372 /* For any given leg, it has min_rtt/2 time before the 'primary'
373 * leg's acks start arriving. So, the amount of data this
374 * 'secondary' leg can send while the min_rtt leg transmits these
375 * acks is:
376 * (cwnd_leg/(leg_rtt/2))*min_rtt/2 = cwnd_leg*min_rtt/leg_rtt.
377 */
378 uint64_t sendable = cwnd_adjusted*in_usec/our_usec;
379 return MIN(cc->cwnd, sendable);
380 }
381}
382
383/**
384 * Returns true if we can switch to a new circuit, false otherwise.
385 *
386 * This function assumes we're primarily switching between two circuits,
387 * the current and the prev. If we're using more than two circuits, we
388 * need to set cfx_drain_pct to 100.
389 */
390static inline bool
392{
393 /* If we still expected to send more cells on this circuit,
394 * we're only allowed to switch if the previous circuit emptied. */
395 if (cfx->cells_until_switch > 0) {
396 /* If there is no prev leg, skip the inflight check. */
397 if (!cfx->prev_leg) {
398 return false;
399 }
400 const congestion_control_t *ccontrol =
402
403 /* If the inflight count has drained to below cfx_drain_pct
404 * of the congestion window, then we can switch.
405 * We check the sendme_inc because there may be un-ackable
406 * data in inflight as well, and we can still switch then. */
407 // TODO-329-TUNING: Should we try to switch if the prev_leg is
408 // ready to send, instead of this?
409 if (ccontrol->inflight < ccontrol->sendme_inc ||
410 100*ccontrol->inflight <=
411 conflux_params_get_drain_pct()*ccontrol->cwnd) {
412 return true;
413 }
414
415 return false;
416 }
417
418 return true;
419}
420
421/**
422 * Favor the circuit with the lowest RTT that still has space in the
423 * congestion window up to the ratio of RTTs.
424 *
425 * This algorithm should only use auxillary legs up to the point
426 * where their data arrives roughly the same time as the lowest
427 * RTT leg. It will not utilize the full cwnd of auxillary legs,
428 * except in slow start. Therefore, out-of-order queue bloat should
429 * be minimized to just the slow-start phase.
430 */
431static const circuit_t *
433{
434 uint64_t min_rtt = UINT64_MAX;
435 const conflux_leg_t *leg = NULL;
436
437 /* Can't get here without any legs. */
439
440 /* Find the leg with the minimum RTT.*/
442 /* Ignore circuits with invalid RTT */
443 if (l->circ_rtts_usec && l->circ_rtts_usec < min_rtt) {
444 min_rtt = l->circ_rtts_usec;
445 leg = l;
446 }
447 } CONFLUX_FOR_EACH_LEG_END(l);
448
449 /* If the package window is has room, use it */
450 if (leg && circuit_ready_to_send(leg->circ)) {
451 return leg->circ;
452 }
453
454 leg = NULL;
455
457 if (!circuit_ready_to_send(l->circ)) {
458 continue;
459 }
460
461 /* Pick a 'min_leg' with the lowest RTT that still has
462 * room in the congestion window. Note that this works for
463 * min_leg itself, up to inflight. */
464 if (l->circ_rtts_usec &&
465 cwnd_sendable(l->circ, min_rtt, l->circ_rtts_usec) > 0) {
466 leg = l;
467 }
468 } CONFLUX_FOR_EACH_LEG_END(l);
469
470 /* If the circuit can't send, don't send on any circuit. */
471 if (!leg || !circuit_ready_to_send(leg->circ)) {
472 return NULL;
473 }
474 return leg->circ;
475}
476
477/**
478 * This function is called when we want to send a relay cell on a
479 * conflux, as well as when we want to compute available space in
480 * to package from streams.
481 *
482 * It determines the circuit that relay command should be sent on,
483 * and sends a SWITCH cell if necessary.
484 *
485 * It returns the circuit we should send on. If no circuits are ready
486 * to send, it returns NULL.
487 */
488circuit_t *
490 circuit_t *orig_circ,
491 uint8_t relay_command)
492{
493 /* If this command should not be multiplexed, send it on the original
494 * circuit */
495 if (!conflux_should_multiplex(relay_command)) {
496 return orig_circ;
497 }
498
499 circuit_t *new_circ = conflux_decide_next_circ(cfx);
500
501 /* Because our congestion window only cover relay data command, we can end up
502 * in a situation where we need to send non data command when all circuits
503 * are at capacity. For those cases, keep using the *current* leg,
504 * so these commands arrive in-order. */
505 if (!new_circ && relay_command != RELAY_COMMAND_DATA) {
506 /* Curr leg should be set, because conflux_decide_next_circ() should
507 * have set it earlier. No BUG() here because the only caller BUG()s. */
508 if (!cfx->curr_leg) {
509 log_warn(LD_BUG, "No current leg for conflux with relay command %d",
510 relay_command);
511 return NULL;
512 }
513 return cfx->curr_leg->circ;
514 }
515
516 /*
517 * If we are switching to a new circuit, we need to send a SWITCH command.
518 * We also need to compute an estimate of how much data we can send on
519 * the new circuit before we are allowed to switch again, to rate
520 * limit the frequency of switching.
521 */
522 if (new_circ) {
523 conflux_leg_t *new_leg = conflux_get_leg(cfx, new_circ);
524 tor_assert(cfx->curr_leg);
525
526 if (new_circ != cfx->curr_leg->circ) {
527 // TODO-329-TUNING: This is one mechanism to rate limit switching,
528 // which should reduce the OOQ mem. However, we're not going to do that
529 // until we get some data on if the memory usage is high
530 cfx->cells_until_switch = 0;
531 //cwnd_sendable(new_circ,cfx->curr_leg->circ_rtts_usec,
532 // new_leg->circ_rtts_usec);
533
535
536 cfx->prev_leg = cfx->curr_leg;
537 cfx->curr_leg = new_leg;
538
539 tor_assert(cfx->prev_leg);
540 tor_assert(cfx->curr_leg);
541
542 if (cfx->curr_leg->last_seq_sent > cfx->prev_leg->last_seq_sent) {
543 /* Having incoherent sequence numbers, log warn about it but rate limit
544 * it to every hour so we avoid redundent report. */
545 static ratelim_t rlimit = RATELIM_INIT(60 * 60);
547 "Current conflux leg last_seq_sent=%"PRIu64
548 " is above previous leg at %" PRIu64 ". Closing set.",
550 cfx->prev_leg->last_seq_sent);
552 END_CIRC_REASON_TORPROTOCOL);
553 return NULL;
554 }
555
556 uint64_t relative_seq = cfx->prev_leg->last_seq_sent -
558
559 /* On failure to send the SWITCH, we close everything. This means we have
560 * a protocol error or the sending failed and the circuit is closed. */
561 if (!conflux_send_switch_command(cfx->curr_leg->circ, relative_seq)) {
563 END_CIRC_REASON_TORPROTOCOL);
564 return NULL;
565 }
567 }
568 }
569
570 return new_circ;
571}
572
573/** Called after conflux actually sent a cell on a circuit.
574 * This function updates sequence number counters, and
575 * switch counters.
576 */
577void
578conflux_note_cell_sent(conflux_t *cfx, circuit_t *circ, uint8_t relay_command)
579{
580 conflux_leg_t *leg = NULL;
581
582 if (!conflux_should_multiplex(relay_command)) {
583 return;
584 }
585
586 leg = conflux_get_leg(cfx, circ);
587 if (leg == NULL) {
588 log_fn(LOG_PROTOCOL_WARN, LD_BUG, "No Conflux leg after sending a cell");
589 return;
590 }
591
592 leg->last_seq_sent++;
593
594 if (cfx->cells_until_switch > 0) {
595 cfx->cells_until_switch--;
596 }
597}
598
599/** Find the leg with lowest non-zero curr_rtt_usec, and
600 * pick it for our current leg. */
601static inline bool
603{
604 conflux_leg_t *min_leg = NULL;
605
607 /* We need to skip 0-RTT legs, since this can happen at the exit
608 * when there is a race between BEGIN and LINKED_ACK, and BEGIN
609 * wins the race. The good news is that because BEGIN won,
610 * we don't need to consider those other legs, since they are
611 * slower. */
612 if (leg->circ_rtts_usec > 0) {
613 if (!min_leg || leg->circ_rtts_usec < min_leg->circ_rtts_usec) {
614 min_leg = leg;
615 }
616 }
617 } CONFLUX_FOR_EACH_LEG_END(leg);
618
619 if (!min_leg) {
620 // Get the 0th leg; if it does not exist, log the set.
621 // Bug 40827 managed to hit this, so let's dump the sets
622 // in case it happens again.
623 if (BUG(smartlist_len(cfx->legs) <= 0)) {
624 // Since we have no legs, we have no idea if this is really a client
625 // or server set. Try to find any that match:
626 log_warn(LD_BUG, "Matching client sets:");
627 conflux_log_set(LOG_WARN, cfx, true);
628 log_warn(LD_BUG, "Matching server sets:");
629 conflux_log_set(LOG_WARN, cfx, false);
630 log_warn(LD_BUG, "End conflux set dump");
631 return false;
632 }
633
634 min_leg = smartlist_get(cfx->legs, 0);
635 tor_assert(min_leg);
636 if (BUG(min_leg->linked_sent_usec == 0)) {
637 log_warn(LD_BUG, "Conflux has no legs with non-zero RTT. "
638 "Using first leg.");
640 }
641 }
642
643 // TODO-329-TUNING: We may want to initialize this to a cwnd, to
644 // minimize early switching?
645 //cfx->cells_until_switch = circuit_ccontrol(min_leg->circ)->cwnd;
646 cfx->cells_until_switch = 0;
647
648 cfx->curr_leg = min_leg;
649
650 return true;
651}
652
653/**
654 * Returns the circuit that conflux would send on next, if
655 * conflux_decide_circ_for_send were called. This is used to compute
656 * available space in the package window.
657 */
658circuit_t *
660{
661 // TODO-329-TUNING: Temporarily validate legs here. We can remove
662 // this once tuning is complete.
664
665 /* If the conflux set is tearing down and has no current leg,
666 * bail and give up */
667 if (cfx->in_full_teardown) {
668 return NULL;
669 }
670
671 /* If we don't have a current leg yet, pick one.
672 * (This is the only non-const operation in this function). */
673 if (!cfx->curr_leg) {
674 if (!conflux_pick_first_leg(cfx))
675 return NULL;
676 }
677
678 /* First, check if we can switch. */
679 if (!conflux_can_switch(cfx)) {
680 tor_assert(cfx->curr_leg);
681 circuit_t *curr_circ = cfx->curr_leg->circ;
682
683 /* If we can't switch, and the current circuit can't send,
684 * then return null. */
685 if (circuit_ready_to_send(curr_circ)) {
686 return curr_circ;
687 }
688 log_info(LD_CIRC, "Conflux can't switch; no circuit to send on.");
689 return NULL;
690 }
691
692 switch (cfx->params.alg) {
693 case CONFLUX_ALG_MINRTT: // latency (no ooq)
695 case CONFLUX_ALG_LOWRTT: // high throughput (high oooq)
697 case CONFLUX_ALG_CWNDRTT: // throughput (low oooq)
699 default:
700 return NULL;
701 }
702}
703
704/**
705 * Called when we have a new RTT estimate for a circuit.
706 */
707void
708conflux_update_rtt(conflux_t *cfx, circuit_t *circ, uint64_t rtt_usec)
709{
710 conflux_leg_t *leg = conflux_get_leg(cfx, circ);
711
712 if (!leg) {
713 log_warn(LD_BUG, "Got RTT update for circuit not in conflux");
714 return;
715 }
716
717 // Update RTT
718 leg->circ_rtts_usec = rtt_usec;
719
720 // TODO-329-ARTI: For UDP latency targeting, arti could decide to launch
721 // new a test leg to potentially replace this one, if a latency target
722 // was requested and we now exceed it. Since C-Tor client likely
723 // will not have UDP support, we aren't doing this here.
724}
725
726/**
727 * Comparison function for ooo_q pqueue.
728 *
729 * Ensures that lower sequence numbers are at the head of the pqueue.
730 */
731static int
732conflux_queue_cmp(const void *a, const void *b)
733{
734 // Compare a and b as conflux_cell_t using the seq field, and return a
735 // comparison result such that the lowest seq is at the head of the pqueue.
736 const conflux_msg_t *cell_a = a;
737 const conflux_msg_t *cell_b = b;
738
739 tor_assert(cell_a);
740 tor_assert(cell_b);
741
742 if (cell_a->seq < cell_b->seq) {
743 return -1;
744 } else if (cell_a->seq > cell_b->seq) {
745 return 1;
746 } else {
747 return 0;
748 }
749}
750
751/**
752 * Get the congestion control object for a conflux circuit.
753 *
754 * Because conflux can only be negotiated with the last hop, we
755 * can use the last hop of the cpath to obtain the congestion
756 * control object for origin circuits. For non-origin circuits,
757 * we can use the circuit itself.
758 */
761{
762 const congestion_control_t *ccontrol = NULL;
763 tor_assert(circ);
764
765 if (CIRCUIT_IS_ORIGIN(circ)) {
766 tor_assert(CONST_TO_ORIGIN_CIRCUIT(circ)->cpath);
767 tor_assert(CONST_TO_ORIGIN_CIRCUIT(circ)->cpath->prev);
768 ccontrol = CONST_TO_ORIGIN_CIRCUIT(circ)->cpath->prev->ccontrol;
769 } else {
770 ccontrol = circ->ccontrol;
771 }
772
773 /* Conflux circuits always have congestion control*/
774 tor_assert(ccontrol);
775 return ccontrol;
776}
777
778// TODO-329-TUNING: For LowRTT, we can at most switch every SENDME,
779// but for BLEST, we should switch at most every cwnd.. But
780// we do not know the other side's CWND here.. We can at best
781// asssume it is above the cwnd_min
782#define CONFLUX_MIN_LINK_INCREMENT 31
783/**
784 * Validate and handle RELAY_COMMAND_CONFLUX_SWITCH.
785 */
786int
788 crypt_path_t *layer_hint,
789 const relay_msg_t *msg)
790{
791 tor_assert(in_circ);
792 tor_assert(msg);
793
794 conflux_t *cfx = in_circ->conflux;
795 uint32_t relative_seq;
796 conflux_leg_t *leg;
797
798 if (!conflux_is_enabled(in_circ)) {
799 circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
800 return -1;
801 }
802
803 /* If there is no conflux object negotiated, this is invalid.
804 * log and close circ */
805 if (!cfx) {
806 log_warn(LD_BUG, "Got a conflux switch command on a circuit without "
807 "conflux negotiated. Closing circuit.");
808
809 circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
810 return -1;
811 }
812
813 // TODO-329-TUNING: Temporarily validate that we have all legs.
814 // After tuning is complete, we can remove this.
816
817 leg = conflux_get_leg(cfx, in_circ);
818
819 /* If we can't find the conflux leg, we got big problems..
820 * Close the circuit. */
821 if (!leg) {
822 log_warn(LD_BUG, "Got a conflux switch command on a circuit without "
823 "conflux leg. Closing circuit.");
824 circuit_mark_for_close(in_circ, END_CIRC_REASON_INTERNAL);
825 return -1;
826 }
827
828 // Check source hop via layer_hint
829 if (!conflux_validate_source_hop(in_circ, layer_hint)) {
830 log_warn(LD_BUG, "Got a conflux switch command on a circuit with "
831 "invalid source hop. Closing circuit.");
832 circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
833 return -1;
834 }
835
836 relative_seq = conflux_cell_parse_switch(msg);
837
838 /*
839 * We have to make sure that the switch command is truely
840 * incrementing the sequence number, or else it becomes
841 * a side channel that can be spammed for traffic analysis.
842 */
843 // TODO-329-TUNING: This can happen. Disabling for now..
844 //if (relative_seq < CONFLUX_MIN_LINK_INCREMENT) {
845 // log_warn(LD_CIRC, "Got a conflux switch command with a relative "
846 // "sequence number less than the minimum increment. Closing "
847 // "circuit.");
848 // circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
849 // return -1;
850 //}
851
852 // TODO-329-UDP: When Prop#340 exits and was negotiated, ensure we're
853 // in a packed cell, with another cell following, otherwise
854 // this is a spammed side-channel.
855 // - We definitely should never get switches back-to-back.
856 // - We should not get switches across all legs with no data
857 // But before Prop#340, it doesn't make much sense to do this.
858 // C-Tor is riddled with side-channels like this anyway, unless
859 // vanguards is in use. And this feature is not supported by
860 // onion servicees in C-Tor, so we're good there.
861
862 /* Update the absolute sequence number on this leg by the delta.
863 * Since this cell is not multiplexed, we do not count it towards
864 * absolute sequence numbers. We only increment the sequence
865 * numbers for multiplexed cells. Hence there is no +1 here. */
866 leg->last_seq_recv += relative_seq;
867
868 /* Mark this data as validated for controlport and vanguards
869 * dropped cell handling */
870 if (CIRCUIT_IS_ORIGIN(in_circ)) {
871 circuit_read_valid_data(TO_ORIGIN_CIRCUIT(in_circ), msg->length);
872 }
873
874 return 0;
875}
876
877/**
878 * Process an incoming relay cell for conflux. Called from
879 * connection_edge_process_relay_cell().
880 *
881 * Returns true if the conflux system now has well-ordered cells to deliver
882 * to streams, false otherwise.
883 */
884bool
886 crypt_path_t *layer_hint, const relay_msg_t *msg)
887{
888 // TODO-329-TUNING: Temporarily validate legs here. We can remove
889 // this after tuning is complete.
891
892 conflux_leg_t *leg = conflux_get_leg(cfx, in_circ);
893 if (!leg) {
894 log_warn(LD_BUG, "Got a conflux cell on a circuit without "
895 "conflux leg. Closing circuit.");
896 circuit_mark_for_close(in_circ, END_CIRC_REASON_INTERNAL);
897 return false;
898 }
899
900 /* We need to make sure this cell came from the expected hop, or
901 * else it could be a data corruption attack from a middle node. */
902 if (!conflux_validate_source_hop(in_circ, layer_hint)) {
903 circuit_mark_for_close(in_circ, END_CIRC_REASON_TORPROTOCOL);
904 return false;
905 }
906
907 /* Update the running absolute sequence number */
908 leg->last_seq_recv++;
909
910 /* If this cell is next, fast-path it by processing the cell in-place */
911 if (leg->last_seq_recv == cfx->last_seq_delivered + 1) {
912 /* The cell is now ready to be processed, and rest of the queue should
913 * now be checked for remaining elements */
914 cfx->last_seq_delivered++;
915 return true;
916 } else if (leg->last_seq_recv <= cfx->last_seq_delivered) {
917 /* Anyone can mangle these sequence number. */
918 log_fn(LOG_PROTOCOL_WARN, LD_BUG,
919 "Got a conflux cell with a sequence number "
920 "less than the last delivered. Closing circuit.");
921 circuit_mark_for_close(in_circ, END_CIRC_REASON_INTERNAL);
922 return false;
923 } else {
924 /* Both cost and param are in bytes. */
925 if (cfx->ooo_q_alloc_cost >= conflux_params_get_max_oooq()) {
926 /* Log rate limit every hour. In heavy DDoS scenario, this could be
927 * triggered many times so avoid the spam. */
928 static ratelim_t rlimit = RATELIM_INIT(60 * 60);
930 "Conflux OOO queue is at maximum. Currently at "
931 "%"TOR_PRIuSZ " bytes, maximum allowed is %u bytes. "
932 "Closing.",
933 cfx->ooo_q_alloc_cost, conflux_params_get_max_oooq());
934 circuit_mark_for_close(in_circ, END_CIRC_REASON_RESOURCELIMIT);
935 return false;
936 }
937 conflux_msg_t *c_msg = tor_malloc_zero(sizeof(conflux_msg_t));
938 c_msg->seq = leg->last_seq_recv;
939 /* Notice the copy here. Reason is that we don't have ownership of the
940 * message. If we wanted to pull that off, we would need to change the
941 * whole calling stack and unit tests on either not touching it after this
942 * function indicates that it has taken it or never allocate it from the
943 * stack. This is simpler and less error prone but might show up in our
944 * profile (maybe?). The Maze is serious. It needs to be respected. */
945 c_msg->msg = relay_msg_copy(msg);
946 size_t cost = conflux_msg_alloc_cost(c_msg);
947
949 offsetof(conflux_msg_t, heap_idx), c_msg);
950
951 total_ooo_q_bytes += cost;
952 cfx->ooo_q_alloc_cost += cost;
953
954 /* This cell should not be processed yet, and the queue is not ready
955 * to process because the next absolute seqnum has not yet arrived */
956 return false;
957 }
958}
959
960/**
961 * Dequeue the top cell from our queue.
962 *
963 * Returns the cell as a conflux_cell_t, or NULL if the queue is empty
964 * or has a hole.
965 */
968{
969 conflux_msg_t *top = NULL;
970 /* Related to #41162. This is really a consequence of the C-tor maze.
971 * The function above can close a circuit without returning an error
972 * due to several return code ignored. Auditting all of the cell code
973 * path and fixing them to not ignore errors could bring many more
974 * issues as this behavior has been in tor forever. So do the bandaid
975 * fix of bailing if the circuit is closed. */
976 if (circ->marked_for_close) {
977 static ratelim_t rlim = RATELIM_INIT(60 * 60);
978 log_fn_ratelim(&rlim, (circ->conflux == NULL) ? LOG_WARN : LOG_NOTICE,
979 LD_CIRC,
980 "Circuit was closed at %s:%u when dequeuing from OOO",
982 return NULL;
983 }
984 conflux_t *cfx = circ->conflux;
985 if (cfx == NULL) {
986 static ratelim_t rlim = RATELIM_INIT(60 * 60);
988 "Bug: Non marked for close circuit with NULL conflux");
989 return NULL;
990 }
991 if (cfx->ooo_q == NULL) {
992 static ratelim_t rlim = RATELIM_INIT(60 * 60);
994 "Bug: Non marked for close circuit with NULL OOO queue");
995 return NULL;
996 }
997
998 if (smartlist_len(cfx->ooo_q) == 0)
999 return NULL;
1000
1001 top = smartlist_get(cfx->ooo_q, 0);
1002
1003 /* If the top cell is the next sequence number we need, then
1004 * pop and return it. */
1005 if (top->seq == cfx->last_seq_delivered+1) {
1007 offsetof(conflux_msg_t, heap_idx));
1008
1009 size_t cost = conflux_msg_alloc_cost(top);
1010 total_ooo_q_bytes -= cost;
1011 cfx->ooo_q_alloc_cost -= cost;
1012
1013 cfx->last_seq_delivered++;
1014 return top;
1015 } else {
1016 return NULL;
1017 }
1018}
1019
1020/** Free a given conflux msg object. */
1021void
1023{
1024 if (msg) {
1025 relay_msg_free(msg->msg);
1026 tor_free(msg);
1027 }
1028}
Base circuit structure.
origin_circuit_t * TO_ORIGIN_CIRCUIT(circuit_t *x)
Header file for circuitlist.c.
#define CIRCUIT_IS_ORIGIN(c)
void circuit_read_valid_data(origin_circuit_t *circ, uint16_t relay_body_len)
Header file for circuituse.c.
Functions and types for monotonic times.
Header file for config.c.
void conflux_note_cell_sent(conflux_t *cfx, circuit_t *circ, uint8_t relay_command)
Definition conflux.c:578
static uint64_t cwnd_sendable(const circuit_t *on_circ, uint64_t in_usec, uint64_t our_usec)
Definition conflux.c:355
static bool conflux_pick_first_leg(conflux_t *cfx)
Definition conflux.c:602
void conflux_update_rtt(conflux_t *cfx, circuit_t *circ, uint64_t rtt_usec)
Definition conflux.c:708
bool conflux_should_multiplex(int relay_command)
Definition conflux.c:59
circuit_t * conflux_decide_next_circ(conflux_t *cfx)
Definition conflux.c:659
static int conflux_queue_cmp(const void *a, const void *b)
Definition conflux.c:732
static const circuit_t * conflux_decide_circ_lowrtt(const conflux_t *cfx)
Definition conflux.c:307
static uint64_t cwnd_available(const circuit_t *on_circ)
Definition conflux.c:337
uint64_t conflux_get_circ_bytes_allocation(const circuit_t *circ)
Definition conflux.c:181
const congestion_control_t * circuit_ccontrol(const circuit_t *circ)
Definition conflux.c:760
conflux_msg_t * conflux_dequeue_relay_msg(circuit_t *circ)
Definition conflux.c:967
size_t conflux_msg_alloc_cost(conflux_msg_t *msg)
Definition conflux.c:47
uint64_t conflux_get_max_seq_recv(const conflux_t *cfx)
Definition conflux.c:165
uint64_t conflux_get_max_seq_sent(const conflux_t *cfx)
Definition conflux.c:148
void conflux_relay_msg_free_(conflux_msg_t *msg)
Definition conflux.c:1022
void conflux_clear_ooo_q(conflux_t *cfx)
Definition conflux.c:219
static bool conflux_can_switch(const conflux_t *cfx)
Definition conflux.c:391
circuit_t * conflux_decide_circ_for_send(conflux_t *cfx, circuit_t *orig_circ, uint8_t relay_command)
Definition conflux.c:489
static const circuit_t * conflux_decide_circ_cwndrtt(const conflux_t *cfx)
Definition conflux.c:432
conflux_leg_t * conflux_get_leg(conflux_t *cfx, const circuit_t *circ)
Definition conflux.c:127
bool conflux_process_relay_msg(conflux_t *cfx, circuit_t *in_circ, crypt_path_t *layer_hint, const relay_msg_t *msg)
Definition conflux.c:885
static const circuit_t * conflux_decide_circ_minrtt(const conflux_t *cfx)
Definition conflux.c:275
int conflux_process_switch_command(circuit_t *in_circ, crypt_path_t *layer_hint, const relay_msg_t *msg)
Definition conflux.c:787
size_t conflux_handle_oom(size_t bytes_to_remove)
Definition conflux.c:201
static bool circuit_ready_to_send(const circuit_t *circ)
Definition conflux.c:241
uint64_t conflux_get_total_bytes_allocation(void)
Definition conflux.c:194
Public APIs for conflux multipath support.
#define CONFLUX_FOR_EACH_LEG_BEGIN(cfx, var)
Definition conflux.h:20
#define CONFLUX_NUM_LEGS(cfx)
Definition conflux.h:26
uint32_t conflux_cell_parse_switch(const relay_msg_t *msg)
bool conflux_send_switch_command(circuit_t *send_circ, uint64_t relative_seq)
Header file for conflux_cell.c.
Header file for conflux_params.c.
void conflux_log_set(int loglevel, const conflux_t *cfx, bool is_client)
void conflux_mark_all_for_close(const uint8_t *nonce, bool is_client, int reason)
Header file for conflux_pool.c.
Structure definitions for conflux multipath.
void conflux_validate_stream_lists(const conflux_t *cfx)
void conflux_validate_legs(const conflux_t *cfx)
bool conflux_validate_source_hop(circuit_t *in_circ, crypt_path_t *layer_hint)
Header file for conflux_util.c.
Public APIs for congestion control.
Structure definitions for congestion control.
#define log_fn(severity, domain, args,...)
Definition log.h:283
#define log_fn_ratelim(ratelim, severity, domain, args,...)
Definition log.h:288
#define LD_BUG
Definition log.h:86
#define LOG_NOTICE
Definition log.h:50
#define LD_CIRC
Definition log.h:82
#define LOG_WARN
Definition log.h:53
#define tor_free(p)
Definition malloc.h:56
Master header file for Tor-specific functionality.
Origin circuit structure.
Header file for relay.c.
relay_msg_t * relay_msg_copy(const relay_msg_t *msg)
Definition relay_msg.c:68
Header file for relay_msg.c.
Header file for sendme.c.
void * smartlist_pqueue_pop(smartlist_t *sl, int(*compare)(const void *a, const void *b), ptrdiff_t idx_field_offset)
Definition smartlist.c:755
void smartlist_pqueue_add(smartlist_t *sl, int(*compare)(const void *a, const void *b), ptrdiff_t idx_field_offset, void *item)
Definition smartlist.c:726
void smartlist_clear(smartlist_t *sl)
#define SMARTLIST_FOREACH(sl, type, var, cmd)
unsigned int circuit_blocked_on_n_chan
Definition circuit_st.h:92
unsigned int circuit_blocked_on_p_chan
Definition circuit_st.h:95
uint16_t marked_for_close
Definition circuit_st.h:200
struct conflux_t * conflux
Definition circuit_st.h:273
const char * marked_for_close_file
Definition circuit_st.h:203
struct congestion_control_t * ccontrol
Definition circuit_st.h:260
uint64_t last_seq_sent
Definition conflux_st.h:66
uint64_t last_seq_recv
Definition conflux_st.h:47
uint64_t circ_rtts_usec
Definition conflux_st.h:75
uint64_t linked_sent_usec
Definition conflux_st.h:79
circuit_t * circ
Definition conflux_st.h:82
uint64_t seq
Definition conflux.h:33
relay_msg_t * msg
Definition conflux.h:42
smartlist_t * ooo_q
Definition conflux_st.h:103
struct conflux_leg_t * curr_leg
Definition conflux_st.h:123
struct conflux_params_t params
Definition conflux_st.h:88
struct conflux_leg_t * prev_leg
Definition conflux_st.h:127
uint8_t nonce[DIGEST256_LEN]
Definition conflux_st.h:130
uint64_t cells_until_switch
Definition conflux_st.h:119
size_t ooo_q_alloc_cost
Definition conflux_st.h:109
uint64_t last_seq_delivered
Definition conflux_st.h:114
smartlist_t * legs
Definition conflux_st.h:93
bool in_full_teardown
Definition conflux_st.h:135
struct crypt_path_t * prev
struct congestion_control_t * ccontrol
crypt_path_t * cpath
#define tor_assert(expr)
Definition util_bug.h:103