diff options
Diffstat (limited to 'net/core')
| -rw-r--r-- | net/core/dev.c | 64 | ||||
| -rw-r--r-- | net/core/filter.c | 1 | ||||
| -rw-r--r-- | net/core/net-sysfs.c | 4 | ||||
| -rw-r--r-- | net/core/netpoll.c | 7 | ||||
| -rw-r--r-- | net/core/page_pool.c | 12 | ||||
| -rw-r--r-- | net/core/sock.c | 15 |
6 files changed, 79 insertions, 24 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 5194b70769cc..a374efa23f07 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4849,6 +4849,36 @@ static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) return hash_32(hash, flow_table->log); } +#ifdef CONFIG_RFS_ACCEL +/** + * rps_flow_is_active - check whether the flow is recently active. + * @rflow: Specific flow to check activity. + * @flow_table: per-queue flowtable that @rflow belongs to. + * @cpu: CPU saved in @rflow. + * + * If the CPU has processed many packets since the flow's last activity + * (beyond 10 times the table size), the flow is considered stale. + * + * Return: true if flow was recently active. + */ +static bool rps_flow_is_active(struct rps_dev_flow *rflow, + struct rps_dev_flow_table *flow_table, + unsigned int cpu) +{ + unsigned int flow_last_active; + unsigned int sd_input_head; + + if (cpu >= nr_cpu_ids) + return false; + + sd_input_head = READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head); + flow_last_active = READ_ONCE(rflow->last_qtail); + + return (int)(sd_input_head - flow_last_active) < + (int)(10 << flow_table->log); +} +#endif + static struct rps_dev_flow * set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow *rflow, u16 next_cpu) @@ -4859,8 +4889,11 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct netdev_rx_queue *rxqueue; struct rps_dev_flow_table *flow_table; struct rps_dev_flow *old_rflow; + struct rps_dev_flow *tmp_rflow; + unsigned int tmp_cpu; u16 rxq_index; u32 flow_id; + u32 hash; int rc; /* Should we steer this flow to a different hardware queue? */ @@ -4875,14 +4908,32 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (!flow_table) goto out; - flow_id = rfs_slot(skb_get_hash(skb), flow_table); + + hash = skb_get_hash(skb); + flow_id = rfs_slot(hash, flow_table); + + tmp_rflow = &flow_table->flows[flow_id]; + tmp_cpu = READ_ONCE(tmp_rflow->cpu); + + if (READ_ONCE(tmp_rflow->filter) != RPS_NO_FILTER) { + if (rps_flow_is_active(tmp_rflow, flow_table, + tmp_cpu)) { + if (hash != READ_ONCE(tmp_rflow->hash) || + next_cpu == tmp_cpu) + goto out; + } + } + rc = dev->netdev_ops->ndo_rx_flow_steer(dev, skb, rxq_index, flow_id); if (rc < 0) goto out; + old_rflow = rflow; - rflow = &flow_table->flows[flow_id]; + rflow = tmp_rflow; WRITE_ONCE(rflow->filter, rc); + WRITE_ONCE(rflow->hash, hash); + if (old_rflow->filter == rc) WRITE_ONCE(old_rflow->filter, RPS_NO_FILTER); out: @@ -5017,17 +5068,16 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, struct rps_dev_flow_table *flow_table; struct rps_dev_flow *rflow; bool expire = true; - unsigned int cpu; rcu_read_lock(); flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id < (1UL << flow_table->log)) { + unsigned int cpu; + rflow = &flow_table->flows[flow_id]; cpu = READ_ONCE(rflow->cpu); - if (READ_ONCE(rflow->filter) == filter_id && cpu < nr_cpu_ids && - ((int)(READ_ONCE(per_cpu(softnet_data, cpu).input_queue_head) - - READ_ONCE(rflow->last_qtail)) < - (int)(10 << flow_table->log))) + if (READ_ONCE(rflow->filter) == filter_id && + rps_flow_is_active(rflow, flow_table, cpu)) expire = false; } rcu_read_unlock(); diff --git a/net/core/filter.c b/net/core/filter.c index c5cdf3b08341..b20d5fecdbc9 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4212,6 +4212,7 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset) if (unlikely(!sinfo->nr_frags)) { xdp_buff_clear_frags_flag(xdp); + xdp_buff_clear_frag_pfmemalloc(xdp); xdp->data_end -= offset; } diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index c28cd6665444..5ea9f64adce3 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1120,8 +1120,10 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, return -ENOMEM; table->log = ilog2(mask) + 1; - for (count = 0; count <= mask; count++) + for (count = 0; count <= mask; count++) { table->flows[count].cpu = RPS_NO_CPU; + table->flows[count].filter = RPS_NO_FILTER; + } } else { table = NULL; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 5f65b62346d4..4ab154c614e3 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -228,19 +228,16 @@ static void refill_skbs(struct netpoll *np) { struct sk_buff_head *skb_pool; struct sk_buff *skb; - unsigned long flags; skb_pool = &np->skb_pool; - spin_lock_irqsave(&skb_pool->lock, flags); - while (skb_pool->qlen < MAX_SKBS) { + while (READ_ONCE(skb_pool->qlen) < MAX_SKBS) { skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC); if (!skb) break; - __skb_queue_tail(skb_pool, skb); + skb_queue_tail(skb_pool, skb); } - spin_unlock_irqrestore(&skb_pool->lock, flags); } static void zap_completion_queue(void) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 19c92aa04e54..1a5edec485f1 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -211,11 +211,7 @@ static int page_pool_init(struct page_pool *pool, return -EINVAL; if (pool->p.pool_size) - ring_qsize = pool->p.pool_size; - - /* Sanity limit mem that can be pinned down */ - if (ring_qsize > 32768) - return -E2BIG; + ring_qsize = min(pool->p.pool_size, 16384); /* DMA direction is either DMA_FROM_DEVICE or DMA_BIDIRECTIONAL. * DMA_BIDIRECTIONAL is for allowing page used for DMA sending, @@ -596,6 +592,12 @@ static noinline netmem_ref __page_pool_alloc_netmems_slow(struct page_pool *pool netmem_ref netmem; int i, nr_pages; + /* Unconditionally set NOWARN if allocating from NAPI. + * Drivers forget to set it, and OOM reports on packet Rx are useless. + */ + if ((gfp & GFP_ATOMIC) == GFP_ATOMIC) + gfp |= __GFP_NOWARN; + /* Don't support bulk alloc for high-order pages */ if (unlikely(pp_order)) return page_to_netmem(__page_pool_alloc_page_order(pool, gfp)); diff --git a/net/core/sock.c b/net/core/sock.c index e21348ead7e7..bdeea7cc134d 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3162,23 +3162,27 @@ void __release_sock(struct sock *sk) __acquires(&sk->sk_lock.slock) { struct sk_buff *skb, *next; + int nb = 0; while ((skb = sk->sk_backlog.head) != NULL) { sk->sk_backlog.head = sk->sk_backlog.tail = NULL; spin_unlock_bh(&sk->sk_lock.slock); - do { + while (1) { next = skb->next; prefetch(next); DEBUG_NET_WARN_ON_ONCE(skb_dst_is_noref(skb)); skb_mark_not_on_list(skb); sk_backlog_rcv(sk, skb); - cond_resched(); - skb = next; - } while (skb != NULL); + if (!skb) + break; + + if (!(++nb & 15)) + cond_resched(); + } spin_lock_bh(&sk->sk_lock.slock); } @@ -3336,8 +3340,7 @@ suppress_allocation: } } - if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged)) - trace_sock_exceed_buf_limit(sk, prot, allocated, kind); + trace_sock_exceed_buf_limit(sk, prot, allocated, kind); sk_memory_allocated_sub(sk, amt); |
