diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-10 20:01:30 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-10 20:01:30 -0800 |
commit | c5ce28df0e7c01a1de23c36ebdefcd803f2b6cbb (patch) | |
tree | 9830baf38832769e1cf621708889111bbe3c93df /Documentation/networking | |
parent | 29afc4e9a408f2304e09c6dd0dbcfbd2356d0faa (diff) | |
parent | 9399f0c51489ae8c16d6559b82a452fdc1895e91 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
1) More iov_iter conversion work from Al Viro.
[ The "crypto: switch af_alg_make_sg() to iov_iter" commit was
wrong, and this pull actually adds an extra commit on top of the
branch I'm pulling to fix that up, so that the pre-merge state is
ok. - Linus ]
2) Various optimizations to the ipv4 forwarding information base trie
lookup implementation. From Alexander Duyck.
3) Remove sock_iocb altogether, from CHristoph Hellwig.
4) Allow congestion control algorithm selection via routing metrics.
From Daniel Borkmann.
5) Make ipv4 uncached route list per-cpu, from Eric Dumazet.
6) Handle rfs hash collisions more gracefully, also from Eric Dumazet.
7) Add xmit_more support to r8169, e1000, and e1000e drivers. From
Florian Westphal.
8) Transparent Ethernet Bridging support for GRO, from Jesse Gross.
9) Add BPF packet actions to packet scheduler, from Jiri Pirko.
10) Add support for uniqu flow IDs to openvswitch, from Joe Stringer.
11) New NetCP ethernet driver, from Muralidharan Karicheri and Wingman
Kwok.
12) More sanely handle out-of-window dupacks, which can result in
serious ACK storms. From Neal Cardwell.
13) Various rhashtable bug fixes and enhancements, from Herbert Xu,
Patrick McHardy, and Thomas Graf.
14) Support xmit_more in be2net, from Sathya Perla.
15) Group Policy extensions for vxlan, from Thomas Graf.
16) Remove Checksum Offload support for vxlan, from Tom Herbert.
17) Like ipv4, support lockless transmit over ipv6 UDP sockets. From
Vlad Yasevich.
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1494+1 commits)
crypto: fix af_alg_make_sg() conversion to iov_iter
ipv4: Namespecify TCP PMTU mechanism
i40e: Fix for stats init function call in Rx setup
tcp: don't include Fast Open option in SYN-ACK on pure SYN-data
openvswitch: Only set TUNNEL_VXLAN_OPT if VXLAN-GBP metadata is set
ipv6: Make __ipv6_select_ident static
ipv6: Fix fragment id assignment on LE arches.
bridge: Fix inability to add non-vlan fdb entry
net: Mellanox: Delete unnecessary checks before the function call "vunmap"
cxgb4: Add support in cxgb4 to get expansion rom version via ethtool
ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion ROM version
net: dsa: Remove redundant phy_attach()
IB/mlx4: Reset flow support for IB kernel ULPs
IB/mlx4: Always use the correct port for mirrored multicast attachments
net/bonding: Fix potential bad memory access during bonding events
tipc: remove tipc_snprintf
tipc: nl compat add noop and remove legacy nl framework
tipc: convert legacy nl stats show to nl compat
tipc: convert legacy nl net id get to nl compat
tipc: convert legacy nl net id set to nl compat
...
Diffstat (limited to 'Documentation/networking')
-rw-r--r-- | Documentation/networking/filter.txt | 4 | ||||
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 29 | ||||
-rw-r--r-- | Documentation/networking/nf_conntrack-sysctl.txt | 3 | ||||
-rw-r--r-- | Documentation/networking/openvswitch.txt | 13 | ||||
-rw-r--r-- | Documentation/networking/timestamping.txt | 21 | ||||
-rw-r--r-- | Documentation/networking/timestamping/txtimestamp.c | 38 |
6 files changed, 93 insertions, 15 deletions
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index 58d08f8d8d80..9930ecfbb465 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -279,8 +279,8 @@ Possible BPF extensions are shown in the following table: hatype skb->dev->type rxhash skb->hash cpu raw_smp_processor_id() - vlan_tci vlan_tx_tag_get(skb) - vlan_pr vlan_tx_tag_present(skb) + vlan_tci skb_vlan_tag_get(skb) + vlan_pr skb_vlan_tag_present(skb) rand prandom_u32() These extensions can also be prefixed with '#'. diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 85b022179104..1b8c964b0d17 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -290,6 +290,28 @@ tcp_frto - INTEGER By default it's enabled with a non-zero value. 0 disables F-RTO. +tcp_invalid_ratelimit - INTEGER + Limit the maximal rate for sending duplicate acknowledgments + in response to incoming TCP packets that are for an existing + connection but that are invalid due to any of these reasons: + + (a) out-of-window sequence number, + (b) out-of-window acknowledgment number, or + (c) PAWS (Protection Against Wrapped Sequence numbers) check failure + + This can help mitigate simple "ack loop" DoS attacks, wherein + a buggy or malicious middlebox or man-in-the-middle can + rewrite TCP header fields in manner that causes each endpoint + to think that the other is sending invalid TCP segments, thus + causing each side to send an unterminating stream of duplicate + acknowledgments for invalid segments. + + Using 0 disables rate-limiting of dupacks in response to + invalid segments; otherwise this value specifies the minimal + space between sending such dupacks, in milliseconds. + + Default: 500 (milliseconds). + tcp_keepalive_time - INTEGER How often TCP sends out keepalive messages when keepalive is enabled. Default: 2hours. @@ -1287,6 +1309,13 @@ accept_ra_rtr_pref - BOOLEAN Functional default: enabled if accept_ra is enabled. disabled if accept_ra is disabled. +accept_ra_mtu - BOOLEAN + Apply the MTU value specified in RA option 5 (RFC4861). If + disabled, the MTU specified in the RA will be ignored. + + Functional default: enabled if accept_ra is enabled. + disabled if accept_ra is disabled. + accept_redirects - BOOLEAN Accept Redirects. diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt index 70da5086153d..f55599c62c9d 100644 --- a/Documentation/networking/nf_conntrack-sysctl.txt +++ b/Documentation/networking/nf_conntrack-sysctl.txt @@ -11,7 +11,8 @@ nf_conntrack_buckets - INTEGER (read-only) Size of hash table. If not specified as parameter during module loading, the default size is calculated by dividing total memory by 16384 to determine the number of buckets but the hash table will - never have fewer than 32 or more than 16384 buckets. + never have fewer than 32 and limited to 16384 buckets. For systems + with more than 4GB of memory it will be 65536 buckets. nf_conntrack_checksum - BOOLEAN 0 - disabled diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt index 37c20ee2455e..b3b9ac61d29d 100644 --- a/Documentation/networking/openvswitch.txt +++ b/Documentation/networking/openvswitch.txt @@ -131,6 +131,19 @@ performs best-effort detection of overlapping wildcarded flows and may reject some but not all of them. However, this behavior may change in future versions. +Unique flow identifiers +----------------------- + +An alternative to using the original match portion of a key as the handle for +flow identification is a unique flow identifier, or "UFID". UFIDs are optional +for both the kernel and user space program. + +User space programs that support UFID are expected to provide it during flow +setup in addition to the flow, then refer to the flow using the UFID for all +future operations. The kernel is not required to index flows by the original +flow key if a UFID is specified. + + Basic rule for evolving flow keys --------------------------------- diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index a5c784c89312..5f0922613f1a 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG: option IP_PKTINFO simultaneously. +SOF_TIMESTAMPING_OPT_TSONLY: + + Applies to transmit timestamps only. Makes the kernel return the + timestamp as a cmsg alongside an empty packet, as opposed to + alongside the original packet. This reduces the amount of memory + charged to the socket's receive budget (SO_RCVBUF) and delivers + the timestamp even if sysctl net.core.tstamp_allow_data is 0. + This option disables SOF_TIMESTAMPING_OPT_CMSG. + + +New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to +disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate +regardless of the setting of sysctl net.core.tstamp_allow_data. + +An exception is when a process needs additional cmsg data, for +instance SOL_IP/IP_PKTINFO to detect the egress network interface. +Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on +having access to the contents of the original packet, so cannot be +combined with SOF_TIMESTAMPING_OPT_TSONLY. + + 1.4 Bytestream Timestamps The SO_TIMESTAMPING interface supports timestamping of bytes in a diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c index 876f71c5625a..8217510d3842 100644 --- a/Documentation/networking/timestamping/txtimestamp.c +++ b/Documentation/networking/timestamping/txtimestamp.c @@ -30,6 +30,8 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define _GNU_SOURCE + #include <arpa/inet.h> #include <asm/types.h> #include <error.h> @@ -59,14 +61,6 @@ #include <time.h> #include <unistd.h> -/* ugly hack to work around netinet/in.h and linux/ipv6.h conflicts */ -#ifndef in6_pktinfo -struct in6_pktinfo { - struct in6_addr ipi6_addr; - int ipi6_ifindex; -}; -#endif - /* command line parameters */ static int cfg_proto = SOCK_STREAM; static int cfg_ipproto = IPPROTO_TCP; @@ -76,6 +70,7 @@ static int do_ipv6 = 1; static int cfg_payload_len = 10; static bool cfg_show_payload; static bool cfg_do_pktinfo; +static bool cfg_loop_nodata; static uint16_t dest_port = 9000; static struct sockaddr_in daddr; @@ -147,6 +142,9 @@ static void print_payload(char *data, int len) { int i; + if (!len) + return; + if (len > 70) len = 70; @@ -183,6 +181,7 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) struct sock_extended_err *serr = NULL; struct scm_timestamping *tss = NULL; struct cmsghdr *cm; + int batch = 0; for (cm = CMSG_FIRSTHDR(msg); cm && cm->cmsg_len; @@ -215,10 +214,18 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } else fprintf(stderr, "unknown cmsg %d,%d\n", cm->cmsg_level, cm->cmsg_type); + + if (serr && tss) { + print_timestamp(tss, serr->ee_info, serr->ee_data, + payload_len); + serr = NULL; + tss = NULL; + batch++; + } } - if (serr && tss) - print_timestamp(tss, serr->ee_info, serr->ee_data, payload_len); + if (batch > 1) + fprintf(stderr, "batched %d timestamps\n", batch); } static int recv_errmsg(int fd) @@ -250,7 +257,7 @@ static int recv_errmsg(int fd) if (ret == -1 && errno != EAGAIN) error(1, errno, "recvmsg"); - if (ret > 0) { + if (ret >= 0) { __recv_errmsg_cmsg(&msg, ret); if (cfg_show_payload) print_payload(data, cfg_payload_len); @@ -315,6 +322,9 @@ static void do_test(int family, unsigned int opt) opt |= SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_CMSG | SOF_TIMESTAMPING_OPT_ID; + if (cfg_loop_nodata) + opt |= SOF_TIMESTAMPING_OPT_TSONLY; + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, (char *) &opt, sizeof(opt))) error(1, 0, "setsockopt timestamping"); @@ -384,6 +394,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -h: show this message\n" " -I: request PKTINFO\n" " -l N: send N bytes at a time\n" + " -n: set no-payload option\n" " -r: use raw\n" " -R: use raw (IP_HDRINCL)\n" " -p N: connect to port N\n" @@ -398,7 +409,7 @@ static void parse_opt(int argc, char **argv) int proto_count = 0; char c; - while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) { + while ((c = getopt(argc, argv, "46hIl:np:rRux")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -409,6 +420,9 @@ static void parse_opt(int argc, char **argv) case 'I': cfg_do_pktinfo = true; break; + case 'n': + cfg_loop_nodata = true; + break; case 'r': proto_count++; cfg_proto = SOCK_RAW; |