diff options
Diffstat (limited to 'include/linux/hyperv.h')
| -rw-r--r-- | include/linux/hyperv.h | 93 |
1 files changed, 72 insertions, 21 deletions
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index b10954a66939..c877e7980585 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -674,6 +674,11 @@ enum hv_signal_policy { HV_SIGNAL_POLICY_EXPLICIT, }; +enum hv_numa_policy { + HV_BALANCED = 0, + HV_LOCALIZED, +}; + enum vmbus_device_type { HV_IDE = 0, HV_SCSI, @@ -850,6 +855,43 @@ struct vmbus_channel { * ring lock to preserve the current behavior. */ bool acquire_ring_lock; + /* + * For performance critical channels (storage, networking + * etc,), Hyper-V has a mechanism to enhance the throughput + * at the expense of latency: + * When the host is to be signaled, we just set a bit in a shared page + * and this bit will be inspected by the hypervisor within a certain + * window and if the bit is set, the host will be signaled. The window + * of time is the monitor latency - currently around 100 usecs. This + * mechanism improves throughput by: + * + * A) Making the host more efficient - each time it wakes up, + * potentially it will process morev number of packets. The + * monitor latency allows a batch to build up. + * B) By deferring the hypercall to signal, we will also minimize + * the interrupts. + * + * Clearly, these optimizations improve throughput at the expense of + * latency. Furthermore, since the channel is shared for both + * control and data messages, control messages currently suffer + * unnecessary latency adversley impacting performance and boot + * time. To fix this issue, permit tagging the channel as being + * in "low latency" mode. In this mode, we will bypass the monitor + * mechanism. + */ + bool low_latency; + + /* + * NUMA distribution policy: + * We support teo policies: + * 1) Balanced: Here all performance critical channels are + * distributed evenly amongst all the NUMA nodes. + * This policy will be the default policy. + * 2) Localized: All channels of a given instance of a + * performance critical service will be assigned CPUs + * within a selected NUMA node. + */ + enum hv_numa_policy affinity_policy; }; @@ -870,6 +912,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c, c->signal_policy = policy; } +static inline void set_channel_affinity_state(struct vmbus_channel *c, + enum hv_numa_policy policy) +{ + c->affinity_policy = policy; +} + static inline void set_channel_read_state(struct vmbus_channel *c, bool state) { c->batched_reading = state; @@ -891,6 +939,16 @@ static inline void set_channel_pending_send_size(struct vmbus_channel *c, c->outbound.ring_buffer->pending_send_sz = size; } +static inline void set_low_latency_mode(struct vmbus_channel *c) +{ + c->low_latency = true; +} + +static inline void clear_low_latency_mode(struct vmbus_channel *c) +{ + c->low_latency = false; +} + void vmbus_onmessage(void *context); int vmbus_request_offers(void); @@ -1357,6 +1415,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *, struct icmsg_negotiate *, u8 *, int, int); +void hv_event_tasklet_disable(struct vmbus_channel *channel); +void hv_event_tasklet_enable(struct vmbus_channel *channel); + void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); /* @@ -1431,31 +1492,23 @@ static inline struct vmpacket_descriptor * get_next_pkt_raw(struct vmbus_channel *channel) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; + u32 priv_read_loc = ring_info->priv_read_index; void *ring_buffer = hv_get_ring_buffer(ring_info); - struct vmpacket_descriptor *cur_desc; - u32 packetlen; u32 dsize = ring_info->ring_datasize; - u32 delta = read_loc - ring_info->ring_buffer->read_index; + /* + * delta is the difference between what is available to read and + * what was already consumed in place. We commit read index after + * the whole batch is processed. + */ + u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ? + priv_read_loc - ring_info->ring_buffer->read_index : + (dsize - ring_info->ring_buffer->read_index) + priv_read_loc; u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) return NULL; - if ((read_loc + sizeof(*cur_desc)) > dsize) - return NULL; - - cur_desc = ring_buffer + read_loc; - packetlen = cur_desc->len8 << 3; - - /* - * If the packet under consideration is wrapping around, - * return failure. - */ - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1)) - return NULL; - - return cur_desc; + return ring_buffer + priv_read_loc; } /* @@ -1467,16 +1520,14 @@ static inline void put_pkt_raw(struct vmbus_channel *channel, struct vmpacket_descriptor *desc) { struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 read_loc = ring_info->priv_read_index; u32 packetlen = desc->len8 << 3; u32 dsize = ring_info->ring_datasize; - if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize) - BUG(); /* * Include the packet trailer. */ ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; + ring_info->priv_read_index %= dsize; } /* |
