summaryrefslogtreecommitdiff
path: root/include/linux/hyperv.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/hyperv.h')
-rw-r--r--include/linux/hyperv.h93
1 files changed, 72 insertions, 21 deletions
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index b10954a66939..c877e7980585 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -674,6 +674,11 @@ enum hv_signal_policy {
HV_SIGNAL_POLICY_EXPLICIT,
};
+enum hv_numa_policy {
+ HV_BALANCED = 0,
+ HV_LOCALIZED,
+};
+
enum vmbus_device_type {
HV_IDE = 0,
HV_SCSI,
@@ -850,6 +855,43 @@ struct vmbus_channel {
* ring lock to preserve the current behavior.
*/
bool acquire_ring_lock;
+ /*
+ * For performance critical channels (storage, networking
+ * etc,), Hyper-V has a mechanism to enhance the throughput
+ * at the expense of latency:
+ * When the host is to be signaled, we just set a bit in a shared page
+ * and this bit will be inspected by the hypervisor within a certain
+ * window and if the bit is set, the host will be signaled. The window
+ * of time is the monitor latency - currently around 100 usecs. This
+ * mechanism improves throughput by:
+ *
+ * A) Making the host more efficient - each time it wakes up,
+ * potentially it will process morev number of packets. The
+ * monitor latency allows a batch to build up.
+ * B) By deferring the hypercall to signal, we will also minimize
+ * the interrupts.
+ *
+ * Clearly, these optimizations improve throughput at the expense of
+ * latency. Furthermore, since the channel is shared for both
+ * control and data messages, control messages currently suffer
+ * unnecessary latency adversley impacting performance and boot
+ * time. To fix this issue, permit tagging the channel as being
+ * in "low latency" mode. In this mode, we will bypass the monitor
+ * mechanism.
+ */
+ bool low_latency;
+
+ /*
+ * NUMA distribution policy:
+ * We support teo policies:
+ * 1) Balanced: Here all performance critical channels are
+ * distributed evenly amongst all the NUMA nodes.
+ * This policy will be the default policy.
+ * 2) Localized: All channels of a given instance of a
+ * performance critical service will be assigned CPUs
+ * within a selected NUMA node.
+ */
+ enum hv_numa_policy affinity_policy;
};
@@ -870,6 +912,12 @@ static inline void set_channel_signal_state(struct vmbus_channel *c,
c->signal_policy = policy;
}
+static inline void set_channel_affinity_state(struct vmbus_channel *c,
+ enum hv_numa_policy policy)
+{
+ c->affinity_policy = policy;
+}
+
static inline void set_channel_read_state(struct vmbus_channel *c, bool state)
{
c->batched_reading = state;
@@ -891,6 +939,16 @@ static inline void set_channel_pending_send_size(struct vmbus_channel *c,
c->outbound.ring_buffer->pending_send_sz = size;
}
+static inline void set_low_latency_mode(struct vmbus_channel *c)
+{
+ c->low_latency = true;
+}
+
+static inline void clear_low_latency_mode(struct vmbus_channel *c)
+{
+ c->low_latency = false;
+}
+
void vmbus_onmessage(void *context);
int vmbus_request_offers(void);
@@ -1357,6 +1415,9 @@ extern bool vmbus_prep_negotiate_resp(struct icmsg_hdr *,
struct icmsg_negotiate *, u8 *, int,
int);
+void hv_event_tasklet_disable(struct vmbus_channel *channel);
+void hv_event_tasklet_enable(struct vmbus_channel *channel);
+
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid);
/*
@@ -1431,31 +1492,23 @@ static inline struct vmpacket_descriptor *
get_next_pkt_raw(struct vmbus_channel *channel)
{
struct hv_ring_buffer_info *ring_info = &channel->inbound;
- u32 read_loc = ring_info->priv_read_index;
+ u32 priv_read_loc = ring_info->priv_read_index;
void *ring_buffer = hv_get_ring_buffer(ring_info);
- struct vmpacket_descriptor *cur_desc;
- u32 packetlen;
u32 dsize = ring_info->ring_datasize;
- u32 delta = read_loc - ring_info->ring_buffer->read_index;
+ /*
+ * delta is the difference between what is available to read and
+ * what was already consumed in place. We commit read index after
+ * the whole batch is processed.
+ */
+ u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ?
+ priv_read_loc - ring_info->ring_buffer->read_index :
+ (dsize - ring_info->ring_buffer->read_index) + priv_read_loc;
u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta);
if (bytes_avail_toread < sizeof(struct vmpacket_descriptor))
return NULL;
- if ((read_loc + sizeof(*cur_desc)) > dsize)
- return NULL;
-
- cur_desc = ring_buffer + read_loc;
- packetlen = cur_desc->len8 << 3;
-
- /*
- * If the packet under consideration is wrapping around,
- * return failure.
- */
- if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > (dsize - 1))
- return NULL;
-
- return cur_desc;
+ return ring_buffer + priv_read_loc;
}
/*
@@ -1467,16 +1520,14 @@ static inline void put_pkt_raw(struct vmbus_channel *channel,
struct vmpacket_descriptor *desc)
{
struct hv_ring_buffer_info *ring_info = &channel->inbound;
- u32 read_loc = ring_info->priv_read_index;
u32 packetlen = desc->len8 << 3;
u32 dsize = ring_info->ring_datasize;
- if ((read_loc + packetlen + VMBUS_PKT_TRAILER) > dsize)
- BUG();
/*
* Include the packet trailer.
*/
ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
+ ring_info->priv_read_index %= dsize;
}
/*