#
# From latency-performance tuned profile
#

# Minimal preemption granularity for CPU-bound tasks:
# (default: 1 msec#  (1 + ilog(ncpus)), units: nanoseconds)
kernel.sched_min_granularity_ns = 10000000

# The total time the scheduler will consider a migrated process
# "cache hot" and thus less likely to be re-migrated
# (system default is 500000, i.e. 0.5 ms)
kernel.sched_migration_cost_ns = 5000000

# If a workload mostly uses anonymous memory and it hits this limit, the entire
# working set is buffered for I/O, and any more write buffering would require
# swapping, so it's time to throttle writes until I/O can catch up.  Workloads
# that mostly use file mappings may be able to use even higher values.
#
# The generator of dirty data starts writeback at this percentage
# (system default is 20%)
vm.dirty_ratio = 10

# Start background writeback (via writeback threads) at this percentage
# (system default is 10%)
vm.dirty_background_ratio = 3

# The swappiness parameter controls the tendency of the kernel to move
# processes out of physical memory and onto the swap disk.
# 0 tells the kernel to avoid swapping processes out of physical memory
# for as long as possible
# 100 tells the kernel to aggressively swap processes out of physical memory
# and move them to swap cache
vm.swappiness = 10

#
# From network-latency tuned profile
#
# TCP Fast Open (TFO) allows clients to send data in the initial SYN request,
# without waiting for a full handshake to occur. This removes an entire round
# trip almost transparently from the application.
net.ipv4.tcp_fastopen = 3

# Low latency busy poll timeout for socket reads.
# Approximate time in us to busy loop waiting for packets on the device queue.
# A value of 50 is recommended.
# Will increase power usage.
net.core.busy_read = 50

# Low latency busy poll timeout for poll and select.
# Approximate time in us to busy loop waiting for events.
# Recommended value depends on the number of sockets you poll on.
# For several sockets 50, for several hundreds 100.
net.core.busy_poll = 50

#
# From Mellanox tuning guide
#

# Disable the TCP timestamps option for better CPU utilization:
net.ipv4.tcp_timestamps = 0

# Enable the TCP selective acks option for better throughput:
net.ipv4.tcp_sack = 1

# Increase the maximum length of processor input queues:
net.core.netdev_max_backlog = 250000

# Increase the TCP maximum and default buffer sizes using setsockopt():
net.core.rmem_max = 4194304
net.core.wmem_max = 4194304
net.core.rmem_default = 4194304
net.core.wmem_default = 4194304
net.core.optmem_max = 4194304

# Increase memory thresholds to prevent packet dropping:
net.ipv4.tcp_rmem = 4096 87380 4194304
net.ipv4.tcp_wmem = 4096 65536 4194304

# Enable low latency mode for TCP:
net.ipv4.tcp_low_latency = 1

# The following variable is used to tell the kernel how much of the socket
# buffer space should be used for TCP window size, and how much to save for
# an application buffer.
# A value of 1 means the socket buffer will be divided evenly between TCP windows size and application.
net.ipv4.tcp_adv_win_scale = 1

#
# From Mellanox HPC-X guide
#

# Pre-allocate some huge pages
vm.nr_hugepages = 1024