# # From latency-performance tuned profile # # Minimal preemption granularity for CPU-bound tasks: # (default: 1 msec# (1 + ilog(ncpus)), units: nanoseconds) kernel.sched_min_granularity_ns = 10000000 # The total time the scheduler will consider a migrated process # "cache hot" and thus less likely to be re-migrated # (system default is 500000, i.e. 0.5 ms) kernel.sched_migration_cost_ns = 5000000 # If a workload mostly uses anonymous memory and it hits this limit, the entire # working set is buffered for I/O, and any more write buffering would require # swapping, so it's time to throttle writes until I/O can catch up. Workloads # that mostly use file mappings may be able to use even higher values. # # The generator of dirty data starts writeback at this percentage # (system default is 20%) vm.dirty_ratio = 10 # Start background writeback (via writeback threads) at this percentage # (system default is 10%) vm.dirty_background_ratio = 3 # The swappiness parameter controls the tendency of the kernel to move # processes out of physical memory and onto the swap disk. # 0 tells the kernel to avoid swapping processes out of physical memory # for as long as possible # 100 tells the kernel to aggressively swap processes out of physical memory # and move them to swap cache vm.swappiness = 10 # # From network-latency tuned profile # # TCP Fast Open (TFO) allows clients to send data in the initial SYN request, # without waiting for a full handshake to occur. This removes an entire round # trip almost transparently from the application. net.ipv4.tcp_fastopen = 3 # Low latency busy poll timeout for socket reads. # Approximate time in us to busy loop waiting for packets on the device queue. # A value of 50 is recommended. # Will increase power usage. net.core.busy_read = 50 # Low latency busy poll timeout for poll and select. # Approximate time in us to busy loop waiting for events. # Recommended value depends on the number of sockets you poll on. # For several sockets 50, for several hundreds 100. net.core.busy_poll = 50 # # From Mellanox tuning guide # # Disable the TCP timestamps option for better CPU utilization: net.ipv4.tcp_timestamps = 0 # Enable the TCP selective acks option for better throughput: net.ipv4.tcp_sack = 1 # Increase the maximum length of processor input queues: net.core.netdev_max_backlog = 250000 # Increase the TCP maximum and default buffer sizes using setsockopt(): net.core.rmem_max = 4194304 net.core.wmem_max = 4194304 net.core.rmem_default = 4194304 net.core.wmem_default = 4194304 net.core.optmem_max = 4194304 # Increase memory thresholds to prevent packet dropping: net.ipv4.tcp_rmem = 4096 87380 4194304 net.ipv4.tcp_wmem = 4096 65536 4194304 # Enable low latency mode for TCP: net.ipv4.tcp_low_latency = 1 # The following variable is used to tell the kernel how much of the socket # buffer space should be used for TCP window size, and how much to save for # an application buffer. # A value of 1 means the socket buffer will be divided evenly between TCP windows size and application. net.ipv4.tcp_adv_win_scale = 1 # # From Mellanox HPC-X guide # # Pre-allocate some huge pages vm.nr_hugepages = 1024