Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RHEL-40693: netkvm lazy RX allocation of physical memory #1229

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
2 changes: 2 additions & 0 deletions NetKVM/Common/ParaNdis-RX.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class CParaNdisRX : public CParaNdisTemplatePath<CVirtQueue>, public CNdisAlloca

PARANDIS_RECEIVE_QUEUE &UnclassifiedPacketsQueue() { return m_UnclassifiedPacketsQueue; }
UINT GetFreeRxBuffers() const { return m_NetNofReceiveBuffers; }
BOOLEAN AllocateMore();
private:
/* list of Rx buffers available for data (under VIRTIO management) */
LIST_ENTRY m_NetReceiveBuffers;
Expand All @@ -58,6 +59,7 @@ class CParaNdisRX : public CParaNdisTemplatePath<CVirtQueue>, public CNdisAlloca
private:
int PrepareReceiveBuffers();
pRxNetDescriptor CreateRxDescriptorOnInit();
void RecalculateLimits();
};

#ifdef PARANDIS_SUPPORT_RSS
Expand Down
25 changes: 23 additions & 2 deletions NetKVM/Common/ParaNdis-Util.h
Original file line number Diff line number Diff line change
Expand Up @@ -562,10 +562,9 @@ class CNdisSharedMemory : public CNdisAllocatable<CNdisSharedMemory, 'XTSM'>
: m_DrvHandle(NULL)
{}

bool Create(NDIS_HANDLE DrvHandle)
void Initialize(NDIS_HANDLE DrvHandle)
{
m_DrvHandle = DrvHandle;
return true;
}

~CNdisSharedMemory();
Expand Down Expand Up @@ -849,3 +848,25 @@ void ParaNdis_CompleteNBLChainWithStatus(NDIS_HANDLE MiniportHandle, PNET_BUFFER

ParaNdis_CompleteNBLChain(MiniportHandle, NBL, Flags);
}

static FORCEINLINE void UpdateTimestamp(ULONGLONG& Variable)
{
LARGE_INTEGER li;
NdisGetCurrentSystemTime(&li);
Variable = li.QuadPart;
}

class CSystemThread
{
public:
bool Start(PVOID Context);
void Stop();
CMutexProtectedAccess& PowerMutex() { return m_PowerMutex; }
private:
CNdisEvent m_Event;
CMutexProtectedAccess m_PowerMutex;
HANDLE m_hThread = NULL;
void ThreadProc();
PVOID m_Context = NULL;
ULONGLONG m_StartTime = 0;
};
36 changes: 22 additions & 14 deletions NetKVM/Common/ParaNdis-VirtQueue.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,7 @@ class CTXHeaders
CTXHeaders()
{}

bool Create(NDIS_HANDLE DrvHandle, ULONG VirtioHdrSize)
{
m_VirtioHdrSize = VirtioHdrSize;
return m_HeadersBuffer.Create(DrvHandle);
}

bool Allocate();
void Initialize(ULONG VirtioHdrSize, const tCompletePhysicalAddress& Buffer);

virtio_net_hdr *VirtioHeader() const
{ return static_cast<virtio_net_hdr*>(m_VirtioHeaderVA); }
Expand Down Expand Up @@ -67,7 +61,7 @@ class CTXHeaders
{ return m_IPHeadersPA; }

private:
CNdisSharedMemory m_HeadersBuffer;
tCompletePhysicalAddress m_HeadersBuffer;
ULONG m_VirtioHdrSize = 0;

PVOID m_VlanHeaderVA = nullptr;
Expand Down Expand Up @@ -99,17 +93,30 @@ class CTXDescriptor : public CNdisAllocatable<CTXDescriptor, 'DTHR'>
bool Indirect,
bool AnyLayout)
{
if (!m_Headers.Create(DrvHandle, VirtioHeaderSize))
return false;
if (!m_IndirectArea.Create(DrvHandle))
m_MemoryBuffer.Initialize(DrvHandle);
// allocate 8K buffer
if (!m_MemoryBuffer.Allocate(PAGE_SIZE * 2))
{
return false;
}
m_VirtioSGL = VirtioSGL;
m_VirtioSGLSize = VirtioSGLSize;
m_Indirect = Indirect;
m_AnyLayout = AnyLayout;


return m_Headers.Allocate() && (!m_Indirect || m_IndirectArea.Allocate(PAGE_SIZE));
// first 4K is for headers area
tCompletePhysicalAddress headers;
headers.Physical = m_MemoryBuffer.GetPA();
headers.Virtual = m_MemoryBuffer.GetVA();
headers.size = PAGE_SIZE;
m_Headers.Initialize(VirtioHeaderSize, headers);

// second 4K is for indirect area
m_IndirectArea.Physical = m_MemoryBuffer.GetPA();
m_IndirectArea.Physical.QuadPart += PAGE_SIZE;
m_IndirectArea.Virtual = RtlOffsetToPointer(m_MemoryBuffer.GetVA(), PAGE_SIZE);
m_IndirectArea.size = PAGE_SIZE;
return true;
}

SubmitTxPacketResult Enqueue(CTXVirtQueue *Queue, ULONG TotalDescriptors, ULONG FreeDescriptors);
Expand All @@ -133,7 +140,8 @@ class CTXDescriptor : public CNdisAllocatable<CTXDescriptor, 'DTHR'>

private:
CTXHeaders m_Headers;
CNdisSharedMemory m_IndirectArea;
CNdisSharedMemory m_MemoryBuffer;
tCompletePhysicalAddress m_IndirectArea;
bool m_Indirect = false;
bool m_AnyLayout = false;

Expand Down
8 changes: 8 additions & 0 deletions NetKVM/Common/ParaNdis_Common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ typedef struct _tagConfigurationEntries
tConfigurationEntry VlanId;
tConfigurationEntry JumboPacket;
tConfigurationEntry NumberOfHandledRXPacketsInDPC;
tConfigurationEntry FastInit;
#if PARANDIS_SUPPORT_RSS
tConfigurationEntry RSSOffloadSupported;
tConfigurationEntry NumRSSQueues;
Expand Down Expand Up @@ -133,6 +134,7 @@ static const tConfigurationEntries defaultConfiguration =
{ "VlanId", 0, 0, MAX_VLAN_ID},
{ "*JumboPacket", 1514, 590, 65500},
{ "NumberOfHandledRXPacketsInDPC", MAX_RX_LOOPS, 1, 10000},
{ "FastInit", 1, 0, 1},
#if PARANDIS_SUPPORT_RSS
{ "*RSS", 1, 0, 1},
{ "*NumRssQueues", 16, 1, PARANDIS_RSS_MAX_RECEIVE_QUEUES},
Expand Down Expand Up @@ -270,6 +272,7 @@ static bool ReadNicConfiguration(PARANDIS_ADAPTER *pContext, PUCHAR pNewMACAddre
GetConfigurationEntry(cfg, &pConfiguration->VlanId);
GetConfigurationEntry(cfg, &pConfiguration->JumboPacket);
GetConfigurationEntry(cfg, &pConfiguration->NumberOfHandledRXPacketsInDPC);
GetConfigurationEntry(cfg, &pConfiguration->FastInit);
#if PARANDIS_SUPPORT_RSS
GetConfigurationEntry(cfg, &pConfiguration->RSSOffloadSupported);
GetConfigurationEntry(cfg, &pConfiguration->NumRSSQueues);
Expand All @@ -287,6 +290,7 @@ static bool ReadNicConfiguration(PARANDIS_ADAPTER *pContext, PUCHAR pNewMACAddre

bDebugPrint = pConfiguration->isLogEnabled.ulValue;
virtioDebugLevel = pConfiguration->debugLevel.ulValue;
pContext->bFastInit = pConfiguration->FastInit.ulValue != 0;
pContext->physicalMediaType = (NDIS_PHYSICAL_MEDIUM)pConfiguration->PhysicalMediaType.ulValue;
pContext->maxFreeTxDescriptors = pConfiguration->TxCapacity.ulValue;
pContext->maxRxBufferPerQueue = pConfiguration->RxCapacity.ulValue;
Expand Down Expand Up @@ -2161,6 +2165,8 @@ NDIS_STATUS ParaNdis_PowerOn(PARANDIS_ADAPTER *pContext)
DEBUG_ENTRY(0);
ParaNdis_DebugHistory(pContext, _etagHistoryLogOperation::hopPowerOn, NULL, 1, 0, 0);

CMutexLockedContext sync(pContext->systemThread.PowerMutex());

pContext->m_StateMachine.NotifyPowerOn();

ParaNdis_ResetVirtIONetDevice(pContext);
Expand Down Expand Up @@ -2213,6 +2219,8 @@ VOID ParaNdis_PowerOff(PARANDIS_ADAPTER *pContext)
DEBUG_ENTRY(0);
ParaNdis_DebugHistory(pContext, _etagHistoryLogOperation::hopPowerOff, NULL, 1, 0, 0);

CMutexLockedContext sync(pContext->systemThread.PowerMutex());

pContext->m_StateMachine.NotifySuspended();

pContext->bConnected = FALSE;
Expand Down
70 changes: 65 additions & 5 deletions NetKVM/Common/ParaNdis_RX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#include "ParaNdis_RX.tmh"
#endif

//define as 0 to allocate all the required buffer at once
//#define INITIAL_RX_BUFFERS 0
#define INITIAL_RX_BUFFERS 16

static FORCEINLINE VOID ParaNdis_ReceiveQueueAddBuffer(PPARANDIS_RECEIVE_QUEUE pQueue, pRxNetDescriptor pBuffer)
{
NdisInterlockedInsertTailList(&pQueue->BuffersList,
Expand Down Expand Up @@ -93,11 +97,28 @@ CParaNdisRX::~CParaNdisRX()
{
}

// called during initialization
// also later during additional allocations under m_Lock
// when we update m_NetMaxReceiveBuffers, we also update
// m_nReusedRxBuffersLimit, set m_nReusedRxBuffersLimit to zero
// and kick the rx queue
void CParaNdisRX::RecalculateLimits()
{
m_nReusedRxBuffersLimit = m_NetMaxReceiveBuffers / 4 + 1;
m_nReusedRxBuffersCounter = 0;
m_MinRxBufferLimit = m_NetMaxReceiveBuffers * m_Context->MinRxBufferPercent / 100;
DPrintf(0, "[%s] m_NetMaxReceiveBuffers %d, m_MinRxBufferLimit %u\n", __FUNCTION__, m_NetMaxReceiveBuffers, m_MinRxBufferLimit);
}

bool CParaNdisRX::Create(PPARANDIS_ADAPTER Context, UINT DeviceQueueIndex)
{
m_Context = Context;
m_queueIndex = (u16)DeviceQueueIndex;
m_NetMaxReceiveBuffers = Context->maxRxBufferPerQueue;
m_NetMaxReceiveBuffers = Context->bFastInit ? INITIAL_RX_BUFFERS : 0;
if (!m_NetMaxReceiveBuffers || m_NetMaxReceiveBuffers > Context->maxRxBufferPerQueue)
{
m_NetMaxReceiveBuffers = Context->maxRxBufferPerQueue;
}

if (!m_VirtQueue.Create(DeviceQueueIndex,
&m_Context->IODevice,
Expand All @@ -109,8 +130,6 @@ bool CParaNdisRX::Create(PPARANDIS_ADAPTER Context, UINT DeviceQueueIndex)

PrepareReceiveBuffers();

m_nReusedRxBuffersLimit = m_NetMaxReceiveBuffers / 4 + 1;

CreatePath();

return true;
Expand Down Expand Up @@ -140,8 +159,9 @@ int CParaNdisRX::PrepareReceiveBuffers()
m_NetNofReceiveBuffers++;
}
m_NetMaxReceiveBuffers = m_NetNofReceiveBuffers;
m_MinRxBufferLimit = m_NetNofReceiveBuffers * m_Context->MinRxBufferPercent / 100;
DPrintf(0, "[%s] m_NetMaxReceiveBuffers %d, m_MinRxBufferLimit %u\n", __FUNCTION__, m_NetMaxReceiveBuffers, m_MinRxBufferLimit);

RecalculateLimits();

if (m_Context->extraStatistics.minFreeRxBuffers == 0 || m_Context->extraStatistics.minFreeRxBuffers > m_NetNofReceiveBuffers)
{
m_Context->extraStatistics.minFreeRxBuffers = m_NetNofReceiveBuffers;
Expand Down Expand Up @@ -250,6 +270,46 @@ pRxNetDescriptor CParaNdisRX::CreateRxDescriptorOnInit()
return NULL;
}

/* must be called on PASSIVE from system thread */
BOOLEAN CParaNdisRX::AllocateMore()
{
BOOLEAN result = false;

// if the queue is not ready, try again later
if (!m_pVirtQueue->IsValid() || !m_Reinsert)
{
DPrintf(1, " Queue is not ready, try later\n");
return true;
}

if (m_NetMaxReceiveBuffers >= m_Context->maxRxBufferPerQueue || m_NetMaxReceiveBuffers >= m_pVirtQueue->GetRingSize())
{
return result;
}
pRxNetDescriptor pBuffersDescriptor = CreateRxDescriptorOnInit();

TPassiveSpinLocker autoLock(m_Lock);

if (pBuffersDescriptor)
{
pBuffersDescriptor->Queue = this;
if (m_pVirtQueue->CanTouchHardware() && AddRxBufferToQueue(pBuffersDescriptor))
{
InsertTailList(&m_NetReceiveBuffers, &pBuffersDescriptor->listEntry);
m_NetNofReceiveBuffers++;
m_NetMaxReceiveBuffers++;
RecalculateLimits();
KickRXRing();
result = true;
}
else
{
ParaNdis_FreeRxBufferDescriptor(m_Context, pBuffersDescriptor);
}
}
return result;
}

/* TODO - make it method in pRXNetDescriptor */
BOOLEAN CParaNdisRX::AddRxBufferToQueue(pRxNetDescriptor pBufferDescriptor)
{
Expand Down
9 changes: 1 addition & 8 deletions NetKVM/Common/ParaNdis_TX.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,6 @@
#include "ParaNdis_TX.tmh"
#endif

static FORCEINLINE void UpdateTimestamp(ULONGLONG& Variable)
{
LARGE_INTEGER li;
NdisGetCurrentSystemTime(&li);
Variable = li.QuadPart;
}

CNBL::CNBL(PNET_BUFFER_LIST NBL, PPARANDIS_ADAPTER Context, CParaNdisTX &ParentTXPath, CAllocationHelper<CNBL> *NBLAllocator, CAllocationHelper<CNB> *NBAllocator)
: m_NBL(NBL)
, m_Context(Context)
Expand Down Expand Up @@ -463,7 +456,7 @@ bool CParaNdisTX::AllocateExtraPages()
{
return false;
}
Page->Create(m_Context->MiniportHandle);
Page->Initialize(m_Context->MiniportHandle);
if (Page->Allocate(PAGE_SIZE))
{
m_ExtraPages.Push(Page);
Expand Down
55 changes: 55 additions & 0 deletions NetKVM/Common/ParaNdis_Util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,58 @@ void Parandis_UtilOnly_Trace(LONG level, LPCSTR s1, LPCSTR s2)
TraceNoPrefix(level, "[%s] - format concatenation failed for %s", s1, s2);
}
}

bool CSystemThread::Start(PVOID Context)
{
m_Context = Context;
UpdateTimestamp(m_StartTime);
NTSTATUS status = PsCreateSystemThread(
&m_hThread, GENERIC_READ, NULL, NULL, NULL,
[](PVOID Ctx)
{
((CSystemThread*)Ctx)->ThreadProc();
},
this);
if (!NT_SUCCESS(status))
{
DPrintf(0, "Failed to start, status %X", status);
}
return m_hThread != NULL && NT_SUCCESS(status);
}

void CSystemThread::Stop()
{
DPrintf(0, "Waiting for thread termination");
m_Event.Notify();
while (m_hThread)
{
NdisMSleep(20000);
}
DPrintf(0, "Terminated");
}

void CSystemThread::ThreadProc()
{
PARANDIS_ADAPTER* context = (PARANDIS_ADAPTER*)m_Context;
context->extraStatistics.lazyAllocTime = -1;
while (!m_Event.Wait(1))
{
UINT n = 0;

CMutexLockedContext sync(m_PowerMutex);

for (UINT i = 0; i < context->nPathBundles; ++i)
{
n += context->pPathBundles[i].rxPath.AllocateMore();
}
if (n == 0)
{
DPrintf(0, "All the memory allocations done");
m_Event.Notify();
ULONGLONG endTimestamp;
UpdateTimestamp(endTimestamp);
context->extraStatistics.lazyAllocTime = (LONG)((endTimestamp - m_StartTime)/10000);
}
}
m_hThread = NULL;
}
Loading