1
/* $Id: PDMAsyncCompletionFile.cpp 35346 2010-12-27 16:13:13Z vboxsync $ */
3
* PDM Async I/O - Transport data asynchronous in R3 using EMT.
7
* Copyright (C) 2006-2009 Oracle Corporation
9
* This file is part of VirtualBox Open Source Edition (OSE), as
10
* available from http://www.virtualbox.org. This file is free software;
11
* you can redistribute it and/or modify it under the terms of the GNU
12
* General Public License (GPL) as published by the Free Software
13
* Foundation, in version 2 as it comes in the "COPYING" file of the
14
* VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15
* hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
19
/*******************************************************************************
21
*******************************************************************************/
22
#define LOG_GROUP LOG_GROUP_PDM_ASYNC_COMPLETION
23
#include "PDMInternal.h"
24
#include <VBox/vmm/pdm.h>
25
#include <VBox/vmm/mm.h>
26
#include <VBox/vmm/vm.h>
30
#include <VBox/vmm/uvm.h>
33
#include <iprt/assert.h>
34
#include <iprt/critsect.h>
36
#include <iprt/file.h>
38
#include <iprt/semaphore.h>
39
#include <iprt/string.h>
40
#include <iprt/thread.h>
41
#include <iprt/path.h>
43
#if defined(RT_OS_DARWIN) || defined(RT_OS_SOLARIS) || defined(RT_OS_FREEBSD)
45
# include <sys/ioctl.h>
46
# include <sys/types.h>
47
# include <sys/stat.h>
53
# define _WIN32_WINNT 0x0500
55
# include <winioctl.h>
58
# include <sys/disk.h>
59
#endif /* RT_OS_DARWIN */
62
# include <sys/dkio.h>
63
# include <sys/vtoc.h>
64
#endif /* RT_OS_SOLARIS */
66
# include <sys/disk.h>
67
#endif /* RT_OS_FREEBSD */
69
#include "PDMAsyncCompletionFileInternal.h"
72
/*******************************************************************************
73
* Internal Functions *
74
*******************************************************************************/
75
#ifdef VBOX_WITH_DEBUGGER
76
static DECLCALLBACK(int) pdmacEpFileErrorInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR pArgs, unsigned cArgs, PDBGCVAR pResult);
79
/*******************************************************************************
81
*******************************************************************************/
82
#ifdef VBOX_WITH_DEBUGGER
83
static const DBGCVARDESC g_aInjectErrorArgs[] =
85
/* cTimesMin, cTimesMax, enmCategory, fFlags, pszName, pszDescription */
86
{ 1, 1, DBGCVAR_CAT_STRING, 0, "direction", "write/read." },
87
{ 1, 1, DBGCVAR_CAT_STRING, 0, "filename", "Filename." },
88
{ 1, 1, DBGCVAR_CAT_STRING, 0, "errcode", "IPRT error code." },
91
/** Command descriptors. */
92
static const DBGCCMD g_aCmds[] =
94
/* pszCmd, cArgsMin, cArgsMax, paArgDesc, cArgDescs, pResultDesc, fFlags, pfnHandler pszSyntax, ....pszDescription */
95
{ "injecterror", 3, 3, &g_aInjectErrorArgs[0], 3, NULL, 0, pdmacEpFileErrorInject, "", "Inject error into I/O subsystem." },
103
* @param pEndpoint Pointer to the endpoint the segment was for.
104
* @param pTask The task to free.
106
void pdmacFileTaskFree(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint,
107
PPDMACTASKFILE pTask)
109
PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
111
LogFlowFunc((": pEndpoint=%p pTask=%p\n", pEndpoint, pTask));
113
/* Try the per endpoint cache first. */
114
if (pEndpoint->cTasksCached < pEpClass->cTasksCacheMax)
116
/* Add it to the list. */
117
pEndpoint->pTasksFreeTail->pNext = pTask;
118
pEndpoint->pTasksFreeTail = pTask;
119
ASMAtomicIncU32(&pEndpoint->cTasksCached);
123
Log(("Freeing task %p because all caches are full\n", pTask));
129
* Allocates a task segment
131
* @returns Pointer to the new task segment or NULL
132
* @param pEndpoint Pointer to the endpoint
134
PPDMACTASKFILE pdmacFileTaskAlloc(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
136
PPDMACTASKFILE pTask = NULL;
138
/* Try the small per endpoint cache first. */
139
if (pEndpoint->pTasksFreeHead == pEndpoint->pTasksFreeTail)
141
/* Try the bigger endpoint class cache. */
142
PPDMASYNCCOMPLETIONEPCLASSFILE pEndpointClass = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->Core.pEpClass;
145
* Allocate completely new.
146
* If this fails we return NULL.
148
int rc = MMR3HeapAllocZEx(pEndpointClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
149
sizeof(PDMACTASKFILE),
154
LogFlow(("Allocated task %p\n", pTask));
158
/* Grab a free task from the head. */
159
AssertMsg(pEndpoint->cTasksCached > 0, ("No tasks cached but list contains more than one element\n"));
161
pTask = pEndpoint->pTasksFreeHead;
162
pEndpoint->pTasksFreeHead = pTask->pNext;
163
ASMAtomicDecU32(&pEndpoint->cTasksCached);
171
PPDMACTASKFILE pdmacFileEpGetNewTasks(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
173
PPDMACTASKFILE pTasks = NULL;
178
pTasks = ASMAtomicXchgPtrT(&pEndpoint->pTasksNewHead, NULL, PPDMACTASKFILE);
180
/* Reverse the list to process in FIFO order. */
183
PPDMACTASKFILE pTask = pTasks;
189
PPDMACTASKFILE pCur = pTask;
190
pTask = pTask->pNext;
191
pCur->pNext = pTasks;
199
static void pdmacFileAioMgrWakeup(PPDMACEPFILEMGR pAioMgr)
201
bool fWokenUp = ASMAtomicXchgBool(&pAioMgr->fWokenUp, true);
205
int rc = VINF_SUCCESS;
206
bool fWaitingEventSem = ASMAtomicReadBool(&pAioMgr->fWaitingEventSem);
208
if (fWaitingEventSem)
209
rc = RTSemEventSignal(pAioMgr->EventSem);
215
static int pdmacFileAioMgrWaitForBlockingEvent(PPDMACEPFILEMGR pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT enmEvent)
217
int rc = VINF_SUCCESS;
219
ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, enmEvent);
220
Assert(!pAioMgr->fBlockingEventPending);
221
ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, true);
223
/* Wakeup the async I/O manager */
224
pdmacFileAioMgrWakeup(pAioMgr);
226
/* Wait for completion. */
227
rc = RTSemEventWait(pAioMgr->EventSemBlock, RT_INDEFINITE_WAIT);
230
ASMAtomicXchgBool(&pAioMgr->fBlockingEventPending, false);
231
ASMAtomicWriteU32((volatile uint32_t *)&pAioMgr->enmBlockingEvent, PDMACEPFILEAIOMGRBLOCKINGEVENT_INVALID);
236
int pdmacFileAioMgrAddEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
240
LogFlowFunc(("pAioMgr=%#p pEndpoint=%#p{%s}\n", pAioMgr, pEndpoint, pEndpoint->Core.pszUri));
242
/* Update the assigned I/O manager. */
243
ASMAtomicWritePtr(&pEndpoint->pAioMgr, pAioMgr);
245
rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
246
AssertRCReturn(rc, rc);
248
ASMAtomicWritePtr(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint, pEndpoint);
249
rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_ADD_ENDPOINT);
250
ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.AddEndpoint.pEndpoint);
252
RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
257
static int pdmacFileAioMgrRemoveEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
261
rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
262
AssertRCReturn(rc, rc);
264
ASMAtomicWritePtr(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint, pEndpoint);
265
rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_REMOVE_ENDPOINT);
266
ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.RemoveEndpoint.pEndpoint);
268
RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
273
static int pdmacFileAioMgrCloseEndpoint(PPDMACEPFILEMGR pAioMgr, PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint)
277
rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
278
AssertRCReturn(rc, rc);
280
ASMAtomicWritePtr(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint, pEndpoint);
281
rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_CLOSE_ENDPOINT);
282
ASMAtomicWriteNullPtr(&pAioMgr->BlockingEventData.CloseEndpoint.pEndpoint);
284
RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
289
static int pdmacFileAioMgrShutdown(PPDMACEPFILEMGR pAioMgr)
293
rc = RTCritSectEnter(&pAioMgr->CritSectBlockingEvent);
294
AssertRCReturn(rc, rc);
296
rc = pdmacFileAioMgrWaitForBlockingEvent(pAioMgr, PDMACEPFILEAIOMGRBLOCKINGEVENT_SHUTDOWN);
298
RTCritSectLeave(&pAioMgr->CritSectBlockingEvent);
303
int pdmacFileEpAddTask(PPDMASYNCCOMPLETIONENDPOINTFILE pEndpoint, PPDMACTASKFILE pTask)
305
PPDMACTASKFILE pNext;
308
pNext = pEndpoint->pTasksNewHead;
309
pTask->pNext = pNext;
310
} while (!ASMAtomicCmpXchgPtr(&pEndpoint->pTasksNewHead, pTask, pNext));
312
pdmacFileAioMgrWakeup(ASMAtomicReadPtrT(&pEndpoint->pAioMgr, PPDMACEPFILEMGR));
317
void pdmacFileEpTaskCompleted(PPDMACTASKFILE pTask, void *pvUser, int rc)
319
PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pvUser;
321
LogFlowFunc(("pTask=%#p pvUser=%#p rc=%Rrc\n", pTask, pvUser, rc));
323
if (pTask->enmTransferType == PDMACTASKFILETRANSFER_FLUSH)
325
pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, rc, true);
329
Assert((uint32_t)pTask->DataSeg.cbSeg == pTask->DataSeg.cbSeg && (int32_t)pTask->DataSeg.cbSeg >= 0);
330
uint32_t uOld = ASMAtomicSubS32(&pTaskFile->cbTransferLeft, (int32_t)pTask->DataSeg.cbSeg);
332
/* The first error will be returned. */
334
ASMAtomicCmpXchgS32(&pTaskFile->rc, rc, VINF_SUCCESS);
335
#ifdef VBOX_WITH_DEBUGGER
338
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pTaskFile->Core.pEndpoint;
340
/* Overwrite with injected error code. */
341
if (pTask->enmTransferType == PDMACTASKFILETRANSFER_READ)
342
rc = ASMAtomicXchgS32(&pEpFile->rcReqRead, VINF_SUCCESS);
344
rc = ASMAtomicXchgS32(&pEpFile->rcReqWrite, VINF_SUCCESS);
347
ASMAtomicCmpXchgS32(&pTaskFile->rc, rc, VINF_SUCCESS);
351
if (!(uOld - pTask->DataSeg.cbSeg)
352
&& !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
353
pdmR3AsyncCompletionCompleteTask(&pTaskFile->Core, pTaskFile->rc, true);
357
DECLINLINE(void) pdmacFileEpTaskInit(PPDMASYNCCOMPLETIONTASK pTask, size_t cbTransfer)
359
PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
361
Assert((uint32_t)cbTransfer == cbTransfer && (int32_t)cbTransfer >= 0);
362
ASMAtomicWriteS32(&pTaskFile->cbTransferLeft, (int32_t)cbTransfer);
363
ASMAtomicWriteBool(&pTaskFile->fCompleted, false);
364
ASMAtomicWriteS32(&pTaskFile->rc, VINF_SUCCESS);
367
int pdmacFileEpTaskInitiate(PPDMASYNCCOMPLETIONTASK pTask,
368
PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
369
PCRTSGSEG paSegments, size_t cSegments,
370
size_t cbTransfer, PDMACTASKFILETRANSFER enmTransfer)
372
int rc = VINF_SUCCESS;
373
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
374
PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
375
PPDMACEPFILEMGR pAioMgr = pEpFile->pAioMgr;
377
Assert( (enmTransfer == PDMACTASKFILETRANSFER_READ)
378
|| (enmTransfer == PDMACTASKFILETRANSFER_WRITE));
380
for (unsigned i = 0; i < cSegments; i++)
382
PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
385
pIoTask->pEndpoint = pEpFile;
386
pIoTask->enmTransferType = enmTransfer;
388
pIoTask->DataSeg.cbSeg = paSegments[i].cbSeg;
389
pIoTask->DataSeg.pvSeg = paSegments[i].pvSeg;
390
pIoTask->pvUser = pTaskFile;
391
pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
393
/* Send it off to the I/O manager. */
394
pdmacFileEpAddTask(pEpFile, pIoTask);
395
off += paSegments[i].cbSeg;
396
cbTransfer -= paSegments[i].cbSeg;
399
AssertMsg(!cbTransfer, ("Incomplete transfer %u bytes left\n", cbTransfer));
401
if (ASMAtomicReadS32(&pTaskFile->cbTransferLeft) == 0
402
&& !ASMAtomicXchgBool(&pTaskFile->fCompleted, true))
403
pdmR3AsyncCompletionCompleteTask(pTask, pTaskFile->rc, false);
405
rc = VINF_AIO_TASK_PENDING;
411
* Creates a new async I/O manager.
413
* @returns VBox status code.
414
* @param pEpClass Pointer to the endpoint class data.
415
* @param ppAioMgr Where to store the pointer to the new async I/O manager on success.
416
* @param enmMgrType Wanted manager type - can be overwritten by the global override.
418
int pdmacFileAioMgrCreate(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClass, PPPDMACEPFILEMGR ppAioMgr,
419
PDMACEPFILEMGRTYPE enmMgrType)
421
int rc = VINF_SUCCESS;
422
PPDMACEPFILEMGR pAioMgrNew;
424
LogFlowFunc((": Entered\n"));
426
rc = MMR3HeapAllocZEx(pEpClass->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION, sizeof(PDMACEPFILEMGR), (void **)&pAioMgrNew);
429
if (enmMgrType < pEpClass->enmMgrTypeOverride)
430
pAioMgrNew->enmMgrType = enmMgrType;
432
pAioMgrNew->enmMgrType = pEpClass->enmMgrTypeOverride;
434
rc = RTSemEventCreate(&pAioMgrNew->EventSem);
437
rc = RTSemEventCreate(&pAioMgrNew->EventSemBlock);
440
rc = RTCritSectInit(&pAioMgrNew->CritSectBlockingEvent);
443
/* Init the rest of the manager. */
444
if (pAioMgrNew->enmMgrType != PDMACEPFILEMGRTYPE_SIMPLE)
445
rc = pdmacFileAioMgrNormalInit(pAioMgrNew);
449
pAioMgrNew->enmState = PDMACEPFILEMGRSTATE_RUNNING;
451
rc = RTThreadCreateF(&pAioMgrNew->Thread,
452
pAioMgrNew->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE
453
? pdmacFileAioMgrFailsafe
454
: pdmacFileAioMgrNormal,
459
"AioMgr%d-%s", pEpClass->cAioMgrs,
460
pAioMgrNew->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE
465
/* Link it into the list. */
466
RTCritSectEnter(&pEpClass->CritSect);
467
pAioMgrNew->pNext = pEpClass->pAioMgrHead;
468
if (pEpClass->pAioMgrHead)
469
pEpClass->pAioMgrHead->pPrev = pAioMgrNew;
470
pEpClass->pAioMgrHead = pAioMgrNew;
471
pEpClass->cAioMgrs++;
472
RTCritSectLeave(&pEpClass->CritSect);
474
*ppAioMgr = pAioMgrNew;
476
Log(("PDMAC: Successfully created new file AIO Mgr {%s}\n", RTThreadGetName(pAioMgrNew->Thread)));
479
pdmacFileAioMgrNormalDestroy(pAioMgrNew);
481
RTCritSectDelete(&pAioMgrNew->CritSectBlockingEvent);
483
RTSemEventDestroy(pAioMgrNew->EventSem);
485
RTSemEventDestroy(pAioMgrNew->EventSemBlock);
487
MMR3HeapFree(pAioMgrNew);
490
LogFlowFunc((": Leave rc=%Rrc\n", rc));
496
* Destroys a async I/O manager.
499
* @param pAioMgr The async I/O manager to destroy.
501
static void pdmacFileAioMgrDestroy(PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile, PPDMACEPFILEMGR pAioMgr)
503
int rc = pdmacFileAioMgrShutdown(pAioMgr);
506
/* Unlink from the list. */
507
rc = RTCritSectEnter(&pEpClassFile->CritSect);
510
PPDMACEPFILEMGR pPrev = pAioMgr->pPrev;
511
PPDMACEPFILEMGR pNext = pAioMgr->pNext;
514
pPrev->pNext = pNext;
516
pEpClassFile->pAioMgrHead = pNext;
519
pNext->pPrev = pPrev;
521
pEpClassFile->cAioMgrs--;
522
rc = RTCritSectLeave(&pEpClassFile->CritSect);
525
/* Free the resources. */
526
RTCritSectDelete(&pAioMgr->CritSectBlockingEvent);
527
RTSemEventDestroy(pAioMgr->EventSem);
528
if (pAioMgr->enmMgrType != PDMACEPFILEMGRTYPE_SIMPLE)
529
pdmacFileAioMgrNormalDestroy(pAioMgr);
531
MMR3HeapFree(pAioMgr);
534
static int pdmacFileMgrTypeFromName(const char *pszVal, PPDMACEPFILEMGRTYPE penmMgrType)
536
int rc = VINF_SUCCESS;
538
if (!RTStrCmp(pszVal, "Simple"))
539
*penmMgrType = PDMACEPFILEMGRTYPE_SIMPLE;
540
else if (!RTStrCmp(pszVal, "Async"))
541
*penmMgrType = PDMACEPFILEMGRTYPE_ASYNC;
543
rc = VERR_CFGM_CONFIG_UNKNOWN_VALUE;
548
static const char *pdmacFileMgrTypeToName(PDMACEPFILEMGRTYPE enmMgrType)
550
if (enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE)
552
if (enmMgrType == PDMACEPFILEMGRTYPE_ASYNC)
558
static int pdmacFileBackendTypeFromName(const char *pszVal, PPDMACFILEEPBACKEND penmBackendType)
560
int rc = VINF_SUCCESS;
562
if (!RTStrCmp(pszVal, "Buffered"))
563
*penmBackendType = PDMACFILEEPBACKEND_BUFFERED;
564
else if (!RTStrCmp(pszVal, "NonBuffered"))
565
*penmBackendType = PDMACFILEEPBACKEND_NON_BUFFERED;
567
rc = VERR_CFGM_CONFIG_UNKNOWN_VALUE;
572
static const char *pdmacFileBackendTypeToName(PDMACFILEEPBACKEND enmBackendType)
574
if (enmBackendType == PDMACFILEEPBACKEND_BUFFERED)
576
if (enmBackendType == PDMACFILEEPBACKEND_NON_BUFFERED)
577
return "NonBuffered";
583
* Get the size of the given file.
584
* Works for block devices too.
586
* @returns VBox status code.
587
* @param hFile The file handle.
588
* @param pcbSize Where to store the size of the file on success.
590
static int pdmacFileEpNativeGetSize(RTFILE hFile, uint64_t *pcbSize)
592
int rc = VINF_SUCCESS;
595
rc = RTFileGetSize(hFile, &cbSize);
596
if (RT_SUCCESS(rc) && (cbSize != 0))
601
DISK_GEOMETRY DriveGeo;
603
if (DeviceIoControl((HANDLE)hFile,
604
IOCTL_DISK_GET_DRIVE_GEOMETRY, NULL, 0,
605
&DriveGeo, sizeof(DriveGeo), &cbDriveGeo, NULL))
607
if ( DriveGeo.MediaType == FixedMedia
608
|| DriveGeo.MediaType == RemovableMedia)
610
cbSize = DriveGeo.Cylinders.QuadPart
611
* DriveGeo.TracksPerCylinder
612
* DriveGeo.SectorsPerTrack
613
* DriveGeo.BytesPerSector;
615
GET_LENGTH_INFORMATION DiskLenInfo;
617
if (DeviceIoControl((HANDLE)hFile,
618
IOCTL_DISK_GET_LENGTH_INFO, NULL, 0,
619
&DiskLenInfo, sizeof(DiskLenInfo), &junk, (LPOVERLAPPED)NULL))
621
/* IOCTL_DISK_GET_LENGTH_INFO is supported -- override cbSize. */
622
cbSize = DiskLenInfo.Length.QuadPart;
629
rc = VERR_INVALID_PARAMETER;
634
rc = RTErrConvertFromWin32(GetLastError());
636
#elif defined(RT_OS_DARWIN)
638
if (!fstat(hFile, &DevStat) && S_ISBLK(DevStat.st_mode))
642
if (!ioctl(hFile, DKIOCGETBLOCKCOUNT, &cBlocks))
644
if (!ioctl(hFile, DKIOCGETBLOCKSIZE, &cbBlock))
645
cbSize = cBlocks * cbBlock;
647
rc = RTErrConvertFromErrno(errno);
650
rc = RTErrConvertFromErrno(errno);
653
rc = VERR_INVALID_PARAMETER;
654
#elif defined(RT_OS_SOLARIS)
656
if (!fstat(hFile, &DevStat) && ( S_ISBLK(DevStat.st_mode)
657
|| S_ISCHR(DevStat.st_mode)))
659
struct dk_minfo mediainfo;
660
if (!ioctl(hFile, DKIOCGMEDIAINFO, &mediainfo))
661
cbSize = mediainfo.dki_capacity * mediainfo.dki_lbsize;
663
rc = RTErrConvertFromErrno(errno);
666
rc = VERR_INVALID_PARAMETER;
667
#elif defined(RT_OS_FREEBSD)
669
if (!fstat(hFile, &DevStat) && S_ISCHR(DevStat.st_mode))
672
if (!ioctl(hFile, DIOCGMEDIASIZE, &cbMedia))
677
rc = RTErrConvertFromErrno(errno);
680
rc = VERR_INVALID_PARAMETER;
682
/* Could be a block device */
683
rc = RTFileSeek(hFile, 0, RTFILE_SEEK_END, &cbSize);
686
if (RT_SUCCESS(rc) && (cbSize != 0))
688
else if (RT_SUCCESS(rc))
689
rc = VERR_NOT_SUPPORTED;
695
#ifdef VBOX_WITH_DEBUGGER
697
* Error inject callback.
699
static DECLCALLBACK(int) pdmacEpFileErrorInject(PCDBGCCMD pCmd, PDBGCCMDHLP pCmdHlp, PVM pVM, PCDBGCVAR pArgs, unsigned cArgs, PDBGCVAR pResult)
702
PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile;
708
return DBGCCmdHlpPrintf(pCmdHlp, "error: The command requires a VM to be selected.\n");
710
|| pArgs[0].enmType != DBGCVAR_TYPE_STRING
711
|| pArgs[1].enmType != DBGCVAR_TYPE_STRING
712
|| pArgs[2].enmType != DBGCVAR_TYPE_STRING)
713
return pCmdHlp->pfnPrintf(pCmdHlp, NULL, "error: parser error, invalid arguments.\n");
715
pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pVM->pUVM->pdm.s.apAsyncCompletionEndpointClass[PDMASYNCCOMPLETIONEPCLASSTYPE_FILE];
717
/* Syntax is "read|write <filename> <status code>" */
718
if (!RTStrCmp(pArgs[0].u.pszString, "read"))
720
else if (!RTStrCmp(pArgs[0].u.pszString, "write"))
724
DBGCCmdHlpPrintf(pCmdHlp, "error: invalid transefr direction '%s'.\n", pArgs[0].u.pszString);
728
/* Search for the matching endpoint. */
729
RTCritSectEnter(&pEpClassFile->Core.CritSect);
730
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpClassFile->Core.pEndpointsHead;
734
if (!RTStrCmp(pArgs[1].u.pszString, RTPathFilename(pEpFile->Core.pszUri)))
736
pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEpFile->Core.pNext;
741
int rcToInject = RTStrToInt32(pArgs[2].u.pszString);
744
ASMAtomicXchgS32(&pEpFile->rcReqWrite, rcToInject);
746
ASMAtomicXchgS32(&pEpFile->rcReqRead, rcToInject);
748
DBGCCmdHlpPrintf(pCmdHlp, "Injected %Rrc into '%s' for %s\n",
749
rcToInject, pArgs[1].u.pszString, pArgs[0].u.pszString);
752
DBGCCmdHlpPrintf(pCmdHlp, "No file with name '%s' found\n", NULL, pArgs[1].u.pszString);
754
RTCritSectLeave(&pEpClassFile->Core.CritSect);
759
static int pdmacFileInitialize(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals, PCFGMNODE pCfgNode)
761
int rc = VINF_SUCCESS;
762
RTFILEAIOLIMITS AioLimits; /** < Async I/O limitations. */
764
PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
766
rc = RTFileAioGetLimits(&AioLimits);
768
if (RT_SUCCESS(rc) && RTEnvExist("VBOX_ASYNC_IO_FAILBACK"))
769
rc = VERR_ENV_VAR_NOT_FOUND;
773
LogRel(("AIO: Async I/O manager not supported (rc=%Rrc). Falling back to simple manager\n",
775
pEpClassFile->enmMgrTypeOverride = PDMACEPFILEMGRTYPE_SIMPLE;
776
pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_BUFFERED;
780
pEpClassFile->uBitmaskAlignment = AioLimits.cbBufferAlignment ? ~((RTR3UINTPTR)AioLimits.cbBufferAlignment - 1) : RTR3UINTPTR_MAX;
781
pEpClassFile->cReqsOutstandingMax = AioLimits.cReqsOutstandingMax;
785
/* Query the default manager type */
787
rc = CFGMR3QueryStringAllocDef(pCfgNode, "IoMgr", &pszVal, "Async");
788
AssertLogRelRCReturn(rc, rc);
790
rc = pdmacFileMgrTypeFromName(pszVal, &pEpClassFile->enmMgrTypeOverride);
791
MMR3HeapFree(pszVal);
795
LogRel(("AIOMgr: Default manager type is \"%s\"\n", pdmacFileMgrTypeToName(pEpClassFile->enmMgrTypeOverride)));
797
/* Query default backend type */
798
rc = CFGMR3QueryStringAllocDef(pCfgNode, "FileBackend", &pszVal, "NonBuffered");
799
AssertLogRelRCReturn(rc, rc);
801
rc = pdmacFileBackendTypeFromName(pszVal, &pEpClassFile->enmEpBackendDefault);
802
MMR3HeapFree(pszVal);
806
LogRel(("AIOMgr: Default file backend is \"%s\"\n", pdmacFileBackendTypeToName(pEpClassFile->enmEpBackendDefault)));
809
if ( pEpClassFile->enmMgrTypeOverride == PDMACEPFILEMGRTYPE_ASYNC
810
&& pEpClassFile->enmEpBackendDefault == PDMACFILEEPBACKEND_BUFFERED)
812
LogRel(("AIOMgr: Linux does not support buffered async I/O, changing to non buffered\n"));
813
pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_NON_BUFFERED;
819
/* No configuration supplied, set defaults */
820
pEpClassFile->enmEpBackendDefault = PDMACFILEEPBACKEND_NON_BUFFERED;
821
pEpClassFile->enmMgrTypeOverride = PDMACEPFILEMGRTYPE_ASYNC;
825
/* Init critical section. */
826
rc = RTCritSectInit(&pEpClassFile->CritSect);
828
#ifdef VBOX_WITH_DEBUGGER
829
/* Install the error injection handler. */
832
rc = DBGCRegisterCommands(&g_aCmds[0], 1);
840
static void pdmacFileTerminate(PPDMASYNCCOMPLETIONEPCLASS pClassGlobals)
842
PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pClassGlobals;
844
/* All endpoints should be closed at this point. */
845
AssertMsg(!pEpClassFile->Core.pEndpointsHead, ("There are still endpoints left\n"));
847
/* Destroy all left async I/O managers. */
848
while (pEpClassFile->pAioMgrHead)
849
pdmacFileAioMgrDestroy(pEpClassFile, pEpClassFile->pAioMgrHead);
851
RTCritSectDelete(&pEpClassFile->CritSect);
854
static int pdmacFileEpInitialize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint,
855
const char *pszUri, uint32_t fFlags)
857
int rc = VINF_SUCCESS;
858
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
859
PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
860
PDMACEPFILEMGRTYPE enmMgrType = pEpClassFile->enmMgrTypeOverride;
861
PDMACFILEEPBACKEND enmEpBackend = pEpClassFile->enmEpBackendDefault;
863
AssertMsgReturn((fFlags & ~(PDMACEP_FILE_FLAGS_READ_ONLY | PDMACEP_FILE_FLAGS_DONT_LOCK)) == 0,
864
("PDMAsyncCompletion: Invalid flag specified\n"), VERR_INVALID_PARAMETER);
866
unsigned fFileFlags = RTFILE_O_OPEN;
868
if (fFlags & PDMACEP_FILE_FLAGS_READ_ONLY)
869
fFileFlags |= RTFILE_O_READ | RTFILE_O_DENY_NONE;
872
fFileFlags |= RTFILE_O_READWRITE;
875
* Opened in read/write mode. Check whether the caller wants to
876
* avoid the lock. Return an error in case caching is enabled
877
* because this can lead to data corruption.
879
if (fFlags & PDMACEP_FILE_FLAGS_DONT_LOCK)
880
fFileFlags |= RTFILE_O_DENY_NONE;
882
fFileFlags |= RTFILE_O_DENY_WRITE;
885
if (enmMgrType == PDMACEPFILEMGRTYPE_ASYNC)
886
fFileFlags |= RTFILE_O_ASYNC_IO;
888
if (enmEpBackend == PDMACFILEEPBACKEND_NON_BUFFERED)
891
* We only disable the cache if the size of the file is a multiple of 512.
892
* Certain hosts like Windows, Linux and Solaris require that transfer sizes
893
* are aligned to the volume sector size.
894
* If not we just make sure that the data is written to disk with RTFILE_O_WRITE_THROUGH
895
* which will trash the host cache but ensures that the host cache will not
896
* contain dirty buffers.
898
RTFILE File = NIL_RTFILE;
900
rc = RTFileOpen(&File, pszUri, RTFILE_O_READ | RTFILE_O_OPEN | RTFILE_O_DENY_NONE);
905
rc = pdmacFileEpNativeGetSize(File, &cbSize);
906
Assert(RT_FAILURE(rc) || cbSize != 0);
908
if (RT_SUCCESS(rc) && ((cbSize % 512) == 0))
909
fFileFlags |= RTFILE_O_NO_CACHE;
912
/* Downgrade to the buffered backend */
913
enmEpBackend = PDMACFILEEPBACKEND_BUFFERED;
916
fFileFlags &= ~RTFILE_O_ASYNC_IO;
917
enmMgrType = PDMACEPFILEMGRTYPE_SIMPLE;
924
/* Open with final flags. */
925
rc = RTFileOpen(&pEpFile->File, pszUri, fFileFlags);
926
if ((rc == VERR_INVALID_FUNCTION) || (rc == VERR_INVALID_PARAMETER))
928
LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed with %Rrc\n",
929
pszUri, fFileFlags, rc));
931
* Solaris doesn't support directio on ZFS so far. :-\
932
* Trying to enable it returns VERR_INVALID_FUNCTION
933
* (ENOTTY). Remove it and hope for the best.
934
* ZFS supports write throttling in case applications
935
* write more data than can be synced to the disk
936
* without blocking the whole application.
938
* On Linux we have the same problem with cifs.
939
* Have to disable async I/O here too because it requires O_DIRECT.
941
fFileFlags &= ~RTFILE_O_NO_CACHE;
942
enmEpBackend = PDMACFILEEPBACKEND_BUFFERED;
945
fFileFlags &= ~RTFILE_O_ASYNC_IO;
946
enmMgrType = PDMACEPFILEMGRTYPE_SIMPLE;
950
rc = RTFileOpen(&pEpFile->File, pszUri, fFileFlags);
954
LogRel(("pdmacFileEpInitialize: RTFileOpen %s / %08x failed AGAIN(!) with %Rrc\n",
955
pszUri, fFileFlags, rc));
961
pEpFile->fFlags = fFileFlags;
963
rc = pdmacFileEpNativeGetSize(pEpFile->File, (uint64_t *)&pEpFile->cbFile);
964
Assert(RT_FAILURE(rc) || pEpFile->cbFile != 0);
968
/* Initialize the segment cache */
969
rc = MMR3HeapAllocZEx(pEpClassFile->Core.pVM, MM_TAG_PDM_ASYNC_COMPLETION,
970
sizeof(PDMACTASKFILE),
971
(void **)&pEpFile->pTasksFreeHead);
974
PPDMACEPFILEMGR pAioMgr = NULL;
976
pEpFile->pTasksFreeTail = pEpFile->pTasksFreeHead;
977
pEpFile->cTasksCached = 0;
978
pEpFile->enmBackendType = enmEpBackend;
980
* Disable async flushes on Solaris for now.
981
* They cause weird hangs which needs more investigations.
983
#ifndef RT_OS_SOLARIS
984
pEpFile->fAsyncFlushSupported = true;
986
pEpFile->fAsyncFlushSupported = false;
989
if (enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE)
991
/* Simple mode. Every file has its own async I/O manager. */
992
rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, PDMACEPFILEMGRTYPE_SIMPLE);
997
pAioMgr = pEpClassFile->pAioMgrHead;
999
/* Check for an idling manager of the same type */
1002
if (pAioMgr->enmMgrType == enmMgrType)
1004
pAioMgr = pAioMgr->pNext;
1009
rc = pdmacFileAioMgrCreate(pEpClassFile, &pAioMgr, enmMgrType);
1014
pEpFile->AioMgr.pTreeRangesLocked = (PAVLRFOFFTREE)RTMemAllocZ(sizeof(AVLRFOFFTREE));
1015
if (!pEpFile->AioMgr.pTreeRangesLocked)
1016
rc = VERR_NO_MEMORY;
1019
pEpFile->enmState = PDMASYNCCOMPLETIONENDPOINTFILESTATE_ACTIVE;
1021
/* Assign the endpoint to the thread. */
1022
rc = pdmacFileAioMgrAddEndpoint(pAioMgr, pEpFile);
1025
RTMemFree(pEpFile->AioMgr.pTreeRangesLocked);
1026
MMR3HeapFree(pEpFile->pTasksFreeHead);
1033
RTFileClose(pEpFile->File);
1036
#ifdef VBOX_WITH_STATISTICS
1039
STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatRead,
1040
STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1041
STAMUNIT_TICKS_PER_CALL, "Time taken to read from the endpoint",
1042
"/PDM/AsyncCompletion/File/%s/Read", RTPathFilename(pEpFile->Core.pszUri));
1044
STAMR3RegisterF(pEpClassFile->Core.pVM, &pEpFile->StatWrite,
1045
STAMTYPE_PROFILE_ADV, STAMVISIBILITY_ALWAYS,
1046
STAMUNIT_TICKS_PER_CALL, "Time taken to write to the endpoint",
1047
"/PDM/AsyncCompletion/File/%s/Write", RTPathFilename(pEpFile->Core.pszUri));
1052
LogRel(("AIOMgr: Endpoint for file '%s' (flags %08x) created successfully\n", pszUri, pEpFile->fFlags));
1057
static int pdmacFileEpRangesLockedDestroy(PAVLRFOFFNODECORE pNode, void *pvUser)
1059
AssertMsgFailed(("The locked ranges tree should be empty at that point\n"));
1060
return VINF_SUCCESS;
1063
static int pdmacFileEpClose(PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
1065
int rc = VINF_SUCCESS;
1066
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1067
PPDMASYNCCOMPLETIONEPCLASSFILE pEpClassFile = (PPDMASYNCCOMPLETIONEPCLASSFILE)pEndpoint->pEpClass;
1069
/* Make sure that all tasks finished for this endpoint. */
1070
rc = pdmacFileAioMgrCloseEndpoint(pEpFile->pAioMgr, pEpFile);
1074
* If the async I/O manager is in failsafe mode this is the only endpoint
1075
* he processes and thus can be destroyed now.
1077
if (pEpFile->pAioMgr->enmMgrType == PDMACEPFILEMGRTYPE_SIMPLE)
1078
pdmacFileAioMgrDestroy(pEpClassFile, pEpFile->pAioMgr);
1080
/* Free cached tasks. */
1081
PPDMACTASKFILE pTask = pEpFile->pTasksFreeHead;
1085
PPDMACTASKFILE pTaskFree = pTask;
1086
pTask = pTask->pNext;
1087
MMR3HeapFree(pTaskFree);
1090
/* Destroy the locked ranges tree now. */
1091
RTAvlrFileOffsetDestroy(pEpFile->AioMgr.pTreeRangesLocked, pdmacFileEpRangesLockedDestroy, NULL);
1093
RTFileClose(pEpFile->File);
1095
#ifdef VBOX_WITH_STATISTICS
1096
STAMR3Deregister(pEpClassFile->Core.pVM, &pEpFile->StatRead);
1097
STAMR3Deregister(pEpClassFile->Core.pVM, &pEpFile->StatWrite);
1100
return VINF_SUCCESS;
1103
static int pdmacFileEpRead(PPDMASYNCCOMPLETIONTASK pTask,
1104
PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
1105
PCRTSGSEG paSegments, size_t cSegments,
1108
int rc = VINF_SUCCESS;
1109
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1111
LogFlowFunc(("pTask=%#p pEndpoint=%#p off=%RTfoff paSegments=%#p cSegments=%zu cbRead=%zu\n",
1112
pTask, pEndpoint, off, paSegments, cSegments, cbRead));
1114
STAM_PROFILE_ADV_START(&pEpFile->StatRead, Read);
1116
pdmacFileEpTaskInit(pTask, cbRead);
1118
rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbRead,
1119
PDMACTASKFILETRANSFER_READ);
1121
STAM_PROFILE_ADV_STOP(&pEpFile->StatRead, Read);
1126
static int pdmacFileEpWrite(PPDMASYNCCOMPLETIONTASK pTask,
1127
PPDMASYNCCOMPLETIONENDPOINT pEndpoint, RTFOFF off,
1128
PCRTSGSEG paSegments, size_t cSegments,
1131
int rc = VINF_SUCCESS;
1132
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1134
if (RT_UNLIKELY(pEpFile->fReadonly))
1135
return VERR_NOT_SUPPORTED;
1137
STAM_PROFILE_ADV_START(&pEpFile->StatWrite, Write);
1139
pdmacFileEpTaskInit(pTask, cbWrite);
1141
rc = pdmacFileEpTaskInitiate(pTask, pEndpoint, off, paSegments, cSegments, cbWrite,
1142
PDMACTASKFILETRANSFER_WRITE);
1144
STAM_PROFILE_ADV_STOP(&pEpFile->StatWrite, Write);
1149
static int pdmacFileEpFlush(PPDMASYNCCOMPLETIONTASK pTask,
1150
PPDMASYNCCOMPLETIONENDPOINT pEndpoint)
1152
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1153
PPDMASYNCCOMPLETIONTASKFILE pTaskFile = (PPDMASYNCCOMPLETIONTASKFILE)pTask;
1155
if (RT_UNLIKELY(pEpFile->fReadonly))
1156
return VERR_NOT_SUPPORTED;
1158
pdmacFileEpTaskInit(pTask, 0);
1160
PPDMACTASKFILE pIoTask = pdmacFileTaskAlloc(pEpFile);
1161
if (RT_UNLIKELY(!pIoTask))
1162
return VERR_NO_MEMORY;
1164
pIoTask->pEndpoint = pEpFile;
1165
pIoTask->enmTransferType = PDMACTASKFILETRANSFER_FLUSH;
1166
pIoTask->pvUser = pTaskFile;
1167
pIoTask->pfnCompleted = pdmacFileEpTaskCompleted;
1168
pdmacFileEpAddTask(pEpFile, pIoTask);
1170
return VINF_AIO_TASK_PENDING;
1173
static int pdmacFileEpGetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t *pcbSize)
1175
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1177
*pcbSize = ASMAtomicReadU64(&pEpFile->cbFile);
1179
return VINF_SUCCESS;
1182
static int pdmacFileEpSetSize(PPDMASYNCCOMPLETIONENDPOINT pEndpoint, uint64_t cbSize)
1184
PPDMASYNCCOMPLETIONENDPOINTFILE pEpFile = (PPDMASYNCCOMPLETIONENDPOINTFILE)pEndpoint;
1186
ASMAtomicWriteU64(&pEpFile->cbFile, cbSize);
1187
return RTFileSetSize(pEpFile->File, cbSize);
1190
const PDMASYNCCOMPLETIONEPCLASSOPS g_PDMAsyncCompletionEndpointClassFile =
1193
PDMAC_EPCLASS_OPS_VERSION,
1197
PDMASYNCCOMPLETIONEPCLASSTYPE_FILE,
1198
/* cbEndpointClassGlobal */
1199
sizeof(PDMASYNCCOMPLETIONEPCLASSFILE),
1201
sizeof(PDMASYNCCOMPLETIONENDPOINTFILE),
1203
sizeof(PDMASYNCCOMPLETIONTASKFILE),
1205
pdmacFileInitialize,
1208
/* pfnEpInitialize. */
1209
pdmacFileEpInitialize,
1223
PDMAC_EPCLASS_OPS_VERSION