-
Notifications
You must be signed in to change notification settings - Fork 27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add BlockBlobDatabase as TES database option #194
Draft
MattMcL4475
wants to merge
7
commits into
main
Choose a base branch
from
feature/BlockBlobDatabase
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Draft
Changes from all commits
Commits
Show all changes
7 commits
Select commit
Hold shift + click to select a range
8b647e9
started implementation
MattMcL4475 301dcb1
Merge remote-tracking branch 'origin/main' into feature/BlockBlobData…
MattMcL4475 de50a50
v1
MattMcL4475 5613726
add copyrights
MattMcL4475 7a4b818
update docs
MattMcL4475 da5ef3d
minor reorder
MattMcL4475 621b298
Merge remote-tracking branch 'origin/main' into feature/BlockBlobData…
MattMcL4475 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT License. | ||
|
||
namespace Tes.Models | ||
{ | ||
public class BlockBlobDatabaseOptions | ||
{ | ||
public const string SectionName = "BlockBlobDatabase"; | ||
|
||
public string StorageAccountName { get; set; } | ||
public string ContainerName { get; set; } = "testasksdb"; | ||
public string ContainerSasToken { get; set; } | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,200 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT License. | ||
|
||
using System; | ||
using System.Collections.Concurrent; | ||
using System.Collections.Generic; | ||
using System.Linq; | ||
using System.Threading; | ||
using System.Threading.Tasks; | ||
using Azure.Identity; | ||
using Azure.Storage.Blobs; | ||
using Polly; | ||
|
||
namespace Tes.Repository | ||
{ | ||
public class BlockBlobDatabase<T> where T : class | ||
{ | ||
private const int maxConcurrentItemDownloads = 64; | ||
private const string activeStatePrefix = "a/"; | ||
private const string inactiveStatePrefix = "z/"; | ||
|
||
private readonly BlobServiceClient blobServiceClient; | ||
private readonly BlobContainerClient container; | ||
|
||
public string StorageAccountName { get; set; } | ||
public string ContainerName { get; set; } | ||
|
||
public BlockBlobDatabase(string storageAccountName, string containerName, string containerSasToken = null) | ||
{ | ||
StorageAccountName = storageAccountName; | ||
ContainerName = containerName; | ||
|
||
if (!string.IsNullOrWhiteSpace(containerSasToken)) | ||
{ | ||
blobServiceClient = new BlobServiceClient(new Uri($"https://{StorageAccountName}.blob.core.windows.net?{containerSasToken.TrimStart('?')}")); | ||
} | ||
else | ||
{ | ||
// Use managed identity. Token lifetime and refreshing is handled automatically. | ||
blobServiceClient = new BlobServiceClient(new Uri($"https://{StorageAccountName}.blob.core.windows.net"), new DefaultAzureCredential()); | ||
} | ||
|
||
container = blobServiceClient.GetBlobContainerClient(ContainerName); | ||
container.CreateIfNotExistsAsync().Wait(); | ||
} | ||
|
||
public async Task CreateOrUpdateItemAsync(string id, T item, bool isActive) | ||
{ | ||
var json = System.Text.Json.JsonSerializer.Serialize(item); | ||
|
||
if (!isActive) | ||
{ | ||
// Delete active if it exists | ||
var blobClient1 = container.GetBlobClient(GetActiveBlobNameById(id)); | ||
var task1 = blobClient1.DeleteIfExistsAsync(); | ||
|
||
// Update/create active | ||
var blobClient2 = container.GetBlobClient(GetInactiveBlobNameById(id)); | ||
var task2 = blobClient2.UploadAsync(BinaryData.FromString(json), overwrite: true); | ||
|
||
// Retry to reduce likelihood of one blob succeeding and the other failing | ||
await Policy | ||
.Handle<Exception>() | ||
.WaitAndRetryAsync(10, retryAttempt => TimeSpan.FromSeconds(1)) | ||
.ExecuteAsync(async () => await Task.WhenAll(task1, task2)); | ||
} | ||
else | ||
{ | ||
// Assumption: a task can never go from inactive to active, so no need to delete anything here | ||
var blobClient = container.GetBlobClient($"{id}.json"); | ||
var activeBlobTask = await blobClient.UploadAsync(BinaryData.FromString(json), overwrite: true); | ||
} | ||
} | ||
|
||
public async Task DeleteItemAsync(string id) | ||
{ | ||
var blobClient = container.GetBlobClient(GetActiveBlobNameById(id)); | ||
var blobClient2 = container.GetBlobClient(GetInactiveBlobNameById(id)); | ||
var task1 = blobClient.DeleteIfExistsAsync(); | ||
var task2 = blobClient2.DeleteIfExistsAsync(); | ||
|
||
// Retry to reduce likelihood of one blob succeeding and the other failing | ||
await Policy | ||
.Handle<Exception>() | ||
.WaitAndRetryAsync(10, retryAttempt => TimeSpan.FromSeconds(1)) | ||
.ExecuteAsync(async () => await Task.WhenAll(task1, task2)); | ||
} | ||
|
||
public async Task<T> GetItemAsync(string id) | ||
{ | ||
// Check if inactive exists first, since inactive will never go to active state, to make more consistent results | ||
var blobClient = container.GetBlobClient(GetInactiveBlobNameById(id)); | ||
|
||
if (await blobClient.ExistsAsync()) | ||
{ | ||
var inactiveBlobJson = (await blobClient.DownloadContentAsync()).Value.Content.ToString(); | ||
return System.Text.Json.JsonSerializer.Deserialize<T>(inactiveBlobJson); | ||
} | ||
|
||
blobClient = container.GetBlobClient(GetActiveBlobNameById(id)); | ||
var json = (await blobClient.DownloadContentAsync()).Value.Content.ToString(); | ||
return System.Text.Json.JsonSerializer.Deserialize<T>(json); | ||
} | ||
|
||
/// <summary> | ||
/// Downloads all items in parallel | ||
/// Specifically designed NOT to enumerate items to prevent caller stalling the download throughput | ||
/// </summary> | ||
/// <typeparam name="T"></typeparam> | ||
/// <returns></returns> | ||
public async Task<IList<T>> GetItemsAsync(bool activeOnly = false) | ||
{ | ||
var enumerator = container.GetBlobsAsync(prefix: activeOnly ? activeStatePrefix : null).GetAsyncEnumerator(); | ||
var blobNames = new List<string>(); | ||
|
||
while (await enumerator.MoveNextAsync()) | ||
{ | ||
// example: a/0fb0858a-3166-4a22-85b6-4337df2f53c5.json | ||
// example: z/0fb0858a-3166-4a22-85b6-4337df2f53c5.json | ||
var blobName = enumerator.Current.Name; | ||
blobNames.Add(blobName); | ||
} | ||
|
||
return await DownloadBlobsAsync(blobNames); | ||
} | ||
|
||
public async Task<(string, IList<T>)> GetItemsWithPagingAsync(bool activeOnly = false, int pageSize = 5000, string continuationToken = null) | ||
{ | ||
var blobNames = new List<string>(); | ||
|
||
while (true) | ||
{ | ||
var pages = container.GetBlobsAsync(prefix: activeOnly ? activeStatePrefix : null).AsPages(continuationToken, pageSize); | ||
var enumerator = pages.GetAsyncEnumerator(); | ||
var isMoreItems = await enumerator.MoveNextAsync(); | ||
|
||
if (!isMoreItems) | ||
{ | ||
return (null, new List<T>()); | ||
} | ||
|
||
var page = enumerator.Current; | ||
|
||
foreach (var blob in page.Values) | ||
{ | ||
blobNames.Add(blob.Name); | ||
} | ||
|
||
return (page.ContinuationToken, await DownloadBlobsAsync(blobNames)); | ||
} | ||
} | ||
|
||
private string GetActiveBlobNameById(string id) => $"{activeStatePrefix}{id}.json"; | ||
|
||
private string GetInactiveBlobNameById(string id) => $"{inactiveStatePrefix}{id}.json"; | ||
|
||
private async Task<IList<T>> DownloadBlobsAsync(List<string> blobNames) | ||
{ | ||
var downloadQueue = new ConcurrentQueue<string>(blobNames); | ||
var items = new ConcurrentBag<T>(); | ||
long runningTasksCount = 0; | ||
|
||
while (downloadQueue.TryDequeue(out var blobName)) | ||
{ | ||
while (Interlocked.Read(ref runningTasksCount) >= maxConcurrentItemDownloads) | ||
{ | ||
// Pause while maxed out | ||
await Task.Delay(50); | ||
} | ||
|
||
Interlocked.Increment(ref runningTasksCount); | ||
|
||
_ = Task.Run(async () => | ||
{ | ||
try | ||
{ | ||
var blobClient = container.GetBlobClient(blobName); | ||
var json = (await blobClient.DownloadContentAsync()).Value.Content.ToString(); | ||
items.Add(System.Text.Json.JsonSerializer.Deserialize<T>(json)); | ||
} | ||
catch (Exception exc) | ||
{ | ||
// TODO log? | ||
downloadQueue.Enqueue(blobName); | ||
} | ||
|
||
Interlocked.Decrement(ref runningTasksCount); | ||
}); | ||
} | ||
|
||
while (Interlocked.Read(ref runningTasksCount) > 0) | ||
{ | ||
// Wait for all downloads to complete | ||
await Task.Delay(50); | ||
} | ||
|
||
return items.ToList(); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT License. | ||
|
||
using System; | ||
using System.Collections.Generic; | ||
using System.Diagnostics; | ||
using System.Linq; | ||
using System.Linq.Expressions; | ||
using System.Threading.Tasks; | ||
using Microsoft.Extensions.Logging; | ||
using Microsoft.Extensions.Options; | ||
using Polly; | ||
using Tes.Models; | ||
|
||
namespace Tes.Repository | ||
{ | ||
public class TesTaskAzureBlockBlobRepository : IRepository<TesTask> | ||
{ | ||
private readonly BlockBlobDatabase<TesTask> db; | ||
private readonly ICache<TesTask> cache; | ||
private readonly ILogger logger; | ||
|
||
public TesTaskAzureBlockBlobRepository(IOptions<BlockBlobDatabaseOptions> options, ILogger<TesTaskAzureBlockBlobRepository> logger, ICache<TesTask> cache = null) | ||
{ | ||
db = new BlockBlobDatabase<TesTask>(options.Value.StorageAccountName, options.Value.ContainerName, options.Value.ContainerSasToken); | ||
this.cache = cache; | ||
this.logger = logger; | ||
WarmCacheAsync().Wait(); | ||
} | ||
|
||
public async Task<TesTask> CreateItemAsync(TesTask item) | ||
{ | ||
await db.CreateOrUpdateItemAsync(item.Id, item, item.IsActiveState()); | ||
return item; | ||
} | ||
|
||
public async Task DeleteItemAsync(string id) | ||
{ | ||
await db.DeleteItemAsync(id); | ||
} | ||
|
||
public async Task<IEnumerable<TesTask>> GetItemsAsync(Expression<Func<TesTask, bool>> predicate) | ||
{ | ||
return await db.GetItemsAsync(); | ||
} | ||
public async Task<IEnumerable<TesTask>> GetActiveItemsAsync() | ||
{ | ||
return await db.GetItemsAsync(activeOnly: true); | ||
} | ||
|
||
public async Task<(string, IEnumerable<TesTask>)> GetItemsAsync(Expression<Func<TesTask, bool>> predicate, int pageSize, string continuationToken) | ||
{ | ||
// TODO - add support for listing tasks by name | ||
return await db.GetItemsWithPagingAsync(false, pageSize, continuationToken); | ||
} | ||
|
||
public async Task<bool> TryGetItemAsync(string id, Action<TesTask> onSuccess = null) | ||
{ | ||
var item = await db.GetItemAsync(id); | ||
onSuccess?.Invoke(item); | ||
return true; | ||
} | ||
|
||
public async Task<TesTask> UpdateItemAsync(TesTask item) | ||
{ | ||
await db.CreateOrUpdateItemAsync(item.Id, item, item.IsActiveState()); | ||
return item; | ||
} | ||
|
||
private async Task WarmCacheAsync() | ||
{ | ||
if (cache == null) | ||
{ | ||
logger.LogWarning("Cache is null for TesTaskAzureBlockBlobRepository; no caching will be used."); | ||
return; | ||
} | ||
|
||
var sw = Stopwatch.StartNew(); | ||
logger.LogInformation("Warming cache..."); | ||
|
||
// Don't allow the state of the system to change until the cache and system are consistent; | ||
// this is a fast PostgreSQL query even for 1 million items | ||
await Policy | ||
.Handle<Exception>() | ||
.WaitAndRetryAsync(3, | ||
retryAttempt => | ||
{ | ||
logger.LogWarning($"Warming cache retry attempt #{retryAttempt}"); | ||
return TimeSpan.FromSeconds(10); | ||
}, | ||
(ex, ts) => | ||
{ | ||
logger.LogCritical(ex, "Couldn't warm cache, is the storage account available?"); | ||
}) | ||
.ExecuteAsync(async () => | ||
{ | ||
var activeTasks = (await GetActiveItemsAsync()).ToList(); | ||
var tasksAddedCount = 0; | ||
|
||
foreach (var task in activeTasks.OrderBy(t => t.CreationTime)) | ||
{ | ||
cache?.TryAdd(task.Id, task); | ||
tasksAddedCount++; | ||
} | ||
|
||
logger.LogInformation($"Cache warmed successfully in {sw.Elapsed.TotalSeconds:n3} seconds. Added {tasksAddedCount:n0} items to the cache."); | ||
}); | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
|
||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm... I think this retry policy could lead to data loss. Two requests roughly at the same time hit this method, the first fails and the second succeeds (we want the second one to win), the first request will go in retry loop that could overwrite the latest data.
As an alternative you can consider an optimistic concurrency approach for the update, where you check if an item has changed since you last read it - storage supports this via headers, and etags. And for the create scenario, you turn overwrite off, to avoid any race condition.