'ListBlobsFlat returning deleted blobs along with other blobs
I have a nodejs appservice that downloads blobs,250 at a time until 3500 blobs are downloaded (this is to limit the number of concurrent connections opened to the container) and then processes them (bulk uploading data to the database) followed by deleting them.
The appservice API is being called by another ETL tool (snaplogic) every 5 minutes. If the first run is over in say 4 mins 50 secs and the next run starts in 10 seconds, the ListBlobs flat returns deleted blob names and then the app service fails when trying to download the blobs.
I am stumped because I am specifically telling the Azure API not to list the deleted blobs. What am I doing wrong?
I know that when the blobs are deleted, azure actually just marks them for deletion and the garbage collection process eventually deletes them. But if they are already marked for deletion, then they shouldn't be sent back in the list!
async function getBlobNames(containerClient, maxBlobsToDownload) {
var blobNames = [];
var counter = 0;
for await (const blob of containerClient.listBlobsFlat({
includeUncommitedBlobs: false,
includeDeleted: false,
includeSnapshots: false,
includeVersions: false,
includeDeletedWithVersions: false,
})) {
if (counter < maxBlobsToDownload) {
blobNames.push(blob.name);
} else {
break;
}
counter++;
}
return blobNames;
}
async function deleteBlobs(start, end) {
try {
var deletePromises = [];
const containerClient = xenialHelper.getContainerClient(
srcConnection,
sourceContainer
);
for (let i = start; i < end; i++) {
const blobName = blobNames[i];
deletePromises.push(
containerClient.deleteBlob(blobName, {
deleteSnapshots: "include",
})
);
}
await Promise.all(deletePromises);
console.log(`${end - start} Blobs Deleted`);
} catch (err) {
console.log("Blob Delete Error");
console.log(err);
throw new Error(err);
}
}
async function getBlobs(start, end) {
console.log("Getting Blobs");
const startTime = moment();
try {
promises = [];
const containerClient = xenialHelper.getContainerClient(
srcConnection,
sourceContainer
);
for (var i = start; i < end; i++) {
const blobName = blobNames[i];
promises.push(
downloadBlob(containerClient.getBlockBlobClient(blobName), blobName)
);
}
await Promise.all(promises);
console.log(
`${end - start} Blobs Downloaded ${xenialHelper.printTime(
startTime,
moment()
)}`
);
} catch (err) {
console.log("Error GetBlobs");
console.log(err);
throw new Error(err);
}
}
//Main Function
const maxBlobsToDownload = 1500;
router.get("/processFiles", jsonBodyParser, async (req, res) => {
if (req.query.maxDownloads) {
maxBlobsToDownload = req.query.maxDownloads;
}
try {
const containerClient = xenialHelper.getContainerClient(
srcConnection,
sourceContainer
);
blobNames = await getBlobNames(
containerClient,
parseInt(maxBlobsToDownload)
);
console.log(blobNames.length);
let pageSize = 250; //;
const iterations = getIterations(blobNames, pageSize);
let start = 0;
let end = parseInt(pageSize);
for (let i = 0; i < iterations; i++) {
if (i > 0) {
start = end;
end += pageSize;
}
if (blobNames.length < end) {
end = blobNames.length;
}
console.log(`${start} : ${end}`);
await getBlobs(start, end);
}
start = 0;
end = parseInt(pageSize);
await processData();
for (let i = 0; i < iterations; i++) {
if (i > 0) {
start = end;
end += pageSize;
}
if (blobNames.length < end) {
end = blobNames.length;
}
console.log(`${start} : ${end}`);
await deleteBlobs(start, end);
}
console.log("Total Time " + printTime(startTime, moment()));
res.json({ result: "success" });
} catch (err) {
console.log("Error");
console.log(err);
res.setHeader("Content-Type", "application/json");
response.status(500).send({
message: err,
});
}
});
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
| Solution | Source |
|---|
