From 478da3080000b4913861c1b1731b188af9c9b2dd Mon Sep 17 00:00:00 2001 From: naveenpaul1 Date: Mon, 30 Sep 2024 12:03:44 +0530 Subject: [PATCH] NSFS : S3 rm with --recursive option does not delete all the objects Signed-off-by: naveenpaul1 --- src/sdk/namespace_fs.js | 64 ++--- .../jest_tests/test_nsfs_list_object.test.js | 227 ++++++++++++++++++ src/test/unit_tests/test_namespace_fs.js | 14 +- src/util/js_utils.js | 26 ++ 4 files changed, 298 insertions(+), 33 deletions(-) create mode 100644 src/test/unit_tests/jest_tests/test_nsfs_list_object.test.js diff --git a/src/sdk/namespace_fs.js b/src/sdk/namespace_fs.js index fabe042ed3..ee3c111124 100644 --- a/src/sdk/namespace_fs.js +++ b/src/sdk/namespace_fs.js @@ -4,7 +4,6 @@ 'use strict'; const _ = require('lodash'); -const fs = require('fs'); const path = require('path'); const util = require('util'); const mime = require('mime'); @@ -14,6 +13,7 @@ const dbg = require('../util/debug_module')(__filename); const config = require('../../config'); const crypto = require('crypto'); const s3_utils = require('../endpoint/s3/s3_utils'); +const js_utils = require('../util/js_utils'); const error_utils = require('../util/error_utils'); const stream_utils = require('../util/stream_utils'); const buffer_utils = require('../util/buffer_utils'); @@ -250,24 +250,6 @@ function is_sparse_file(stat) { return (stat.blocks * 512 < stat.size); } -/** - * @param {fs.Dirent} e - * @returns {string} - */ -function get_entry_name(e) { - return e.name; -} - -/** - * @param {string} name - * @returns {fs.Dirent} - */ -function make_named_dirent(name) { - const entry = new fs.Dirent(); - entry.name = name; - return entry; -} - function to_xattr(fs_xattr) { const xattr = _.mapKeys(fs_xattr, (val, key) => { // Prioritize ignore list @@ -680,14 +662,30 @@ class NamespaceFS { const marker_dir = key_marker.slice(0, dir_key.length); const marker_ent = key_marker.slice(dir_key.length); // marker is after dir so no keys in this dir can apply - if (dir_key < marker_dir) { + if (is_first_dir_under_second_dir(dir_key, marker_dir)) { // dbg.log0(`marker is after dir so no keys in this dir can apply: dir_key=${dir_key} marker_dir=${marker_dir}`); return; } // when the dir portion of the marker is completely below the current dir // then every key in this dir satisfies the marker and marker_ent should not be used. - const marker_curr = (marker_dir < dir_key) ? '' : marker_ent; + const marker_curr = is_first_dir_under_second_dir(marker_dir, dir_key) ? '' : marker_ent; // dbg.log0(`process_dir: dir_key=${dir_key} prefix_ent=${prefix_ent} marker_curr=${marker_curr}`); + + // compares dir path. returns true if dir1 is under second dir, false if otherwise + // replasing slash with space to make sure secial charecters are not getting + // more priory than backslash while comparing. + /** + * @param {string} first_dir + * @param {string} second_dir + */ + function is_first_dir_under_second_dir(first_dir, second_dir) { + // first_dir and second_dir comparison will fail when there are folder with structure slimier to + // "dir_prfix.12345/" and "dir_prfix.12345.backup/" because "/" considered lesser than "." to fix this + // all the backslash is replaced with space. This updated first_dir and second_dir used only for comparison. + first_dir = first_dir.replaceAll('/', ' '); + second_dir = second_dir.replaceAll('/', ' '); + return first_dir < second_dir; + } /** * @typedef {{ * key: string, @@ -696,12 +694,19 @@ class NamespaceFS { */ const insert_entry_to_results_arr = async r => { let pos; + + const r_key_updated = r.key.replaceAll('/', ' '); // Since versions are arranged next to latest object in the latest first order, // no need to find the sorted last index. Push the ".versions/#VERSION_OBJECT" as // they are in order - if (results.length && r.key < results[results.length - 1].key && + if (results.length && r_key_updated < results[results.length - 1].key && !this._is_hidden_version_path(r.key)) { - pos = _.sortedLastIndexBy(results, r, a => a.key); + pos = js_utils.sortedLastIndexBy(results, + curr => { + const curr_key = r_key_updated.includes('/') ? curr.key : curr.key.replaceAll('/', ' '); + return curr_key < r_key_updated; + }, + ); } else { pos = results.length; } @@ -731,7 +736,7 @@ class NamespaceFS { const process_entry = async ent => { // dbg.log0('process_entry', dir_key, ent.name); if ((!ent.name.startsWith(prefix_ent) || - ent.name < marker_curr || + ent.name < marker_curr.split('/')[0] || ent.name === this.get_bucket_tmpdir_name() || ent.name === config.NSFS_FOLDER_OBJECT_NAME) || this._is_hidden_version_path(ent.name)) { @@ -774,7 +779,7 @@ class NamespaceFS { // insert dir object to objects list if its key is lexicographicly bigger than the key marker && // no delimiter OR prefix is the current directory entry const is_dir_content = cached_dir.stat.xattr && cached_dir.stat.xattr[XATTR_DIR_CONTENT]; - if (is_dir_content && dir_key > key_marker && (!delimiter || dir_key === prefix)) { + if (is_dir_content && is_first_dir_under_second_dir(marker_dir, dir_key) && (!delimiter || dir_key === prefix)) { const r = { key: dir_key, common_prefix: false }; await insert_entry_to_results_arr(r); } @@ -797,10 +802,9 @@ class NamespaceFS { {name: start_marker} ) + 1; } else { - marker_index = _.sortedLastIndexBy( - sorted_entries, - make_named_dirent(marker_curr), - get_entry_name + const marker_curr_updated = marker_curr.split('/')[0].replaceAll('/', ' '); + marker_index = js_utils.sortedLastIndexBy(sorted_entries, + curr => curr.name.replaceAll('/', ' ') <= marker_curr_updated, ); } @@ -3385,6 +3389,7 @@ class NamespaceFS { } } + /** @type {PersistentLogger} */ NamespaceFS._migrate_wal = null; @@ -3393,4 +3398,3 @@ NamespaceFS._restore_wal = null; module.exports = NamespaceFS; module.exports.multi_buffer_pool = multi_buffer_pool; - diff --git a/src/test/unit_tests/jest_tests/test_nsfs_list_object.test.js b/src/test/unit_tests/jest_tests/test_nsfs_list_object.test.js new file mode 100644 index 0000000000..0ad16121c2 --- /dev/null +++ b/src/test/unit_tests/jest_tests/test_nsfs_list_object.test.js @@ -0,0 +1,227 @@ +/* Copyright (C) 2016 NooBaa */ +/* eslint-disable no-undef */ +'use strict'; +const path = require('path'); +const fs_utils = require('../../../util/fs_utils'); +const { TMP_PATH } = require('../../system_tests/test_utils'); +const NamespaceFS = require('../../../sdk/namespace_fs'); +const buffer_utils = require('../../../util/buffer_utils'); +const tmp_ns_nsfs_path = path.join(TMP_PATH, 'test_nsfs_namespace_list'); +const nsfs_src_bkt = 'nsfs_src'; +const ns_nsfs_tmp_bucket_path = `${tmp_ns_nsfs_path}/${nsfs_src_bkt}`; +const list_bkt = 'test_namespace_list_object'; +const timeout = 50000; +const files_without_folders_to_upload = make_keys(264, i => `file_without_folder${i}`); +const folders_to_upload = make_keys(264, i => `folder${i}/`); +const files_in_folders_to_upload = make_keys(264, i => `folder1/file${i}`); +const files_in_inner_folders_to_upload = make_keys(264, i => `folder1/inner_folder/file${i}`); +const files_in_inner_backup_folders_to_upload = make_keys(264, i => `folder1/inner_folder.backup/file${i}`); +const files_in_inner_all_backup_folders_to_upload = make_keys(264, i => `folder1/inner.folder.all.backup/file${i}`); +const files_in_inner_all_folders_to_upload = make_keys(264, i => `folder1/inner_folder.all/file${i}`); +const dummy_object_sdk = make_dummy_object_sdk(); +const ns_nsfs_tmp = new NamespaceFS({ bucket_path: ns_nsfs_tmp_bucket_path, bucket_id: '3', namespace_resource_id: undefined }); +let sorted_all_dir_entries; +function make_dummy_object_sdk() { + return { + requesting_account: { + force_md5_etag: false, + nsfs_account_config: { + uid: process.getuid(), + gid: process.getgid(), + } + }, + abort_controller: new AbortController(), + throw_if_aborted() { + if (this.abort_controller.signal.aborted) throw new Error('request aborted signal'); + } + }; +} +function sort_entries_by_name(a, b) { + if (a.replaceAll('/', ' ') < b.replaceAll('/', ' ')) return -1; + if (a.replaceAll('/', ' ') > b.replaceAll('/', ' ')) return 1; + return 0; +} +// eslint-disable-next-line max-lines-per-function +describe('manage sorted list flow', () => { + describe('list objects - pagination', () => { + beforeAll(async () => { + await fs_utils.create_fresh_path(ns_nsfs_tmp_bucket_path); + await create_keys(list_bkt, ns_nsfs_tmp, [ + ...files_without_folders_to_upload, + ...folders_to_upload, + ...files_in_folders_to_upload, + ...files_in_inner_folders_to_upload, + ...files_in_inner_backup_folders_to_upload, + ...files_in_inner_all_backup_folders_to_upload, + ...files_in_inner_all_folders_to_upload, + ]); + sorted_all_dir_entries = [ + ...files_without_folders_to_upload, + ...folders_to_upload, + ...files_in_folders_to_upload, + ...files_in_inner_folders_to_upload, + ...files_in_inner_backup_folders_to_upload, + ...files_in_inner_all_backup_folders_to_upload, + ...files_in_inner_all_folders_to_upload, + ]; + sorted_all_dir_entries.sort(sort_entries_by_name); + }, timeout); + afterAll(async function() { + await delete_keys(list_bkt, ns_nsfs_tmp, [ + ...folders_to_upload, + ...files_in_folders_to_upload, + ...files_without_folders_to_upload, + ...files_in_inner_folders_to_upload, + ...files_in_inner_backup_folders_to_upload, + ...files_in_inner_all_backup_folders_to_upload, + ...files_in_inner_all_folders_to_upload, + ]); + await fs_utils.folder_delete(`${ns_nsfs_tmp_bucket_path}`); + }); + it('page=1000', async () => { + let r; + let total_items = 0; + let object_item_start_index = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + object_item_start_index = total_items; + total_items += r.objects.length; + await validat_pagination(r, total_items, object_item_start_index); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=500', async () => { + let r; + let total_items = 0; + let object_item_start_index = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + limit: 500, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + object_item_start_index = total_items; + total_items += r.objects.length; + await validat_pagination(r, total_items, object_item_start_index); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=264', async () => { + let r; + let total_items = 0; + let object_item_start_index = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + limit: 264, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + object_item_start_index = total_items; + total_items += r.objects.length; + await validat_pagination(r, total_items, object_item_start_index); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=250', async () => { + let r; + let total_items = 0; + let object_item_start_index = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + limit: 250, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + object_item_start_index = total_items; + total_items += r.objects.length; + await validat_pagination(r, total_items, object_item_start_index); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=100', async () => { + let r; + let total_items = 0; + let object_item_start_index = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + limit: 100, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + object_item_start_index = total_items; + total_items += r.objects.length; + await validat_pagination(r, total_items, object_item_start_index); + if (!r.next_marker) { + break; + } + } + }, timeout); + it('page=10', async () => { + let r; + let total_items = 0; + let object_item_start_index = 0; + for (;;) { + r = await ns_nsfs_tmp.list_objects({ + bucket: list_bkt, + limit: 10, + key_marker: r ? r.next_marker : "", + }, dummy_object_sdk); + object_item_start_index = total_items; + total_items += r.objects.length; + await validat_pagination(r, total_items, object_item_start_index); + if (!r.next_marker) { + break; + } + } + }, timeout); + }); +}); +async function validat_pagination(r, total_items, object_item_start_index) { + if (r.next_marker) { + expect(r.is_truncated).toStrictEqual(true); + expect(r.objects.map(it => it.key)).toEqual(sorted_all_dir_entries.slice(object_item_start_index, total_items)); + } else { + expect(total_items).toEqual(1848); + expect(r.is_truncated).toStrictEqual(false); + } +} +/** + * @param {number} count + * @param {(i:number)=>string} gen + * @returns {string[]} + */ +function make_keys(count, gen) { + const arr = new Array(count); + for (let i = 0; i < count; ++i) arr[i] = gen(i); + arr.sort(); + Object.freeze(arr); + return arr; +} +async function delete_keys(bkt, nsfs_delete_tmp, keys) { + await nsfs_delete_tmp.delete_multiple_objects({ + bucket: bkt, + objects: keys.map(key => ({ key })), + }, dummy_object_sdk); +} +async function create_keys(bkt, nsfs_create_tmp, keys) { + return Promise.all(keys.map(async key => { + await nsfs_create_tmp.upload_object({ + bucket: bkt, + key, + content_type: 'application/octet-stream', + source_stream: buffer_utils.buffer_to_read_stream(null), + size: 0 + }, dummy_object_sdk); + })); +} diff --git a/src/test/unit_tests/test_namespace_fs.js b/src/test/unit_tests/test_namespace_fs.js index 5ff9b6fc31..e260a8c24d 100644 --- a/src/test/unit_tests/test_namespace_fs.js +++ b/src/test/unit_tests/test_namespace_fs.js @@ -142,19 +142,27 @@ mocha.describe('namespace_fs', function() { function assert_sorted_list(res) { let prev_key = ''; + const regex = /[^A-Za-z0-9]/; for (const { key } of res.objects) { if (res.next_marker) { assert(key <= res.next_marker, 'bad next_marker at key ' + key); } - assert(prev_key <= key, 'objects not sorted at key ' + key); + // String comparison will fail when with special character and backslash cases, + // Where special characters such as dot, hyphen are listed before backslash in sorted list. + // compare string only if key contains no special characters + if (!regex.test(key)) { + assert(prev_key <= key, 'objects not sorted at key ' + key + " prev_key: " + prev_key); + } prev_key = key; } - prev_key = ''; for (const key of res.common_prefixes) { if (res.next_marker) { assert(key <= res.next_marker, 'next_marker at key ' + key); } - assert(prev_key <= key, 'prefixes not sorted at key ' + key); + + if (!regex.test(key)) { + assert(prev_key <= key, 'prefixes not sorted at key ' + key + " prev_key: " + prev_key); + } prev_key = key; } } diff --git a/src/util/js_utils.js b/src/util/js_utils.js index e0604c4d0f..443a1f84af 100644 --- a/src/util/js_utils.js +++ b/src/util/js_utils.js @@ -237,6 +237,31 @@ function omit_symbol(maybe_obj, sym) { return _.omit(obj, sym); } +/** + * sortedLastIndexBy is like lodash's sortedLastIndexBy except that + * instead of taking a iteratee, it takes a function `less` which should + * return `true` when `curr < value`. + * @template T + * @param {Array} array + * @param {(curr: T) => Boolean} less + * @returns + */ +function sortedLastIndexBy(array, less) { + let low = 0; + let high = array === null ? low : array.length; + + while (low < high) { + const mid = Math.floor(low + ((high - low) / 2)); + if (less(array[mid])) { + low = mid + 1; + } else { + high = mid; + } + } + + return high; +} + exports.self_bind = self_bind; exports.array_push_all = array_push_all; exports.array_push_keep_latest = array_push_keep_latest; @@ -250,3 +275,4 @@ exports.inspect_lazy = inspect_lazy; exports.make_array = make_array; exports.map_get_or_create = map_get_or_create; exports.omit_symbol = omit_symbol; +exports.sortedLastIndexBy = sortedLastIndexBy;