simpler cache, md5 id, gridshare touching, resolves #1282

aces · Mar 15, 2024 · fe078ea · fe078ea
1 parent deb6998
commit fe078ea
Show file tree

Hide file tree

Showing 4 changed files with 54 additions and 69 deletions.
diff --git a/Bourreau/lib/bourreau_system_checks.rb b/Bourreau/lib/bourreau_system_checks.rb
@@ -485,44 +485,6 @@ def self.a100_ensure_dp_cache_symlink_exists #:nodoc:
 
 
 
-  # touch to avoid deletion by cluster bimonthly sctratch cleanup : 1) DataProvider cache dir, 2) DP_Cache_Key.md5 3) DP_Cache_Rev.id
-  # 4) gridshare dir, and 5) DP_Cache symbolic link located in it
-  # gridshare and cache_dir are typically updated often, but we touch them just in the case
-  def self.a105_ensure_dp_cache_and_symlink_will_exists #:nodoc:
-
-    myself        = RemoteResource.current_resource
-    cache_dir     = myself.dp_cache_dir
-    dp_cache_id   = File.join cache_dir,     DataProvider::DP_CACHE_ID_FILE
-    dp_cache_md5  = File.join cache_dir,     DataProvider::DP_CACHE_MD5_FILE
-    gridshare_dir = myself.cms_shared_dir
-    sym_path      = File.join gridshare_dir, DataProvider::DP_CACHE_SYML
-
-    puts "C> Updating timestamp for cache folder as well as its symlink, MD5 and ID files"
-
-    begin
-      FileUtils.touch [gridshare_dir, cache_dir, dp_cache_id, dp_cache_md5], verbose: true, nocreate: true
-    # files still might be deleted if a bourreau is not rebooted for a long time
-    # some cluster can have policies countering touch abuse
-    # touch command may fail for many reasons, e.g. resource issues
-    # sometimes touch might fails even if timestamp update is successful
-    rescue => e
-      puts "C> Cache MD5 and ID files timestamp update FAILED: " + e.message
-      return
-    end
-
-    # update timestamp for a softlink (rather than the folder it points to)
-
-    if system "touch -h #{sym_path}"
-      puts "C> Timestamps are updated."
-    else
-      puts "C> Cache symlink timestamp update FAILED!!!"
-      puts "C> Try to recreate the symlink manually!"  # older version of touch or unix do no support symlink updates
-    end
-    return if Time.now - File.lstat(sym_path).mtime > 1.day  #  fail only if symlink is seriously outdated
-  end
-
-
-
   def self.a110_ensure_task_class_git_commits_cached
 
     #----------------------------------------------------------------------------

diff --git a/BrainPortal/app/models/data_provider.rb b/BrainPortal/app/models/data_provider.rb
@@ -1353,7 +1353,44 @@ def self.cleanup_leftover_cache_files(do_it=false, options={})
     end
   end
 
+  # Updates the time stamp for important auxiliary directories and files
+  # as workaround for HPC file deletion policies.
+  #
+  # Some Bourreaux systems are configured with disk allocations where files older than N days are erased automatically.
+  #
+  # To prevent such system from deleting the top-level directories for the DP_Cache, and some cbrain-specific admin files, I suggest that part of the boot process should touch them to reset their timestamps.
+  #
+  # On a portal or bourreau:
+  #
+  # - the +DataProvider+ cache dir
+  # - the +DP_Cache_Key.md5+ and
+  # - +DP_Cache_Rev.id+ located in that cache dir
+  #
+  # On a bourreau:
+  #
+  # - the +gridshare+ dir
+  # - the +DP_Cache+ symbolic link located in it.
+  def self.system_touch
+
+    myself       = RemoteResource.current_resource
+    cache_dir    = myself.dp_cache_dir
+    dp_cache_id  = File.join cache_dir, DataProvider::DP_CACHE_ID_FILE
+    dp_cache_md5 = File.join cache_dir, DataProvider::DP_CACHE_MD5_FILE
 
+    FileUtils.touch [cache_dir, dp_cache_id, dp_cache_md5], verbose: true, nocreate: true
+
+    # touch only cache for Portal, for Bourreau touch gridshare
+    return true unless myself.is_a? Bourreau
+
+    gridshare_dir = myself.cms_shared_dir
+    sym_path      = File.join gridshare_dir, DataProvider::DP_CACHE_SYML
+
+    FileUtils.touch gridshare_dir, verbose: true, nocreate: true
+
+    # update timestamp for a softlink rather than the folder it points to
+    return system("touch", "--no-deference", "--no-create", sym_path)
+
+  end
 
   #################################################################
   # Access restriction checking methods, using flags in meta-data.
@@ -1615,4 +1652,3 @@ def self.local_rsync_protects_args?
   end
 
 end
-
diff --git a/BrainPortal/lib/cbrain_system_checks.rb b/BrainPortal/lib/cbrain_system_checks.rb
@@ -334,6 +334,23 @@ def self.a050_check_data_provider_cache_wipe #:nodoc:
     end
   end
 
+  # prvents archiving/delete of cbrain system files adn directories, such as cache
+  def self.a060_ensure_system_files_will_not_be_deleted #:nodoc:
+
+    #-----------------------------------------------------------------------------
+    puts "C> Updating timestamp for important system files and directories"
+    #-----------------------------------------------------------------------------
+
+    cache_root = DataProvider.cache_rootdir rescue nil
+    # Need to perform a `to_s` due to a strange behaviour of `blank?`
+    # on `Pathname` (if a content of a `Pathname` is empty it will return true)
+    if cache_root.to_s.blank?
+      puts "C> \t- SKIPPING! No cache root directory yet configured!"
+      return
+    end
+
+    DataProvider.system_touch
+  end
 
 
   def self.a080_ensure_set_starttime_revision #:nodoc:

diff --git a/BrainPortal/lib/portal_system_checks.rb b/BrainPortal/lib/portal_system_checks.rb
@@ -202,34 +202,4 @@ def self.z010_ensure_we_have_a_ssh_agent_locker #:nodoc:
       }
     )
   end
-
-
-  # touch to avoid deletion by cluster bimonthly sctratch cleanup :
-  # 1) DataProvider cache dir
-  # 2) DP_Cache_Key.md5
-  # 3) DP_Cache_Rev.id
-  # 4) cache_dir which is typically updated often, but we touch it just in the case
-  def self.z020_dp_cache_and_symlink_will_exists #:nodoc:
-
-    myself        = RemoteResource.current_resource
-    cache_dir     = myself.dp_cache_dir
-    dp_cache_id   = File.join cache_dir,     DataProvider::DP_CACHE_ID_FILE
-    dp_cache_md5  = File.join cache_dir,     DataProvider::DP_CACHE_MD5_FILE
-
-    puts "C> Updating timestamp for cache folder as well as its symlink, MD5 and ID files"
-
-    begin
-      FileUtils.touch [cache_dir, dp_cache_id, dp_cache_md5], verbose: true, nocreate: true
-        # files still might be deleted if a bourreau is not rebooted for a long time
-        # some cluster can have policies countering touch abuse
-        # touch command may fail for many reasons, e.g. resource issues
-        # sometimes touch might fails even if timestamp update is successful
-      puts "C> Timestamps are updated."
-    rescue => e
-      puts "C> Timestamps update FAILED: " + e.message
-      return
-    end
-  end
-
 end
-