From 39f5de1b80d1ba131ee2776d8d5b9ca667ec8800 Mon Sep 17 00:00:00 2001 From: Ramaiz Mansoor Date: Mon, 18 Nov 2024 21:21:36 -0500 Subject: [PATCH] Revert "[BISERVER-15124] - Moving repository cleaner to scheduler plugin" --- .../unified/jcr/RepositoryCleaner.java | 149 ++++++++++++++++++ .../unified/jcr/RepositoryCleanerTest.java | 102 ++++++++++++ 2 files changed, 251 insertions(+) create mode 100644 repository/src/main/java/org/pentaho/platform/repository2/unified/jcr/RepositoryCleaner.java create mode 100644 repository/src/test/java/org/pentaho/platform/repository2/unified/jcr/RepositoryCleanerTest.java diff --git a/repository/src/main/java/org/pentaho/platform/repository2/unified/jcr/RepositoryCleaner.java b/repository/src/main/java/org/pentaho/platform/repository2/unified/jcr/RepositoryCleaner.java new file mode 100644 index 0000000000..b5e9d59218 --- /dev/null +++ b/repository/src/main/java/org/pentaho/platform/repository2/unified/jcr/RepositoryCleaner.java @@ -0,0 +1,149 @@ +/*! ****************************************************************************** + * + * Pentaho + * + * Copyright (C) 2024 by Hitachi Vantara, LLC : http://www.pentaho.com + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file. + * + * Change Date: 2029-07-20 + ******************************************************************************/ + + +package org.pentaho.platform.repository2.unified.jcr; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.jackrabbit.api.management.DataStoreGarbageCollector; +import org.apache.jackrabbit.core.IPentahoSystemSessionFactory; +import org.apache.jackrabbit.core.RepositoryImpl; +import org.pentaho.platform.engine.core.system.PentahoSystem; + +import javax.jcr.Node; +import javax.jcr.NodeIterator; +import javax.jcr.Property; +import javax.jcr.Repository; +import javax.jcr.RepositoryException; +import javax.jcr.Session; +import javax.jcr.Value; +import javax.jcr.version.VersionHistory; + +/** + * This class provides a static method {@linkplain #gc()} for running JCR's GC routine. + * + * @author Andrey Khayrutdinov + */ +public class RepositoryCleaner { + + private final Log logger = LogFactory.getLog( RepositoryCleaner.class ); + private static final String JCR_FROZEN_NODE = "jcr:frozenNode"; + private static final String JCR_FROZEN_UUID = "jcr:frozenUuid"; + private static final String JCR_ROOT_VERSION = "jcr:rootVersion"; + private IPentahoSystemSessionFactory systemSessionFactory = new IPentahoSystemSessionFactory.DefaultImpl(); + + /** + * Exists primary for testing + * @param systemSessionFactory + */ + public void setSystemSessionFactory( IPentahoSystemSessionFactory systemSessionFactory ) { + this.systemSessionFactory = systemSessionFactory; + } + + public synchronized void gc() { + Repository jcrRepository = PentahoSystem.get( Repository.class, "jcrRepository", null ); + if ( jcrRepository == null ) { + logger.error( "Cannot obtain JCR repository. Exiting" ); + return; + } + + if ( !( jcrRepository instanceof RepositoryImpl ) ) { + logger.error( + String.format( "Expected RepositoryImpl, but got: [%s]. Exiting", jcrRepository.getClass().getName() ) ); + return; + } + + final RepositoryImpl repository = (RepositoryImpl) jcrRepository; + + try { + logger.debug( "Starting Orphaned Version Purge" ); + Session systemSession = systemSessionFactory.create( repository ); + Node node = systemSession.getNode( "/jcr:system/jcr:versionStorage" ); + findVersionNodesAndPurge( node, systemSession ); + systemSession.save(); + logger.debug( "Finished Orphaned Version Purge" ); + } catch ( RepositoryException e ) { + logger.error( "Error running Orphaned Version purge", e ); + } + + try { + logger.info( "Creating garbage collector" ); + // JCR's documentation recommends not to use RepositoryImpl.createDataStoreGarbageCollector() and + // instead invoke RepositoryManager.createDataStoreGarbageCollector() + // (see it here: http://wiki.apache.org/jackrabbit/DataStore#Data_Store_Garbage_Collection) + + // However, the example from the wiki cannot be applied directly, because + // RepositoryFactoryImpl accepts only TransientRepository's instances that were created by itself; + // it creates such instance in "not started" state, and when the instance tries to start, it fails, + // because Pentaho's JCR repository is already running. + + DataStoreGarbageCollector gc = repository.createDataStoreGarbageCollector(); + try { + logger.debug( "Starting marking stage" ); + gc.setPersistenceManagerScan( false ); + gc.mark(); + logger.debug( "Starting sweeping stage" ); + int deleted = gc.sweep(); + logger.info( String.format( "Garbage collecting completed. %d items were deleted", deleted ) ); + } finally { + gc.close(); + } + } catch ( RepositoryException e ) { + logger.error( "Error during garbage collecting", e ); + } + + } + + private void findVersionNodesAndPurge( Node node, Session session ) { + if ( node == null || session == null ) { + return; + } + try { + if ( node.getName().equals( JCR_FROZEN_NODE ) && node.hasProperty( JCR_FROZEN_UUID ) && !node.getParent() + .getName().equals( JCR_ROOT_VERSION ) ) { + // Version Node + Property property = node.getProperty( JCR_FROZEN_UUID ); + Value uuid = property.getValue(); + Node nodeByIdentifier = null; + try { + nodeByIdentifier = session.getNodeByIdentifier( uuid.getString() ); + nodeByIdentifier = session.getNode( nodeByIdentifier.getPath() ); + } catch ( RepositoryException ex ) { + // ignored this means the node is gone. + } + if ( nodeByIdentifier == null ) { + // node is gone + logger.info( "Removed orphan version: " + node.getPath() ); + ( (VersionHistory) node.getParent().getParent() ).removeVersion( node.getParent().getName() ); + } + } + } catch ( RepositoryException e ) { + logger.error( "Error purging version nodes. Routine will continue", e ); + } + + NodeIterator nodes = null; + try { + nodes = node.getNodes(); + } catch ( RepositoryException e ) { + logger.error( "Error purging version nodes. Routine will continue", e ); + } + + if ( nodes == null ) { + return; + } + + while ( nodes.hasNext() ) { + findVersionNodesAndPurge( nodes.nextNode(), session ); + } + } +} diff --git a/repository/src/test/java/org/pentaho/platform/repository2/unified/jcr/RepositoryCleanerTest.java b/repository/src/test/java/org/pentaho/platform/repository2/unified/jcr/RepositoryCleanerTest.java new file mode 100644 index 0000000000..7c83839cd4 --- /dev/null +++ b/repository/src/test/java/org/pentaho/platform/repository2/unified/jcr/RepositoryCleanerTest.java @@ -0,0 +1,102 @@ +/*! ****************************************************************************** + * + * Pentaho + * + * Copyright (C) 2024 by Hitachi Vantara, LLC : http://www.pentaho.com + * + * Use of this software is governed by the Business Source License included + * in the LICENSE.TXT file. + * + * Change Date: 2029-07-20 + ******************************************************************************/ + + +package org.pentaho.platform.repository2.unified.jcr; + +import org.apache.jackrabbit.core.IPentahoSystemSessionFactory; +import org.apache.jackrabbit.core.RepositoryImpl; +import org.apache.jackrabbit.core.gc.GarbageCollector; +import org.junit.Assert; +import org.junit.Test; +import org.pentaho.test.platform.engine.core.MicroPlatform; + +import javax.jcr.Node; +import javax.jcr.Repository; +import javax.jcr.Session; + +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.when; + +/** + * @author Andrey Khayrutdinov + */ +public class RepositoryCleanerTest { + + private static final String SOLUTION_PATH = "src/test/resources/solution"; + + @Test + public void gc() throws Exception { + GarbageCollector collector = mock( GarbageCollector.class ); + + RepositoryImpl repository = mock( RepositoryImpl.class ); + when( repository.createDataStoreGarbageCollector() ).thenReturn( collector ); + + MicroPlatform mp = new MicroPlatform( getSolutionPath() ); + mp.defineInstance( Repository.class, repository ); + mp.defineInstance( "jcrRepository", repository ); + mp.start(); + + RepositoryCleaner cleaner = new RepositoryCleaner(); + Session systemSession = mock( Session.class ); + IPentahoSystemSessionFactory sessionFactory = mock( IPentahoSystemSessionFactory.class ); + when( sessionFactory.create( repository ) ).thenReturn( systemSession ); + cleaner.setSystemSessionFactory( sessionFactory ); + + try { + cleaner.gc(); + } finally { + mp.stop(); + } + + verify( collector, times( 1 ) ).mark(); + verify( collector, times( 1 ) ).sweep(); + verify( collector, times( 1 ) ).close(); + } + + protected String getSolutionPath() { + return SOLUTION_PATH; + } + + @Test + public void testFindVersionNodesAndPurgeWhenNodeHasNullNodes() throws Exception { + GarbageCollector collector = mock( GarbageCollector.class ); + RepositoryImpl repository = mock( RepositoryImpl.class ); + when( repository.createDataStoreGarbageCollector() ).thenReturn( collector ); + MicroPlatform mp = new MicroPlatform( getSolutionPath() ); + mp.defineInstance( Repository.class, repository ); + mp.defineInstance( "jcrRepository", repository ); + mp.start(); + + RepositoryCleaner cleaner = new RepositoryCleaner(); + Session systemSession = mock( Session.class ); + IPentahoSystemSessionFactory sessionFactory = mock( IPentahoSystemSessionFactory.class ); + when( sessionFactory.create( any() ) ).thenReturn( systemSession ); + Node parentNode = mock( Node.class ); + when( systemSession.getNode( anyString() ) ).thenReturn( parentNode ); + when( parentNode.getName() ).thenReturn( "" ); + when( parentNode.getNodes() ).thenReturn( null ); + try { + cleaner.setSystemSessionFactory( sessionFactory ); + cleaner.gc(); + } catch ( Exception e ) { + Assert.fail(); + } finally { + mp.stop(); + } + + } +}