Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 23 additions & 7 deletions dspace_aip_archiver.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from sqlite3 import Error
from subprocess import run
from time import strftime
from zipfile import ZipFile


class ProgressPercentage(object):
Expand Down Expand Up @@ -104,11 +105,19 @@ def exportAipFromDSpaceToStorageFolder(handle, configData):
cli = configData["dspace"]["DSPACE_CLI"]
eperson = "-e " + configData["dspace"]["DSPACE_EPERSON"]
item = "-i " + handle
file_name = handle.replace("/", "-") + ".tar"
file_name = handle.replace("/", "-") + ".zip"
destination = join(configData["dspace"]["EXPORT_LOCATION"], file_name)
run([cli, "packager", "-d", "-t AIP", eperson, item, destination])


def unZipFile(fileName, sourceFilePath):

with ZipFile(sourceFilePath + "/" + fileName, 'r') as zipObj:
zipObj.extractall(sourceFilePath + "/temp/")

os.remove(sourceFilePath + "/" + fileName)


def getHandleId(record):

handleid = ""
Expand Down Expand Up @@ -252,7 +261,7 @@ def saveToTargetFile(fileName, content, path):
file.write(content)


def createTarFile(fileName, sourceFilePath, targetFilePath, title):
def createTarFile(fileName, sourceFilePath, targetFilePath):

allItem = os.listdir(sourceFilePath)
with tarfile.open(os.path.join(targetFilePath, fileName), 'w:gz') as tar:
Expand Down Expand Up @@ -318,15 +327,23 @@ def cleanFolder(folderPath):
title = getValueFromField(record, "title")
desc = getValueFromField(record, "description")
identifier = getHandleId(record.getField("identifier"))
dspaceExportFileName = identifier.replace("/", "-") + ".zip"
bagitFileName = identifier.replace("/", "-") + ".tar"

logging.info(
"Handle %s: Start export handle file and create APTrust bagit",
identifier)
exportAipFromDSpaceToStorageFolder(
identifier,
configData)
if os.path.exists(export_location + bagitFileName):
if os.path.exists(export_location + dspaceExportFileName):
unZipFile(dspaceExportFileName, export_location)
createTarFile(
bagitFileName,
export_location +
"/temp/",
export_location)
cleanFolder(export_location + "/temp/")
os.rmdir(export_location + "/temp/")
noid = getNoidFromDB(conn, identifier, noid_template)
fileCount = [1, 1]
bagitInfo = createBagitInfo(configData, noid, fileCount)
Expand All @@ -339,8 +356,7 @@ def cleanFolder(folderPath):
createTarFile(
bagitFileName,
export_location,
storage_location,
bagitFileName)
storage_location)
uploadFileToS3(
storage_location + bagitFileName,
configData["s3"]["bucket_name"], bagitFileName)
Expand All @@ -351,7 +367,7 @@ def cleanFolder(folderPath):
"Handle %s: APTrust bagit uploaded to s3",
identifier)
else:
logging.info("Handle %s file not found", bagitFileName)
logging.info("Handle %s file not found", dspaceExportFileName)

conn.close()

Expand Down