diff --git a/pyproject.toml b/pyproject.toml index 75be370..f070e74 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "rda_python_dsupdt" -version = "1.0.5" +version = "2.0.0" authors = [ { name="Zaihua Ji", email="zji@ucar.edu" }, ] @@ -22,14 +22,10 @@ dependencies = [ "rda_python_dsarch", ] -[tool.setuptools] -include-package-data = true - -[tool.setuptools.packages.find] -where = ["src"] - -[tool.setuptools.package-data] -"rda_python_dsupdt" = ["dsupdt.usg"] +[tool.pytest.ini_options] +pythonpath = [ + "src" +] [project.urls] "Homepage" = "https://github.com/NCAR/rda-python-dsupdt" diff --git a/requirements.txt b/requirements.txt index f48e174..84ba122 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,7 @@ +psycopg2-binary==2.9.10 +pytest +rda-python-globus +unidecode +hvac rda_python_common rda_python_dsarch diff --git a/src/rda_python_dsupdt/ds_updt.py b/src/rda_python_dsupdt/ds_updt.py new file mode 100644 index 0000000..074f479 --- /dev/null +++ b/src/rda_python_dsupdt/ds_updt.py @@ -0,0 +1,2454 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: dsupdt +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/10/2020 +# 2025-02-05 transferred to package rda_python_dsupdt from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: python utility program to download remote files, +# process downloaded files and create local file, and +# archive local files onto RDA Server +# save information of web online data files or Saved files into RDADB +# +# Github: https://github.com/NCAR/rda-python-dsupdt.git +# +################################################################################## +# +import sys +import os +import re +from os import path as op +from rda_python_common import PgLOG +from rda_python_common import PgSIG +from rda_python_common import PgLock +from rda_python_common import PgCMD +from rda_python_common import PgFile +from rda_python_common import PgUtil +from rda_python_common import PgOPT +from rda_python_common import PgDBI +from rda_python_common import PgSplit +from . import PgUpdt + +TEMPINFO = {} +TOPMSG = SUBJECT = ACTSTR = None +ALLCNT = 0 +DEFTYPES = {'WT' : 'D', 'ST' : 'P', 'QT' : 'B'} + +# +# main function to run dsupdt +# +def main(): + + global SUBJECT + PgOPT.parsing_input('dsupdt') + PgUpdt.check_enough_options(PgOPT.PGOPT['CACT'], PgOPT.PGOPT['ACTS']) + start_action() + + if SUBJECT and 'NE' not in PgOPT.params and (PgLOG.PGLOG['ERRCNT'] or 'EE' not in PgOPT.params): + SUBJECT += " on " + PgLOG.PGLOG['HOSTNAME'] + PgLOG.set_email("{}: {}".format(SUBJECT, TOPMSG), PgLOG.EMLTOP) + if ACTSTR: SUBJECT = "{} for {}".format(ACTSTR, SUBJECT) + if PgSIG.PGSIG['PPID'] > 1: SUBJECT += " in CPID {}".format(PgSIG.PGSIG['PID']) + if PgLOG.PGLOG['ERRCNT'] > 0: SUBJECT += " With Error" + if PgLOG.PGLOG['DSCHECK']: + PgDBI.build_customized_email("dscheck", "einfo", "cindex = {}".format(PgLOG.PGLOG['DSCHECK']['cindex']), + SUBJECT, PgOPT.PGOPT['wrnlog']) + elif PgOPT.PGOPT['UCNTL']: + PgDBI.build_customized_email("dcupdt", "einfo", "cindex = {}".format(PgOPT.PGOPT['UCNTL']['cindex']), + SUBJECT, PgOPT.PGOPT['wrnlog']) + else: + PgLOG.pglog(SUBJECT, PgOPT.PGOPT['wrnlog']|PgLOG.SNDEML) + + if PgLOG.PGLOG['DSCHECK']: + if PgLOG.PGLOG['ERRMSG']: + PgDBI.record_dscheck_error(PgLOG.PGLOG['ERRMSG']) + else: + PgCMD.record_dscheck_status("D") + + if PgOPT.OPTS[PgOPT.PGOPT['CACT']][2]: PgLOG.cmdlog() # log end time if not getting only action + + PgLOG.pgexit(0) + +# +# start action of dsupdt +# +def start_action(): + + global ALLCNT + + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['CU'][0]: + if 'CI' in PgOPT.params: + if PgUpdt.cache_update_control(PgOPT.params['CI'][0], 1): + check_dataset_status() + else: + ALLCNT = PgOPT.get_option_count(["ED", "EH"]) + check_dataset_status(0) + elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['DL'][0]: + if 'CI' in PgOPT.params: + ALLCNT = len(PgOPT.params['CI']) + delete_control_info() + elif 'RF' in PgOPT.params: + ALLCNT = len(PgOPT.params['RF']) + delete_remote_info() + else: + ALLCNT = len(PgOPT.params['LI']) + delete_local_info() + elif PgOPT.OPTS[PgOPT.PGOPT['CACT']][0]&PgOPT.OPTS['GA'][0]: + get_update_info() + elif PgOPT.PGOPT['CACT'] == 'PC': + process_update_controls() + elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['SA'][0]: + if 'IF' not in PgOPT.params: + PgOPT.action_error("Missing input file via Option -IF") + if PgOPT.get_input_info(PgOPT.params['IF'], 'DCUPDT'): + PgUpdt.check_enough_options('SC', PgOPT.OPTS['SC'][0]) + ALLCNT = len(PgOPT.params['CI']) + set_control_info() + if PgOPT.get_input_info(PgOPT.params['IF'], 'DLUPDT'): + PgUpdt.check_enough_options('SL', PgOPT.OPTS['SL'][0]) + ALLCNT = len(PgOPT.params['LI']) + set_local_info() + if PgOPT.get_input_info(PgOPT.params['IF'], 'DRUPDT') and PgOPT.params['RF']: + PgUpdt.check_enough_options('SR', PgOPT.OPTS['SR'][0]) + ALLCNT = len(PgOPT.params['RF']) if 'RF' in PgOPT.params else 0 + set_remote_info() + elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['SC'][0]: + ALLCNT = len(PgOPT.params['CI']) + set_control_info() + elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['SL'][0]: + ALLCNT = len(PgOPT.params['LI']) + set_local_info() + elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['SR'][0]: + ALLCNT = len(PgOPT.params['RF']) + set_remote_info() + elif PgOPT.PGOPT['ACTS']&PgOPT.OPTS['UF'][0]: + if 'CI' in PgOPT.params: + if PgUpdt.cache_update_control(PgOPT.params['CI'][0], 1): dataset_update() + else: + ALLCNT = PgOPT.get_option_count(["ED", "EH"]) + dataset_update() + elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['UL'][0]: + if 'CI' in PgOPT.params: + ALLCNT = len(PgOPT.params['CI']) + unlock_control_info() + if 'LI' in PgOPT.params: + ALLCNT = len(PgOPT.params['LI']) + unlock_update_info() + +# +# delete update control records for given dsid and control indices +# +def delete_control_info(): + + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Delete {} update control record{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + delcnt = modcnt = 0 + for i in range(ALLCNT): + cidx = PgLock.lock_update_control(PgOPT.params['CI'][i], 2, PgOPT.PGOPT['extlog']) + if cidx <= 0: continue + ccnd = "cindex = {}".format(cidx) + delcnt += PgDBI.pgdel("dcupdt", ccnd, PgOPT.PGOPT['extlog']) + modcnt += PgDBI.pgexec("UPDATE dlupdt SET cindex = 0 WHERE " + ccnd, PgOPT.PGOPT['extlog']) + + PgLOG.pglog("{} of {} update control record{} deleted".format(delcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) + if modcnt > 0: + s = 's' if modcnt > 1 else '' + PgLOG.pglog("{} associated local file record{} modified".format(modcnt, s), PgOPT.PGOPT['wrnlog']) + +# +# delete local files for given dsid and locfile indices +# +def delete_local_info(): + + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Delete {} Locfile record{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + dcnt = delcnt = 0 + for i in range(ALLCNT): + lidx = PgOPT.params['LI'][i] + lcnd = "lindex = {}".format(lidx) + if PgLock.lock_update(lidx, None, 2, PgOPT.PGOPT['errlog']) <= 0: continue + cnt = PgDBI.pgget("drupdt", "", lcnd, PgOPT.PGOPT['extlog']) + if cnt > 0: + ss = 's' if cnt > 1 else '' + PgLOG.pglog("Delete {} associated remote file record{} for Locfile index {} ...".format(cnt, ss, lidx), PgLOG.WARNLG) + dcnt += PgDBI.pgdel("drupdt", lcnd, PgOPT.PGOPT['extlog']) + delcnt += PgDBI.pgdel("dlupdt", lcnd, PgOPT.PGOPT['extlog']) + + PgLOG.pglog("{} of {} Locfile record{} deleted".format(delcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) + if dcnt > 0: + s = "s" if (dcnt > 1) else "" + PgLOG.pglog("{} associated Remote file record{} deleted too".format(dcnt, s), PgOPT.PGOPT['wrnlog']) + +# +# delete update remote files for given dsid and remote files/locfile indices +# +def delete_remote_info(): + + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Delete {} remote file record{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + PgOPT.validate_multiple_options(ALLCNT, ["LI", "DO"]) + delcnt = 0 + for i in range(ALLCNT): + lcnd = "lindex = {} AND remotefile = '{}'".format(PgOPT.params['LI'][i], PgOPT.params['RF'][i]) + if 'DO' in PgOPT.params: lcnd += " AND dindex = {}".format(PgOPT.params['DO'][i]) + delcnt += PgDBI.pgdel("drupdt", lcnd, PgOPT.PGOPT['extlog']) + + PgLOG.pglog("{} of {} remote file record{} deleted".format(delcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) + +# +# get update control information +# +def get_control_info(): + + tname = "dcupdt" + hash = PgOPT.TBLHASH[tname] + PgLOG.pglog("Get update control info of {} from RDADB ...".format(PgOPT.params['DS']), PgLOG.WARNLG) + + lens = fnames = None + if 'FN' in PgOPT.params: fnames = PgOPT.params['FN'] + fnames = PgDBI.fieldname_string(fnames, PgOPT.PGOPT[tname], PgOPT.PGOPT['dcall']) + onames = PgOPT.params['ON'] if 'ON' in PgOPT.params else "C" + condition = PgUpdt.file_condition(tname) + PgOPT.get_order_string(onames, tname) + pgrecs = PgDBI.pgmget(tname, "*", condition, PgOPT.PGOPT['extlog']) + if pgrecs and 'FO' in PgOPT.params: lens = PgUtil.all_column_widths(pgrecs, fnames, hash) + PgOPT.OUTPUT.write("{}{}{}\n".format(PgOPT.OPTS['DS'][1], PgOPT.params['ES'], PgOPT.params['DS'])) + if PgOPT.PGOPT['CACT'] == "GA": PgOPT.OUTPUT.write("[{}]\n".format(tname.upper())) + PgOPT.OUTPUT.write(PgOPT.get_string_titles(fnames, hash, lens) + "\n") + if pgrecs: + cnt = PgOPT.print_column_format(pgrecs, fnames, hash, lens) + s = 's' if cnt > 1 else '' + PgLOG.pglog("{} update control record{} retrieved".format(cnt, s), PgOPT.PGOPT['wrnlog']) + else: + PgLOG.pglog("no update control record retrieved", PgOPT.PGOPT['wrnlog']) + +# +# get local file update information +# +def get_local_info(): + + tname = "dlupdt" + hash = PgOPT.TBLHASH[tname] + PgLOG.pglog("Get local file update info of {} from RDADB ...".format(PgOPT.params['DS']), PgLOG.WARNLG) + + lens = fnames = None + if 'FN' in PgOPT.params: fnames = PgOPT.params['FN'] + fnames = PgDBI.fieldname_string(fnames, PgOPT.PGOPT[tname], PgOPT.PGOPT['dlall']) + onames = PgOPT.params['ON'] if 'ON' in PgOPT.params else "XL" + condition = PgUpdt.file_condition(tname) + PgOPT.get_order_string(onames, tname) + pgrecs = PgDBI.pgmget(tname, "*", condition, PgOPT.PGOPT['extlog']) + if pgrecs and 'FO' in PgOPT.params: lens = PgUtil.all_column_widths(pgrecs, fnames, hash) + if PgOPT.PGOPT['CACT'] == "GL": + PgOPT.OUTPUT.write("{}{}{}\n".format(PgOPT.OPTS['DS'][1], PgOPT.params['ES'], PgOPT.params['DS'])) + else: + PgOPT.OUTPUT.write("[{}]\n".format(tname.upper())) + PgOPT.OUTPUT.write(PgOPT.get_string_titles(fnames, hash, lens) + "\n") + if pgrecs: + cnt = PgOPT.print_column_format(pgrecs, fnames, hash, lens) + s = 's' if cnt > 1 else '' + PgLOG.pglog("{} locfile record{} retrieved".format(cnt, s), PgOPT.PGOPT['wrnlog']) + else: + PgLOG.pglog("no locfile record retrieved", PgOPT.PGOPT['wrnlog']) + +# +# get remote file update information +# +def get_remote_info(): + + tname = "drupdt" + hash = PgOPT.TBLHASH[tname] + PgLOG.pglog("Get remote file update info of {} from RDADB ...".format(PgOPT.params['DS']), PgLOG.WARNLG) + + lens = fnames = None + if 'FN' in PgOPT.params: fnames = PgOPT.params['FN'] + fnames = PgDBI.fieldname_string(fnames, PgOPT.PGOPT[tname], PgOPT.PGOPT['drall']) + onames = PgOPT.params['ON'] if 'ON' in PgOPT.params else "LDF" + condition = PgUpdt.file_condition(tname) + PgOPT.get_order_string(onames, tname) + pgrecs = PgDBI.pgmget(tname, "*", condition, PgOPT.PGOPT['extlog']) + if pgrecs and 'FO' in PgOPT.params: lens = PgUtil.all_column_widths(pgrecs, fnames, hash) + if PgOPT.PGOPT['CACT'] == "GR": + PgOPT.OUTPUT.write("{}{}{}\n".format(PgOPT.OPTS['DS'][1], PgOPT.params['ES'], PgOPT.params['DS'])) + else: + PgOPT.OUTPUT.write("[{}]\n".format(tname.upper())) + PgOPT.OUTPUT.write(PgOPT.get_string_titles(fnames, hash, lens) + "\n") + if pgrecs: + cnt = PgOPT.print_column_format(pgrecs, fnames, hash, lens) + s = 's' if cnt > 1 else '' + PgLOG.pglog("{} remote file record{} retrieved".format(cnt, s), PgOPT.PGOPT['wrnlog']) + else: + PgLOG.pglog("no remote file record retrieved", PgOPT.PGOPT['wrnlog']) + +# +# add or modify update control information +# +def set_control_info(): + + tname = 'dcupdt' + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Set {} update control record{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + addcnt = modcnt = 0 + flds = PgOPT.get_field_keys(tname, None, 'C') + if not flds: return PgLOG.pglog("Nothing to set for update control!", PgOPT.PGOPT['errlog']) + PgOPT.validate_multiple_values(tname, ALLCNT, flds) + fields = PgOPT.get_string_fields(flds, tname) + + for i in range(ALLCNT): + cidx = PgOPT.params['CI'][i] + if cidx > 0: + if PgLock.lock_update_control(cidx, 2, PgOPT.PGOPT['errlog']) <= 0: continue + cnd = "cindex = {}".format(cidx) + pgrec = PgDBI.pgget(tname, fields, cnd, PgOPT.PGOPT['errlog']) + if not pgrec: PgOPT.action_error("Error get update control record for " + cnd) + else: + pgrec = None + + record = PgOPT.build_record(flds, pgrec, tname, i) + if record: + if 'pindex' in record and record['pindex'] and not PgDBI.pgget("dcupdt", "", "cindex = {}".format(record['pindex'])): + PgOPT.action_error("Parent control Index {} is not in RDADB".format(record['pindex'])) + if 'action' in record and not re.match(r'^({})$'.format(PgOPT.PGOPT['UPDTACTS']), record['action']): + PgOPT.action_error("Action Name '{}' must be one of dsupdt Actions ({})".format(record['action'], PgOPT.PGOPT['UPDTACTS'])) + if pgrec: + record['pid'] = 0 + record['lockhost'] = '' + modcnt += PgDBI.pgupdt(tname, record, cnd, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + else: + record['dsid'] = PgOPT.params['DS'] + if 'specialist' not in record: record['specialist'] = PgOPT.params['LN'] + addcnt += PgDBI.pgadd(tname, record, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + elif cidx: # unlock + PgLock.lock_update_control(cidx, 0, PgOPT.PGOPT['errlog']) + + PgLOG.pglog("{}/{} of {} control record{} added/modified".format(addcnt, modcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) + +# +# add or modify local file update information +# +def set_local_info(): + + tname = 'dlupdt' + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Set {} local file record{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + addcnt = modcnt = 0 + flds = PgOPT.get_field_keys(tname, None, 'L') + if 'RO' in PgOPT.params and 'XO' not in PgOPT.params: flds += 'X' + if not flds: return PgLOG.pglog("Nothing to set for update local file!", PgOPT.PGOPT['errlog']) + PgOPT.validate_multiple_values(tname, ALLCNT, flds) + fields = PgOPT.get_string_fields(flds, tname) + + for i in range(ALLCNT): + lidx = PgOPT.params['LI'][i] + if lidx > 0: + if PgLock.lock_update(lidx, None, 2, PgOPT.PGOPT['errlog']) <= 0: continue + cnd = "lindex = {}".format(lidx) + pgrec = PgDBI.pgget(tname, fields, cnd, PgOPT.PGOPT['errlog']) + if not pgrec: PgOPT.action_error("Error get Local file record for " + cnd) + else: + pgrec = None + + if 'RO' in PgOPT.params: PgOPT.params['XO'][i] = PgUpdt.get_next_exec_order(PgOPT.params['DS'], 0) + record = PgOPT.build_record(flds, pgrec, tname, i) + if record: + if 'cindex' in record and record['cindex'] and not PgDBI.pgget("dcupdt", "", "cindex = {}".format(record['cindex'])): + PgOPT.action_error("Update control Index {} is not in RDADB".format(record['cindex'])) + if 'action' in record and not re.match(r'^({})$'.format(PgOPT.PGOPT['ARCHACTS']), record['action']): + PgOPT.action_error("Action Name '{}' must be one of dsarch Actions ({})".format(record['action'], PgOPT.PGOPT['ARCHACTS'])) + + if pgrec: + if 'VI' in record and not record['VI'] and pgrec['missdate']: record['missdate'] = record['misshour'] = None + record['pid'] = 0 + record['hostname'] = 0 + modcnt += PgDBI.pgupdt(tname, record, cnd, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + else: + record['dsid'] = PgOPT.params['DS'] + if 'specialist' not in record: record['specialist'] = PgOPT.params['LN'] + if 'execorder' not in record: record['execorder'] = PgUpdt.get_next_exec_order(PgOPT.params['DS'], 1) + addcnt += PgDBI.pgadd(tname, record, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + elif lidx: # unlock + PgLock.lock_update(lidx, None, 0, PgOPT.PGOPT['errlog']) + + PgLOG.pglog("{}/{} of {} Locfile record{} added/modified".format(addcnt, modcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) + +# +# add or modify remote file update information +# +def set_remote_info(): + + tname = 'drupdt' + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Set {} update remote file{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + addcnt = modcnt = 0 + flds = PgOPT.get_field_keys(tname) + if not flds: return PgLOG.pglog("Nothing to set for update remote file!", PgOPT.PGOPT['errlog']) + PgOPT.validate_multiple_values(tname, ALLCNT, flds) + fields = PgOPT.get_string_fields(flds, tname) + + for i in range(ALLCNT): + lidx = PgOPT.params['LI'][i] + didx = PgOPT.params['DO'][i] if 'DO' in PgOPT.params else 0 + cnd = "lindex = {} AND remotefile = '{}' AND dindex = {}".format(lidx, PgOPT.params['RF'][i], didx) + pgrec = PgDBI.pgget("drupdt", fields, cnd, PgOPT.PGOPT['errlog']) + record = PgOPT.build_record(flds, pgrec, tname, i) + if record: + if 'lindex' in record and record['lindex'] and not PgDBI.pgget("dlupdt", "", "lindex = {}".format(record['lindex'])): + PgOPT.action_error("Local file Index {} is not in RDADB".format(record['lindex'])) + + if pgrec: + modcnt += PgDBI.pgupdt("drupdt", record, cnd, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + else: + record['lindex'] = lidx + record['dsid'] = PgOPT.params['DS'] + addcnt += PgDBI.pgadd("drupdt", record, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + + PgLOG.pglog("{}/{} of {} remote file record{} added/modified".format(addcnt, modcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) + +# +# unlock update records for given locfile indices +# +def unlock_update_info(): + + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Unlock {} update locfile{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + modcnt = 0 + for lidx in PgOPT.params['LI']: + cnd = "lindex = {}".format(lidx) + pgrec = PgDBI.pgget("dlupdt", "pid, hostname", cnd, PgOPT.PGOPT['extlog']) + if not pgrec: + PgLOG.pglog("{}: Local File Not exists".format(lidx), PgOPT.PGOPT['errlog']) + elif not pgrec['pid']: + PgLOG.pglog("{}: Local File Not locked".format(lidx), PgOPT.PGOPT['wrnlog']) + elif PgLock.lock_update(lidx, None, -1, PgOPT.PGOPT['errlog']) > 0: + modcnt += 1 + PgLOG.pglog("{}: Local File Unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), PgOPT.PGOPT['wrnlog']) + elif (PgFile.check_host_down(None, pgrec['hostname']) and + PgLock.lock_update(lidx, None, -2, PgOPT.PGOPT['errlog']) > 0): + modcnt += 1 + PgLOG.pglog("{}: Local File Force unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), PgOPT.PGOPT['wrnlog']) + else: + PgLOG.pglog("{}: Local File Unable to unlock {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), PgOPT.PGOPT['wrnlog']) + + PgLOG.pglog("{} of {} local file record{} unlocked from RDADB".format(modcnt, ALLCNT, s), PgLOG.LOGWRN) + +# +# unlock update control records for given locfile indices +# +def unlock_control_info(): + + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Unlock {} update control{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + modcnt = 0 + for cidx in PgOPT.params['CI']: + pgrec = PgDBI.pgget("dcupdt", "pid, lockhost", "cindex = {}".format(cidx), PgOPT.PGOPT['extlog']) + if not pgrec: + PgLOG.pglog("{}: Update Control Not exists".format(cidx), PgOPT.PGOPT['errlog']) + elif not pgrec['pid']: + PgLOG.pglog("{}: Update Control Not locked".format(cidx), PgOPT.PGOPT['wrnlog']) + elif PgLock.lock_update_control(cidx, -1, PgOPT.PGOPT['extlog']) > 0: + modcnt += 1 + PgLOG.pglog("{}: Update Control Unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog']) + elif (PgFile.check_host_down(None, pgrec['lockhost']) and + PgLock.lock_update_control(cidx, -2, PgOPT.PGOPT['extlog']) > 0): + modcnt += 1 + PgLOG.pglog("{}: Update Control Force unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog']) + else: + PgLOG.pglog("{}: Undate Control Unable to unlock {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog']) + + PgLOG.pglog("{} of {} update control record{} unlocked from RDADB".format(modcnt, ALLCNT, s), PgLOG.LOGWRN) + +# +# get update info of local and remote files owned by login name +# +def get_update_info(): + + if 'DS' in PgOPT.params: + dsids = {'dsid' : [PgOPT.params['DS']]} + dscnt = 1 + else: + tname = "dlupdt" + cnd = PgUpdt.file_condition(tname, None, None, 1) + if not cnd: + PgOPT.set_default_value("SN", PgOPT.params['LN']) + cnd = PgUpdt.file_condition(tname, None, None, 1) + dsids = PgDBI.pgmget(tname, "DISTINCT dsid", cnd, PgOPT.PGOPT['extlog']) + dscnt = len(dsids['dsid']) if dsids else 0 + if dscnt == 0: + return PgLOG.pglog("NO dataset identified for giving condition", PgOPT.PGOPT['wrnlog']) + elif dscnt > 1: + PgLOG.pglog("Get Update Info for {} datasets".format(dscnt), PgOPT.PGOPT['wrnlog']) + + PgOPT.PGOPT['AUTODS'] = dscnt + + for i in range(dscnt): + PgOPT.params['DS'] = dsids['dsid'][i] + if PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['GC'][0]: + get_control_info() + elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['GL'][0]: + get_local_info() + elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['GR'][0]: + get_remote_info() + else: + if 'ON' in PgOPT.params: del PgOPT.params['ON'] # use default order string + if 'FN' not in PgOPT.params: PgOPT.params['FN'] = 'ALL' + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['GC'][0]: get_control_info() + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['GL'][0]: get_local_info() + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['GR'][0]: get_remote_info() + + if dscnt > 1: PgLOG.pglog("Update Info of {} datasets retrieved".format(dscnt), PgOPT.PGOPT['wrnlog']) + +# +# gather due datasets for data update +# +def dataset_update(): + + global SUBJECT, TOPMSG, ACTSTR + + actcnd = "specialist = '{}'".format(PgOPT.params['LN']) + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0]: actcnd += " AND action IN ('AW', 'AS', 'AQ')" + (PgOPT.PGOPT['CURDATE'], PgOPT.PGOPT['CURHOUR']) = PgUtil.curdatehour() + if 'CD' not in PgOPT.params: PgOPT.params['CD'] = PgOPT.PGOPT['CURDATE'] # default to current date + if 'CH' not in PgOPT.params: PgOPT.params['CH'] = PgOPT.PGOPT['CURHOUR'] # default to current hour + if ALLCNT > 1 and PgOPT.params['MU']: del PgOPT.params['MU'] + if 'CN' in PgOPT.params and 'RD' in PgOPT.params: del PgOPT.params['CN'] + if 'CN' in PgOPT.params or 'RD' in PgOPT.params or 'RA' in PgOPT.params: + if 'MO' in PgOPT.params: del PgOPT.params['MO'] + elif 'MO' not in PgOPT.params and PgOPT.PGOPT['CACT'] == "UF": + PgOPT.params['MO'] = -1 + + if 'DS' in PgOPT.params: + dsids = [PgOPT.params['DS']] + dscnt = 1 + else: + if 'CI' not in PgOPT.params: actcnd += " AND cindex = 0" + loccnd = PgUpdt.file_condition('dlupdt', "LQFIXA", None, 1) + dscnd = actcnd + if loccnd: dscnd += " AND " + loccnd + pgrecs = PgDBI.pgmget("dlupdt", "DISTINCT dsid", dscnd, PgOPT.PGOPT['extlog']) + dsids = pgrecs['dsid'] if pgrecs else [] + dscnt = len(dsids) + if not dscnt: return PgLOG.pglog("NO dataset is due for update on {} for {}".format(PgOPT.params['CD'], PgOPT.params['LN']), PgOPT.PGOPT['wrnlog']) + PgOPT.PGOPT['AUTODS'] = dscnt + actcnd += " ORDER BY execorder, lindex" + + if PgLOG.PGLOG['DSCHECK']: + fcnt = 0 + for i in range(dscnt): + PgOPT.params['DS'] = dsids[i] + loccnd = PgUpdt.file_condition('dlupdt', "LQFIXA") + locrecs = PgDBI.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), PgOPT.PGOPT['extlog']) + loccnt = len(locrecs['locfile']) if locrecs else 0 + if loccnt == 0: continue + for j in range(loccnt): + locrec = PgUtil.onerecord(locrecs, j) + if (loccnt == 1 and 'LI' in PgOPT.params and 'LF' in PgOPT.params and + len(PgOPT.params['LF']) == 1 and PgOPT.params['LF'][0] != locrec['locfile']): + locrec['locfile'] = PgOPT.params['LF'][0] + fcnt += file_update(locrec, PgLOG.LOGWRN, 1) + PgCMD.set_dscheck_fcount(fcnt, PgLOG.LOGERR) + + # check and update data for each dataset + logact = PgOPT.PGOPT['emllog'] + acnt = ucnt = 0 + for i in range(dscnt): + PgOPT.params['DS'] = dsids[i] + loccnd = PgUpdt.file_condition('dlupdt', "LQFIXA") + locrecs = PgDBI.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), PgOPT.PGOPT['extlog']) + loccnt = len(locrecs['locfile']) if locrecs else 0 + if loccnt == 0: + s = "-UC{}".format(PgOPT.params['CI'][0]) if ('CI' in PgOPT.params and len(PgOPT.params['CI']) == 1) else "" + PgLOG.pglog("{}{}: no config record of local file found to update for '{}'".format(PgOPT.params['DS'], s, PgOPT.params['LN']), PgOPT.PGOPT['wrnlog']) + continue + s = 's' if loccnt > 1 else '' + PgLOG.pglog("{}: {} for {} update record{}".format(PgOPT.params['DS'], PgOPT.PGOPT['CACT'], loccnt, s), logact) + logact = PgOPT.PGOPT['emlsep'] + for j in range(loccnt): + locrec = PgUtil.onerecord(locrecs, j) + if (loccnt == 1 and 'LI' in PgOPT.params and 'LF' in PgOPT.params and + len(PgOPT.params['LF']) == 1 and PgOPT.params['LF'][0] != locrec['locfile']): + locrec['locfile'] = PgOPT.params['LF'][0] + if locrec['cindex']: + if 'CI' not in PgOPT.params: + PgOPT.params['CI'] = [locrec['cindex']] + PgUpdt.cache_update_control(locrec['cindex'], 0) + if 'CN' in PgOPT.params and 'RD' in PgOPT.params: del PgOPT.params['CN'] + if 'CN' in PgOPT.params or 'RD' in PgOPT.params or 'RA' in PgOPT.params: + if 'MO' in PgOPT.params: del PgOPT.params['MO'] + elif 'MO' not in PgOPT.params and PgOPT.PGOPT['CACT'] == "UF": + PgOPT.params['MO'] = -1 + elif locrec['cindex'] != PgOPT.params['CI'][0]: + PgLOG.pglog("{}-{}: Skipped due to control index {} mismatches {}".format(PgOPT.params['DS'], locrec['lindex'], locrec['cindex'], PgOPT.params['CI'][0]), PgOPT.PGOPT['emlerr']) + continue + + PgOPT.PGOPT['rstat'] = 1 # reset remote download status for each local file + if PgSIG.PGSIG['MPROC'] > 1: acnt += 1 + fcnt = file_update(locrec, logact) + if PgSIG.PGSIG['PPID'] > 1: + if PgOPT.PGOPT['AUTODS'] > 1: PgOPT.PGOPT['AUTODS'] = dscnt = 1 + acnt = ucnt = 0 # reinitialize counts for child process + break # stop loop in child + if PgSIG.PGSIG['MPROC'] > 1: + if fcnt == 0: + break # quit + else: + if fcnt > 0: ucnt += 1 # record update count, s is either -1 or 1 + continue # non-daemon parent + if 'QE' in PgOPT.params and fcnt <= 0: break + + if PgOPT.PGOPT['vcnt'] > 0: + renew_internal_version(PgOPT.params['DS'], PgOPT.PGOPT['vcnt']) + PgOPT.PGOPT['vcnt'] = 0 + if PgSIG.PGSIG['MPROC'] > 1: + if not PgSIG.PGSIG['QUIT'] and j == loccnt: continue + break + if PgOPT.PGOPT['rcnt']: + if PgOPT.PGOPT['CACT'] == "DR": + acnt += PgOPT.PGOPT['rcnt'] + ucnt += PgOPT.PGOPT['dcnt'] + s = 's' if PgOPT.PGOPT['rcnt'] > 1 else '' + if loccnt > 1: + PgLOG.pglog("{}: {} of {} rfile{} gotten!".format(PgOPT.params['DS'], PgOPT.PGOPT['dcnt'], PgOPT.PGOPT['rcnt'], s), PgOPT.PGOPT['emllog']) + PgOPT.PGOPT['rcnt'] = PgOPT.PGOPT['dcnt'] = 0 + if PgOPT.PGOPT['lcnt']: + if PgOPT.PGOPT['CACT'] == "BL" or PgOPT.PGOPT['CACT'] == "PB": + acnt += PgOPT.PGOPT['lcnt'] + ucnt += PgOPT.PGOPT['bcnt'] + s = 's' if PgOPT.PGOPT['lcnt'] > 1 else '' + if loccnt > 1 and PgOPT.PGOPT['bcnt'] > 0: + PgLOG.pglog("{}: {} of {} lfile{} built!".format(PgOPT.params['DS'], PgOPT.PGOPT['bcnt'], PgOPT.PGOPT['lcnt'], s), PgOPT.PGOPT['emllog']) + PgOPT.PGOPT['lcnt'] = PgOPT.PGOPT['bcnt'] = 0 + if PgOPT.PGOPT['acnt']: + acnt += PgOPT.PGOPT['acnt'] + ucnt += PgOPT.PGOPT['ucnt'] + s = 's' if PgOPT.PGOPT['acnt'] > 1 else '' + PgLOG.pglog("{}: {} of {} local file{} archived!".format(PgOPT.params['DS'], PgOPT.PGOPT['ucnt'], PgOPT.PGOPT['acnt'], s), + (PgOPT.PGOPT['emlsum'] if dscnt > 1 else PgOPT.PGOPT['emllog'])) + PgOPT.PGOPT['acnt'] = PgOPT.PGOPT['ucnt'] = 0 + + if PgSIG.PGSIG['PPID'] > 1: break # stop loop child + + if acnt > 0: + TOPMSG = detail = "" + if PgSIG.PGSIG['MPROC'] > 1: + s = 's' if acnt > 1 else '' + ACTSTR = "{} of {} CPIDs{} for 'dsupdt {}' started".format(ucnt, acnt, s, PgOPT.PGOPT['CACT']) + else: + s = 's' if ucnt > 1 else '' + TOPMSG = "" + if PgOPT.PGOPT['CACT'] == "DR": + atype = "remote file{} gotten".format(s) + elif PgOPT.PGOPT['CACT'] == "BL" or PgOPT.PGOPT['CACT'] == "PB": + atype = "local file{} built".format(s) + else: + atype = "local file{} archived".format(s) + if PgOPT.PGOPT['rdcnt'] > 0: + s = 's' if PgOPT.PGOPT['rdcnt'] > 1 else '' + TOPMSG = "{} remote server file{} downloaded and ".format(PgOPT.PGOPT['rdcnt'], s) + if PgOPT.PGOPT['udcnt'] > 0: + if detail: detail += " & " + detail += "{} Web Online".format(PgOPT.PGOPT['udcnt']) + if PgOPT.PGOPT['uncnt'] > 0: + if detail: detail += " & " + detail += "{} Glade Only".format(PgOPT.PGOPT['uncnt']) + if PgOPT.PGOPT['uwcnt'] > 0: + if detail: detail += " & " + detail += "{} Web".format(PgOPT.PGOPT['uwcnt']) + if PgOPT.PGOPT['uscnt'] > 0: + if detail: detail += " & " + detail += "{} Saved".format(PgOPT.PGOPT['uscnt']) + if PgOPT.PGOPT['qbcnt'] > 0: + if detail: detail += " & " + detail += "{} Quasar Backup".format(PgOPT.PGOPT['qbcnt']) + if PgOPT.PGOPT['qdcnt'] > 0: + if detail: detail += " & " + detail += "{} Quasar Drdata".format(PgOPT.PGOPT['qdcnt']) + ACTSTR = "{} {}".format(ucnt, atype) + + TOPMSG += ACTSTR + if detail: TOPMSG += " ({})".format(detail) + if dscnt > 1: + PgLOG.pglog("{} datasets: {}".format(dscnt, TOPMSG), PgOPT.PGOPT['emlsum']) + SUBJECT = "DSUPDT of " + if PgOPT.PGOPT['AUTODS'] < 2: + SUBJECT += PgOPT.params['DS'].upper() + else: + SUBJECT += "{} Datasets".format(PgOPT.PGOPT['AUTODS']) + + if PgOPT.PGOPT['UCNTL']: + PgUpdt.reset_control_time() + if SUBJECT: SUBJECT += "-C{}".format(PgOPT.PGOPT['UCNTL']['cindex']) + +# renew internal version number for given dataset +def renew_internal_version(dsid, vcnt): + + s = 's' if vcnt > 1 else '' + cmd = "dsarch {} SV -NV -DE '{} Data file{} rearchived'".format(dsid, vcnt, s) + if PgLOG.pgsystem(cmd, PgOPT.PGOPT['emerol'], 5): # 1 + 4 + pgrec = PgDBI.pgget('dsvrsn', '*', "dsid = '{}' and status = 'A'".format(dsid), PgOPT.PGOPT['emerol']) + if pgrec: + vmsg = "set to {} for DOI {}".format(pgrec['iversion'], pgrec['doi']) + else: + vmsg = 'renewed' + + PgLOG.pglog("{}: {} Data file{} rearchived, Internal version number {}".format(dsid, vcnt, s, vmsg), PgOPT.PGOPT['emlsum']) + +# +# cach the total count of files to be archived +# +def count_caching(locrec, locinfo): + + files = PgUpdt.expand_serial_pattern(locrec['locfile']) + scnt = len(files) if files else 1 + + if ALLCNT > 1: + ecnt = ALLCNT + else: + tinfo = TEMPINFO[locrec['lindex']] = get_tempinfo(locrec, locinfo, 0) + ecnt = len(tinfo['ED']) if tinfo else 1 + + return ecnt * scnt + +# +# gather/archive due data file for update of each local file +# +def file_update(locrec, logact, caching = 0): + + lfile = locrec['locfile'] + endonly = retcnt = 0 + lindex = locrec['lindex'] + loccnd = "lindex = {}".format(lindex) + locinfo = "{}-L{}".format(locrec['dsid'], lindex) + if not lfile: + if caching: + return None + else: + return PgLOG.pglog(locinfo + ": local file name NOT specified", PgOPT.PGOPT['emlerr']) + locinfo += "-" + lfile + if locrec['specialist'] != PgOPT.params['LN']: + if caching: + return None + else: + return PgLOG.pglog("{}: owner '{}', NOT '{}'".format(locinfo, locrec['specialist'], PgOPT.params['LN']), PgOPT.PGOPT['emlerr']) + + if caching: return count_caching(locrec, locinfo) + tempinfo = TEMPINFO[lindex] if lindex in TEMPINFO else get_tempinfo(locrec, locinfo, 0) + if not tempinfo: return 0 # simply return if miss temporal info for update + + rmtcnd = loccnd + rcnd = PgUpdt.file_condition('drupdt', ('D' if 'DO' in PgOPT.params else "RS"), None, 1) + if rcnd: rmtcnd += " AND " + rcnd + rmtrecs = PgDBI.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", PgOPT.PGOPT['extlog']) + rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0 + if rcnt == 0: + if rcnd and PgDBI.pgget("drupdt", "", loccnd): + return PgLOG.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), PgOPT.PGOPT['emlerr']) + # create a empty record remote file + rcnt = 1 + + rmtrecs = {'lindex' : [lindex], 'dindex' : [0]} + rflds = ['remotefile', 'serverfile', 'download', 'begintime', 'endtime', 'tinterval'] + for rfld in rflds: rmtrecs[rfld] = [None] + if rcnt == 1: + if 'RF' in PgOPT.params and len(PgOPT.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and PgOPT.params['RF'][0] == rmtrecs['remotefile'][0]): + rmtrecs['remotefile'][0] = PgOPT.params['RF'][0] + if 'SF' in PgOPT.params and len(PgOPT.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and PgOPT.params['SF'][0] == rmtrecs['serverfile'][0]): + rmtrecs['serverfile'][0] = PgOPT.params['SF'][0] + ecnt = ALLCNT if ALLCNT > 1 else len(tempinfo['ED']) # should be at least one + + if PgSIG.PGSIG['MPROC'] > 1: + pname = "updt{}".format(lindex) + pid = PgSIG.start_child(pname, PgOPT.PGOPT['wrnlog'], 1) # try to start a child process + if pid <= 0: return pid # failed to start a child process + if PgSIG.PGSIG['PPID'] > 1: + PgLOG.set_email() # empty email in child process + PgOPT.PGOPT['acnt'] = PgOPT.PGOPT['ucnt'] = 0 + else: + edate = tempinfo['ED'][0] + ehour = tempinfo['EH'][0] + lfile = PgUpdt.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ']) + locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile) + if ecnt > 1: locinfo += ", {} Update Periods".format(ecnt) + PgLOG.pglog("CPID {} for 'dsupdt {}' of {}".format(PgSIG.pname2cpid(pname), PgOPT.PGOPT['CACT'], locinfo), PgOPT.PGOPT['emllog']) + return 1 # no further action in non-daemon program + + if PgLock.lock_update(lindex, locinfo, 1, PgOPT.PGOPT['emllog']) <= 0: return 0 + PgOPT.PGOPT['lindex'] = lindex + tempinfo['prcmd'] = PgOPT.params['PR'][0] if 'PR' in PgOPT.params else locrec['processremote'] + tempinfo['blcmd'] = PgOPT.params['BC'][0] if 'BC' in PgOPT.params else locrec['buildcmd'] + postcnt = -1 + if PgOPT.PGOPT['UCNTL'] and PgOPT.PGOPT['CACT'] == PgOPT.PGOPT['UCNTL']['action']: + tempinfo['postcmd'] = PgOPT.params['XC'][0] if 'XC' in PgOPT.params else PgOPT.PGOPT['UCNTL']['execcmd'] + if tempinfo['postcmd']: postcnt = 0 + + setmiss = 1 if tempinfo['VD'] else 0 + ufile = uinfo = None + rscnt = ucnt = lcnt = 0 + + for i in range(ecnt): + if ALLCNT > 1 and i > 0: + tempinfo = get_tempinfo(locrec, locinfo, i) + if not tempinfo: break + edate = tempinfo['ED'][0] + ehour = tempinfo['EH'][0] + else: + edate = tempinfo['ED'][i] + ehour = tempinfo['EH'][i] + if 'RE' in PgOPT.params and i and PgUtil.diffdatehour(edate, ehour, tempinfo['edate'], tempinfo['ehour']) <= 0: + continue + if ucnt and tempinfo['RS'] == 1 and i%20 == 0: refresh_metadata(locrec['dsid']) + tempinfo['edate'] = edate + if ehour != None: + tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour) + tempinfo['ehour'] = ehour + else: + tempinfo['einfo'] = "end data date {}".format(edate) + tempinfo['ehour'] = None + if 'GZ' in PgOPT.params: tempinfo['einfo'] += "(UTC)" + + locfiles = PgUpdt.get_local_names(locrec['locfile'], tempinfo) + lcnt = len(locfiles) if locfiles else 0 + if not lcnt: break + rmtcnt = acnt = ccnt = ut = 0 + rfiles = rfile = None + if tempinfo['RS'] == 0 and lcnt > 2: tempinfo['RS'] = 1 + + for l in range(lcnt): + if PgLOG.PGLOG['DSCHECK'] and ((l+1)%20) == 0: + PgCMD.add_dscheck_dcount(20, 0, PgOPT.PGOPT['extlog']) + lfile = locfiles[l] + locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile) + tempinfo['gotnew'] = tempinfo['archived'] = 0 + tempinfo['ainfo'] = None + tempinfo['ainfo'] = file_archive_info(lfile, locrec, tempinfo) + if not tempinfo['ainfo']: continue + if tempinfo['ainfo']['archived'] == tempinfo['ainfo']['archcnt']: + ufile = "{} at {} {}".format(lfile, tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime']) + tempinfo['archived'] = 1 + if 'MO' in PgOPT.params: + if PgOPT.params['MO'] < 0: + PgLOG.pglog("{}: {} already for {}".format(locinfo, PgOPT.PGOPT['CACT'], tempinfo['einfo']), PgOPT.PGOPT['emlsum']) + if i == 0: PgLOG.pglog("Add Mode option -RA if you want to re-archive", PgOPT.PGOPT['wrnlog']) + if 'UT' in PgOPT.params or 'ED' not in PgOPT.params: ut = 1 + retcnt += 1 + continue + else: + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0]: uinfo = locinfo + PgLOG.pglog("{}: {} for {}".format(locinfo, PgOPT.PGOPT['CACT'], tempinfo['einfo']), logact) + if not change_workdir(locrec['workdir'], locinfo, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']): + break + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0]: PgOPT.PGOPT['acnt'] += 1 + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['BL'][0]: PgOPT.PGOPT['lcnt'] += 1 + opt = 1 if tempinfo['AQ'] else 65 # 1+64(remove small file) + linfo = PgFile.check_local_file(lfile, opt, PgOPT.PGOPT['emerol']) + cnt = -1 + if rmtcnt > 0: + cnt = rmtcnt + rfile = rfiles[l] + else: + dr = 1 if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['PB'][0] else 0 + if linfo and PgOPT.PGOPT['CACT'] == "BL" and not tempinfo['prcmd']: dr = 0 # skip download for BL only + if dr: + dfiles = None + for j in range(rcnt): # processs each remote record + pgrec = PgUtil.onerecord(rmtrecs, j) + if dfiles and pgrec['remotefile'] == rfile and not PgOPT.PGOPT['mcnt']: + continue # skip + rfile = pgrec['remotefile'] + act = 0 if locrec['action'] == 'AQ' else PgOPT.PGOPT['ACTS']&PgOPT.OPTS['DR'][0] + dfiles = download_remote_files(pgrec, lfile, linfo, locrec, locinfo, tempinfo, act) + if PgOPT.PGOPT['rstat'] < 0: + i = ecnt + break + if dfiles: rfiles = PgUtil.joinarray(rfiles, dfiles) + + rmtcnt = len(rfiles) if rfiles else 0 + if rmtcnt > 0: + if lcnt > 1 and rmtcnt != lcnt: + PgLOG.pglog("{}: {} files found for {} local files".format(locrec['locinfo'], rmtcnt, lcnt), PgOPT.PGOPT['emlerr']) + i = ecnt + break + cnt = rmtcnt + rfile = rfiles[l] if lcnt > 1 else rfiles[rmtcnt-1] # record the break remote file name + else: + rfile = None + if linfo and PgOPT.PGOPT['rstat'] == 0: PgOPT.PGOPT['rstat'] = 1 + + if cnt != 0 and PgOPT.PGOPT['rstat'] > 0: + if PgOPT.PGOPT['ACTS']&(PgOPT.OPTS['BL'][0]|PgOPT.OPTS['AF'][0]): + if cnt < 0 and linfo: + if tempinfo['archived'] and PgOPT.PGOPT['CACT'] == "UF" and not tempinfo['gotnew']: + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0] and 'RA' not in PgOPT.params: + PgLOG.pglog(lfile + ": local file archived already", PgOPT.PGOPT['emllog']) + cnt = 0 + else: + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['BL'][0]: + PgLOG.pglog(lfile + ": local file exists already", PgOPT.PGOPT['emllog']) + cnt = 1 + elif rmtcnt == lcnt and lfile == rfile: + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['BL'][0]: + PgLOG.pglog(lfile + ": local file same as remote file", PgOPT.PGOPT['emllog']) + elif not (PgOPT.PGOPT['ACTS']&PgOPT.OPTS['BL'][0]): + PgLOG.pglog(lfile + ": local file not built yet", PgOPT.PGOPT['emlerr']) + cnt = 0 + else: + cnt = build_local_file(rfiles, lfile, linfo, locrec, tempinfo, lcnt, l) + if cnt and 'lfile' in tempinfo: + lfile = tempinfo['lfile'] + del tempinfo['lfile'] + + if cnt != 0 and (PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0]): + file_status_info(lfile, rfile, tempinfo) + cnt = archive_data_file(lfile, locrec, tempinfo, i) + if cnt > 0: + ucnt += 1 + if tempinfo['RS'] == 1: rscnt += 1 + if postcnt > -1: postcnt += 1 + elif cnt > 0: + cnt = 0 + + if cnt > 0 and PgOPT.PGOPT['rstat'] > 0: + ccnt += 1 + elif 'UT' in PgOPT.params or tempinfo['archived']: + ut = 1 + if cnt > 0: acnt += 1 + + if PgLOG.PGLOG['DSCHECK']: + PgCMD.add_dscheck_dcount(lcnt%20, 0, PgOPT.PGOPT['extlog']) + if ccnt == lcnt and (PgOPT.PGOPT['ACTS']&PgOPT.OPTS['CF'][0]) and locrec['cleancmd']: + if tempinfo['CVD'] and PgUtil.diffdate(edate, tempinfo['CVD']) > 0: + clean_older_files(locrec['cleancmd'], locrec['workdir'], locinfo, tempinfo['CVD'], locrec['locfile'], rmtrecs, rcnt, tempinfo) + else: + if not rfiles and rcnt and locrec['cleancmd'].find(' -RF') > -1: + rfiles = get_all_remote_files(rmtrecs, rcnt, tempinfo, edate) + clean_files(locrec['cleancmd'], edate, ehour, locfiles, rfiles, tempinfo['FQ']) + if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0] or PgOPT.PGOPT['UCNTL'] and PgOPT.PGOPT['CACT'] == PgOPT.PGOPT['UCNTL']['action']: + rmonly = 1 if PgOPT.PGOPT['rstat'] > 0 else 0 + if ccnt == lcnt: + PgUpdt.reset_update_time(locinfo, locrec, tempinfo, ccnt, endonly) + elif ut: + PgUpdt.reset_update_time(locinfo, locrec, tempinfo, acnt, endonly) + else: + if PgOPT.PGOPT['rstat'] == 0: + if tempinfo['VD'] and PgUtil.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) < 0: + PgUpdt.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update + PgOPT.PGOPT['rstat'] = 1 # reset remote download status + elif 'IE' in PgOPT.params: + if tempinfo['VD'] and PgUtil.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) >= 0: + endonly = 1 + PgUpdt.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update + PgOPT.PGOPT['rstat'] = 1 # reset remote download status + if setmiss: setmiss = PgUpdt.set_miss_time(lfile, locrec, tempinfo, rmonly) + + if postcnt > 0: + postcmd = PgUpdt.executable_command(PgUpdt.replace_pattern(tempinfo['postcmd'], edate, ehour, tempinfo['FQ']), + lfile, PgOPT.params['DS'], edate, ehour) + PgLOG.pgsystem(postcmd, PgOPT.PGOPT['emllog'], 5) + postcnt = 0 + if rscnt >= PgOPT.PGOPT['RSMAX']: + refresh_metadata(locrec['dsid']) + rscnt = 0 + if PgOPT.PGOPT['rstat'] < -1 or PgOPT.PGOPT['rstat'] < 0 and 'QE' in PgOPT.params: break # unrecoverable errors + + if rscnt > 0: refresh_metadata(locrec['dsid']) + if ufile and uinfo and ucnt == 0: + PgLOG.pglog("{}: Last successful update - {}".format(uinfo, ufile), PgOPT.PGOPT['emlsum']) + PgLock.lock_update(lindex, locinfo, 0, PgOPT.PGOPT['errlog']) + PgOPT.PGOPT['lindex'] = 0 + + return retcnt + +# +# refresh the gathered metadata with speed up option -R and -S +# +def refresh_metadata(dsid): + + sx = "{} -d {} -r".format(PgOPT.PGOPT['scm'], dsid) + if PgOPT.PGOPT['wtidx']: + if 0 in PgOPT.PGOPT['wtidx']: + PgLOG.pgsystem(sx + 'w all', PgOPT.PGOPT['emllog'], 5) + else: + for tidx in PgOPT.PGOPT['wtidx']: + PgLOG.pgsystem("{}w {}".format(sx, tidx), PgOPT.PGOPT['emllog'], 5) + PgOPT.PGOPT['wtidx'] = {} + +# +# retrieve remote files +# act: > 0 - create filenames and get data files physically; 0 - create filenames only +# +def download_remote_files(rmtrec, lfile, linfo, locrec, locinfo, tempinfo, act = 0): + + emlsum = PgOPT.PGOPT['emlsum'] if PgOPT.PGOPT['CACT'] == "DR" else PgOPT.PGOPT['emllog'] + rfile = rmtrec['remotefile'] + rmtinfo = locinfo + dfiles = [] + if not rfile: + rfile = lfile + rcnt = 1 + if rfile != locrec['locfile']: rmtinfo += "-" + rfile + if act: + tempinfo['DC'] = (PgOPT.params['DC'][0] if 'DC' in PgOPT.params and PgOPT.params['DC'][0] else + (rmtrec['download'] if rmtrec['download'] else locrec['download'])) + + rfiles = PgUpdt.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo) + rcnt = len(rfiles) if rfiles else 0 + if rcnt == 0: + PgOPT.PGOPT['rstat'] = -2 + return PgLOG.pglog(rmtinfo + ": NO remote file name identified", PgOPT.PGOPT['emlerr']) + + PgOPT.PGOPT['rcnt'] += rcnt # accumulate remote file counts + if tempinfo['DC']: tempinfo['DC'] = None + + if act: # get file names on remote server and create download command + sfile = rmtrec['serverfile'] + if sfile and sfile != rfile: + sfiles = PgUpdt.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo) + scnt = len(sfiles) if sfiles else 0 + if scnt != rcnt: + PgOPT.PGOPT['rstat'] = -2 + return PgLOG.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), PgOPT.PGOPT['emlerr']) + else: + sfiles = rfiles + scnt = rcnt + + if tempinfo['AQ']: + tstr = tempinfo['AQ'] + if tstr == 'Web': + rpath = "{}/{}/".format(PgLOG.PGLOG['DSDHOME'], PgOPT.params['DS']) + else: + rpath = "{}/{}/{}/".format(PgLOG.PGLOG['DECSHOME'], PgOPT.params['DS'], tempinfo['ST']) + else: + tstr = 'Remote' + rpath = '' + + ks = 1 if 'KS' in PgOPT.params else 0 + PgOPT.PGOPT['mcnt'] = ocnt = ecnt = scnt = dcnt = ncnt = 0 + omsize = PgLOG.PGLOG['MINSIZE'] + if 'VS' in tempinfo and 'VS' not in PgOPT.params: PgLOG.PGLOG['MINSIZE'] = tempinfo['VS'] + for i in range(rcnt): + rfile = rfiles[i] + rname = rfile['fname'] + rcmd = rfile['rcmd'] + rinfo = PgFile.check_local_file(rpath + rname, 65, PgOPT.PGOPT['emerol']) # 65 = 1 + 64 + gotnew = 0 + if not act: + if rinfo: + dfiles.append(rname) + dcnt += 1 + else: + ecnt += 1 + if rfile['amiss']: + PgLOG.pglog(rname + ": SKIP for NOT gotten {} file yet".format(tstr), PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['mcnt'] += 1 + elif 'IE' in PgOPT.params: + PgLOG.pglog(rname + ": NOT gotten {} file yet".format(tstr), PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['rstat'] = -1 + else: + PgLOG.pglog(rname + ": ERROR for NOT gotten {} file yet".format(tstr), PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['rstat'] = -2 + break + continue + elif rinfo and 'RD' not in PgOPT.params: + if not rcmd: + dfiles.append(rname) + dcnt += 1 + if tempinfo['archived']: + if 'CN' not in PgOPT.params: + ocnt += 1 + elif PgUtil.cmptime(rinfo['date_modified'], rinfo['time_modified'], tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime']) < 1: + ocnt += 1 + PgLOG.pglog("{}: ARCHIVED, NO newer remote file {} found".format(lfile, rname), PgOPT.PGOPT['emllog']) + continue + elif 'CN' in PgOPT.params: + if rfile['ready'] == -1: # out of check new period already + dfiles.append(rname) + dcnt += 1 + if tempinfo['archived']: ocnt += 1 + continue + elif PgUtil.cmptime(rinfo['date_modified'], rinfo['time_modified'], rfile['date'], rfile['time']) >= 0: + dfiles.append(rname) + dcnt += 1 + if tempinfo['archived']: + ocnt += 1 + else: + PgLOG.pglog(rname + ": IS local already", PgOPT.PGOPT['emllog']) + continue + + sfile = sfiles[i] + sname = sfile['fname'] + sinfo = rinfo if sname == rname else PgFile.check_local_file(sname, 65, PgOPT.PGOPT['emerol']) + dact = get_download_action(rcmd) + rdcnt = 1 if re.search(r'(ncftpget|wget) ', dact) else 0 + dcmd = derr = "" + info0 = cfile = pcmd = bname = None + ftype = "remote" if sname == rname else "server" + if sinfo: + if rcmd: + if 'RD' in PgOPT.params: + PgLOG.pglog(sname + ": ftype file is local, Try dact again", PgOPT.PGOPT['emllog']) + elif ('CN' not in PgOPT.params and + PgUtil.cmptime(sinfo['date_modified'], sinfo['time_modified'], sfile['date'], sfile['time']) >= 0): + rcmd = None # do not need download again + else: + PgLOG.pglog("{}: USE the local copy of {} file for NO download command".format(sname, ftype), PgOPT.PGOPT['emllog']) + elif not rcmd: + if tempinfo['archived']: + ocnt += 1 + PgLOG.pglog("{}: ARCHIVED, NO need get {} file {} again for NO download command".format(lfile, ftype, sname), emlsum) + else: + ecnt += 1 + if rfile['amiss']: + PgLOG.pglog(rname + ": SKIP missing remote file for NO download command", PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['mcnt'] += 1 + elif 'IE' in PgOPT.params: + PgLOG.pglog(rname + ": MISS remote file for NO download command", PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['rstat'] = -1 + else: + PgLOG.pglog(rname + ": ERROR missing remote file for NO download command", PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['rstat'] = -2 + break + continue + + if rcmd: # try to download now + if not sfile['ready']: + PgOPT.PGOPT['rstat'] = 0 + PgLOG.pglog("{}: {} file NOT Ready yet".format(sname, ftype), PgOPT.PGOPT['emllog']) + ecnt += 1 + break + if 'CN' in PgOPT.params: + if sinfo: + cfile = sname + elif rinfo: + cfile = rname + info0 = rinfo + elif rcnt == 1 and linfo: + cfile = lfile + info0 = linfo + elif tempinfo['archived']: + cfile = '' + + dcmd = PgUpdt.executable_command(rcmd, sname, PgOPT.params['DS'], sfile['date'], sfile['hour']) + if tempinfo['AT']: + stat = check_agetime(dcmd, sname, tempinfo['AT']) + if stat <= 0: + PgOPT.PGOPT['rstat'] = stat + ecnt += 1 + break + if cfile != None: + stat = check_newer_file(dcmd, cfile, tempinfo['ainfo']) + if stat > 0: + if cfile != sname: + if stat < 3: PgLOG.pglog("{}: Found newer {} file {}".format(cfile, ftype, sname), emlsum) + else: + if stat < 3: PgLOG.pglog("{}: Found newer {} file".format(cfile, ftype), emlsum) + if stat == 2: # file redlownloaded, reget file info + sinfo = PgFile.check_local_file(sname, 64, PgOPT.PGOPT['emerol']) + else: # force download file + cfile = None + else: + if stat < 0: + if PgOPT.PGOPT['STATUS']: + if cfile != sname: + PgLOG.pglog("{}: Error check newer {} file {}\n{}".format(cfile, ftype, sname, PgOPT.PGOPT['STATUS']), PgOPT.PGOPT['emlerr']) + else: + PgLOG.pglog("{}: Error check newer {} file\n{}".format(cfile, ftype, PgOPT.PGOPT['STATUS']), PgOPT.PGOPT['emlerr']) + else: + if cfile != sname: + PgLOG.pglog("{}: Cannot check newer {} file {} via {}".format(cfile, ftype, sname, dcmd), PgOPT.PGOPT['emlsum']) + else: + PgLOG.pglog("{}: Cannot check newer {} file via {}".format(cfile, ftype, dcmd), PgOPT.PGOPT['emlsum']) + + if stat < -1: # uncrecoverable error + PgOPT.PGOPT['rstat'] = stat + ecnt += 1 + break + elif cfile and cfile != sname: + PgLOG.pglog("{}: NO newer {} file {} found\n{}".format(cfile, ftype, sname, PgOPT.PGOPT['STATUS']), emlsum) + else: + PgLOG.pglog("{}: NO newer {} file found\n{}".format(sname, ftype, PgOPT.PGOPT['STATUS']), emlsum) + + if tempinfo['archived']: + ncnt += 1 + if rcnt == 1: continue + if not info0: info0 = sinfo + sinfo = None + + if not cfile: + if op.isfile(sname) and PgLOG.pgsystem("mv -f {} {}.rd".format(sname, sname), PgOPT.PGOPT['emerol'], 4): + bname = sname + ".rd" + if not info0: info0 = PgFile.check_local_file(bname, 64, PgOPT.PGOPT['emerol']) + if dcmd.find('wget ') > -1: PgUpdt.slow_web_access(dcmd) + PgLOG.pgsystem(dcmd, PgOPT.PGOPT['wrnlog'], 257) # 1 + 256 + derr = PgLOG.PGLOG['SYSERR'] + sinfo = PgFile.check_local_file(sname, 70, PgOPT.PGOPT['emerol']) + if sinfo: + mode = 0o664 if sinfo['isfile'] else 0o775 + if mode != sinfo['mode']: PgFile.set_local_mode(sname, sinfo['isfile'], mode, sinfo['mode'], sinfo['logname'], PgOPT.PGOPT['emerol']) + + (stat, derr) = PgUpdt.parse_download_error(derr, dact, sinfo) + if stat < -1: # uncrecoverable error + PgLOG.pglog("{}: error {}\n{}".format(sname, dcmd, derr), PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['rstat'] = stat + ecnt += 1 + break + elif stat > 0 and PgLOG.PGLOG['DSCHECK'] and sinfo: + PgCMD.add_dscheck_dcount(0, sinfo['data_size'], PgOPT.PGOPT['extlog']) + + if sinfo: + if info0: + if info0['data_size'] == sinfo['data_size'] and bname: + if PgFile.compare_md5sum(bname, sname, PgOPT.PGOPT['emlsum']): + PgLOG.pglog("{}: GOT same size, but different content, {} file via {}".format(sname, ftype, dact), PgOPT.PGOPT['emlsum']) + tempinfo['gotnew'] = gotnew = 1 + PgOPT.PGOPT['rdcnt'] += rdcnt + scnt += 1 + else: + PgLOG.pglog("{}: GOT same {} file via {}".format(sname, ftype, dact), emlsum) + if rinfo and rname != sname and 'KS' not in PgOPT.params: + PgLOG.pgsystem("rm -f " + sname, PgOPT.PGOPT['emllog'], 5) + sinfo = None + if tempinfo['archived']: + ncnt += 1 + else: + PgLOG.pglog("{}: GOT different {} file via {}".format(sname, ftype, dact), PgOPT.PGOPT['emlsum']) + tempinfo['gotnew'] = gotnew = 1 + PgOPT.PGOPT['rdcnt'] += rdcnt + scnt += 1 + if bname: PgLOG.pgsystem("rm -rf " + bname, PgOPT.PGOPT['emerol'], 4) + elif rcmd: + PgLOG.pglog("{}: GOT {} file via {}".format(sname, ftype, dact), emlsum) + PgOPT.PGOPT['rdcnt'] += rdcnt + scnt += 1 + + PgOPT.PGOPT['dcnt'] += 1 + if tempinfo['prcmd']: pcmd = tempinfo['prcmd'] + elif info0: + if bname: + PgLOG.pglog("{}: RETAIN the older {} file".format(sname, ftype), emlsum) + PgLOG.pgsystem("mv -f {} {}".format(bname, sname), PgOPT.PGOPT['emerol'], 4) + if tempinfo['prcmd']: pcmd = tempinfo['prcmd'] + sinfo = info0 + elif cfile: + if tempinfo['archived']: + ocnt += 1 + elif rcnt == 1: + if tempinfo['prcmd']: pcmd = tempinfo['prcmd'] + if cfile == sname: + sinfo = info0 + elif not rinfo and cfile == lfile: + continue + elif not cfile: + ecnt += 1 + if sfile['amiss']: + PgLOG.pglog("{}: SKIP {} file for FAIL {}\n{}".format(sname, ftype, dact, derr), PgOPT.PGOPT['emlsum']) + PgOPT.PGOPT['mcnt'] += 1 + else: + PgOPT.PGOPT['rstat'] = 0 if 'IE' in PgOPT.params else -1 + if not derr or derr and derr.find(PgLOG.PGLOG['MISSFILE']) > -1: + msg = "{}: NOT Available for {}\n".format(sname, dact) + PgLOG.set_email(msg, PgOPT.PGOPT['emlsum']) + if derr: PgLOG.pglog(derr, PgOPT.PGOPT['emllog']) + else: + PgLOG.pglog("{}: ERROR {}\n{}".format(sname, dact, derr), PgOPT.PGOPT['emlerr']) + if PgOPT.PGOPT['rstat'] < 0: break + continue + else: + ecnt += 1 + if sfile['amiss']: PgOPT.PGOPT['mcnt'] += 1 + continue + + if sinfo: + if rname == sname: + rinfo = sinfo + elif not rinfo or gotnew: + if rinfo: PgLOG.pgsystem("rm -f " + rname, PgOPT.PGOPT['emerol'], 5) + if PgFile.convert_files(rname, sname, ks, PgOPT.PGOPT['emerol']): + rinfo = PgFile.check_local_file(rname, 64, PgOPT.PGOPT['emerol']) + else: + PgOPT.PGOPT['rstat'] = -1 + ecnt += 1 + break + + if not rinfo: + ecnt += 1 + if sfile['amiss']: + PgLOG.pglog(rname + ": SKIP missing remote file", PgOPT.PGOPT['emlsum']) + PgOPT.PGOPT['mcnt'] += 1 + elif 'IE' in PgOPT.params: + PgLOG.pglog(rname + ": MISS remote file", PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['rstat'] = -1 + else: + PgLOG.pglog(rname + ": ERROR missing remote file", PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['rstat'] = -2 + break + continue + + if pcmd: + pcmd = PgUpdt.executable_command(PgUpdt.replace_pattern(pcmd, rfile['date'], rfile['hour'], tempinfo['FQ']), + rname, PgOPT.params['DS'], rfile['date'], rfile['hour']) + if not PgLOG.pgsystem(pcmd, PgOPT.PGOPT['emllog'], 259): + if PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgOPT.PGOPT['emlerr']) + PgOPT.PGOPT['rstat'] = -1 + ecnt += 1 + break + dfiles.append(rname) + dcnt += 1 + + PgLOG.PGLOG['MINSIZE'] = omsize + if ncnt == rcnt: + PgOPT.PGOPT['rstat'] = 0 + if dcnt > 0: dcnt = 0 + elif ecnt > 0: + s = 's' if rcnt > 1 else '' + if dcnt > scnt: + PgLOG.pglog("{}/{} of {} rfile{} obtained/at local".format(scnt, dcnt, rcnt, s), PgOPT.PGOPT['emllog']) + else: + PgLOG.pglog("{} of {} rfile{} obtained".format(scnt, rcnt, s), PgOPT.PGOPT['emllog']) + if dcnt > 0 and ocnt > 0: dcnt = 0 + elif ocnt == rcnt: + PgOPT.PGOPT['rstat'] = 0 + + return dfiles if PgOPT.PGOPT['rstat'] == 1 and dcnt > 0 else None + +# +# build up local files +# +def build_local_file(rfiles, lfile, linfo, locrec, tempinfo, lcnt, l): + + emlsum = PgOPT.PGOPT['emlsum'] if (PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['BL'][0]) else PgOPT.PGOPT['emllog'] + + if lcnt > 1: + rcnt = 1 + rmax = l + 1 + else: + rmax = rcnt = len(rfiles) if rfiles else 0 + + rbfile = None + if linfo: + if rcnt == 1 and lfile == rfiles[l]: return 1 + if PgLOG.pgsystem("mv -f {} {}".format(lfile, rbfile), PgOPT.PGOPT['emerol'], 4): + rbfile = lfile + '.rb' + else: + s = op.dirname(lfile) + if s and not op.isdir(s): PgFile.make_local_directory(s, PgOPT.PGOPT['emllog']|PgLOG.EXITLG) + + cext = None + if locrec['options']: + ms = re.search(r'-AF\s+([\w\.]+)', locrec['options'], re.I) + if ms: + fmt = ms.group(1) + ms = re.search(r'(\w+)\.TAR(\.|$)', fmt, re.I) + if ms: # check compression before tarring + fmt = ms.group(1) + ms = re.match(r'^({})$'.format(PgFile.CMPSTR), fmt, re.I) + if ms: cext = '.' + fmt + + if tempinfo['blcmd']: + blcmd = PgUpdt.executable_command(PgUpdt.replace_pattern(tempinfo['blcmd'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']), + lfile, PgOPT.params['DS'], tempinfo['edate'], tempinfo['ehour']) + if not PgLOG.pgsystem(blcmd, PgOPT.PGOPT['emllog']) or PgFile.local_file_size(lfile, 2, PgOPT.PGOPT['emerol']) <= 0: + ret = PgLOG.pglog("{}: error build {}".format(blcmd, lfile), PgOPT.PGOPT['emlerr']) + else: + PgOPT.PGOPT['bcnt'] += 1 + ret = 1 + + if rbfile: + if ret: + PgLOG.pgsystem("rm -rf " + rbfile, PgOPT.PGOPT['emerol'], 4) + else: + PgLOG.pglog(lfile + ": RETAIN the older local file", emlsum) + PgLOG.pgsystem("mv -f {} {}".format(rbfile, lfile), PgOPT.PGOPT['emerol'], 4) + return ret + + if lfile[0] == '!': # executable for build up local file name + blcmd = PgUpdt.executable_command(lfile[1:], None, PgOPT.params['DS'], tempinfo['edate'], tempinfo['ehour']) + lfile = PgLOG.pgsystem(blcmd, PgOPT.PGOPT['emllog'], 21) + if lfile and PgFile.local_file_size(lfile, 2, PgOPT.PGOPT['emerol']) > 0: + tempinfo['lfile'] = lfile + return 1 + else: + return PgLOG.pglog("{}: error build {}".format(blcmd, lfile), PgOPT.PGOPT['emlerr']) + + if rcnt == 0 and not linfo: return 0 # no remote file found to build local file + + ret = 1 + kr = 1 if 'KR' in PgOPT.params else 0 + if rcnt == 1 and not op.isdir(rfiles[l]): + rfile = rfiles[l] + else: + ms = re.match(r'^(.+)\.({})$'.format(PgFile.CMPSTR), lfile, re.I) + rfile = ms.group(1) if ms else lfile + fd = None + if tempinfo['AQ']: + if not PgOPT.validate_one_infile(rfile, PgOPT.params['DS']): return 0 + fd = open(rfile, 'w') + fd.write(tempinfo['AQ'] + "File\n") + + for i in range(rmax): + tfile = rfiles[i] + if fd: + fd.write(tfile + "\n") + continue + + if op.isfile(tfile) and cext and not re.search(r'{}$'.format(cext), tfile, re.I): + ms = re.match(r'^(.+)\.({})$'.format(PgFile.CMPSTR), tfile, re.I) + if ms: tfile = ms.group(1) + tfile += cext + if not PgFile.convert_files(tfile, rfiles[i], kr, PgOPT.PGOPT['emllog']): + if op.exists(rfile): PgLOG.pgsystem("rm -f " + rfile, PgOPT.PGOPT['emllog']) + ret = PgLOG.pglog("{}: QUIT converting file from {}".format(rfile, tfile), PgOPT.PGOPT['emllog']) + break + cmd = "tar -{}vf {} {}".format('u' if i else 'c', rfile, tfile) + ret = PgLOG.pgsystem(cmd, PgOPT.PGOPT['emllog']) + if not ret: break + + if fd: + ret = -1 + fd.close() + + if op.exists(rfile): + s = "s" if rcnt > 1 else "" + if tempinfo['AQ']: + PgLOG.pglog("{}: input file CREATED for backing up {} {} file{}".format(rfile, rcnt, tempinfo['AQ'], s), emlsum) + else: + PgLOG.pglog("{}: tar file CREATED from {} file{}".format(rfile, rcnt, s), emlsum) + else: + ret = PgLOG.pglog(rfile + ": ERROR creating tar file", PgOPT.PGOPT['emlerr']) + + if ret > 0: + if lfile != rfile: + ret = PgFile.convert_files(lfile, rfile, kr, PgOPT.PGOPT['emllog']) + if ret: PgLOG.pglog("{}: BUILT from {}".format(lfile, rfile), emlsum) + if ret: + fsize = PgFile.local_file_size(lfile, 3, PgOPT.PGOPT['emerol']) + if fsize > 0: + PgOPT.PGOPT['bcnt'] += 1 + if PgLOG.PGLOG['DSCHECK']: PgCMD.add_dscheck_dcount(0, fsize, PgOPT.PGOPT['extlog']) + else: + ret = 0 + + if rbfile: + if ret: + PgLOG.pgsystem("rm -rf " + rbfile, PgOPT.PGOPT['emerol'], 4) + else: + PgLOG.pglog(lfile + ": RETAIN the older local file", emlsum) + PgLOG.pgsystem("mv -f {} {}".format(rbfile, lfile), PgOPT.PGOPT['emerol'], 4) + + return 1 if ret else 0 + + +# +# append data type to options for given type name if not in options +# +def append_data_type(tname, options): + + mp = r'(^|\s)-{}(\s|$)'.format(tname) + if not re.search(mp, options, re.I): options += " -{} {}".format(tname, DEFTYPES[tname]) + return options + +# +# get data type from options for given type name, and default one if not in options +# +def get_data_type(tname, options): + + mp = r'(^|\s)-{}\s+(\w)(\s|$)'.format(tname) + ms = re.search(mp, options, re.I) + return ms.group(2) if ms else DEFTYPES[tname] + +# +# archive a data file +# +def archive_data_file(lfile, locrec, tempinfo, eidx): + + growing = -1 + if tempinfo['ainfo']: + ainfo = tempinfo['ainfo'] + if ainfo['vindex']: growing = PgUpdt.is_growing_file(locrec['locfile'], tempinfo['FQ']) + tempinfo['ainfo'] = None # clean the archive info recorded earlier + else: + ainfo = {'archived' : 0, 'note' : None} # reference to empty hash + + PgLOG.pglog("{}: start {} for {}".format(lfile, locrec['action'], tempinfo['einfo']), PgOPT.PGOPT['emllog']) + + options = locrec['options'] if locrec['options'] else "" + act = locrec['action'] + archfile = None + if locrec['archfile']: archfile = PgUpdt.replace_pattern(locrec['archfile'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']) + if act == 'AW': + if archfile and 'wfile' not in ainfo: ainfo['wfile'] = archfile + options = append_data_type('WT', options) + elif act == 'AS': + if archfile and 'sfile' not in ainfo: ainfo['sfile'] = archfile + options = append_data_type('ST', options) + elif act == 'AQ': + if archfile and 'bfile' not in ainfo: ainfo['bfile'] = archfile + options = append_data_type('QT', options) + + if tempinfo['archived'] and not ('RA' in PgOPT.params and growing > 0): + if (ainfo['chksm'] and ainfo['chksm'] == PgOPT.PGOPT['chksm'] or + ainfo['asize'] and ainfo['asize'] == PgOPT.PGOPT['fsize'] and + PgUtil.cmptime(PgOPT.PGOPT['fdate'], PgOPT.PGOPT['ftime'], ainfo['adate'], ainfo['atime']) >= 0): + if 'RA' not in PgOPT.params: + amsg = "{}: ARCHIVED by {}".format(lfile, ainfo['adate']) + if tempinfo['ehour'] != None: amsg += ":{:02}".format(ainfo['ahour']) + PgLOG.pglog(amsg, PgOPT.PGOPT['emllog']) + if eidx == 0: PgLOG.pglog("Add Mode option -RA if you want to re-archive", PgOPT.PGOPT['emllog']) + return -1 + elif growing == 0: + growing = -1 + + if growing == 0: tempinfo['archived'] = move_archived_file(ainfo, tempinfo['archived']) + + if tempinfo['AQ']: + ifopt = 'IF' + else: + ifopt = 'LF' + acmd = "dsarch {} {} -{} {}".format(PgOPT.params['DS'], act, ifopt, lfile) + if 'wfile' in ainfo: acmd += " -WF " + ainfo['wfile'] + if 'sfile' in ainfo: acmd += " -SF " + ainfo['sfile'] + if 'bfile' in ainfo: acmd += " -QF " + ainfo['bfile'] + if PgOPT.PGOPT['chksm']: acmd += " -MC " + PgOPT.PGOPT['chksm'] + + if growing > 0 and not re.search(r'(^|\s)-GF(\s|$)', options, re.I): acmd += " -GF" + if 'MD' in PgOPT.params and not re.search(r'(^|\s)-MD(\s|$)', options, re.I): acmd += " -MD" + if not re.search(r'(^|\s)-NE(\s|$)', options, re.I): acmd += " -NE" # no email in dsarch + if tempinfo['gotnew'] and not re.search(r'(^|\s)-OE(\s|$)', options, re.I): acmd += " -OE" + if 'VS' in PgOPT.params: + acmd += " -VS {}".format(PgOPT.params['VS']) + if 'VS' in tempinfo: options = re.sub('-VS\s+\d+(\s+|$)', '', options, flags=re.I) + if tempinfo['RS'] == 1: acmd += " -RS" + + fnote = None + if locrec['note'] and not re.search(r'(^|\s)-DE(\s|$)', options, re.I): + note = build_data_note(ainfo['note'], lfile, locrec, tempinfo) + if note: + if re.search(r'(\n|\"|\')', note): # if found \n or ' or ", create temporary input file + fnote = PgOPT.params['DS'] + ".note" + nd = open(fnote, 'w') + nd.write("DE<:>\n{}<:>\n".format(note)) + nd.close() + acmd += " -IF " + fnote + else: + acmd += " -DE '{}'".format(note) + + if options: + if locrec['cleancmd']: options = re.sub(r'(^-NW\s+|\s+-NW$)', '', options, 1, re.I) + acmd += " " + PgUpdt.replace_pattern(options, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']) + + ret = PgLOG.pgsystem(acmd, PgOPT.PGOPT['emerol'], 69) # 1 + 4 + 64 + if fnote: PgLOG.pgsystem("rm -f " + fnote, PgOPT.PGOPT['emerol'], 4) + + tempinfo['ainfo'] = file_archive_info(lfile, locrec, tempinfo) + note = count_update_files(ainfo, tempinfo['ainfo'], ret, tempinfo['RS']) + PgLOG.pglog("{}: UPDATED({}) for {}".format(lfile, locrec['action'], tempinfo['einfo']), PgOPT.PGOPT['emlsum']) + + return ret + +# +# count files updated +# +def count_update_files(oinfo, ninfo, success, rsopt): + + nrecs = ninfo['types'] if ninfo else {} + orecs = oinfo['types'] if oinfo else {} + astrs = [] + astr = "" + + for type in nrecs: + nrec = nrecs[type] + orec = orecs[type] if type in orecs else None + + if 'sfile' in nrec: + atype = "Saved {} File".format(PgOPT.STYPE[type]) + elif 'bfile' in nrec: + atype = "Quasar backup {} File".format(PgOPT.BTYPE[type]) + else: + atype = "RDA {} File".format(PgOPT.WTYPE[type]) + if rsopt == 1: + tidx = nrec['tindex'] if nrec['tindex'] else 0 + PgOPT.PGOPT['wtidx'][tidx] = 1 + + if (not orec or + nrec['data_size'] != orec['data_size'] or + PgUtil.cmptime(orec['date_modified'], orec['time_modified'], nrec['date_modified'], nrec['time_modified']) or + not (nrec['checksum'] and orec['checksum'] and nrec['checksum'] == orec['checksum'])): + if 'sfile' in nrec: + PgOPT.PGOPT['uscnt'] += 1 + elif 'bfile' in nrec: + if type == 'D': PgOPT.PGOPT['qdcnt'] += 1 + PgOPT.PGOPT['qbcnt'] += 1 + elif type == 'D': + PgOPT.PGOPT['udcnt'] += 1 + elif type == 'N': + PgOPT.PGOPT['uncnt'] += 1 + else: + PgOPT.PGOPT['uwcnt'] += 1 + + astrs.append("{} {}rchived".format(atype, "Re-a" if orec else "A")) + if PgLOG.PGLOG['DSCHECK']: + PgCMD.add_dscheck_dcount(0, nrec['data_size'], PgOPT.PGOPT['extlog']) + + if astrs: + PgOPT.PGOPT['ucnt'] += 1 + if len(astrs) < ninfo['archcnt']: + if success: + astr = " Successful, but only " + else: + astr = " Partially finished, " + astr += ', '.join(astrs) + else: + if success: + astr = " Successful, but NO file Re-archived" + else: + astr = " Failed, NO file {}rchived".format('Re-a' if oinfo['present'] == ninfo['archcnt'] else "A") + + if astr: + s = "s" if ninfo['archcnt'] > 1 else "" + astr += " of {} archfile{}".format(ninfo['archcnt'], s) + + return astr + +# +# get the temporal info in local and remote file names and the possible values +# between the break update and the current date +# BTW, change to working directory +# +def get_tempinfo(locrec, locinfo, eidx = 0): + + # get data end date for update action + edate = PgOPT.params['ED'][eidx] if ('ED' in PgOPT.params and PgOPT.params['ED'][eidx]) else locrec['enddate'] + if not edate: return PgLOG.pglog(locinfo + ": MISS End Data Date for local update", PgOPT.PGOPT['emlerr']) + ehour = PgOPT.params['EH'][eidx] if ('EH' in PgOPT.params and PgOPT.params['EH'][eidx] != None) else locrec['endhour'] + if not isinstance(edate, str): edate = str(edate) + + if ehour is None and PgDBI.pgget('drupdt', '', "lindex = {} and tinterval like '%H'".format(locrec['lindex'])): + return PgLOG.pglog(locinfo + ": MISS End Data Hour for hourly remote update", PgOPT.PGOPT['emlerr']) + + if locrec['validint']: + val = locrec['validint'] + elif PgOPT.PGOPT['UCNTL'] and PgOPT.PGOPT['UCNTL']['validint']: + val = PgOPT.PGOPT['UCNTL']['validint'] + else: + val = None + + tempinfo = {'AT' : None, 'DC' : None, 'ED' : [], 'EH' : [], 'VI' : None, + 'VD' : None, 'VH' : None, 'CVD' : None, 'NX' : None, 'FQ' : None, + 'QU' : None, 'EP' : 0, 'RS' : -1, 'AQ' : None} + + if val: val = PgUpdt.get_control_time(val, "Valid Internal") + if val: + tempinfo['VI'] = val + if ehour is None and val[3]: ehour = 0 + + val = PgUpdt.get_control_time(locrec['agetime'], "File Age Time") + if val: + tempinfo['AT'] = val + if ehour is None and val[3]: ehour = 0 + + frequency = PgOPT.params['FQ'][0] if 'FQ' in PgOPT.params else locrec['frequency'] + if frequency: # get data update frequency info + (val, unit) = PgOPT.get_control_frequency(frequency) + if val: + tempinfo['FQ'] = val + tempinfo['QU'] = unit # update frequency unit of meassure + else: + locinfo = PgUpdt.replace_pattern(locinfo, edate, ehour) + return PgLOG.pglog("{}: {}".format(locinfo, unit), PgOPT.PGOPT['emlerr']) + if locrec['endperiod']: tempinfo['EP'] = locrec['endperiod'] + if val[3] and ehour is None: ehour = 0 + edate = PgUtil.enddate(edate, tempinfo['EP'], unit, tempinfo['FQ'][6]) + elif 'MU' in PgOPT.params or 'CP' in PgOPT.params: + locinfo = PgUpdt.replace_pattern(locinfo, edate, ehour) + return PgLOG.pglog(locinfo + ": MISS frequency for Update", PgOPT.PGOPT['emlerr']) + + val = PgUpdt.get_control_time(locrec['nextdue'], "Due Internval") + if val: + tempinfo['NX'] = val + if ehour is None and val[3]: ehour = 0 + + # check if allow missing remote file + if 'MR' in PgOPT.params and PgOPT.params['MR'][0]: + tempinfo['amiss'] = PgOPT.params['MR'][0] + elif locrec['missremote']: + tempinfo['amiss'] = locrec['missremote'] + else: + tempinfo['amiss'] = 'N' + + options = locrec['options'] + if locrec['action'] == 'AQ': + if options: + ms = re.search(r'-(ST|WT)\s+(\w)', options) + if ms: + if ms.group(1) == 'ST': + tempinfo['AQ'] = 'Saved' + tempinfo['ST'] = ms.group(2) + else: + tempinfo['AQ'] = 'Web' + else: + return PgLOG.pglog("{}: MISS -ST or -WT to backup {}".format(options, locinfo), PgOPT.PGOPT['emlerr']) + else: + return PgLOG.pglog("Set -ST or -WT in Options to backup {}".format(locinfo), PgOPT.PGOPT['emlerr']) + if (options and re.search(r'(^|\s)-GX(\s|$)', options, re.I) and + not re.search(r'(^|\s)-RS(\s|$)', options, re.I)): + tempinfo['RS'] = 0 # set to 1 if need pass -RS to dsarch + ddate = edate + dhour = ehour + dcnt = 0 + PgOPT.PGOPT['wtidx'] = {} + + if options: + ms = re.search(r'-VS\s+(\d+)', options, re.I) + if ms: tempinfo['VS'] = int(ms.group(1)) + + if tempinfo['VI']: + if tempinfo['VI'][3]: + (vdate, vhour) = PgUtil.adddatehour(PgOPT.PGOPT['CURDATE'], PgOPT.PGOPT['CURHOUR'], -tempinfo['VI'][0], + -tempinfo['VI'][1], -tempinfo['VI'][2], -tempinfo['VI'][3]) + else: + vdate = PgUtil.adddate(PgOPT.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -tempinfo['VI'][2]) + vhour = PgOPT.PGOPT['CURHOUR'] + + if 'CN' in PgOPT.params and locrec['cleancmd']: + tempinfo['CVD'] = PgUtil.adddate(PgOPT.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -(1+tempinfo['VI'][2])) + tempinfo['setmiss'] = 1 + if PgUtil.diffdatehour(edate, ehour, vdate, vhour) < 0: + vdate = edate + vhour = ehour + if tempinfo['amiss'] == 'N' and locrec['missdate']: + dhour = PgUtil.diffdatehour(vdate, vhour, locrec['missdate'], locrec['misshour']) + if dhour > 0: + if dhour > 240: + record = {'missdate' : None, 'misshour' : None} + PgDBI.pgupdt("dlupdt", record, "lindex = {}".format(locrec['lindex'])) + else: + vdate = locrec['missdate'] + vhour = locrec['misshour'] + + if vdate and not isinstance(vdate, str): vdate = str(vdate) + tempinfo['VD'] = vdate + tempinfo['VH'] = vhour + if 'ED' not in PgOPT.params and PgUtil.diffdatehour(edate, ehour, vdate, vhour) > 0: + edate = vdate + if tempinfo['FQ']: + if tempinfo['EP'] or tempinfo['QU'] == 'M': + edate = PgUtil.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + while True: + (udate, uhour) = PgUpdt.addfrequency(edate, ehour, tempinfo['FQ'], -1) + if PgUtil.diffdatehour(udate, uhour, vdate, vhour) < 0: break + edate = udate + ehour = uhour + if tempinfo['EP'] or tempinfo['QU'] == 'M': + edate = PgUtil.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + + vdate = PgOPT.params['CD'] + vhour = PgOPT.params['CH'] + if tempinfo['NX']: + if tempinfo['NX'][3]: + (udate, uhour) = PgUtil.adddatehour(PgOPT.PGOPT['CURDATE'], vhour, -tempinfo['NX'][0], + -tempinfo['NX'][1], -tempinfo['NX'][2], -tempinfo['NX'][3]) + else: + udate = PgUtil.adddate(PgOPT.PGOPT['CURDATE'], -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2]) + uhour = vhour + if PgUtil.diffdatehour(udate, uhour, vdate, vhour) <= 0: + vdate = udate + vhour = uhour + + if 'CP' in PgOPT.params: (vdate, vhour) = PgUpdt.addfrequency(vdate, vhour, tempinfo['FQ'], 1) + + fupdate = 1 if 'FU' in PgOPT.params else 0 + while fupdate or PgUtil.diffdatehour(edate, ehour, vdate, vhour) <= 0: + tempinfo['ED'].append(edate) + if ehour != None and tempinfo['QU'] != 'H': + tempinfo['EH'].append(23) + else: + tempinfo['EH'].append(ehour) + if 'MU' not in PgOPT.params: break + if tempinfo['RS'] == 0 and dcnt < 3: + if PgUtil.diffdatehour(edate, ehour, ddate, dhour) >= 0: dcnt += 1 + (edate, ehour) = PgUpdt.addfrequency(edate, ehour, tempinfo['FQ'], 1) + edate = PgUtil.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + fupdate = 0 + + if tempinfo['RS'] == 0 and dcnt > 2: tempinfo['RS'] = 1 + if not tempinfo['ED']: # no end time found, update not due yet + if tempinfo['NX']: + (udate, uhour) = PgUtil.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3]) + else: + udate = edate + uhour = ehour + locinfo = PgUpdt.replace_pattern(locinfo, edate, ehour, tempinfo['FQ']) + vdate = PgOPT.params['CD'] + val = "Update data" + if tempinfo['NX']: val += " due" + if uhour is None: + locinfo += ": {} on {}".format(val, udate) + else: + locinfo += ": {} at {}:{:02}".format(val, udate, uhour) + vdate += ":{:02}".format(PgOPT.params['CH']) + + return PgLOG.pglog("{} NOT due yet by {}".format(locinfo, vdate), PgOPT.PGOPT['emllog']) + + return tempinfo + +# +# get archived file info +# +def file_archive_info(lfile, locrec, tempinfo): + + if tempinfo['ainfo'] != None: return tempinfo['ainfo'] + + edate = tempinfo['edate'] + ehour = tempinfo['ehour'] + ainfo = {'archcnt' : 0, 'archived' : 0, 'present' : 0, 'vindex' : 0, 'types' : {}, 'note' : None} + growing = PgUpdt.is_growing_file(locrec['locfile'], tempinfo['FQ']) + if growing: + if tempinfo['NX']: + (udate, uhour) = PgUtil.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3]) + else: + udate = edate + uhour = ehour + if PgLOG.PGLOG['GMTZ'] and uhour != None: # convert to local times + (udate, uhour) = PgUtil.adddatehour(udate, uhour, 0, 0, 0, -PgLOG.PGLOG['GMTZ']) + + options = locrec['options'] if locrec['options'] else "" + act = locrec['action'] + locrec['gindex'] = PgUpdt.get_group_index(options, edate, ehour, tempinfo['FQ']) + dsid = PgOPT.params['DS'] + gcnd = "gindex = {}".format(locrec['gindex']) + cnd = "dsid = '{}' AND {}".format(dsid, gcnd) + mmiss = 0 + if re.match(r'^A(B|W)$', act): # check existing web files + ainfo['archcnt'] = 1 + ms = re.search(r'(^|\s)-WT\s+(\w)(\s|$)', options, re.I) + type = get_data_type('WT', options) + if locrec['archfile']: + afile = PgUpdt.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) + else: + afile = lfile if re.search(r'(^|\s)-KP(\s|$)', lfile, re.I) else op.basename(lfile) + ms =re.search(r'(^|\s)-WP\s+(\S+)', options, re.I) + if ms: + path = PgUpdt.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ']) + else: + path = PgDBI.get_group_field_path(locrec['gindex'], dsid, 'webpath') + if path: afile = PgLOG.join_paths(path, afile) + + wrec = PgSplit.pgget_wfile(dsid, "*", "{} AND type = '{}' AND wfile = '{}'".format(gcnd, type, afile), PgOPT.PGOPT['extlog']) + if wrec: + ainfo['wfile'] = wrec['wfile'] + adate = ainfo['adate'] = str(wrec['date_modified']) + atime = ainfo['atime'] = str(wrec['time_modified']) + ahour = None + if atime: + ms = re.match(r'^(\d+):', atime) + if ms: ahour = int(ms.group(1)) + ainfo['ahour'] = ahour + ainfo['asize'] = wrec['data_size'] + ainfo['chksm'] = wrec['checksum'] if wrec['checksum'] else '' + ainfo['note'] = wrec['note'] + ainfo['types'][type] = wrec + ainfo['wtype'] = type + if not growing or PgUtil.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 + if wrec['vindex']: ainfo['vindex'] = wrec['vindex'] + ainfo['present'] += 1 + + if act == 'AS': # check existing save files + ainfo['archcnt'] = 1 + type = get_data_type('ST', options) + if locrec['archfile']: + afile = PgUpdt.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) + else: + afile = lfile if re.search(r'(^|\s)-KP(\s|$)', options, re.I) else op.basename(lfile) + ms = re.search(r'(^|\s)-SP\s+(\S+)', options, re.I) + if ms: + path = PgUpdt.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ']) + else: + path = PgDBI.get_group_field_path(locrec['gindex'], PgOPT.params['DS'], 'savedpath') + if path: afile = PgLOG.join_paths(path, afile) + + srec = PgDBI.pgget("sfile", "*", "{} AND type = '{}' AND sfile = '{}'".format(cnd, type, afile), PgOPT.PGOPT['extlog']) + if srec: + ainfo['sfile'] = srec['sfile'] + adate = ainfo['adate'] = str(srec['date_modified']) + atime = ainfo['atime'] = str(srec['time_modified']) + ahour = None + if atime: + ms = re.match(r'^(\d+):', atime) + if ms: ahour = int(ms.group(1)) + ainfo['asize'] = srec['data_size'] + ainfo['chksm'] = srec['checksum'] if srec['checksum'] else '' + ainfo['note'] = srec['note'] + ainfo['types'][type] = srec + ainfo['stype'] = type + if not growing or PgUtil.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 + if srec['vindex']: ainfo['vindex'] = srec['vindex'] + ainfo['present'] += 1 + + if act == 'AQ': # check existing quasar backup files + ainfo['archcnt'] = 1 + type = get_data_type('QT', options) + if locrec['archfile']: + afile = PgUpdt.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) + else: + return PgLOG.pglog(lfile + ": Miss Backup file name via (FA|FileArchived)", PgOPT.PGOPT['emlerr']) + + brec = PgDBI.pgget("bfile", "*", "dsid = '{}' AND type = '{}' AND bfile = '{}'".format(PgOPT.params['DS'], type, afile), PgOPT.PGOPT['extlog']) + if brec: + ainfo['bfile'] = brec['bfile'] + adate = ainfo['adate'] = str(brec['date_modified']) + atime = ainfo['atime'] = str(brec['time_modified']) + ahour = None + if atime: + ms = re.match(r'^(\d+):', atime) + if ms: ahour = int(ms.group(1)) + ainfo['asize'] = brec['data_size'] + ainfo['chksm'] = brec['checksum'] if brec['checksum'] else '' + ainfo['note'] = brec['note'] + ainfo['types'][type] = brec + ainfo['btype'] = type + if not growing or PgUtil.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 + ainfo['present'] += 1 + + if ainfo['archcnt'] == 0: + PgLOG.pglog("{}: unknown archive action {}".format(lfile, act), PgOPT.PGOPT['extlog']) + + return ainfo # always returns a hash reference for archiving info + +# +# build up data note based on temporal info, keep the begin timestamp +# for existing record; change end timestamp only if new data added +# return None if no change for existing note +# +def build_data_note(onote, lfile, locrec, tempinfo): + + note = locrec['note'] + if not note: return onote + + seps = PgOPT.params['PD'] + match = "[^{}]+".format(seps[1]) + edate = tempinfo['edate'] + ehour = tempinfo['ehour'] + + if note[0] == '!': # executable for build up data note + cmd = PgUpdt.executable_command(1, None, None, edate) + if not cmd: return 0 + return PgLOG.pgsystem(cmd, PgOPT.PGOPT['emllog'], 21) + + # repalce generic patterns first + note = PgUpdt.replace_pattern(note, None) # replace generic patterns first + + # get temporal patterns + patterns = re.findall(r'{}({}){}'.format(seps[0], match, seps[1]), note) + pcnt = len(patterns) + if pcnt == 0: return note # no pattern temporal matches + if pcnt > 2: + PgLOG.pglog("{}-{}: TOO many ({}) temporal patterns".format(lfile, note, pcnt), PgOPT.PGOPT['emllog']) + return onote + + if pcnt == 2: # replace start time + if onote: # get start time from existing note + replace = "{}{}{}".format(seps[0], patterns[0], seps[1]) + ms = re.match(r'^(.*){}(.*){}'.format(replace, PgOPT.params['PD'][0]), note) + if ms: + init = ms.group(1) + sp = ms.group(2) + ms = re.search(r'{}(.+){}'.format(init, sp), onote) + if ms: + sdate = ms.group(1) + note = re.sub(replace, sdate, note, 1) + elif tempinfo['FQ']: # get start time + (sdate, shour) = PgUpdt.addfrequency(edate, ehour, tempinfo['FQ'], 0) + note = PgUpdt.replace_pattern(note, sdate, shour, None, 1) + + return PgUpdt.replace_pattern(note, edate, ehour) # repalce end time now + +# +# get data file status info +# +def file_status_info(lfile, rfile, tempinfo): + + # check and cache new data info + finfo = PgFile.check_local_file(lfile, 33, PgOPT.PGOPT['wrnlog']) # 33 = 1 + 32 + if not finfo: + PgOPT.PGOPT['chksm'] = '' + PgOPT.PGOPT['fsize'] = 0 + return + + fdate = finfo['date_modified'] + ftime = finfo['time_modified'] + fhour = None + ms = re.match(r'^(\d+):', ftime) + if ms: four = int(ms.group(1)) + PgOPT.PGOPT['fsize'] = finfo['data_size'] + PgOPT.PGOPT['chksm'] = finfo['checksum'] + + if rfile and lfile != rfile: + finfo = PgFile.check_local_file(rfile, 1, PgOPT.PGOPT['wrnlog']) + if finfo and PgUtil.cmptime(finfo['date_modified'], finfo['time_modified'], fdate, ftime) < 0: + fdate = finfo['date_modified'] + ftime = finfo['time_modified'] + ms = re.match(r'^(\d+):', ftime) + if ms: four = int(ms.group(1)) + + PgOPT.PGOPT['fdate'] = fdate + PgOPT.PGOPT['ftime'] = ftime + PgOPT.PGOPT['fhour'] = fhour + + if 'RE' in PgOPT.params: # reset end data/time/hour + if tempinfo['NX']: + if tempinfo['NX'][3]: + (fdate, fhour) = PgUtil.adddatehour(fdate, fhour, -tempinfo['NX'][0], -tempinfo['NX'][1], + -tempinfo['NX'][2], -tempinfo['NX'][3]) + else: + fdate = PgUtil.adddate(fdate, -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2]) + + while True: + (edate, ehour) = PgUpdt.addfrequency(tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'], 1) + edate = PgUtil.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + if PgUtil.diffdatehour(edate, ehour, fdate, fhour) > 0: break + tempinfo['edate'] = edate + tempinfo['ehour'] = ehour + +# +# check if a Server file is aged enough for download +# return 1 if valid, 0 if not aged enough, -1 if cannot check +# +def check_agetime(dcmd, sfile, atime): + + info = PgUpdt.check_server_file(dcmd, 1) + if not info: + sact = get_download_action(dcmd) + (stat, derr) = PgUpdt.parse_download_error(PgOPT.PGOPT['STATUS'], sact) + PgOPT.PGOPT['STATUS'] = derr + PgLOG.pglog("{}: cannot check file age\n{}".format(sfile, PgOPT.PGOPT['STATUS']), PgOPT.PGOPT['emlerr']) + return stat + + ahour = None + if atime[3]: + ms = re.match(r'^(\d+):', info['time_modified']) + if ms: ahour = int(ms.group(1)) + (adate, ahour) = PgUtil.adddatehour(info['date_modified'], ahour, atime[0], atime[1], atime[2], atime[3]) + if PgUtil.diffdatehour(PgOPT.params['CD'], PgOPT.params['CH'], adate, ahour) >= 0: + return 1 + + if ahour is None: + PgLOG.pglog(("{}: original {} file ready by {}\n".format(sfile, info['ftype'], info['date_modified']) + + "but NOT aged enough for retrieving yet by " + PgOPT.params['CD']), PgOPT.PGOPT['emllog']) + else: + PgLOG.pglog(("{}: original {} file ready by {}:{:02}\n".format(sfile, info['ftype'], info['date_modified'], ahour) + + "but NOT aged enough for retrieving yet by {}:{:02}".format(PgOPT.params['CD'], PgOPT.params['CH'])), PgOPT.PGOPT['emllog']) + + return 0 # otherwise server file is not aged enough + +# +# check if a Server file is changed with different size +# return 1 - file changed, 2 - new file retrieved, 3 - force redlownload, +# 0 - no change , -1 - error check, -2 - cannot check +# +def check_newer_file(dcmd, cfile, ainfo): + + if cfile: + finfo = PgFile.check_local_file(cfile, 33, PgOPT.PGOPT['wrnlog']) + if not finfo: return 3 # download if can not check newer + else: + finfo = {'isfile' : 0, 'checksum' : ainfo['chksm'], 'data_size' : ainfo['asize'], + 'date_modified' : ainfo['adate'], 'time_modified' : ainfo['atime']} + + cinfo = PgUpdt.check_server_file(dcmd, 33, cfile) + if not cinfo: + sact = get_download_action(dcmd) + (stat, derr) = PgUpdt.parse_download_error(PgOPT.PGOPT['STATUS'], sact) + PgOPT.PGOPT['STATUS'] = derr + return stat + + stat = 2 if cinfo['ftype'] == "WGET" else 1 + if finfo['isfile'] and cfile == cinfo['fname'] and finfo['data_size'] and cinfo['data_size'] and cinfo['data_size'] != finfo['data_size']: + return stat + + PgOPT.PGOPT['STATUS'] = '' + if (finfo['data_size'] != cinfo['data_size'] or 'checksum' not in cinfo or + 'checksum' not in finfo or finfo['checksum'] != cinfo['checksum']): + if 'HO' in PgOPT.params and cinfo['ftype'] == "FTP": + (cdate, ctime) = PgUtil.addhour(cinfo['date_modified'], cinfo['time_modified'], -PgOPT.params['HO'][0]) + else: + cdate = cinfo['date_modified'] + ctime = cinfo['time_modified'] + + if PgUtil.cmptime(cdate, ctime, finfo['date_modified'], finfo['time_modified']) > 0: + msg = "{} Newer {} {}: {} {} {}".format(PgOPT.params['DS'], cinfo['ftype'], cinfo['fname'], cdate, ctime, cinfo['data_size']) + if 'checksum' in cinfo: msg += " " + cinfo['checksum'] + msg += "; {}: ".format(cfile if cfile else "archived") + msg += "{} {} {}".format(finfo['date_modified'], finfo['time_modified'], finfo['data_size']) + if 'checksum' in finfo: msg += " " + finfo['checksum'] + PgLOG.pglog(msg, PgOPT.PGOPT['wrnlog']) + return stat + + if 'adate' in ainfo: + PgOPT.PGOPT['STATUS'] = "archived: {} {}".format(ainfo['adate'], ainfo['atime']) + elif cfile: + PgOPT.PGOPT['STATUS'] += "local copy timestamp: {} {}".format(finfo['date_modified'], finfo['time_modified']) + + if 'note' in cinfo: + PgOPT.PGOPT['STATUS'] += "\n" + cinfo['note'] + + return 0 + +# +# get download action name +# +def get_download_action(dcmd): + + if not dcmd: return "download" + + dact = "DOWNLOAD" + ms = re.search(r'(^|\S\/)tar\s+-(\w+)\s', dcmd) + if ms: + taropt = ms.group(2) + dact = "UNTAR" if taropt.find('x') > -1 else "TAR" + else: + ms = re.match(r'^\s*(\S+)', dcmd) + if ms: + dact = op.basename(ms.group(1)) + if dact == "wc": + ms = re.search(r'\|\s*(\S+)', dcmd) + if ms: dact = op.basename(ms.group(1)) + + return dact + +# +# change to working directory if not there yet +# +def change_workdir(wdir, locinfo, edate, ehour, FQ): + + if 'WD' in PgOPT.params and PgOPT.params['WD'][0]: wdir = PgOPT.params['WD'][0] + if not wdir: + return PgLOG.pglog(locinfo + ": MISS working directory", PgOPT.PGOPT['emlerr']) + else: + wdir = PgLOG.replace_environments(wdir) + wdir = PgUpdt.replace_pattern(wdir, edate, ehour, FQ) + if not PgFile.change_local_directory(wdir, PgOPT.PGOPT['emllog']): return 0 + + return 1 + +# +# clean the working copies of remote and local files/directories +# +def clean_files(cleancmd, edate, ehour, lfiles, rfiles, freq): + + lfile = ' '.join(lfiles) if lfiles else '' + cleancmd = PgUpdt.replace_pattern(cleancmd, edate, ehour, freq) + cleancmd = PgUpdt.executable_command(cleancmd, lfile, None, None, None, rfiles) + PgLOG.PGLOG['ERR2STD'] = [PgLOG.PGLOG['MISSFILE']] + PgLOG.pgsystem(cleancmd, PgOPT.PGOPT['emllog'], 5) + PgLOG.PGLOG['ERR2STD'] = [] + +# +# clean files rematching pattern on given date/hour +# +def clean_older_files(cleancmd, workdir, locinfo, edate, locfile, rmtrecs, rcnt, tempinfo): + + rfiles = None + lfiles = PgUpdt.get_local_names(locfile, tempinfo, edate) + change_workdir(workdir, locinfo, edate, tempinfo['ehour'], tempinfo['FQ']) + + if rcnt and cleancmd.find(' -RF') > 0: + rfiles = get_all_remote_files(rmtrecs, rcnt, tempinfo, edate) + clean_files(cleancmd, edate, tempinfo['ehour'], lfiles, rfiles, tempinfo['FQ']) + +# +# get all remote file names for one update period +# +def get_all_remote_files(rmtrecs, rcnt, tempinfo, edate): + + rfiles = [] + for i in range(rcnt): # processs each remote record + rmtrec = PgUtil.onerecord(rmtrecs, i) + file = rmtrec['remotefile'] + if not file: continue + files = PgUpdt.get_remote_names(file, rmtrec, file, tempinfo, edate) + if files: rfiles.extend(files) + + return rfiles + +# +# check remote file status and sed email to specialist for irregular update cases +# +def check_dataset_status(): + + if 'CD' in PgOPT.params: + PgOPT.params['CD'] = PgUtil.format_date(PgOPT.params['CD']) # standard format in case not yet + else: + PgOPT.params['CD'] = PgUtil.curdate() # default to current date + + condition = "specialist = '{}'".format(PgOPT.params['LN']) + if 'ED' not in PgOPT.params: condition += " AND enddate < '{}'".format(PgOPT.params['CD']) + if 'DS' in PgOPT.params: condition += " AND dsid = '{}'".format(PgOPT.params['DS']) + s = PgUpdt.file_condition('dlupdt', ('L' if 'LI' in PgOPT.params else "FIXA"), None, 1) + if s: condition += " AND " + s + condition += " ORDER BY dsid, execorder, lindex" + locrecs = PgDBI.pgmget("dlupdt", "*", condition, PgOPT.PGOPT['extlog']) + loccnt = len(locrecs['locfile']) if locrecs else 0 + if not loccnt: return PgLOG.pglog("No Update record found for checking update status on {} for '{}'".format(PgOPT.params['CD'], PgOPT.params['LN']), PgOPT.PGOPT['wrnlog']) + + s = "s" if loccnt > 1 else "" + PgLOG.pglog("Check {} record{} for update status...".format(loccnt, s), PgOPT.PGOPT['wrnlog']) + for i in range(loccnt): + locrec = PgUtil.onerecord(locrecs, i) + if loccnt == 1 and 'LI' in PgOPT.params and 'LF' in PgOPT.params and len(PgOPT.params['LF']) == 1 and PgOPT.params['LF'][0] != locrec['locfile']: + locrec['locfile'] = PgOPT.params['LF'][0] + check_locfile_status(locrec) + + if PgOPT.PGOPT['lcnt'] or PgLOG.PGLOG['ERRMSG']: + if PgOPT.PGOPT['lcnt']: + loccnt = PgOPT.PGOPT['lcnt'] + s = "s" if (loccnt > 1) else "" + SUBJECT = "DSUPDT Status of {} update record{}".format(loccnt, s) + if 'DS' in PgOPT.params: SUBJECT += " for {}".format(PgOPT.params['DS']) + TOPMSG = " ready for update of {} local file{}".format(loccnt, s) + s = "s" if (PgOPT.PGOPT['rcnt'] > 1) else "" + TOPMSG = "{}/{} remote{}{}".format(PgOPT.PGOPT['ucnt'], PgOPT.PGOPT['rcnt'], s, TOPMSG) + else: + PgLOG.pglog("No local file ready for checking {} on {} for {}".format(SUBJECT, PgOPT.params['CD'], PgOPT.params['LN']), PgOPT.PGOPT['wrnlog']) + SUBJECT = TOPMSG = None + + if PgOPT.PGOPT['UCNTL']: + PgUpdt.reset_control_time() + if SUBJECT: SUBJECT += "-C{}".format(PgOPT.PGOPT['UCNTL']['cindex']) + +# +# check update status for a given local file +# +def check_locfile_status(locrec): + + loccnd = "lindex = {}".format(locrec['lindex']) + lfile = locrec['locfile'] + locinfo = "{}-L{}".format(locrec['dsid'], locrec['lindex']) + if not lfile: return PgLOG.pglog(locinfo + ": local file name NOT specified", PgOPT.PGOPT['emlerr']) + locinfo += "-" + lfile + tempinfo = get_tempinfo(locrec, locinfo, 0) + if not tempinfo: return 0 # simply return if miss temporal info for update + + rmtcnd = loccnd + rcnd = PgUpdt.file_condition('drupdt', ('D' if 'DO' in PgOPT.params else "RS"), None, 1) + if rcnd: rmtcnd += " AND " + rcnd + rmtrecs = PgDBI.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", PgOPT.PGOPT['extlog']) + rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0 + if rcnt == 0: + if rcnd and PgDBI.pgget("drupdt", "", loccnd): + return PgLOG.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), PgOPT.PGOPT['emlerr']) + rcnt = 1 # create a empty record remote file + rmtrecs = {'lindex' : locrec['lindex'], 'remotefile' : None, 'serverfile' : None} + + if rcnt == 1: + if 'RF' in PgOPT.params and len(PgOPT.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and PgOPT.params['RF'][0] == rmtrecs['remotefile'][0]): + rmtrecs['remotefile'][0] = PgOPT.params['RF'][0] + if 'SF' in PgOPT.params and len(PgOPT.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and PgOPT.params['SF'][0] == rmtrecs['serverfile'][0]): + rmtrecs['serverfile'][0] = PgOPT.params['SF'][0] + + ecnt = len(tempinfo['ED']) + PgOPT.PGOPT['lindex'] = locrec['lindex'] + logact = PgOPT.PGOPT['emllog'] + + retcnt = 0 + for i in range(ecnt): + if ALLCNT > 1 and i > 0: + tempinfo = get_tempinfo(locrec, locinfo, i) + if not tempinfo: break + edate = tempinfo['ED'][0] + ehour = tempinfo['EH'][0] + else: + edate = tempinfo['ED'][i] + ehour = tempinfo['EH'][i] + tempinfo['edate'] = edate + if ehour != None: + tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour) + tempinfo['ehour'] = ehour + else: + tempinfo['einfo'] = "end data date {}".format(edate) + tempinfo['ehour'] = None + + if 'GZ' in PgOPT.params: tempinfo['einfo'] += "(UTC)" + lfile = PgUpdt.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ']) + locinfo = "{}-L{}-{}".format(locrec['dsid'], locrec['lindex'], lfile) + PgLOG.pglog("{}: Check Update Status for {}".format(locinfo, tempinfo['einfo']), logact) + logact = PgOPT.PGOPT['emlsep'] + PgOPT.PGOPT['lcnt'] += 1 + j = 0 + while j < rcnt: # check each remote record, stop checking if error + pgrec = PgUtil.onerecord(rmtrecs, j) + if not check_remote_status(pgrec, lfile, locrec, locinfo, tempinfo) and 'CA' not in PgOPT.params: + break + j += 1 + if j == 0: break + + PgOPT.PGOPT['lindex'] = 0 + + return (1 if retcnt > 0 else 0) + +# +# check update status for given remote file +# +def check_remote_status(rmtrec, lfile, locrec, locinfo, tempinfo): + + rfile = rmtrec['remotefile'] + rmtinfo = locinfo + if not rfile: + rfile = lfile + rcnt = 1 + + if rfile != locrec['locfile']: rmtinfo += "-" + rfile + tempinfo['DC'] = (PgOPT.params['DC'][0] if ('DC' in PgOPT.params and PgOPT.params['DC'][0]) else + (rmtrec['download'] if rmtrec['download'] else locrec['download'])) + rfiles = PgUpdt.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo) + rcnt = len(rfiles) if rfiles else 0 + if not rcnt: return PgLOG.pglog(rmtinfo + ": NO remote file name identified", PgOPT.PGOPT['emlerr']) + + PgOPT.PGOPT['rcnt'] += rcnt # accumulate remote file counts + if tempinfo['DC']: + PgOPT.PGOPT['PCNT'] = PgUpdt.count_pattern_path(tempinfo['DC']) + tempinfo['DC'] = None + + sfile = rmtrec['serverfile'] + if sfile and sfile != rfile: + sfiles = PgUpdt.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo) + scnt = len(sfiles) if sfiles else 0 + if scnt != rcnt: + PgOPT.PGOPT['rstat'] = -2 + return PgLOG.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), PgOPT.PGOPT['emlerr']) + else: + sfiles = rfiles + scnt = rcnt + + dcnt = 0 + for i in range(rcnt): + rmtinfo = locinfo + rfile = rfiles[i] + if rfile['fname'] != lfile: rmtinfo += "-" + rfile['fname'] + sfile = sfiles[i] + if sfile['fname'] != rfile['fname']: rmtinfo += "-" + sfile['fname'] + rcmd = rfile['rcmd'] + if not rcmd: + return PgLOG.pglog(rmtinfo + ": Missing download command", PgOPT.PGOPT['emlerr']) + elif not sfile['ready']: + PgLOG.pglog(rmtinfo + ": NOT Ready yet for update", PgOPT.PGOPT['emllog']) + break + dcnt += 1 + + return 1 if dcnt else 0 + +# +# process the update control records +# +def process_update_controls(): + + global ALLCNT + ctime = PgUtil.curtime(1) + if not ('CI' in PgOPT.params or 'DS' in PgOPT.params): + PgOPT.set_default_value("SN", PgOPT.params['LN']) + + condition = ("(pid = 0 OR lockhost = '{}') AND cntltime <= '{}'".format(PgLOG.PGLOG['HOSTNAME'], ctime) + + PgOPT.PgOPT.get_hash_condition('dcupdt') + " ORDER BY hostname DESC, cntltime") + pgrecs = PgDBI.pgmget("dcupdt", "*", condition, PgOPT.PGOPT['extlog']) + + ALLCNT = len(pgrecs['cindex']) if pgrecs else 0 + if ALLCNT == 0: + return PgLOG.pglog("No update control record idetified due for process", PgLOG.LOGWRN) + + s = 's' if ALLCNT > 1 else '' + PgLOG.pglog("Process {} update control record{} ...".format(ALLCNT, s), PgLOG.WARNLG) + + pcnt = 0 + for i in range(ALLCNT): + pcnt += process_one_control(PgUtil.onerecord(pgrecs, i)) + if pcnt > 1 and not ('CI' in PgOPT.params or 'DS' in PgOPT.params): break + rmsg = "{} of {} update control{} reprocessed by {}".format(pcnt, ALLCNT, s, PgLOG.PGLOG['CURUID']) + if PgLOG.PGLOG['CURUID'] != PgOPT.params['LN']: rmsg += " for " + PgOPT.params['LN'] + PgLOG.pglog(rmsg, PgOPT.PGOPT['wrnlog']) + +# +# process one update control +# +def process_one_control(pgrec): + + cidx = pgrec['cindex'] + cstr = "Control Index {}".format(cidx) + if not pgrec['action']: return PgLOG.pglog(cstr + ": Miss update action", PgOPT.PGOPT['errlog']) + if not (PgOPT.OPTS[pgrec['action']][0]&PgOPT.PGOPT['CNTLACTS']): + return PgLOG.pglog("{}: Invalid dsupdt action '{}'".format(cstr, pgrec['action']), PgOPT.PGOPT['errlog']) + if not pgrec['frequency']: return PgLOG.pglog(cstr + ": Miss update Frequency", PgOPT.PGOPT['errlog']) + if pgrec['pid'] > 0 and PgSIG.check_process(pgrec['pid']): + if 'CI' in PgOPT.params: PgLOG.pglog("{}: Under processing {}/{}".format(cstr, pgrec['pid'], PgLOG.PGLOG['HOSTNAME']), PgOPT.PGOPT['wrnlog']) + return 0 + if pgrec['specialist'] != PgOPT.params['LN']: + return PgLOG.pglog("{}: must be specialist '{}' to process".format(cstr, pgrec['specialist']), PgOPT.PGOPT['errlog']) + if not ('ED' in PgOPT.params or PgOPT.valid_data_time(pgrec, cstr, PgOPT.PGOPT['wrnlog'])): + return 0 + cmd = "dsupdt " + if pgrec['dsid']: cmd += pgrec['dsid'] + ' ' + cmd += "{} -CI {} ".format(pgrec['action'], cidx) + if PgLOG.PGLOG['CURUID'] != PgOPT.params['LN']: cmd += "-LN " + PgOPT.params['LN'] + cmd += "-d -b" + + # make sure it is not locked + if PgLock.lock_update_control(cidx, 0, PgOPT.PGOPT['errlog']) <= 0: return 0 + PgLOG.pglog("{}-{}{}: {}".format(PgLOG.PGLOG['HOSTNAME'], pgrec['specialist'], PgLOG.current_datetime(), cmd), PgLOG.LOGWRN|PgLOG.FRCLOG) + os.system(cmd + " &") + return 1 + +# +# move the previous archived version controlled files +# +def move_archived_file(ainfo, archived): + + stat = 0 + if 'wfile' in ainfo: + type = ainfo['wtype'] + pgrec = ainfo['types'][type] + if pgrec and pgrec['vindex']: + tofile = fromfile = ainfo['wfile'] + ftype = "Web" + ttype = " Saved" + i = 0 + while True: # create tofile name + if i > 0: tofile = "{}.vbu{}".format(fromfile, i) + if not PgDBI.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(PgOPT.params['DS'], tofile), PgOPT.PGOPT['extlog']): + break + i += 1 + stat = PgLOG.pgsystem("dsarch {} MV -WF {} -WT {} -SF {} -ST V -KM -TS".format(PgOPT.params['DS'], fromfile, type, tofile), PgOPT.PGOPT['emerol'], 5) + + if stat == 0 and ainfo['sfile']: + type = ainfo['stype'] + pgrec = ainfo['types'][type] + if pgrec and pgrec['vindex']: + fromfile = ainfo['sfile'] + ftype = "Saved" + ttype = '' + i = 0 + while True: # create tofile name + tofile = "{}.vbu{}".format(fromfile, i) + if not PgDBI.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(PgOPT.params['DS'], tofile), PgOPT.PGOPT['extlog']): + break + i += 1 + stat = PgLOG.pgsystem("dsarch {} MV -RF {} -OT {} -SF {} -ST V".format(PgOPT.params['DS'], fromfile, type, tofile), PgOPT.PGOPT['emerol'], 5) + + if stat: + PgOPT.PGOPT['vcnt'] += 1 + if 'NE' in PgOPT.params or 'EE' in PgOPT.params: + if 'NE' in PgOPT.params: del PgOPT.params['NE'] + if 'EE' in PgOPT.params: del PgOPT.params['EE'] + PgOPT.params['SE'] = 1 # email summary at least + PgOPT.PGOPT['emllog'] |= PgLOG.EMEROL + PgLOG.pglog("{}-{}-{}: Found newer version-conrolled {} file; move to{} type V {}".format(PgOPT.params['DS'], type, fromfile, ftype, ttype, tofile), PgOPT.PGOPT['emlsum']) + archived = 0 + + return archived + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_dsupdt/dsupdt.py b/src/rda_python_dsupdt/dsupdt.py index 074f479..f201f48 100644 --- a/src/rda_python_dsupdt/dsupdt.py +++ b/src/rda_python_dsupdt/dsupdt.py @@ -1,2454 +1,2122 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: dsupdt # Author: Zaihua Ji, zji@ucar.edu # Date: 10/10/2020 # 2025-02-05 transferred to package rda_python_dsupdt from # https://github.com/NCAR/rda-utility-programs.git +# 2025-12-08 convert to class DsUpdt # Purpose: python utility program to download remote files, # process downloaded files and create local file, and # archive local files onto RDA Server # save information of web online data files or Saved files into RDADB -# # Github: https://github.com/NCAR/rda-python-dsupdt.git -# ################################################################################## -# + import sys import os import re from os import path as op -from rda_python_common import PgLOG -from rda_python_common import PgSIG -from rda_python_common import PgLock -from rda_python_common import PgCMD -from rda_python_common import PgFile -from rda_python_common import PgUtil -from rda_python_common import PgOPT -from rda_python_common import PgDBI -from rda_python_common import PgSplit -from . import PgUpdt - -TEMPINFO = {} -TOPMSG = SUBJECT = ACTSTR = None -ALLCNT = 0 -DEFTYPES = {'WT' : 'D', 'ST' : 'P', 'QT' : 'B'} - -# -# main function to run dsupdt -# -def main(): - - global SUBJECT - PgOPT.parsing_input('dsupdt') - PgUpdt.check_enough_options(PgOPT.PGOPT['CACT'], PgOPT.PGOPT['ACTS']) - start_action() - - if SUBJECT and 'NE' not in PgOPT.params and (PgLOG.PGLOG['ERRCNT'] or 'EE' not in PgOPT.params): - SUBJECT += " on " + PgLOG.PGLOG['HOSTNAME'] - PgLOG.set_email("{}: {}".format(SUBJECT, TOPMSG), PgLOG.EMLTOP) - if ACTSTR: SUBJECT = "{} for {}".format(ACTSTR, SUBJECT) - if PgSIG.PGSIG['PPID'] > 1: SUBJECT += " in CPID {}".format(PgSIG.PGSIG['PID']) - if PgLOG.PGLOG['ERRCNT'] > 0: SUBJECT += " With Error" - if PgLOG.PGLOG['DSCHECK']: - PgDBI.build_customized_email("dscheck", "einfo", "cindex = {}".format(PgLOG.PGLOG['DSCHECK']['cindex']), - SUBJECT, PgOPT.PGOPT['wrnlog']) - elif PgOPT.PGOPT['UCNTL']: - PgDBI.build_customized_email("dcupdt", "einfo", "cindex = {}".format(PgOPT.PGOPT['UCNTL']['cindex']), - SUBJECT, PgOPT.PGOPT['wrnlog']) - else: - PgLOG.pglog(SUBJECT, PgOPT.PGOPT['wrnlog']|PgLOG.SNDEML) - - if PgLOG.PGLOG['DSCHECK']: - if PgLOG.PGLOG['ERRMSG']: - PgDBI.record_dscheck_error(PgLOG.PGLOG['ERRMSG']) +from .pg_updt import PgUpdt + +class DsUpdt(PgUpdt): + def __init__(self): + super().__init__() # initialize parent class + self.TEMPINFO = {} + self.TOPMSG = self.SUBJECT = self.ACTSTR = None + self.ALLCNT = 0 + self.DEFTYPES = {'WT' : 'D', 'ST' : 'P', 'QT' : 'B'} + + # main function to run dsupdt + def read_parameters(self): + self.set_help_path(__file__) + aname = 'dsupdt' + self.parsing_input(aname) + self.check_enough_options(self.PGOPT['CACT'], self.PGOPT['ACTS']) + + # start action of dsupdt + def start_action(self): + if self.PGOPT['ACTS']&self.OPTS['CU'][0]: + if 'CI' in self.params: + if self.cache_update_control(self.params['CI'][0], 1): + self.check_dataset_status() + else: + self.ALLCNT = self.get_option_count(["ED", "EH"]) + self.check_dataset_status(0) + elif self.PGOPT['ACTS'] == self.OPTS['DL'][0]: + if 'CI' in self.params: + self.ALLCNT = len(self.params['CI']) + self.delete_control_info() + elif 'RF' in self.params: + self.ALLCNT = len(self.params['RF']) + self.delete_remote_info() + else: + self.ALLCNT = len(self.params['LI']) + self.delete_local_info() + elif self.OPTS[self.PGOPT['CACT']][0]&self.OPTS['GA'][0]: + self.get_update_info() + elif self.PGOPT['CACT'] == 'PC': + self.process_update_controls() + elif self.PGOPT['ACTS'] == self.OPTS['SA'][0]: + if 'IF' not in self.params: + self.action_error("Missing input file via Option -IF") + if self.get_input_info(self.params['IF'], 'DCUPDT'): + self.check_enough_options('SC', self.OPTS['SC'][0]) + self.ALLCNT = len(self.params['CI']) + self.set_control_info() + if self.get_input_info(self.params['IF'], 'DLUPDT'): + self.check_enough_options('SL', self.OPTS['SL'][0]) + self.ALLCNT = len(self.params['LI']) + self.set_local_info() + if self.get_input_info(self.params['IF'], 'DRUPDT') and self.params['RF']: + self.check_enough_options('SR', self.OPTS['SR'][0]) + self.ALLCNT = len(self.params['RF']) if 'RF' in self.params else 0 + self.set_remote_info() + elif self.PGOPT['ACTS'] == self.OPTS['SC'][0]: + self.ALLCNT = len(self.params['CI']) + self.set_control_info() + elif self.PGOPT['ACTS'] == self.OPTS['SL'][0]: + self.ALLCNT = len(self.params['LI']) + self.set_local_info() + elif self.PGOPT['ACTS'] == self.OPTS['SR'][0]: + self.ALLCNT = len(self.params['RF']) + self.set_remote_info() + elif self.PGOPT['ACTS']&self.OPTS['UF'][0]: + if 'CI' in self.params: + if self.cache_update_control(self.params['CI'][0], 1): self.dataset_update() + else: + self.ALLCNT = self.get_option_count(["ED", "EH"]) + self.dataset_update() + elif self.PGOPT['ACTS'] == self.OPTS['UL'][0]: + if 'CI' in self.params: + self.ALLCNT = len(self.params['CI']) + self.unlock_control_info() + if 'LI' in self.params: + self.ALLCNT = len(self.params['LI']) + self.unlock_update_info() + if self.SUBJECT and 'NE' not in self.params and (self.PGLOG['ERRCNT'] or 'EE' not in self.params): + self.SUBJECT += " on " + self.PGLOG['HOSTNAME'] + self.set_email("{}: {}".format(self.SUBJECT, self.TOPMSG), self.EMLTOP) + if self.ACTSTR: self.SUBJECT = "{} for {}".format(self.ACTSTR, self.SUBJECT) + if self.PGSIG['PPID'] > 1: self.SUBJECT += " in CPID {}".format(self.PGSIG['PID']) + if self.PGLOG['ERRCNT'] > 0: self.SUBJECT += " With Error" + if self.PGLOG['DSCHECK']: + self.build_customized_email("dscheck", "einfo", "cindex = {}".format(self.PGLOG['DSCHECK']['cindex']), + self.SUBJECT, self.PGOPT['wrnlog']) + elif self.PGOPT['UCNTL']: + self.build_customized_email("dcupdt", "einfo", "cindex = {}".format(self.PGOPT['UCNTL']['cindex']), + self.SUBJECT, self.PGOPT['wrnlog']) + else: + self.pglog(self.SUBJECT, self.PGOPT['wrnlog']|self.SNDEML) + if self.PGLOG['DSCHECK']: + if self.PGLOG['ERRMSG']: + self.record_dscheck_error(self.PGLOG['ERRMSG']) + else: + self.record_dscheck_status("D") + if self.OPTS[self.PGOPT['CACT']][2]: self.cmdlog() # log end time if not getting only action + + # delete update control records for given dsid and control indices + def delete_control_info(self): + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Delete {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG) + delcnt = modcnt = 0 + for i in range(self.ALLCNT): + cidx = self.lock_update_control(self.params['CI'][i], 2, self.PGOPT['extlog']) + if cidx <= 0: continue + ccnd = "cindex = {}".format(cidx) + delcnt += self.pgdel("dcupdt", ccnd, self.PGOPT['extlog']) + modcnt += self.pgexec("UPDATE dlupdt SET cindex = 0 WHERE " + ccnd, self.PGOPT['extlog']) + self.pglog("{} of {} update control record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog']) + if modcnt > 0: + s = 's' if modcnt > 1 else '' + self.pglog("{} associated local file record{} modified".format(modcnt, s), self.PGOPT['wrnlog']) + + # delete local files for given dsid and locfile indices + def delete_local_info(self): + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Delete {} Locfile record{} ...".format(self.ALLCNT, s), self.WARNLG) + dcnt = delcnt = 0 + for i in range(self.ALLCNT): + lidx = self.params['LI'][i] + lcnd = "lindex = {}".format(lidx) + if self.lock_update(lidx, None, 2, self.PGOPT['errlog']) <= 0: continue + cnt = self.pgget("drupdt", "", lcnd, self.PGOPT['extlog']) + if cnt > 0: + ss = 's' if cnt > 1 else '' + self.pglog("Delete {} associated remote file record{} for Locfile index {} ...".format(cnt, ss, lidx), self.WARNLG) + dcnt += self.pgdel("drupdt", lcnd, self.PGOPT['extlog']) + delcnt += self.pgdel("dlupdt", lcnd, self.PGOPT['extlog']) + self.pglog("{} of {} Locfile record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog']) + if dcnt > 0: + s = "s" if (dcnt > 1) else "" + self.pglog("{} associated Remote file record{} deleted too".format(dcnt, s), self.PGOPT['wrnlog']) + + # delete update remote files for given dsid and remote files/locfile indices + def delete_remote_info(self): + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Delete {} remote file record{} ...".format(self.ALLCNT, s), self.WARNLG) + self.validate_multiple_options(self.ALLCNT, ["LI", "DO"]) + delcnt = 0 + for i in range(self.ALLCNT): + lcnd = "lindex = {} AND remotefile = '{}'".format(self.params['LI'][i], self.params['RF'][i]) + if 'DO' in self.params: lcnd += " AND dindex = {}".format(self.params['DO'][i]) + delcnt += self.pgdel("drupdt", lcnd, self.PGOPT['extlog']) + self.pglog("{} of {} remote file record{} deleted".format(delcnt, self.ALLCNT, s), self.PGOPT['wrnlog']) + + # get update control information + def get_control_info(self): + tname = "dcupdt" + hash = self.TBLHASH[tname] + self.pglog("Get update control info of {} from RDADB ...".format(self.params['DS']), self.WARNLG) + lens = fnames = None + if 'FN' in self.params: fnames = self.params['FN'] + fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['dcall']) + onames = self.params['ON'] if 'ON' in self.params else "C" + condition = self.file_condition(tname) + self.get_order_string(onames, tname) + pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog']) + if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash) + self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS'])) + if self.PGOPT['CACT'] == "GA": self.OUTPUT.write("[{}]\n".format(tname.upper())) + self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n") + if pgrecs: + cnt = self.print_column_format(pgrecs, fnames, hash, lens) + s = 's' if cnt > 1 else '' + self.pglog("{} update control record{} retrieved".format(cnt, s), self.PGOPT['wrnlog']) else: - PgCMD.record_dscheck_status("D") - - if PgOPT.OPTS[PgOPT.PGOPT['CACT']][2]: PgLOG.cmdlog() # log end time if not getting only action - - PgLOG.pgexit(0) + self.pglog("no update control record retrieved", self.PGOPT['wrnlog']) -# -# start action of dsupdt -# -def start_action(): - - global ALLCNT - - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['CU'][0]: - if 'CI' in PgOPT.params: - if PgUpdt.cache_update_control(PgOPT.params['CI'][0], 1): - check_dataset_status() + # get local file update information + def get_local_info(self): + tname = "dlupdt" + hash = self.TBLHASH[tname] + self.pglog("Get local file update info of {} from RDADB ...".format(self.params['DS']), self.WARNLG) + lens = fnames = None + if 'FN' in self.params: fnames = self.params['FN'] + fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['dlall']) + onames = self.params['ON'] if 'ON' in self.params else "XL" + condition = self.file_condition(tname) + self.get_order_string(onames, tname) + pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog']) + if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash) + if self.PGOPT['CACT'] == "GL": + self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS'])) else: - ALLCNT = PgOPT.get_option_count(["ED", "EH"]) - check_dataset_status(0) - elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['DL'][0]: - if 'CI' in PgOPT.params: - ALLCNT = len(PgOPT.params['CI']) - delete_control_info() - elif 'RF' in PgOPT.params: - ALLCNT = len(PgOPT.params['RF']) - delete_remote_info() + self.OUTPUT.write("[{}]\n".format(tname.upper())) + self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n") + if pgrecs: + cnt = self.print_column_format(pgrecs, fnames, hash, lens) + s = 's' if cnt > 1 else '' + self.pglog("{} locfile record{} retrieved".format(cnt, s), self.PGOPT['wrnlog']) else: - ALLCNT = len(PgOPT.params['LI']) - delete_local_info() - elif PgOPT.OPTS[PgOPT.PGOPT['CACT']][0]&PgOPT.OPTS['GA'][0]: - get_update_info() - elif PgOPT.PGOPT['CACT'] == 'PC': - process_update_controls() - elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['SA'][0]: - if 'IF' not in PgOPT.params: - PgOPT.action_error("Missing input file via Option -IF") - if PgOPT.get_input_info(PgOPT.params['IF'], 'DCUPDT'): - PgUpdt.check_enough_options('SC', PgOPT.OPTS['SC'][0]) - ALLCNT = len(PgOPT.params['CI']) - set_control_info() - if PgOPT.get_input_info(PgOPT.params['IF'], 'DLUPDT'): - PgUpdt.check_enough_options('SL', PgOPT.OPTS['SL'][0]) - ALLCNT = len(PgOPT.params['LI']) - set_local_info() - if PgOPT.get_input_info(PgOPT.params['IF'], 'DRUPDT') and PgOPT.params['RF']: - PgUpdt.check_enough_options('SR', PgOPT.OPTS['SR'][0]) - ALLCNT = len(PgOPT.params['RF']) if 'RF' in PgOPT.params else 0 - set_remote_info() - elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['SC'][0]: - ALLCNT = len(PgOPT.params['CI']) - set_control_info() - elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['SL'][0]: - ALLCNT = len(PgOPT.params['LI']) - set_local_info() - elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['SR'][0]: - ALLCNT = len(PgOPT.params['RF']) - set_remote_info() - elif PgOPT.PGOPT['ACTS']&PgOPT.OPTS['UF'][0]: - if 'CI' in PgOPT.params: - if PgUpdt.cache_update_control(PgOPT.params['CI'][0], 1): dataset_update() + self.pglog("no locfile record retrieved", self.PGOPT['wrnlog']) + + # get remote file update information + def get_remote_info(self): + tname = "drupdt" + hash = self.TBLHASH[tname] + self.pglog("Get remote file update info of {} from RDADB ...".format(self.params['DS']), self.WARNLG) + lens = fnames = None + if 'FN' in self.params: fnames = self.params['FN'] + fnames = self.fieldname_string(fnames, self.PGOPT[tname], self.PGOPT['drall']) + onames = self.params['ON'] if 'ON' in self.params else "LDF" + condition = self.file_condition(tname) + self.get_order_string(onames, tname) + pgrecs = self.pgmget(tname, "*", condition, self.PGOPT['extlog']) + if pgrecs and 'FO' in self.params: lens = self.all_column_widths(pgrecs, fnames, hash) + if self.PGOPT['CACT'] == "GR": + self.OUTPUT.write("{}{}{}\n".format(self.OPTS['DS'][1], self.params['ES'], self.params['DS'])) else: - ALLCNT = PgOPT.get_option_count(["ED", "EH"]) - dataset_update() - elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['UL'][0]: - if 'CI' in PgOPT.params: - ALLCNT = len(PgOPT.params['CI']) - unlock_control_info() - if 'LI' in PgOPT.params: - ALLCNT = len(PgOPT.params['LI']) - unlock_update_info() - -# -# delete update control records for given dsid and control indices -# -def delete_control_info(): - - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Delete {} update control record{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - delcnt = modcnt = 0 - for i in range(ALLCNT): - cidx = PgLock.lock_update_control(PgOPT.params['CI'][i], 2, PgOPT.PGOPT['extlog']) - if cidx <= 0: continue - ccnd = "cindex = {}".format(cidx) - delcnt += PgDBI.pgdel("dcupdt", ccnd, PgOPT.PGOPT['extlog']) - modcnt += PgDBI.pgexec("UPDATE dlupdt SET cindex = 0 WHERE " + ccnd, PgOPT.PGOPT['extlog']) - - PgLOG.pglog("{} of {} update control record{} deleted".format(delcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) - if modcnt > 0: - s = 's' if modcnt > 1 else '' - PgLOG.pglog("{} associated local file record{} modified".format(modcnt, s), PgOPT.PGOPT['wrnlog']) - -# -# delete local files for given dsid and locfile indices -# -def delete_local_info(): - - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Delete {} Locfile record{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - dcnt = delcnt = 0 - for i in range(ALLCNT): - lidx = PgOPT.params['LI'][i] - lcnd = "lindex = {}".format(lidx) - if PgLock.lock_update(lidx, None, 2, PgOPT.PGOPT['errlog']) <= 0: continue - cnt = PgDBI.pgget("drupdt", "", lcnd, PgOPT.PGOPT['extlog']) - if cnt > 0: - ss = 's' if cnt > 1 else '' - PgLOG.pglog("Delete {} associated remote file record{} for Locfile index {} ...".format(cnt, ss, lidx), PgLOG.WARNLG) - dcnt += PgDBI.pgdel("drupdt", lcnd, PgOPT.PGOPT['extlog']) - delcnt += PgDBI.pgdel("dlupdt", lcnd, PgOPT.PGOPT['extlog']) - - PgLOG.pglog("{} of {} Locfile record{} deleted".format(delcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) - if dcnt > 0: - s = "s" if (dcnt > 1) else "" - PgLOG.pglog("{} associated Remote file record{} deleted too".format(dcnt, s), PgOPT.PGOPT['wrnlog']) - -# -# delete update remote files for given dsid and remote files/locfile indices -# -def delete_remote_info(): - - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Delete {} remote file record{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - PgOPT.validate_multiple_options(ALLCNT, ["LI", "DO"]) - delcnt = 0 - for i in range(ALLCNT): - lcnd = "lindex = {} AND remotefile = '{}'".format(PgOPT.params['LI'][i], PgOPT.params['RF'][i]) - if 'DO' in PgOPT.params: lcnd += " AND dindex = {}".format(PgOPT.params['DO'][i]) - delcnt += PgDBI.pgdel("drupdt", lcnd, PgOPT.PGOPT['extlog']) - - PgLOG.pglog("{} of {} remote file record{} deleted".format(delcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) - -# -# get update control information -# -def get_control_info(): - - tname = "dcupdt" - hash = PgOPT.TBLHASH[tname] - PgLOG.pglog("Get update control info of {} from RDADB ...".format(PgOPT.params['DS']), PgLOG.WARNLG) - - lens = fnames = None - if 'FN' in PgOPT.params: fnames = PgOPT.params['FN'] - fnames = PgDBI.fieldname_string(fnames, PgOPT.PGOPT[tname], PgOPT.PGOPT['dcall']) - onames = PgOPT.params['ON'] if 'ON' in PgOPT.params else "C" - condition = PgUpdt.file_condition(tname) + PgOPT.get_order_string(onames, tname) - pgrecs = PgDBI.pgmget(tname, "*", condition, PgOPT.PGOPT['extlog']) - if pgrecs and 'FO' in PgOPT.params: lens = PgUtil.all_column_widths(pgrecs, fnames, hash) - PgOPT.OUTPUT.write("{}{}{}\n".format(PgOPT.OPTS['DS'][1], PgOPT.params['ES'], PgOPT.params['DS'])) - if PgOPT.PGOPT['CACT'] == "GA": PgOPT.OUTPUT.write("[{}]\n".format(tname.upper())) - PgOPT.OUTPUT.write(PgOPT.get_string_titles(fnames, hash, lens) + "\n") - if pgrecs: - cnt = PgOPT.print_column_format(pgrecs, fnames, hash, lens) - s = 's' if cnt > 1 else '' - PgLOG.pglog("{} update control record{} retrieved".format(cnt, s), PgOPT.PGOPT['wrnlog']) - else: - PgLOG.pglog("no update control record retrieved", PgOPT.PGOPT['wrnlog']) - -# -# get local file update information -# -def get_local_info(): - - tname = "dlupdt" - hash = PgOPT.TBLHASH[tname] - PgLOG.pglog("Get local file update info of {} from RDADB ...".format(PgOPT.params['DS']), PgLOG.WARNLG) - - lens = fnames = None - if 'FN' in PgOPT.params: fnames = PgOPT.params['FN'] - fnames = PgDBI.fieldname_string(fnames, PgOPT.PGOPT[tname], PgOPT.PGOPT['dlall']) - onames = PgOPT.params['ON'] if 'ON' in PgOPT.params else "XL" - condition = PgUpdt.file_condition(tname) + PgOPT.get_order_string(onames, tname) - pgrecs = PgDBI.pgmget(tname, "*", condition, PgOPT.PGOPT['extlog']) - if pgrecs and 'FO' in PgOPT.params: lens = PgUtil.all_column_widths(pgrecs, fnames, hash) - if PgOPT.PGOPT['CACT'] == "GL": - PgOPT.OUTPUT.write("{}{}{}\n".format(PgOPT.OPTS['DS'][1], PgOPT.params['ES'], PgOPT.params['DS'])) - else: - PgOPT.OUTPUT.write("[{}]\n".format(tname.upper())) - PgOPT.OUTPUT.write(PgOPT.get_string_titles(fnames, hash, lens) + "\n") - if pgrecs: - cnt = PgOPT.print_column_format(pgrecs, fnames, hash, lens) - s = 's' if cnt > 1 else '' - PgLOG.pglog("{} locfile record{} retrieved".format(cnt, s), PgOPT.PGOPT['wrnlog']) - else: - PgLOG.pglog("no locfile record retrieved", PgOPT.PGOPT['wrnlog']) - -# -# get remote file update information -# -def get_remote_info(): - - tname = "drupdt" - hash = PgOPT.TBLHASH[tname] - PgLOG.pglog("Get remote file update info of {} from RDADB ...".format(PgOPT.params['DS']), PgLOG.WARNLG) - - lens = fnames = None - if 'FN' in PgOPT.params: fnames = PgOPT.params['FN'] - fnames = PgDBI.fieldname_string(fnames, PgOPT.PGOPT[tname], PgOPT.PGOPT['drall']) - onames = PgOPT.params['ON'] if 'ON' in PgOPT.params else "LDF" - condition = PgUpdt.file_condition(tname) + PgOPT.get_order_string(onames, tname) - pgrecs = PgDBI.pgmget(tname, "*", condition, PgOPT.PGOPT['extlog']) - if pgrecs and 'FO' in PgOPT.params: lens = PgUtil.all_column_widths(pgrecs, fnames, hash) - if PgOPT.PGOPT['CACT'] == "GR": - PgOPT.OUTPUT.write("{}{}{}\n".format(PgOPT.OPTS['DS'][1], PgOPT.params['ES'], PgOPT.params['DS'])) - else: - PgOPT.OUTPUT.write("[{}]\n".format(tname.upper())) - PgOPT.OUTPUT.write(PgOPT.get_string_titles(fnames, hash, lens) + "\n") - if pgrecs: - cnt = PgOPT.print_column_format(pgrecs, fnames, hash, lens) - s = 's' if cnt > 1 else '' - PgLOG.pglog("{} remote file record{} retrieved".format(cnt, s), PgOPT.PGOPT['wrnlog']) - else: - PgLOG.pglog("no remote file record retrieved", PgOPT.PGOPT['wrnlog']) - -# -# add or modify update control information -# -def set_control_info(): - - tname = 'dcupdt' - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Set {} update control record{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - addcnt = modcnt = 0 - flds = PgOPT.get_field_keys(tname, None, 'C') - if not flds: return PgLOG.pglog("Nothing to set for update control!", PgOPT.PGOPT['errlog']) - PgOPT.validate_multiple_values(tname, ALLCNT, flds) - fields = PgOPT.get_string_fields(flds, tname) - - for i in range(ALLCNT): - cidx = PgOPT.params['CI'][i] - if cidx > 0: - if PgLock.lock_update_control(cidx, 2, PgOPT.PGOPT['errlog']) <= 0: continue - cnd = "cindex = {}".format(cidx) - pgrec = PgDBI.pgget(tname, fields, cnd, PgOPT.PGOPT['errlog']) - if not pgrec: PgOPT.action_error("Error get update control record for " + cnd) + self.OUTPUT.write("[{}]\n".format(tname.upper())) + self.OUTPUT.write(self.get_string_titles(fnames, hash, lens) + "\n") + if pgrecs: + cnt = self.print_column_format(pgrecs, fnames, hash, lens) + s = 's' if cnt > 1 else '' + self.pglog("{} remote file record{} retrieved".format(cnt, s), self.PGOPT['wrnlog']) else: - pgrec = None - - record = PgOPT.build_record(flds, pgrec, tname, i) - if record: - if 'pindex' in record and record['pindex'] and not PgDBI.pgget("dcupdt", "", "cindex = {}".format(record['pindex'])): - PgOPT.action_error("Parent control Index {} is not in RDADB".format(record['pindex'])) - if 'action' in record and not re.match(r'^({})$'.format(PgOPT.PGOPT['UPDTACTS']), record['action']): - PgOPT.action_error("Action Name '{}' must be one of dsupdt Actions ({})".format(record['action'], PgOPT.PGOPT['UPDTACTS'])) - if pgrec: - record['pid'] = 0 - record['lockhost'] = '' - modcnt += PgDBI.pgupdt(tname, record, cnd, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + self.pglog("no remote file record retrieved", self.PGOPT['wrnlog']) + + # add or modify update control information + def set_control_info(self): + tname = 'dcupdt' + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Set {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG) + addcnt = modcnt = 0 + flds = self.get_field_keys(tname, None, 'C') + if not flds: return self.pglog("Nothing to set for update control!", self.PGOPT['errlog']) + self.validate_multiple_values(tname, self.ALLCNT, flds) + fields = self.get_string_fields(flds, tname) + for i in range(self.ALLCNT): + cidx = self.params['CI'][i] + if cidx > 0: + if self.lock_update_control(cidx, 2, self.PGOPT['errlog']) <= 0: continue + cnd = "cindex = {}".format(cidx) + pgrec = self.pgget(tname, fields, cnd, self.PGOPT['errlog']) + if not pgrec: self.action_error("Error get update control record for " + cnd) else: - record['dsid'] = PgOPT.params['DS'] - if 'specialist' not in record: record['specialist'] = PgOPT.params['LN'] - addcnt += PgDBI.pgadd(tname, record, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) - elif cidx: # unlock - PgLock.lock_update_control(cidx, 0, PgOPT.PGOPT['errlog']) - - PgLOG.pglog("{}/{} of {} control record{} added/modified".format(addcnt, modcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) - -# -# add or modify local file update information -# -def set_local_info(): - - tname = 'dlupdt' - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Set {} local file record{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - addcnt = modcnt = 0 - flds = PgOPT.get_field_keys(tname, None, 'L') - if 'RO' in PgOPT.params and 'XO' not in PgOPT.params: flds += 'X' - if not flds: return PgLOG.pglog("Nothing to set for update local file!", PgOPT.PGOPT['errlog']) - PgOPT.validate_multiple_values(tname, ALLCNT, flds) - fields = PgOPT.get_string_fields(flds, tname) - - for i in range(ALLCNT): - lidx = PgOPT.params['LI'][i] - if lidx > 0: - if PgLock.lock_update(lidx, None, 2, PgOPT.PGOPT['errlog']) <= 0: continue + pgrec = None + record = self.build_record(flds, pgrec, tname, i) + if record: + if 'pindex' in record and record['pindex'] and not self.pgget("dcupdt", "", "cindex = {}".format(record['pindex'])): + self.action_error("Parent control Index {} is not in RDADB".format(record['pindex'])) + if 'action' in record and not re.match(r'^({})$'.format(self.PGOPT['UPDTACTS']), record['action']): + self.action_error("Action Name '{}' must be one of dsupdt Actions ({})".format(record['action'], self.PGOPT['UPDTACTS'])) + if pgrec: + record['pid'] = 0 + record['lockhost'] = '' + modcnt += self.pgupdt(tname, record, cnd, self.PGOPT['errlog']|self.DODFLT) + else: + record['dsid'] = self.params['DS'] + if 'specialist' not in record: record['specialist'] = self.params['LN'] + addcnt += self.pgadd(tname, record, self.PGOPT['errlog']|self.DODFLT) + elif cidx: # unlock + self.lock_update_control(cidx, 0, self.PGOPT['errlog']) + self.pglog("{}/{} of {} control record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog']) + + # add or modify local file update information + def set_local_info(self): + tname = 'dlupdt' + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Set {} local file record{} ...".format(self.ALLCNT, s), self.WARNLG) + addcnt = modcnt = 0 + flds = self.get_field_keys(tname, None, 'L') + if 'RO' in self.params and 'XO' not in self.params: flds += 'X' + if not flds: return self.pglog("Nothing to set for update local file!", self.PGOPT['errlog']) + self.validate_multiple_values(tname, self.ALLCNT, flds) + fields = self.get_string_fields(flds, tname) + for i in range(self.ALLCNT): + lidx = self.params['LI'][i] + if lidx > 0: + if self.lock_update(lidx, None, 2, self.PGOPT['errlog']) <= 0: continue + cnd = "lindex = {}".format(lidx) + pgrec = self.pgget(tname, fields, cnd, self.PGOPT['errlog']) + if not pgrec: self.action_error("Error get Local file record for " + cnd) + else: + pgrec = None + if 'RO' in self.params: self.params['XO'][i] = self.get_next_exec_order(self.params['DS'], 0) + record = self.build_record(flds, pgrec, tname, i) + if record: + if 'cindex' in record and record['cindex'] and not self.pgget("dcupdt", "", "cindex = {}".format(record['cindex'])): + self.action_error("Update control Index {} is not in RDADB".format(record['cindex'])) + if 'action' in record and not re.match(r'^({})$'.format(self.PGOPT['ARCHACTS']), record['action']): + self.action_error("Action Name '{}' must be one of dsarch Actions ({})".format(record['action'], self.PGOPT['ARCHACTS'])) + if pgrec: + if 'VI' in record and not record['VI'] and pgrec['missdate']: record['missdate'] = record['misshour'] = None + record['pid'] = 0 + record['hostname'] = 0 + modcnt += self.pgupdt(tname, record, cnd, self.PGOPT['errlog']|self.DODFLT) + else: + record['dsid'] = self.params['DS'] + if 'specialist' not in record: record['specialist'] = self.params['LN'] + if 'execorder' not in record: record['execorder'] = self.get_next_exec_order(self.params['DS'], 1) + addcnt += self.pgadd(tname, record, self.PGOPT['errlog']|self.DODFLT) + elif lidx: # unlock + self.lock_update(lidx, None, 0, self.PGOPT['errlog']) + self.pglog("{}/{} of {} Locfile record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog']) + + # add or modify remote file update information + def set_remote_info(self): + tname = 'drupdt' + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Set {} update remote file{} ...".format(self.ALLCNT, s), self.WARNLG) + addcnt = modcnt = 0 + flds = self.get_field_keys(tname) + if not flds: return self.pglog("Nothing to set for update remote file!", self.PGOPT['errlog']) + self.validate_multiple_values(tname, self.ALLCNT, flds) + fields = self.get_string_fields(flds, tname) + for i in range(self.ALLCNT): + lidx = self.params['LI'][i] + didx = self.params['DO'][i] if 'DO' in self.params else 0 + cnd = "lindex = {} AND remotefile = '{}' AND dindex = {}".format(lidx, self.params['RF'][i], didx) + pgrec = self.pgget("drupdt", fields, cnd, self.PGOPT['errlog']) + record = self.build_record(flds, pgrec, tname, i) + if record: + if 'lindex' in record and record['lindex'] and not self.pgget("dlupdt", "", "lindex = {}".format(record['lindex'])): + self.action_error("Local file Index {} is not in RDADB".format(record['lindex'])) + if pgrec: + modcnt += self.pgupdt("drupdt", record, cnd, self.PGOPT['errlog']|self.DODFLT) + else: + record['lindex'] = lidx + record['dsid'] = self.params['DS'] + addcnt += self.pgadd("drupdt", record, self.PGOPT['errlog']|self.DODFLT) + self.pglog("{}/{} of {} remote file record{} added/modified".format(addcnt, modcnt, self.ALLCNT, s), self.PGOPT['wrnlog']) + + # unlock update records for given locfile indices + def unlock_update_info(self): + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Unlock {} update locfile{} ...".format(self.ALLCNT, s), self.WARNLG) + modcnt = 0 + for lidx in self.params['LI']: cnd = "lindex = {}".format(lidx) - pgrec = PgDBI.pgget(tname, fields, cnd, PgOPT.PGOPT['errlog']) - if not pgrec: PgOPT.action_error("Error get Local file record for " + cnd) - else: - pgrec = None - - if 'RO' in PgOPT.params: PgOPT.params['XO'][i] = PgUpdt.get_next_exec_order(PgOPT.params['DS'], 0) - record = PgOPT.build_record(flds, pgrec, tname, i) - if record: - if 'cindex' in record and record['cindex'] and not PgDBI.pgget("dcupdt", "", "cindex = {}".format(record['cindex'])): - PgOPT.action_error("Update control Index {} is not in RDADB".format(record['cindex'])) - if 'action' in record and not re.match(r'^({})$'.format(PgOPT.PGOPT['ARCHACTS']), record['action']): - PgOPT.action_error("Action Name '{}' must be one of dsarch Actions ({})".format(record['action'], PgOPT.PGOPT['ARCHACTS'])) - - if pgrec: - if 'VI' in record and not record['VI'] and pgrec['missdate']: record['missdate'] = record['misshour'] = None - record['pid'] = 0 - record['hostname'] = 0 - modcnt += PgDBI.pgupdt(tname, record, cnd, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + pgrec = self.pgget("dlupdt", "pid, hostname", cnd, self.PGOPT['extlog']) + if not pgrec: + self.pglog("{}: Local File Not exists".format(lidx), self.PGOPT['errlog']) + elif not pgrec['pid']: + self.pglog("{}: Local File Not locked".format(lidx), self.PGOPT['wrnlog']) + elif self.lock_update(lidx, None, -1, self.PGOPT['errlog']) > 0: + modcnt += 1 + self.pglog("{}: Local File Unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog']) + elif (self.check_host_down(None, pgrec['hostname']) and + self.lock_update(lidx, None, -2, self.PGOPT['errlog']) > 0): + modcnt += 1 + self.pglog("{}: Local File Force unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog']) else: - record['dsid'] = PgOPT.params['DS'] - if 'specialist' not in record: record['specialist'] = PgOPT.params['LN'] - if 'execorder' not in record: record['execorder'] = PgUpdt.get_next_exec_order(PgOPT.params['DS'], 1) - addcnt += PgDBI.pgadd(tname, record, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) - elif lidx: # unlock - PgLock.lock_update(lidx, None, 0, PgOPT.PGOPT['errlog']) - - PgLOG.pglog("{}/{} of {} Locfile record{} added/modified".format(addcnt, modcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) - -# -# add or modify remote file update information -# -def set_remote_info(): - - tname = 'drupdt' - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Set {} update remote file{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - addcnt = modcnt = 0 - flds = PgOPT.get_field_keys(tname) - if not flds: return PgLOG.pglog("Nothing to set for update remote file!", PgOPT.PGOPT['errlog']) - PgOPT.validate_multiple_values(tname, ALLCNT, flds) - fields = PgOPT.get_string_fields(flds, tname) - - for i in range(ALLCNT): - lidx = PgOPT.params['LI'][i] - didx = PgOPT.params['DO'][i] if 'DO' in PgOPT.params else 0 - cnd = "lindex = {} AND remotefile = '{}' AND dindex = {}".format(lidx, PgOPT.params['RF'][i], didx) - pgrec = PgDBI.pgget("drupdt", fields, cnd, PgOPT.PGOPT['errlog']) - record = PgOPT.build_record(flds, pgrec, tname, i) - if record: - if 'lindex' in record and record['lindex'] and not PgDBI.pgget("dlupdt", "", "lindex = {}".format(record['lindex'])): - PgOPT.action_error("Local file Index {} is not in RDADB".format(record['lindex'])) - - if pgrec: - modcnt += PgDBI.pgupdt("drupdt", record, cnd, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) + self.pglog("{}: Local File Unable to unlock {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), self.PGOPT['wrnlog']) + self.pglog("{} of {} local file record{} unlocked from RDADB".format(modcnt, self.ALLCNT, s), self.LOGWRN) + + # unlock update control records for given locfile indices + def unlock_control_info(self): + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Unlock {} update control{} ...".format(self.ALLCNT, s), self.WARNLG) + modcnt = 0 + for cidx in self.params['CI']: + pgrec = self.pgget("dcupdt", "pid, lockhost", "cindex = {}".format(cidx), self.PGOPT['extlog']) + if not pgrec: + self.pglog("{}: Update Control Not exists".format(cidx), self.PGOPT['errlog']) + elif not pgrec['pid']: + self.pglog("{}: Update Control Not locked".format(cidx), self.PGOPT['wrnlog']) + elif self.lock_update_control(cidx, -1, self.PGOPT['extlog']) > 0: + modcnt += 1 + self.pglog("{}: Update Control Unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog']) + elif (self.check_host_down(None, pgrec['lockhost']) and + self.lock_update_control(cidx, -2, self.PGOPT['extlog']) > 0): + modcnt += 1 + self.pglog("{}: Update Control Force unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog']) else: - record['lindex'] = lidx - record['dsid'] = PgOPT.params['DS'] - addcnt += PgDBI.pgadd("drupdt", record, PgOPT.PGOPT['errlog']|PgLOG.DODFLT) - - PgLOG.pglog("{}/{} of {} remote file record{} added/modified".format(addcnt, modcnt, ALLCNT, s), PgOPT.PGOPT['wrnlog']) - -# -# unlock update records for given locfile indices -# -def unlock_update_info(): - - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Unlock {} update locfile{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - modcnt = 0 - for lidx in PgOPT.params['LI']: - cnd = "lindex = {}".format(lidx) - pgrec = PgDBI.pgget("dlupdt", "pid, hostname", cnd, PgOPT.PGOPT['extlog']) - if not pgrec: - PgLOG.pglog("{}: Local File Not exists".format(lidx), PgOPT.PGOPT['errlog']) - elif not pgrec['pid']: - PgLOG.pglog("{}: Local File Not locked".format(lidx), PgOPT.PGOPT['wrnlog']) - elif PgLock.lock_update(lidx, None, -1, PgOPT.PGOPT['errlog']) > 0: - modcnt += 1 - PgLOG.pglog("{}: Local File Unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), PgOPT.PGOPT['wrnlog']) - elif (PgFile.check_host_down(None, pgrec['hostname']) and - PgLock.lock_update(lidx, None, -2, PgOPT.PGOPT['errlog']) > 0): - modcnt += 1 - PgLOG.pglog("{}: Local File Force unlocked {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), PgOPT.PGOPT['wrnlog']) - else: - PgLOG.pglog("{}: Local File Unable to unlock {}/{}".format(lidx, pgrec['pid'], pgrec['hostname']), PgOPT.PGOPT['wrnlog']) - - PgLOG.pglog("{} of {} local file record{} unlocked from RDADB".format(modcnt, ALLCNT, s), PgLOG.LOGWRN) - -# -# unlock update control records for given locfile indices -# -def unlock_control_info(): - - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Unlock {} update control{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - modcnt = 0 - for cidx in PgOPT.params['CI']: - pgrec = PgDBI.pgget("dcupdt", "pid, lockhost", "cindex = {}".format(cidx), PgOPT.PGOPT['extlog']) - if not pgrec: - PgLOG.pglog("{}: Update Control Not exists".format(cidx), PgOPT.PGOPT['errlog']) - elif not pgrec['pid']: - PgLOG.pglog("{}: Update Control Not locked".format(cidx), PgOPT.PGOPT['wrnlog']) - elif PgLock.lock_update_control(cidx, -1, PgOPT.PGOPT['extlog']) > 0: - modcnt += 1 - PgLOG.pglog("{}: Update Control Unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog']) - elif (PgFile.check_host_down(None, pgrec['lockhost']) and - PgLock.lock_update_control(cidx, -2, PgOPT.PGOPT['extlog']) > 0): - modcnt += 1 - PgLOG.pglog("{}: Update Control Force unlocked {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog']) + self.pglog("{}: Undate Control Unable to unlock {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), self.PGOPT['wrnlog']) + self.pglog("{} of {} update control record{} unlocked from RDADB".format(modcnt, self.ALLCNT, s), self.LOGWRN) + + # get update info of local and remote files owned by login name + def get_update_info(self): + if 'DS' in self.params: + dsids = {'dsid' : [self.params['DS']]} + dscnt = 1 else: - PgLOG.pglog("{}: Undate Control Unable to unlock {}/{}".format(cidx, pgrec['pid'], pgrec['lockhost']), PgOPT.PGOPT['wrnlog']) - - PgLOG.pglog("{} of {} update control record{} unlocked from RDADB".format(modcnt, ALLCNT, s), PgLOG.LOGWRN) - -# -# get update info of local and remote files owned by login name -# -def get_update_info(): - - if 'DS' in PgOPT.params: - dsids = {'dsid' : [PgOPT.params['DS']]} - dscnt = 1 - else: - tname = "dlupdt" - cnd = PgUpdt.file_condition(tname, None, None, 1) - if not cnd: - PgOPT.set_default_value("SN", PgOPT.params['LN']) - cnd = PgUpdt.file_condition(tname, None, None, 1) - dsids = PgDBI.pgmget(tname, "DISTINCT dsid", cnd, PgOPT.PGOPT['extlog']) - dscnt = len(dsids['dsid']) if dsids else 0 - if dscnt == 0: - return PgLOG.pglog("NO dataset identified for giving condition", PgOPT.PGOPT['wrnlog']) - elif dscnt > 1: - PgLOG.pglog("Get Update Info for {} datasets".format(dscnt), PgOPT.PGOPT['wrnlog']) - - PgOPT.PGOPT['AUTODS'] = dscnt - - for i in range(dscnt): - PgOPT.params['DS'] = dsids['dsid'][i] - if PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['GC'][0]: - get_control_info() - elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['GL'][0]: - get_local_info() - elif PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['GR'][0]: - get_remote_info() + tname = "dlupdt" + cnd = self.file_condition(tname, None, None, 1) + if not cnd: + self.set_default_value("SN", self.params['LN']) + cnd = self.file_condition(tname, None, None, 1) + dsids = self.pgmget(tname, "DISTINCT dsid", cnd, self.PGOPT['extlog']) + dscnt = len(dsids['dsid']) if dsids else 0 + if dscnt == 0: + return self.pglog("NO dataset identified for giving condition", self.PGOPT['wrnlog']) + elif dscnt > 1: + self.pglog("Get Update Info for {} datasets".format(dscnt), self.PGOPT['wrnlog']) + self.PGOPT['AUTODS'] = dscnt + for i in range(dscnt): + self.params['DS'] = dsids['dsid'][i] + if self.PGOPT['ACTS'] == self.OPTS['GC'][0]: + self.get_control_info() + elif self.PGOPT['ACTS'] == self.OPTS['GL'][0]: + self.get_local_info() + elif self.PGOPT['ACTS'] == self.OPTS['GR'][0]: + self.get_remote_info() + else: + if 'ON' in self.params: del self.params['ON'] # use default order string + if 'FN' not in self.params: self.params['FN'] = 'ALL' + if self.PGOPT['ACTS']&self.OPTS['GC'][0]: self.get_control_info() + if self.PGOPT['ACTS']&self.OPTS['GL'][0]: self.get_local_info() + if self.PGOPT['ACTS']&self.OPTS['GR'][0]: self.get_remote_info() + if dscnt > 1: self.pglog("Update Info of {} datasets retrieved".format(dscnt), self.PGOPT['wrnlog']) + + # gather due datasets for data update + def dataset_update(self): + actcnd = "specialist = '{}'".format(self.params['LN']) + if self.PGOPT['ACTS']&self.OPTS['AF'][0]: actcnd += " AND action IN ('AW', 'AS', 'AQ')" + (self.PGOPT['CURDATE'], self.PGOPT['CURHOUR']) = self.curdatehour() + if 'CD' not in self.params: self.params['CD'] = self.PGOPT['CURDATE'] # default to current date + if 'CH' not in self.params: self.params['CH'] = self.PGOPT['CURHOUR'] # default to current hour + if self.ALLCNT > 1 and self.params['MU']: del self.params['MU'] + if 'CN' in self.params and 'RD' in self.params: del self.params['CN'] + if 'CN' in self.params or 'RD' in self.params or 'RA' in self.params: + if 'MO' in self.params: del self.params['MO'] + elif 'MO' not in self.params and self.PGOPT['CACT'] == "UF": + self.params['MO'] = -1 + if 'DS' in self.params: + dsids = [self.params['DS']] + dscnt = 1 else: - if 'ON' in PgOPT.params: del PgOPT.params['ON'] # use default order string - if 'FN' not in PgOPT.params: PgOPT.params['FN'] = 'ALL' - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['GC'][0]: get_control_info() - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['GL'][0]: get_local_info() - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['GR'][0]: get_remote_info() - - if dscnt > 1: PgLOG.pglog("Update Info of {} datasets retrieved".format(dscnt), PgOPT.PGOPT['wrnlog']) - -# -# gather due datasets for data update -# -def dataset_update(): - - global SUBJECT, TOPMSG, ACTSTR - - actcnd = "specialist = '{}'".format(PgOPT.params['LN']) - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0]: actcnd += " AND action IN ('AW', 'AS', 'AQ')" - (PgOPT.PGOPT['CURDATE'], PgOPT.PGOPT['CURHOUR']) = PgUtil.curdatehour() - if 'CD' not in PgOPT.params: PgOPT.params['CD'] = PgOPT.PGOPT['CURDATE'] # default to current date - if 'CH' not in PgOPT.params: PgOPT.params['CH'] = PgOPT.PGOPT['CURHOUR'] # default to current hour - if ALLCNT > 1 and PgOPT.params['MU']: del PgOPT.params['MU'] - if 'CN' in PgOPT.params and 'RD' in PgOPT.params: del PgOPT.params['CN'] - if 'CN' in PgOPT.params or 'RD' in PgOPT.params or 'RA' in PgOPT.params: - if 'MO' in PgOPT.params: del PgOPT.params['MO'] - elif 'MO' not in PgOPT.params and PgOPT.PGOPT['CACT'] == "UF": - PgOPT.params['MO'] = -1 - - if 'DS' in PgOPT.params: - dsids = [PgOPT.params['DS']] - dscnt = 1 - else: - if 'CI' not in PgOPT.params: actcnd += " AND cindex = 0" - loccnd = PgUpdt.file_condition('dlupdt', "LQFIXA", None, 1) - dscnd = actcnd - if loccnd: dscnd += " AND " + loccnd - pgrecs = PgDBI.pgmget("dlupdt", "DISTINCT dsid", dscnd, PgOPT.PGOPT['extlog']) - dsids = pgrecs['dsid'] if pgrecs else [] - dscnt = len(dsids) - if not dscnt: return PgLOG.pglog("NO dataset is due for update on {} for {}".format(PgOPT.params['CD'], PgOPT.params['LN']), PgOPT.PGOPT['wrnlog']) - PgOPT.PGOPT['AUTODS'] = dscnt - actcnd += " ORDER BY execorder, lindex" - - if PgLOG.PGLOG['DSCHECK']: - fcnt = 0 + if 'CI' not in self.params: actcnd += " AND cindex = 0" + loccnd = self.file_condition('dlupdt', "LQFIXA", None, 1) + dscnd = actcnd + if loccnd: dscnd += " AND " + loccnd + pgrecs = self.pgmget("dlupdt", "DISTINCT dsid", dscnd, self.PGOPT['extlog']) + dsids = pgrecs['dsid'] if pgrecs else [] + dscnt = len(dsids) + if not dscnt: return self.pglog("NO dataset is due for update on {} for {}".format(self.params['CD'], self.params['LN']), self.PGOPT['wrnlog']) + self.PGOPT['AUTODS'] = dscnt + actcnd += " ORDER BY execorder, lindex" + if self.PGLOG['DSCHECK']: + fcnt = 0 + for i in range(dscnt): + self.params['DS'] = dsids[i] + loccnd = self.file_condition('dlupdt', "LQFIXA") + locrecs = self.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), self.PGOPT['extlog']) + loccnt = len(locrecs['locfile']) if locrecs else 0 + if loccnt == 0: continue + for j in range(loccnt): + locrec = self.onerecord(locrecs, j) + if (loccnt == 1 and 'LI' in self.params and 'LF' in self.params and + len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']): + locrec['locfile'] = self.params['LF'][0] + fcnt += self.file_update(locrec, self.LOGWRN, 1) + self.set_dscheck_fcount(fcnt, self.LOGERR) + # check and update data for each dataset + logact = self.PGOPT['emllog'] + acnt = ucnt = 0 for i in range(dscnt): - PgOPT.params['DS'] = dsids[i] - loccnd = PgUpdt.file_condition('dlupdt', "LQFIXA") - locrecs = PgDBI.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), PgOPT.PGOPT['extlog']) + self.params['DS'] = dsids[i] + loccnd = self.file_condition('dlupdt', "LQFIXA") + locrecs = self.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), self.PGOPT['extlog']) loccnt = len(locrecs['locfile']) if locrecs else 0 - if loccnt == 0: continue + if loccnt == 0: + s = "-UC{}".format(self.params['CI'][0]) if ('CI' in self.params and len(self.params['CI']) == 1) else "" + self.pglog("{}{}: no config record of local file found to update for '{}'".format(self.params['DS'], s, self.params['LN']), self.PGOPT['wrnlog']) + continue + s = 's' if loccnt > 1 else '' + self.pglog("{}: {} for {} update record{}".format(self.params['DS'], self.PGOPT['CACT'], loccnt, s), logact) + logact = self.PGOPT['emlsep'] for j in range(loccnt): - locrec = PgUtil.onerecord(locrecs, j) - if (loccnt == 1 and 'LI' in PgOPT.params and 'LF' in PgOPT.params and - len(PgOPT.params['LF']) == 1 and PgOPT.params['LF'][0] != locrec['locfile']): - locrec['locfile'] = PgOPT.params['LF'][0] - fcnt += file_update(locrec, PgLOG.LOGWRN, 1) - PgCMD.set_dscheck_fcount(fcnt, PgLOG.LOGERR) - - # check and update data for each dataset - logact = PgOPT.PGOPT['emllog'] - acnt = ucnt = 0 - for i in range(dscnt): - PgOPT.params['DS'] = dsids[i] - loccnd = PgUpdt.file_condition('dlupdt', "LQFIXA") - locrecs = PgDBI.pgmget("dlupdt", "*", "{} AND {}".format(loccnd, actcnd), PgOPT.PGOPT['extlog']) - loccnt = len(locrecs['locfile']) if locrecs else 0 - if loccnt == 0: - s = "-UC{}".format(PgOPT.params['CI'][0]) if ('CI' in PgOPT.params and len(PgOPT.params['CI']) == 1) else "" - PgLOG.pglog("{}{}: no config record of local file found to update for '{}'".format(PgOPT.params['DS'], s, PgOPT.params['LN']), PgOPT.PGOPT['wrnlog']) - continue - s = 's' if loccnt > 1 else '' - PgLOG.pglog("{}: {} for {} update record{}".format(PgOPT.params['DS'], PgOPT.PGOPT['CACT'], loccnt, s), logact) - logact = PgOPT.PGOPT['emlsep'] - for j in range(loccnt): - locrec = PgUtil.onerecord(locrecs, j) - if (loccnt == 1 and 'LI' in PgOPT.params and 'LF' in PgOPT.params and - len(PgOPT.params['LF']) == 1 and PgOPT.params['LF'][0] != locrec['locfile']): - locrec['locfile'] = PgOPT.params['LF'][0] - if locrec['cindex']: - if 'CI' not in PgOPT.params: - PgOPT.params['CI'] = [locrec['cindex']] - PgUpdt.cache_update_control(locrec['cindex'], 0) - if 'CN' in PgOPT.params and 'RD' in PgOPT.params: del PgOPT.params['CN'] - if 'CN' in PgOPT.params or 'RD' in PgOPT.params or 'RA' in PgOPT.params: - if 'MO' in PgOPT.params: del PgOPT.params['MO'] - elif 'MO' not in PgOPT.params and PgOPT.PGOPT['CACT'] == "UF": - PgOPT.params['MO'] = -1 - elif locrec['cindex'] != PgOPT.params['CI'][0]: - PgLOG.pglog("{}-{}: Skipped due to control index {} mismatches {}".format(PgOPT.params['DS'], locrec['lindex'], locrec['cindex'], PgOPT.params['CI'][0]), PgOPT.PGOPT['emlerr']) - continue - - PgOPT.PGOPT['rstat'] = 1 # reset remote download status for each local file - if PgSIG.PGSIG['MPROC'] > 1: acnt += 1 - fcnt = file_update(locrec, logact) - if PgSIG.PGSIG['PPID'] > 1: - if PgOPT.PGOPT['AUTODS'] > 1: PgOPT.PGOPT['AUTODS'] = dscnt = 1 - acnt = ucnt = 0 # reinitialize counts for child process - break # stop loop in child - if PgSIG.PGSIG['MPROC'] > 1: - if fcnt == 0: - break # quit + locrec = self.onerecord(locrecs, j) + if (loccnt == 1 and 'LI' in self.params and 'LF' in self.params and + len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']): + locrec['locfile'] = self.params['LF'][0] + if locrec['cindex']: + if 'CI' not in self.params: + self.params['CI'] = [locrec['cindex']] + self.cache_update_control(locrec['cindex'], 0) + if 'CN' in self.params and 'RD' in self.params: del self.params['CN'] + if 'CN' in self.params or 'RD' in self.params or 'RA' in self.params: + if 'MO' in self.params: del self.params['MO'] + elif 'MO' not in self.params and self.PGOPT['CACT'] == "UF": + self.params['MO'] = -1 + elif locrec['cindex'] != self.params['CI'][0]: + self.pglog("{}-{}: Skipped due to control index {} mismatches {}".format(self.params['DS'], locrec['lindex'], locrec['cindex'], self.params['CI'][0]), self.PGOPT['emlerr']) + continue + self.PGOPT['rstat'] = 1 # reset remote download status for each local file + if self.PGSIG['MPROC'] > 1: acnt += 1 + fcnt = self.file_update(locrec, logact) + if self.PGSIG['PPID'] > 1: + if self.PGOPT['AUTODS'] > 1: self.PGOPT['AUTODS'] = dscnt = 1 + acnt = ucnt = 0 # reinitialize counts for child process + break # stop loop in child + if self.PGSIG['MPROC'] > 1: + if fcnt == 0: + break # quit + else: + if fcnt > 0: ucnt += 1 # record update count, s is either -1 or 1 + continue # non-daemon parent + if 'QE' in self.params and fcnt <= 0: break + if self.PGOPT['vcnt'] > 0: + self.renew_internal_version(self.params['DS'], self.PGOPT['vcnt']) + self.PGOPT['vcnt'] = 0 + if self.PGSIG['MPROC'] > 1: + if not self.PGSIG['QUIT'] and j == loccnt: continue + break + if self.PGOPT['rcnt']: + if self.PGOPT['CACT'] == "DR": + acnt += self.PGOPT['rcnt'] + ucnt += self.PGOPT['dcnt'] + s = 's' if self.PGOPT['rcnt'] > 1 else '' + if loccnt > 1: + self.pglog("{}: {} of {} rfile{} gotten!".format(self.params['DS'], self.PGOPT['dcnt'], self.PGOPT['rcnt'], s), self.PGOPT['emllog']) + self.PGOPT['rcnt'] = self.PGOPT['dcnt'] = 0 + if self.PGOPT['lcnt']: + if self.PGOPT['CACT'] == "BL" or self.PGOPT['CACT'] == "PB": + acnt += self.PGOPT['lcnt'] + ucnt += self.PGOPT['bcnt'] + s = 's' if self.PGOPT['lcnt'] > 1 else '' + if loccnt > 1 and self.PGOPT['bcnt'] > 0: + self.pglog("{}: {} of {} lfile{} built!".format(self.params['DS'], self.PGOPT['bcnt'], self.PGOPT['lcnt'], s), self.PGOPT['emllog']) + self.PGOPT['lcnt'] = self.PGOPT['bcnt'] = 0 + if self.PGOPT['acnt']: + acnt += self.PGOPT['acnt'] + ucnt += self.PGOPT['ucnt'] + s = 's' if self.PGOPT['acnt'] > 1 else '' + self.pglog("{}: {} of {} local file{} archived!".format(self.params['DS'], self.PGOPT['ucnt'], self.PGOPT['acnt'], s), + (self.PGOPT['emlsum'] if dscnt > 1 else self.PGOPT['emllog'])) + self.PGOPT['acnt'] = self.PGOPT['ucnt'] = 0 + if self.PGSIG['PPID'] > 1: break # stop loop child + if acnt > 0: + self.TOPMSG = detail = "" + if self.PGSIG['MPROC'] > 1: + s = 's' if acnt > 1 else '' + self.ACTSTR = "{} of {} CPIDs{} for 'dsupdt {}' started".format(ucnt, acnt, s, self.PGOPT['CACT']) + else: + s = 's' if ucnt > 1 else '' + self.TOPMSG = "" + if self.PGOPT['CACT'] == "DR": + atype = "remote file{} gotten".format(s) + elif self.PGOPT['CACT'] == "BL" or self.PGOPT['CACT'] == "PB": + atype = "local file{} built".format(s) else: - if fcnt > 0: ucnt += 1 # record update count, s is either -1 or 1 - continue # non-daemon parent - if 'QE' in PgOPT.params and fcnt <= 0: break - - if PgOPT.PGOPT['vcnt'] > 0: - renew_internal_version(PgOPT.params['DS'], PgOPT.PGOPT['vcnt']) - PgOPT.PGOPT['vcnt'] = 0 - if PgSIG.PGSIG['MPROC'] > 1: - if not PgSIG.PGSIG['QUIT'] and j == loccnt: continue - break - if PgOPT.PGOPT['rcnt']: - if PgOPT.PGOPT['CACT'] == "DR": - acnt += PgOPT.PGOPT['rcnt'] - ucnt += PgOPT.PGOPT['dcnt'] - s = 's' if PgOPT.PGOPT['rcnt'] > 1 else '' - if loccnt > 1: - PgLOG.pglog("{}: {} of {} rfile{} gotten!".format(PgOPT.params['DS'], PgOPT.PGOPT['dcnt'], PgOPT.PGOPT['rcnt'], s), PgOPT.PGOPT['emllog']) - PgOPT.PGOPT['rcnt'] = PgOPT.PGOPT['dcnt'] = 0 - if PgOPT.PGOPT['lcnt']: - if PgOPT.PGOPT['CACT'] == "BL" or PgOPT.PGOPT['CACT'] == "PB": - acnt += PgOPT.PGOPT['lcnt'] - ucnt += PgOPT.PGOPT['bcnt'] - s = 's' if PgOPT.PGOPT['lcnt'] > 1 else '' - if loccnt > 1 and PgOPT.PGOPT['bcnt'] > 0: - PgLOG.pglog("{}: {} of {} lfile{} built!".format(PgOPT.params['DS'], PgOPT.PGOPT['bcnt'], PgOPT.PGOPT['lcnt'], s), PgOPT.PGOPT['emllog']) - PgOPT.PGOPT['lcnt'] = PgOPT.PGOPT['bcnt'] = 0 - if PgOPT.PGOPT['acnt']: - acnt += PgOPT.PGOPT['acnt'] - ucnt += PgOPT.PGOPT['ucnt'] - s = 's' if PgOPT.PGOPT['acnt'] > 1 else '' - PgLOG.pglog("{}: {} of {} local file{} archived!".format(PgOPT.params['DS'], PgOPT.PGOPT['ucnt'], PgOPT.PGOPT['acnt'], s), - (PgOPT.PGOPT['emlsum'] if dscnt > 1 else PgOPT.PGOPT['emllog'])) - PgOPT.PGOPT['acnt'] = PgOPT.PGOPT['ucnt'] = 0 - - if PgSIG.PGSIG['PPID'] > 1: break # stop loop child - - if acnt > 0: - TOPMSG = detail = "" - if PgSIG.PGSIG['MPROC'] > 1: - s = 's' if acnt > 1 else '' - ACTSTR = "{} of {} CPIDs{} for 'dsupdt {}' started".format(ucnt, acnt, s, PgOPT.PGOPT['CACT']) - else: - s = 's' if ucnt > 1 else '' - TOPMSG = "" - if PgOPT.PGOPT['CACT'] == "DR": - atype = "remote file{} gotten".format(s) - elif PgOPT.PGOPT['CACT'] == "BL" or PgOPT.PGOPT['CACT'] == "PB": - atype = "local file{} built".format(s) + atype = "local file{} archived".format(s) + if self.PGOPT['rdcnt'] > 0: + s = 's' if self.PGOPT['rdcnt'] > 1 else '' + self.TOPMSG = "{} remote server file{} downloaded and ".format(self.PGOPT['rdcnt'], s) + if self.PGOPT['udcnt'] > 0: + if detail: detail += " & " + detail += "{} Web Online".format(self.PGOPT['udcnt']) + if self.PGOPT['uncnt'] > 0: + if detail: detail += " & " + detail += "{} Glade Only".format(self.PGOPT['uncnt']) + if self.PGOPT['uwcnt'] > 0: + if detail: detail += " & " + detail += "{} Web".format(self.PGOPT['uwcnt']) + if self.PGOPT['uscnt'] > 0: + if detail: detail += " & " + detail += "{} Saved".format(self.PGOPT['uscnt']) + if self.PGOPT['qbcnt'] > 0: + if detail: detail += " & " + detail += "{} Quasar Backup".format(self.PGOPT['qbcnt']) + if self.PGOPT['qdcnt'] > 0: + if detail: detail += " & " + detail += "{} Quasar Drdata".format(self.PGOPT['qdcnt']) + self.ACTSTR = "{} {}".format(ucnt, atype) + self.TOPMSG += self.ACTSTR + if detail: self.TOPMSG += " ({})".format(detail) + if dscnt > 1: + self.pglog("{} datasets: {}".format(dscnt, self.TOPMSG), self.PGOPT['emlsum']) + self.SUBJECT = "DSUPDT of " + if self.PGOPT['AUTODS'] < 2: + self.SUBJECT += self.params['DS'].upper() else: - atype = "local file{} archived".format(s) - if PgOPT.PGOPT['rdcnt'] > 0: - s = 's' if PgOPT.PGOPT['rdcnt'] > 1 else '' - TOPMSG = "{} remote server file{} downloaded and ".format(PgOPT.PGOPT['rdcnt'], s) - if PgOPT.PGOPT['udcnt'] > 0: - if detail: detail += " & " - detail += "{} Web Online".format(PgOPT.PGOPT['udcnt']) - if PgOPT.PGOPT['uncnt'] > 0: - if detail: detail += " & " - detail += "{} Glade Only".format(PgOPT.PGOPT['uncnt']) - if PgOPT.PGOPT['uwcnt'] > 0: - if detail: detail += " & " - detail += "{} Web".format(PgOPT.PGOPT['uwcnt']) - if PgOPT.PGOPT['uscnt'] > 0: - if detail: detail += " & " - detail += "{} Saved".format(PgOPT.PGOPT['uscnt']) - if PgOPT.PGOPT['qbcnt'] > 0: - if detail: detail += " & " - detail += "{} Quasar Backup".format(PgOPT.PGOPT['qbcnt']) - if PgOPT.PGOPT['qdcnt'] > 0: - if detail: detail += " & " - detail += "{} Quasar Drdata".format(PgOPT.PGOPT['qdcnt']) - ACTSTR = "{} {}".format(ucnt, atype) - - TOPMSG += ACTSTR - if detail: TOPMSG += " ({})".format(detail) - if dscnt > 1: - PgLOG.pglog("{} datasets: {}".format(dscnt, TOPMSG), PgOPT.PGOPT['emlsum']) - SUBJECT = "DSUPDT of " - if PgOPT.PGOPT['AUTODS'] < 2: - SUBJECT += PgOPT.params['DS'].upper() - else: - SUBJECT += "{} Datasets".format(PgOPT.PGOPT['AUTODS']) - - if PgOPT.PGOPT['UCNTL']: - PgUpdt.reset_control_time() - if SUBJECT: SUBJECT += "-C{}".format(PgOPT.PGOPT['UCNTL']['cindex']) - -# renew internal version number for given dataset -def renew_internal_version(dsid, vcnt): - - s = 's' if vcnt > 1 else '' - cmd = "dsarch {} SV -NV -DE '{} Data file{} rearchived'".format(dsid, vcnt, s) - if PgLOG.pgsystem(cmd, PgOPT.PGOPT['emerol'], 5): # 1 + 4 - pgrec = PgDBI.pgget('dsvrsn', '*', "dsid = '{}' and status = 'A'".format(dsid), PgOPT.PGOPT['emerol']) - if pgrec: - vmsg = "set to {} for DOI {}".format(pgrec['iversion'], pgrec['doi']) - else: - vmsg = 'renewed' - - PgLOG.pglog("{}: {} Data file{} rearchived, Internal version number {}".format(dsid, vcnt, s, vmsg), PgOPT.PGOPT['emlsum']) - -# -# cach the total count of files to be archived -# -def count_caching(locrec, locinfo): - - files = PgUpdt.expand_serial_pattern(locrec['locfile']) - scnt = len(files) if files else 1 - - if ALLCNT > 1: - ecnt = ALLCNT - else: - tinfo = TEMPINFO[locrec['lindex']] = get_tempinfo(locrec, locinfo, 0) - ecnt = len(tinfo['ED']) if tinfo else 1 - - return ecnt * scnt - -# -# gather/archive due data file for update of each local file -# -def file_update(locrec, logact, caching = 0): - - lfile = locrec['locfile'] - endonly = retcnt = 0 - lindex = locrec['lindex'] - loccnd = "lindex = {}".format(lindex) - locinfo = "{}-L{}".format(locrec['dsid'], lindex) - if not lfile: - if caching: - return None - else: - return PgLOG.pglog(locinfo + ": local file name NOT specified", PgOPT.PGOPT['emlerr']) - locinfo += "-" + lfile - if locrec['specialist'] != PgOPT.params['LN']: - if caching: - return None - else: - return PgLOG.pglog("{}: owner '{}', NOT '{}'".format(locinfo, locrec['specialist'], PgOPT.params['LN']), PgOPT.PGOPT['emlerr']) - - if caching: return count_caching(locrec, locinfo) - tempinfo = TEMPINFO[lindex] if lindex in TEMPINFO else get_tempinfo(locrec, locinfo, 0) - if not tempinfo: return 0 # simply return if miss temporal info for update - - rmtcnd = loccnd - rcnd = PgUpdt.file_condition('drupdt', ('D' if 'DO' in PgOPT.params else "RS"), None, 1) - if rcnd: rmtcnd += " AND " + rcnd - rmtrecs = PgDBI.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", PgOPT.PGOPT['extlog']) - rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0 - if rcnt == 0: - if rcnd and PgDBI.pgget("drupdt", "", loccnd): - return PgLOG.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), PgOPT.PGOPT['emlerr']) - # create a empty record remote file - rcnt = 1 - - rmtrecs = {'lindex' : [lindex], 'dindex' : [0]} - rflds = ['remotefile', 'serverfile', 'download', 'begintime', 'endtime', 'tinterval'] - for rfld in rflds: rmtrecs[rfld] = [None] - if rcnt == 1: - if 'RF' in PgOPT.params and len(PgOPT.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and PgOPT.params['RF'][0] == rmtrecs['remotefile'][0]): - rmtrecs['remotefile'][0] = PgOPT.params['RF'][0] - if 'SF' in PgOPT.params and len(PgOPT.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and PgOPT.params['SF'][0] == rmtrecs['serverfile'][0]): - rmtrecs['serverfile'][0] = PgOPT.params['SF'][0] - ecnt = ALLCNT if ALLCNT > 1 else len(tempinfo['ED']) # should be at least one - - if PgSIG.PGSIG['MPROC'] > 1: - pname = "updt{}".format(lindex) - pid = PgSIG.start_child(pname, PgOPT.PGOPT['wrnlog'], 1) # try to start a child process - if pid <= 0: return pid # failed to start a child process - if PgSIG.PGSIG['PPID'] > 1: - PgLOG.set_email() # empty email in child process - PgOPT.PGOPT['acnt'] = PgOPT.PGOPT['ucnt'] = 0 - else: - edate = tempinfo['ED'][0] - ehour = tempinfo['EH'][0] - lfile = PgUpdt.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ']) - locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile) - if ecnt > 1: locinfo += ", {} Update Periods".format(ecnt) - PgLOG.pglog("CPID {} for 'dsupdt {}' of {}".format(PgSIG.pname2cpid(pname), PgOPT.PGOPT['CACT'], locinfo), PgOPT.PGOPT['emllog']) - return 1 # no further action in non-daemon program - - if PgLock.lock_update(lindex, locinfo, 1, PgOPT.PGOPT['emllog']) <= 0: return 0 - PgOPT.PGOPT['lindex'] = lindex - tempinfo['prcmd'] = PgOPT.params['PR'][0] if 'PR' in PgOPT.params else locrec['processremote'] - tempinfo['blcmd'] = PgOPT.params['BC'][0] if 'BC' in PgOPT.params else locrec['buildcmd'] - postcnt = -1 - if PgOPT.PGOPT['UCNTL'] and PgOPT.PGOPT['CACT'] == PgOPT.PGOPT['UCNTL']['action']: - tempinfo['postcmd'] = PgOPT.params['XC'][0] if 'XC' in PgOPT.params else PgOPT.PGOPT['UCNTL']['execcmd'] - if tempinfo['postcmd']: postcnt = 0 - - setmiss = 1 if tempinfo['VD'] else 0 - ufile = uinfo = None - rscnt = ucnt = lcnt = 0 - - for i in range(ecnt): - if ALLCNT > 1 and i > 0: - tempinfo = get_tempinfo(locrec, locinfo, i) - if not tempinfo: break - edate = tempinfo['ED'][0] - ehour = tempinfo['EH'][0] - else: - edate = tempinfo['ED'][i] - ehour = tempinfo['EH'][i] - if 'RE' in PgOPT.params and i and PgUtil.diffdatehour(edate, ehour, tempinfo['edate'], tempinfo['ehour']) <= 0: - continue - if ucnt and tempinfo['RS'] == 1 and i%20 == 0: refresh_metadata(locrec['dsid']) - tempinfo['edate'] = edate - if ehour != None: - tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour) - tempinfo['ehour'] = ehour + self.SUBJECT += "{} Datasets".format(self.PGOPT['AUTODS']) + if self.PGOPT['UCNTL']: + self.reset_control_time() + if self.SUBJECT: self.SUBJECT += "-C{}".format(self.PGOPT['UCNTL']['cindex']) + + # renew internal version number for given dataset + def renew_internal_version(self, dsid, vcnt): + s = 's' if vcnt > 1 else '' + cmd = "dsarch {} SV -NV -DE '{} Data file{} rearchived'".format(dsid, vcnt, s) + if self.pgsystem(cmd, self.PGOPT['emerol'], 5): # 1 + 4 + pgrec = self.pgget('dsvrsn', '*', "dsid = '{}' and status = 'A'".format(dsid), self.PGOPT['emerol']) + if pgrec: + vmsg = "set to {} for DOI {}".format(pgrec['iversion'], pgrec['doi']) + else: + vmsg = 'renewed' + self.pglog("{}: {} Data file{} rearchived, Internal version number {}".format(dsid, vcnt, s, vmsg), self.PGOPT['emlsum']) + + # cach the total count of files to be archived + def count_caching(self, locrec, locinfo): + files = self.expand_serial_pattern(locrec['locfile']) + scnt = len(files) if files else 1 + if self.ALLCNT > 1: + ecnt = self.ALLCNT else: - tempinfo['einfo'] = "end data date {}".format(edate) - tempinfo['ehour'] = None - if 'GZ' in PgOPT.params: tempinfo['einfo'] += "(UTC)" - - locfiles = PgUpdt.get_local_names(locrec['locfile'], tempinfo) - lcnt = len(locfiles) if locfiles else 0 - if not lcnt: break - rmtcnt = acnt = ccnt = ut = 0 - rfiles = rfile = None - if tempinfo['RS'] == 0 and lcnt > 2: tempinfo['RS'] = 1 - - for l in range(lcnt): - if PgLOG.PGLOG['DSCHECK'] and ((l+1)%20) == 0: - PgCMD.add_dscheck_dcount(20, 0, PgOPT.PGOPT['extlog']) - lfile = locfiles[l] - locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile) - tempinfo['gotnew'] = tempinfo['archived'] = 0 - tempinfo['ainfo'] = None - tempinfo['ainfo'] = file_archive_info(lfile, locrec, tempinfo) - if not tempinfo['ainfo']: continue - if tempinfo['ainfo']['archived'] == tempinfo['ainfo']['archcnt']: - ufile = "{} at {} {}".format(lfile, tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime']) - tempinfo['archived'] = 1 - if 'MO' in PgOPT.params: - if PgOPT.params['MO'] < 0: - PgLOG.pglog("{}: {} already for {}".format(locinfo, PgOPT.PGOPT['CACT'], tempinfo['einfo']), PgOPT.PGOPT['emlsum']) - if i == 0: PgLOG.pglog("Add Mode option -RA if you want to re-archive", PgOPT.PGOPT['wrnlog']) - if 'UT' in PgOPT.params or 'ED' not in PgOPT.params: ut = 1 - retcnt += 1 - continue + tinfo = self.TEMPINFO[locrec['lindex']] = self.get_tempinfo(locrec, locinfo, 0) + ecnt = len(tinfo['ED']) if tinfo else 1 + return ecnt * scnt + + # gather/archive due data file for update of each local file + def file_update(self, locrec, logact, caching = 0): + lfile = locrec['locfile'] + endonly = retcnt = 0 + lindex = locrec['lindex'] + loccnd = "lindex = {}".format(lindex) + locinfo = "{}-L{}".format(locrec['dsid'], lindex) + if not lfile: + if caching: + return None else: - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0]: uinfo = locinfo - PgLOG.pglog("{}: {} for {}".format(locinfo, PgOPT.PGOPT['CACT'], tempinfo['einfo']), logact) - if not change_workdir(locrec['workdir'], locinfo, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']): - break - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0]: PgOPT.PGOPT['acnt'] += 1 - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['BL'][0]: PgOPT.PGOPT['lcnt'] += 1 - opt = 1 if tempinfo['AQ'] else 65 # 1+64(remove small file) - linfo = PgFile.check_local_file(lfile, opt, PgOPT.PGOPT['emerol']) - cnt = -1 - if rmtcnt > 0: - cnt = rmtcnt - rfile = rfiles[l] + return self.pglog(locinfo + ": local file name NOT specified", self.PGOPT['emlerr']) + locinfo += "-" + lfile + if locrec['specialist'] != self.params['LN']: + if caching: + return None else: - dr = 1 if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['PB'][0] else 0 - if linfo and PgOPT.PGOPT['CACT'] == "BL" and not tempinfo['prcmd']: dr = 0 # skip download for BL only - if dr: - dfiles = None - for j in range(rcnt): # processs each remote record - pgrec = PgUtil.onerecord(rmtrecs, j) - if dfiles and pgrec['remotefile'] == rfile and not PgOPT.PGOPT['mcnt']: - continue # skip - rfile = pgrec['remotefile'] - act = 0 if locrec['action'] == 'AQ' else PgOPT.PGOPT['ACTS']&PgOPT.OPTS['DR'][0] - dfiles = download_remote_files(pgrec, lfile, linfo, locrec, locinfo, tempinfo, act) - if PgOPT.PGOPT['rstat'] < 0: - i = ecnt - break - if dfiles: rfiles = PgUtil.joinarray(rfiles, dfiles) - - rmtcnt = len(rfiles) if rfiles else 0 - if rmtcnt > 0: - if lcnt > 1 and rmtcnt != lcnt: - PgLOG.pglog("{}: {} files found for {} local files".format(locrec['locinfo'], rmtcnt, lcnt), PgOPT.PGOPT['emlerr']) - i = ecnt - break - cnt = rmtcnt - rfile = rfiles[l] if lcnt > 1 else rfiles[rmtcnt-1] # record the break remote file name - else: - rfile = None - if linfo and PgOPT.PGOPT['rstat'] == 0: PgOPT.PGOPT['rstat'] = 1 - - if cnt != 0 and PgOPT.PGOPT['rstat'] > 0: - if PgOPT.PGOPT['ACTS']&(PgOPT.OPTS['BL'][0]|PgOPT.OPTS['AF'][0]): - if cnt < 0 and linfo: - if tempinfo['archived'] and PgOPT.PGOPT['CACT'] == "UF" and not tempinfo['gotnew']: - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0] and 'RA' not in PgOPT.params: - PgLOG.pglog(lfile + ": local file archived already", PgOPT.PGOPT['emllog']) - cnt = 0 - else: - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['BL'][0]: - PgLOG.pglog(lfile + ": local file exists already", PgOPT.PGOPT['emllog']) - cnt = 1 - elif rmtcnt == lcnt and lfile == rfile: - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['BL'][0]: - PgLOG.pglog(lfile + ": local file same as remote file", PgOPT.PGOPT['emllog']) - elif not (PgOPT.PGOPT['ACTS']&PgOPT.OPTS['BL'][0]): - PgLOG.pglog(lfile + ": local file not built yet", PgOPT.PGOPT['emlerr']) - cnt = 0 - else: - cnt = build_local_file(rfiles, lfile, linfo, locrec, tempinfo, lcnt, l) - if cnt and 'lfile' in tempinfo: - lfile = tempinfo['lfile'] - del tempinfo['lfile'] - - if cnt != 0 and (PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0]): - file_status_info(lfile, rfile, tempinfo) - cnt = archive_data_file(lfile, locrec, tempinfo, i) - if cnt > 0: - ucnt += 1 - if tempinfo['RS'] == 1: rscnt += 1 - if postcnt > -1: postcnt += 1 - elif cnt > 0: - cnt = 0 - - if cnt > 0 and PgOPT.PGOPT['rstat'] > 0: - ccnt += 1 - elif 'UT' in PgOPT.params or tempinfo['archived']: - ut = 1 - if cnt > 0: acnt += 1 - - if PgLOG.PGLOG['DSCHECK']: - PgCMD.add_dscheck_dcount(lcnt%20, 0, PgOPT.PGOPT['extlog']) - if ccnt == lcnt and (PgOPT.PGOPT['ACTS']&PgOPT.OPTS['CF'][0]) and locrec['cleancmd']: - if tempinfo['CVD'] and PgUtil.diffdate(edate, tempinfo['CVD']) > 0: - clean_older_files(locrec['cleancmd'], locrec['workdir'], locinfo, tempinfo['CVD'], locrec['locfile'], rmtrecs, rcnt, tempinfo) + return self.pglog("{}: owner '{}', NOT '{}'".format(locinfo, locrec['specialist'], self.params['LN']), self.PGOPT['emlerr']) + if caching: return self.count_caching(locrec, locinfo) + tempinfo = self.TEMPINFO[lindex] if lindex in self.TEMPINFO else self.get_tempinfo(locrec, locinfo, 0) + if not tempinfo: return 0 # simply return if miss temporal info for update + rmtcnd = loccnd + rcnd = self.file_condition('drupdt', ('D' if 'DO' in self.params else "RS"), None, 1) + if rcnd: rmtcnd += " AND " + rcnd + rmtrecs = self.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", self.PGOPT['extlog']) + rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0 + if rcnt == 0: + if rcnd and self.pgget("drupdt", "", loccnd): + return self.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), self.PGOPT['emlerr']) + # create a empty record remote file + rcnt = 1 + rmtrecs = {'lindex' : [lindex], 'dindex' : [0]} + rflds = ['remotefile', 'serverfile', 'download', 'begintime', 'endtime', 'tinterval'] + for rfld in rflds: rmtrecs[rfld] = [None] + if rcnt == 1: + if 'RF' in self.params and len(self.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and self.params['RF'][0] == rmtrecs['remotefile'][0]): + rmtrecs['remotefile'][0] = self.params['RF'][0] + if 'SF' in self.params and len(self.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and self.params['SF'][0] == rmtrecs['serverfile'][0]): + rmtrecs['serverfile'][0] = self.params['SF'][0] + ecnt = self.ALLCNT if self.ALLCNT > 1 else len(tempinfo['ED']) # should be at least one + if self.PGSIG['MPROC'] > 1: + pname = "updt{}".format(lindex) + pid = self.start_child(pname, self.PGOPT['wrnlog'], 1) # try to start a child process + if pid <= 0: return pid # failed to start a child process + if self.PGSIG['PPID'] > 1: + self.set_email() # empty email in child process + self.PGOPT['acnt'] = self.PGOPT['ucnt'] = 0 else: - if not rfiles and rcnt and locrec['cleancmd'].find(' -RF') > -1: - rfiles = get_all_remote_files(rmtrecs, rcnt, tempinfo, edate) - clean_files(locrec['cleancmd'], edate, ehour, locfiles, rfiles, tempinfo['FQ']) - if PgOPT.PGOPT['ACTS']&PgOPT.OPTS['AF'][0] or PgOPT.PGOPT['UCNTL'] and PgOPT.PGOPT['CACT'] == PgOPT.PGOPT['UCNTL']['action']: - rmonly = 1 if PgOPT.PGOPT['rstat'] > 0 else 0 - if ccnt == lcnt: - PgUpdt.reset_update_time(locinfo, locrec, tempinfo, ccnt, endonly) - elif ut: - PgUpdt.reset_update_time(locinfo, locrec, tempinfo, acnt, endonly) + edate = tempinfo['ED'][0] + ehour = tempinfo['EH'][0] + lfile = self.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ']) + locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile) + if ecnt > 1: locinfo += ", {} Update Periods".format(ecnt) + self.pglog("CPID {} for 'dsupdt {}' of {}".format(self.pname2cpid(pname), self.PGOPT['CACT'], locinfo), self.PGOPT['emllog']) + return 1 # no further action in non-daemon program + if self.lock_update(lindex, locinfo, 1, self.PGOPT['emllog']) <= 0: return 0 + self.PGOPT['lindex'] = lindex + tempinfo['prcmd'] = self.params['PR'][0] if 'PR' in self.params else locrec['processremote'] + tempinfo['blcmd'] = self.params['BC'][0] if 'BC' in self.params else locrec['buildcmd'] + postcnt = -1 + if self.PGOPT['UCNTL'] and self.PGOPT['CACT'] == self.PGOPT['UCNTL']['action']: + tempinfo['postcmd'] = self.params['XC'][0] if 'XC' in self.params else self.PGOPT['UCNTL']['execcmd'] + if tempinfo['postcmd']: postcnt = 0 + setmiss = 1 if tempinfo['VD'] else 0 + ufile = uinfo = None + rscnt = ucnt = lcnt = 0 + for i in range(ecnt): + if self.ALLCNT > 1 and i > 0: + tempinfo = self.get_tempinfo(locrec, locinfo, i) + if not tempinfo: break + edate = tempinfo['ED'][0] + ehour = tempinfo['EH'][0] else: - if PgOPT.PGOPT['rstat'] == 0: - if tempinfo['VD'] and PgUtil.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) < 0: - PgUpdt.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update - PgOPT.PGOPT['rstat'] = 1 # reset remote download status - elif 'IE' in PgOPT.params: - if tempinfo['VD'] and PgUtil.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) >= 0: - endonly = 1 - PgUpdt.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update - PgOPT.PGOPT['rstat'] = 1 # reset remote download status - if setmiss: setmiss = PgUpdt.set_miss_time(lfile, locrec, tempinfo, rmonly) - - if postcnt > 0: - postcmd = PgUpdt.executable_command(PgUpdt.replace_pattern(tempinfo['postcmd'], edate, ehour, tempinfo['FQ']), - lfile, PgOPT.params['DS'], edate, ehour) - PgLOG.pgsystem(postcmd, PgOPT.PGOPT['emllog'], 5) - postcnt = 0 - if rscnt >= PgOPT.PGOPT['RSMAX']: - refresh_metadata(locrec['dsid']) - rscnt = 0 - if PgOPT.PGOPT['rstat'] < -1 or PgOPT.PGOPT['rstat'] < 0 and 'QE' in PgOPT.params: break # unrecoverable errors - - if rscnt > 0: refresh_metadata(locrec['dsid']) - if ufile and uinfo and ucnt == 0: - PgLOG.pglog("{}: Last successful update - {}".format(uinfo, ufile), PgOPT.PGOPT['emlsum']) - PgLock.lock_update(lindex, locinfo, 0, PgOPT.PGOPT['errlog']) - PgOPT.PGOPT['lindex'] = 0 - - return retcnt - -# -# refresh the gathered metadata with speed up option -R and -S -# -def refresh_metadata(dsid): - - sx = "{} -d {} -r".format(PgOPT.PGOPT['scm'], dsid) - if PgOPT.PGOPT['wtidx']: - if 0 in PgOPT.PGOPT['wtidx']: - PgLOG.pgsystem(sx + 'w all', PgOPT.PGOPT['emllog'], 5) - else: - for tidx in PgOPT.PGOPT['wtidx']: - PgLOG.pgsystem("{}w {}".format(sx, tidx), PgOPT.PGOPT['emllog'], 5) - PgOPT.PGOPT['wtidx'] = {} - -# -# retrieve remote files -# act: > 0 - create filenames and get data files physically; 0 - create filenames only -# -def download_remote_files(rmtrec, lfile, linfo, locrec, locinfo, tempinfo, act = 0): - - emlsum = PgOPT.PGOPT['emlsum'] if PgOPT.PGOPT['CACT'] == "DR" else PgOPT.PGOPT['emllog'] - rfile = rmtrec['remotefile'] - rmtinfo = locinfo - dfiles = [] - if not rfile: - rfile = lfile - rcnt = 1 - if rfile != locrec['locfile']: rmtinfo += "-" + rfile - if act: - tempinfo['DC'] = (PgOPT.params['DC'][0] if 'DC' in PgOPT.params and PgOPT.params['DC'][0] else - (rmtrec['download'] if rmtrec['download'] else locrec['download'])) - - rfiles = PgUpdt.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo) - rcnt = len(rfiles) if rfiles else 0 - if rcnt == 0: - PgOPT.PGOPT['rstat'] = -2 - return PgLOG.pglog(rmtinfo + ": NO remote file name identified", PgOPT.PGOPT['emlerr']) - - PgOPT.PGOPT['rcnt'] += rcnt # accumulate remote file counts - if tempinfo['DC']: tempinfo['DC'] = None - - if act: # get file names on remote server and create download command - sfile = rmtrec['serverfile'] - if sfile and sfile != rfile: - sfiles = PgUpdt.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo) - scnt = len(sfiles) if sfiles else 0 - if scnt != rcnt: - PgOPT.PGOPT['rstat'] = -2 - return PgLOG.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), PgOPT.PGOPT['emlerr']) - else: - sfiles = rfiles - scnt = rcnt - - if tempinfo['AQ']: - tstr = tempinfo['AQ'] - if tstr == 'Web': - rpath = "{}/{}/".format(PgLOG.PGLOG['DSDHOME'], PgOPT.params['DS']) - else: - rpath = "{}/{}/{}/".format(PgLOG.PGLOG['DECSHOME'], PgOPT.params['DS'], tempinfo['ST']) - else: - tstr = 'Remote' - rpath = '' - - ks = 1 if 'KS' in PgOPT.params else 0 - PgOPT.PGOPT['mcnt'] = ocnt = ecnt = scnt = dcnt = ncnt = 0 - omsize = PgLOG.PGLOG['MINSIZE'] - if 'VS' in tempinfo and 'VS' not in PgOPT.params: PgLOG.PGLOG['MINSIZE'] = tempinfo['VS'] - for i in range(rcnt): - rfile = rfiles[i] - rname = rfile['fname'] - rcmd = rfile['rcmd'] - rinfo = PgFile.check_local_file(rpath + rname, 65, PgOPT.PGOPT['emerol']) # 65 = 1 + 64 - gotnew = 0 - if not act: - if rinfo: - dfiles.append(rname) - dcnt += 1 + edate = tempinfo['ED'][i] + ehour = tempinfo['EH'][i] + if 'RE' in self.params and i and self.diffdatehour(edate, ehour, tempinfo['edate'], tempinfo['ehour']) <= 0: + continue + if ucnt and tempinfo['RS'] == 1 and i%20 == 0: self.refresh_metadata(locrec['dsid']) + tempinfo['edate'] = edate + if ehour != None: + tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour) + tempinfo['ehour'] = ehour else: - ecnt += 1 - if rfile['amiss']: - PgLOG.pglog(rname + ": SKIP for NOT gotten {} file yet".format(tstr), PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['mcnt'] += 1 - elif 'IE' in PgOPT.params: - PgLOG.pglog(rname + ": NOT gotten {} file yet".format(tstr), PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['rstat'] = -1 + tempinfo['einfo'] = "end data date {}".format(edate) + tempinfo['ehour'] = None + if 'GZ' in self.params: tempinfo['einfo'] += "(UTC)" + locfiles = self.get_local_names(locrec['locfile'], tempinfo) + lcnt = len(locfiles) if locfiles else 0 + if not lcnt: break + rmtcnt = acnt = ccnt = ut = 0 + rfiles = rfile = None + if tempinfo['RS'] == 0 and lcnt > 2: tempinfo['RS'] = 1 + for l in range(lcnt): + if self.PGLOG['DSCHECK'] and ((l+1)%20) == 0: + self.add_dscheck_dcount(20, 0, self.PGOPT['extlog']) + lfile = locfiles[l] + locinfo = "{}-L{}-{}".format(locrec['dsid'], lindex, lfile) + tempinfo['gotnew'] = tempinfo['archived'] = 0 + tempinfo['ainfo'] = None + tempinfo['ainfo'] = self.file_archive_info(lfile, locrec, tempinfo) + if not tempinfo['ainfo']: continue + if tempinfo['ainfo']['archived'] == tempinfo['ainfo']['archcnt']: + ufile = "{} at {} {}".format(lfile, tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime']) + tempinfo['archived'] = 1 + if 'MO' in self.params: + if self.params['MO'] < 0: + self.pglog("{}: {} already for {}".format(locinfo, self.PGOPT['CACT'], tempinfo['einfo']), self.PGOPT['emlsum']) + if i == 0: self.pglog("Add Mode option -RA if you want to re-archive", self.PGOPT['wrnlog']) + if 'UT' in self.params or 'ED' not in self.params: ut = 1 + retcnt += 1 + continue else: - PgLOG.pglog(rname + ": ERROR for NOT gotten {} file yet".format(tstr), PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['rstat'] = -2 + if self.PGOPT['ACTS']&self.OPTS['AF'][0]: uinfo = locinfo + self.pglog("{}: {} for {}".format(locinfo, self.PGOPT['CACT'], tempinfo['einfo']), logact) + if not self.change_workdir(locrec['workdir'], locinfo, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']): break - continue - elif rinfo and 'RD' not in PgOPT.params: - if not rcmd: - dfiles.append(rname) - dcnt += 1 - if tempinfo['archived']: - if 'CN' not in PgOPT.params: - ocnt += 1 - elif PgUtil.cmptime(rinfo['date_modified'], rinfo['time_modified'], tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime']) < 1: - ocnt += 1 - PgLOG.pglog("{}: ARCHIVED, NO newer remote file {} found".format(lfile, rname), PgOPT.PGOPT['emllog']) + if self.PGOPT['ACTS']&self.OPTS['AF'][0]: self.PGOPT['acnt'] += 1 + if self.PGOPT['ACTS']&self.OPTS['BL'][0]: self.PGOPT['lcnt'] += 1 + opt = 1 if tempinfo['AQ'] else 65 # 1+64(remove small file) + linfo = self.check_local_file(lfile, opt, self.PGOPT['emerol']) + cnt = -1 + if rmtcnt > 0: + cnt = rmtcnt + rfile = rfiles[l] + else: + dr = 1 if self.PGOPT['ACTS']&self.OPTS['PB'][0] else 0 + if linfo and self.PGOPT['CACT'] == "BL" and not tempinfo['prcmd']: dr = 0 # skip download for BL only + if dr: + dfiles = None + for j in range(rcnt): # processs each remote record + pgrec = self.onerecord(rmtrecs, j) + if dfiles and pgrec['remotefile'] == rfile and not self.PGOPT['mcnt']: + continue # skip + rfile = pgrec['remotefile'] + act = 0 if locrec['action'] == 'AQ' else self.PGOPT['ACTS']&self.OPTS['DR'][0] + dfiles = self.download_remote_files(pgrec, lfile, linfo, locrec, locinfo, tempinfo, act) + if self.PGOPT['rstat'] < 0: + i = ecnt + break + if dfiles: rfiles = self.joinarray(rfiles, dfiles) + rmtcnt = len(rfiles) if rfiles else 0 + if rmtcnt > 0: + if lcnt > 1 and rmtcnt != lcnt: + self.pglog("{}: {} files found for {} local files".format(locrec['locinfo'], rmtcnt, lcnt), self.PGOPT['emlerr']) + i = ecnt + break + cnt = rmtcnt + rfile = rfiles[l] if lcnt > 1 else rfiles[rmtcnt-1] # record the break remote file name + else: + rfile = None + if linfo and self.PGOPT['rstat'] == 0: self.PGOPT['rstat'] = 1 + if cnt != 0 and self.PGOPT['rstat'] > 0: + if self.PGOPT['ACTS']&(self.OPTS['BL'][0]|self.OPTS['AF'][0]): + if cnt < 0 and linfo: + if tempinfo['archived'] and self.PGOPT['CACT'] == "UF" and not tempinfo['gotnew']: + if self.PGOPT['ACTS']&self.OPTS['AF'][0] and 'RA' not in self.params: + self.pglog(lfile + ": local file archived already", self.PGOPT['emllog']) + cnt = 0 + else: + if self.PGOPT['ACTS']&self.OPTS['BL'][0]: + self.pglog(lfile + ": local file exists already", self.PGOPT['emllog']) + cnt = 1 + elif rmtcnt == lcnt and lfile == rfile: + if self.PGOPT['ACTS']&self.OPTS['BL'][0]: + self.pglog(lfile + ": local file same as remote file", self.PGOPT['emllog']) + elif not (self.PGOPT['ACTS']&self.OPTS['BL'][0]): + self.pglog(lfile + ": local file not built yet", self.PGOPT['emlerr']) + cnt = 0 + else: + cnt = self.build_local_file(rfiles, lfile, linfo, locrec, tempinfo, lcnt, l) + if cnt and 'lfile' in tempinfo: + lfile = tempinfo['lfile'] + del tempinfo['lfile'] + if cnt != 0 and (self.PGOPT['ACTS']&self.OPTS['AF'][0]): + self.file_status_info(lfile, rfile, tempinfo) + cnt = self.archive_data_file(lfile, locrec, tempinfo, i) + if cnt > 0: + ucnt += 1 + if tempinfo['RS'] == 1: rscnt += 1 + if postcnt > -1: postcnt += 1 + elif cnt > 0: + cnt = 0 + if cnt > 0 and self.PGOPT['rstat'] > 0: + ccnt += 1 + elif 'UT' in self.params or tempinfo['archived']: + ut = 1 + if cnt > 0: acnt += 1 + if self.PGLOG['DSCHECK']: + self.add_dscheck_dcount(lcnt%20, 0, self.PGOPT['extlog']) + if ccnt == lcnt and (self.PGOPT['ACTS']&self.OPTS['CF'][0]) and locrec['cleancmd']: + if tempinfo['CVD'] and self.diffdate(edate, tempinfo['CVD']) > 0: + self.clean_older_files(locrec['cleancmd'], locrec['workdir'], locinfo, tempinfo['CVD'], locrec['locfile'], rmtrecs, rcnt, tempinfo) + else: + if not rfiles and rcnt and locrec['cleancmd'].find(' -RF') > -1: + rfiles = self.get_all_remote_files(rmtrecs, rcnt, tempinfo, edate) + self.clean_files(locrec['cleancmd'], edate, ehour, locfiles, rfiles, tempinfo['FQ']) + if self.PGOPT['ACTS']&self.OPTS['AF'][0] or self.PGOPT['UCNTL'] and self.PGOPT['CACT'] == self.PGOPT['UCNTL']['action']: + rmonly = 1 if self.PGOPT['rstat'] > 0 else 0 + if ccnt == lcnt: + self.reset_update_time(locinfo, locrec, tempinfo, ccnt, endonly) + elif ut: + self.reset_update_time(locinfo, locrec, tempinfo, acnt, endonly) + else: + if self.PGOPT['rstat'] == 0: + if tempinfo['VD'] and self.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) < 0: + self.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update + self.PGOPT['rstat'] = 1 # reset remote download status + elif 'IE' in self.params: + if tempinfo['VD'] and self.diffdatehour(edate, ehour, tempinfo['VD'], tempinfo['VH']) >= 0: + endonly = 1 + self.reset_update_time(locinfo, locrec, tempinfo, 0, endonly) # skip update + self.PGOPT['rstat'] = 1 # reset remote download status + if setmiss: setmiss = self.set_miss_time(lfile, locrec, tempinfo, rmonly) + if postcnt > 0: + postcmd = self.executable_command(self.replace_pattern(tempinfo['postcmd'], edate, ehour, tempinfo['FQ']), + lfile, self.params['DS'], edate, ehour) + self.pgsystem(postcmd, self.PGOPT['emllog'], 5) + postcnt = 0 + if rscnt >= self.PGOPT['RSMAX']: + self.refresh_metadata(locrec['dsid']) + rscnt = 0 + if self.PGOPT['rstat'] < -1 or self.PGOPT['rstat'] < 0 and 'QE' in self.params: break # unrecoverable errors + if rscnt > 0: self.refresh_metadata(locrec['dsid']) + if ufile and uinfo and ucnt == 0: + self.pglog("{}: Last successful update - {}".format(uinfo, ufile), self.PGOPT['emlsum']) + self.lock_update(lindex, locinfo, 0, self.PGOPT['errlog']) + self.PGOPT['lindex'] = 0 + return retcnt + + # refresh the gathered metadata with speed up option -R and -S + def refresh_metadata(self, dsid): + sx = "{} -d {} -r".format(self.PGOPT['scm'], dsid) + if self.PGOPT['wtidx']: + if 0 in self.PGOPT['wtidx']: + self.pgsystem(sx + 'w all', self.PGOPT['emllog'], 5) + else: + for tidx in self.PGOPT['wtidx']: + self.pgsystem("{}w {}".format(sx, tidx), self.PGOPT['emllog'], 5) + self.PGOPT['wtidx'] = {} + + # retrieve remote files# act: > 0 - create filenames and get data files physically; 0 - create filenames only + def download_remote_files(self, rmtrec, lfile, linfo, locrec, locinfo, tempinfo, act = 0): + emlsum = self.PGOPT['emlsum'] if self.PGOPT['CACT'] == "DR" else self.PGOPT['emllog'] + rfile = rmtrec['remotefile'] + rmtinfo = locinfo + dfiles = [] + if not rfile: + rfile = lfile + rcnt = 1 + if rfile != locrec['locfile']: rmtinfo += "-" + rfile + if act: + tempinfo['DC'] = (self.params['DC'][0] if 'DC' in self.params and self.params['DC'][0] else + (rmtrec['download'] if rmtrec['download'] else locrec['download'])) + rfiles = self.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo) + rcnt = len(rfiles) if rfiles else 0 + if rcnt == 0: + self.PGOPT['rstat'] = -2 + return self.pglog(rmtinfo + ": NO remote file name identified", self.PGOPT['emlerr']) + self.PGOPT['rcnt'] += rcnt # accumulate remote file counts + if tempinfo['DC']: tempinfo['DC'] = None + if act: # get file names on remote server and create download command + sfile = rmtrec['serverfile'] + if sfile and sfile != rfile: + sfiles = self.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo) + scnt = len(sfiles) if sfiles else 0 + if scnt != rcnt: + self.PGOPT['rstat'] = -2 + return self.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), self.PGOPT['emlerr']) + else: + sfiles = rfiles + scnt = rcnt + if tempinfo['AQ']: + tstr = tempinfo['AQ'] + if tstr == 'Web': + rpath = "{}/{}/".format(self.PGLOG['DSDHOME'], self.params['DS']) + else: + rpath = "{}/{}/{}/".format(self.PGLOG['DECSHOME'], self.params['DS'], tempinfo['ST']) + else: + tstr = 'Remote' + rpath = '' + ks = 1 if 'KS' in self.params else 0 + self.PGOPT['mcnt'] = ocnt = ecnt = scnt = dcnt = ncnt = 0 + omsize = self.PGLOG['MINSIZE'] + if 'VS' in tempinfo and 'VS' not in self.params: self.PGLOG['MINSIZE'] = tempinfo['VS'] + for i in range(rcnt): + rfile = rfiles[i] + rname = rfile['fname'] + rcmd = rfile['rcmd'] + rinfo = self.check_local_file(rpath + rname, 65, self.PGOPT['emerol']) # 65 = 1 + 64 + gotnew = 0 + if not act: + if rinfo: + dfiles.append(rname) + dcnt += 1 + else: + ecnt += 1 + if rfile['amiss']: + self.pglog(rname + ": SKIP for NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr']) + self.PGOPT['mcnt'] += 1 + elif 'IE' in self.params: + self.pglog(rname + ": NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr']) + self.PGOPT['rstat'] = -1 + else: + self.pglog(rname + ": ERROR for NOT gotten {} file yet".format(tstr), self.PGOPT['emlerr']) + self.PGOPT['rstat'] = -2 + break continue - elif 'CN' in PgOPT.params: - if rfile['ready'] == -1: # out of check new period already + elif rinfo and 'RD' not in self.params: + if not rcmd: dfiles.append(rname) dcnt += 1 - if tempinfo['archived']: ocnt += 1 + if tempinfo['archived']: + if 'CN' not in self.params: + ocnt += 1 + elif self.cmptime(rinfo['date_modified'], rinfo['time_modified'], tempinfo['ainfo']['adate'], tempinfo['ainfo']['atime']) < 1: + ocnt += 1 + self.pglog("{}: ARCHIVED, NO newer remote file {} found".format(lfile, rname), self.PGOPT['emllog']) + continue + elif 'CN' in self.params: + if rfile['ready'] == -1: # out of check new period already + dfiles.append(rname) + dcnt += 1 + if tempinfo['archived']: ocnt += 1 + continue + elif self.cmptime(rinfo['date_modified'], rinfo['time_modified'], rfile['date'], rfile['time']) >= 0: + dfiles.append(rname) + dcnt += 1 + if tempinfo['archived']: + ocnt += 1 + else: + self.pglog(rname + ": IS local already", self.PGOPT['emllog']) continue - elif PgUtil.cmptime(rinfo['date_modified'], rinfo['time_modified'], rfile['date'], rfile['time']) >= 0: - dfiles.append(rname) - dcnt += 1 + sfile = sfiles[i] + sname = sfile['fname'] + sinfo = rinfo if sname == rname else self.check_local_file(sname, 65, self.PGOPT['emerol']) + dact = self.get_download_action(rcmd) + rdcnt = 1 if re.search(r'(ncftpget|wget) ', dact) else 0 + dcmd = derr = "" + info0 = cfile = pcmd = bname = None + ftype = "remote" if sname == rname else "server" + if sinfo: + if rcmd: + if 'RD' in self.params: + self.pglog(sname + ": ftype file is local, Try dact again", self.PGOPT['emllog']) + elif ('CN' not in self.params and + self.cmptime(sinfo['date_modified'], sinfo['time_modified'], sfile['date'], sfile['time']) >= 0): + rcmd = None # do not need download again + else: + self.pglog("{}: USE the local copy of {} file for NO download command".format(sname, ftype), self.PGOPT['emllog']) + elif not rcmd: if tempinfo['archived']: ocnt += 1 + self.pglog("{}: ARCHIVED, NO need get {} file {} again for NO download command".format(lfile, ftype, sname), emlsum) else: - PgLOG.pglog(rname + ": IS local already", PgOPT.PGOPT['emllog']) + ecnt += 1 + if rfile['amiss']: + self.pglog(rname + ": SKIP missing remote file for NO download command", self.PGOPT['emlerr']) + self.PGOPT['mcnt'] += 1 + elif 'IE' in self.params: + self.pglog(rname + ": MISS remote file for NO download command", self.PGOPT['emlerr']) + self.PGOPT['rstat'] = -1 + else: + self.pglog(rname + ": ERROR missing remote file for NO download command", self.PGOPT['emlerr']) + self.PGOPT['rstat'] = -2 + break continue - - sfile = sfiles[i] - sname = sfile['fname'] - sinfo = rinfo if sname == rname else PgFile.check_local_file(sname, 65, PgOPT.PGOPT['emerol']) - dact = get_download_action(rcmd) - rdcnt = 1 if re.search(r'(ncftpget|wget) ', dact) else 0 - dcmd = derr = "" - info0 = cfile = pcmd = bname = None - ftype = "remote" if sname == rname else "server" - if sinfo: - if rcmd: - if 'RD' in PgOPT.params: - PgLOG.pglog(sname + ": ftype file is local, Try dact again", PgOPT.PGOPT['emllog']) - elif ('CN' not in PgOPT.params and - PgUtil.cmptime(sinfo['date_modified'], sinfo['time_modified'], sfile['date'], sfile['time']) >= 0): - rcmd = None # do not need download again - else: - PgLOG.pglog("{}: USE the local copy of {} file for NO download command".format(sname, ftype), PgOPT.PGOPT['emllog']) - elif not rcmd: - if tempinfo['archived']: - ocnt += 1 - PgLOG.pglog("{}: ARCHIVED, NO need get {} file {} again for NO download command".format(lfile, ftype, sname), emlsum) - else: - ecnt += 1 - if rfile['amiss']: - PgLOG.pglog(rname + ": SKIP missing remote file for NO download command", PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['mcnt'] += 1 - elif 'IE' in PgOPT.params: - PgLOG.pglog(rname + ": MISS remote file for NO download command", PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['rstat'] = -1 - else: - PgLOG.pglog(rname + ": ERROR missing remote file for NO download command", PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['rstat'] = -2 - break - continue - - if rcmd: # try to download now - if not sfile['ready']: - PgOPT.PGOPT['rstat'] = 0 - PgLOG.pglog("{}: {} file NOT Ready yet".format(sname, ftype), PgOPT.PGOPT['emllog']) - ecnt += 1 - break - if 'CN' in PgOPT.params: - if sinfo: - cfile = sname - elif rinfo: - cfile = rname - info0 = rinfo - elif rcnt == 1 and linfo: - cfile = lfile - info0 = linfo - elif tempinfo['archived']: - cfile = '' - - dcmd = PgUpdt.executable_command(rcmd, sname, PgOPT.params['DS'], sfile['date'], sfile['hour']) - if tempinfo['AT']: - stat = check_agetime(dcmd, sname, tempinfo['AT']) - if stat <= 0: - PgOPT.PGOPT['rstat'] = stat + if rcmd: # try to download now + if not sfile['ready']: + self.PGOPT['rstat'] = 0 + self.pglog("{}: {} file NOT Ready yet".format(sname, ftype), self.PGOPT['emllog']) ecnt += 1 break - if cfile != None: - stat = check_newer_file(dcmd, cfile, tempinfo['ainfo']) - if stat > 0: - if cfile != sname: - if stat < 3: PgLOG.pglog("{}: Found newer {} file {}".format(cfile, ftype, sname), emlsum) + if 'CN' in self.params: + if sinfo: + cfile = sname + elif rinfo: + cfile = rname + info0 = rinfo + elif rcnt == 1 and linfo: + cfile = lfile + info0 = linfo + elif tempinfo['archived']: + cfile = '' + dcmd = self.executable_command(rcmd, sname, self.params['DS'], sfile['date'], sfile['hour']) + if tempinfo['AT']: + stat = self.check_agetime(dcmd, sname, tempinfo['AT']) + if stat <= 0: + self.PGOPT['rstat'] = stat + ecnt += 1 + break + if cfile != None: + stat = self.check_newer_file(dcmd, cfile, tempinfo['ainfo']) + if stat > 0: + if cfile != sname: + if stat < 3: self.pglog("{}: Found newer {} file {}".format(cfile, ftype, sname), emlsum) + else: + if stat < 3: self.pglog("{}: Found newer {} file".format(cfile, ftype), emlsum) + if stat == 2: # file redlownloaded, reget file info + sinfo = self.check_local_file(sname, 64, self.PGOPT['emerol']) + else: # force download file + cfile = None else: - if stat < 3: PgLOG.pglog("{}: Found newer {} file".format(cfile, ftype), emlsum) - if stat == 2: # file redlownloaded, reget file info - sinfo = PgFile.check_local_file(sname, 64, PgOPT.PGOPT['emerol']) - else: # force download file - cfile = None - else: - if stat < 0: - if PgOPT.PGOPT['STATUS']: - if cfile != sname: - PgLOG.pglog("{}: Error check newer {} file {}\n{}".format(cfile, ftype, sname, PgOPT.PGOPT['STATUS']), PgOPT.PGOPT['emlerr']) + if stat < 0: + if self.PGOPT['STATUS']: + if cfile != sname: + self.pglog("{}: Error check newer {} file {}\n{}".format(cfile, ftype, sname, self.PGOPT['STATUS']), self.PGOPT['emlerr']) + else: + self.pglog("{}: Error check newer {} file\n{}".format(cfile, ftype, self.PGOPT['STATUS']), self.PGOPT['emlerr']) else: - PgLOG.pglog("{}: Error check newer {} file\n{}".format(cfile, ftype, PgOPT.PGOPT['STATUS']), PgOPT.PGOPT['emlerr']) + if cfile != sname: + self.pglog("{}: Cannot check newer {} file {} via {}".format(cfile, ftype, sname, dcmd), self.PGOPT['emlsum']) + else: + self.pglog("{}: Cannot check newer {} file via {}".format(cfile, ftype, dcmd), self.PGOPT['emlsum']) + if stat < -1: # uncrecoverable error + self.PGOPT['rstat'] = stat + ecnt += 1 + break + elif cfile and cfile != sname: + self.pglog("{}: NO newer {} file {} found\n{}".format(cfile, ftype, sname, self.PGOPT['STATUS']), emlsum) else: - if cfile != sname: - PgLOG.pglog("{}: Cannot check newer {} file {} via {}".format(cfile, ftype, sname, dcmd), PgOPT.PGOPT['emlsum']) - else: - PgLOG.pglog("{}: Cannot check newer {} file via {}".format(cfile, ftype, dcmd), PgOPT.PGOPT['emlsum']) - - if stat < -1: # uncrecoverable error - PgOPT.PGOPT['rstat'] = stat - ecnt += 1 - break - elif cfile and cfile != sname: - PgLOG.pglog("{}: NO newer {} file {} found\n{}".format(cfile, ftype, sname, PgOPT.PGOPT['STATUS']), emlsum) + self.pglog("{}: NO newer {} file found\n{}".format(sname, ftype, self.PGOPT['STATUS']), emlsum) + if tempinfo['archived']: + ncnt += 1 + if rcnt == 1: continue + if not info0: info0 = sinfo + sinfo = None + if not cfile: + if op.isfile(sname) and self.pgsystem("mv -f {} {}.rd".format(sname, sname), self.PGOPT['emerol'], 4): + bname = sname + ".rd" + if not info0: info0 = self.check_local_file(bname, 64, self.PGOPT['emerol']) + if dcmd.find('wget ') > -1: self.slow_web_access(dcmd) + self.pgsystem(dcmd, self.PGOPT['wrnlog'], 257) # 1 + 256 + derr = self.PGLOG['SYSERR'] + sinfo = self.check_local_file(sname, 70, self.PGOPT['emerol']) + if sinfo: + mode = 0o664 if sinfo['isfile'] else 0o775 + if mode != sinfo['mode']: self.set_local_mode(sname, sinfo['isfile'], mode, sinfo['mode'], sinfo['logname'], self.PGOPT['emerol']) + (stat, derr) = self.parse_download_error(derr, dact, sinfo) + if stat < -1: # uncrecoverable error + self.pglog("{}: error {}\n{}".format(sname, dcmd, derr), self.PGOPT['emlerr']) + self.PGOPT['rstat'] = stat + ecnt += 1 + break + elif stat > 0 and self.PGLOG['DSCHECK'] and sinfo: + self.add_dscheck_dcount(0, sinfo['data_size'], self.PGOPT['extlog']) + if sinfo: + if info0: + if info0['data_size'] == sinfo['data_size'] and bname: + if self.compare_md5sum(bname, sname, self.PGOPT['emlsum']): + self.pglog("{}: GOT same size, but different content, {} file via {}".format(sname, ftype, dact), self.PGOPT['emlsum']) + tempinfo['gotnew'] = gotnew = 1 + self.PGOPT['rdcnt'] += rdcnt + scnt += 1 + else: + self.pglog("{}: GOT same {} file via {}".format(sname, ftype, dact), emlsum) + if rinfo and rname != sname and 'KS' not in self.params: + self.pgsystem("rm -f " + sname, self.PGOPT['emllog'], 5) + sinfo = None + if tempinfo['archived']: + ncnt += 1 else: - PgLOG.pglog("{}: NO newer {} file found\n{}".format(sname, ftype, PgOPT.PGOPT['STATUS']), emlsum) - - if tempinfo['archived']: - ncnt += 1 - if rcnt == 1: continue - if not info0: info0 = sinfo - sinfo = None - - if not cfile: - if op.isfile(sname) and PgLOG.pgsystem("mv -f {} {}.rd".format(sname, sname), PgOPT.PGOPT['emerol'], 4): - bname = sname + ".rd" - if not info0: info0 = PgFile.check_local_file(bname, 64, PgOPT.PGOPT['emerol']) - if dcmd.find('wget ') > -1: PgUpdt.slow_web_access(dcmd) - PgLOG.pgsystem(dcmd, PgOPT.PGOPT['wrnlog'], 257) # 1 + 256 - derr = PgLOG.PGLOG['SYSERR'] - sinfo = PgFile.check_local_file(sname, 70, PgOPT.PGOPT['emerol']) - if sinfo: - mode = 0o664 if sinfo['isfile'] else 0o775 - if mode != sinfo['mode']: PgFile.set_local_mode(sname, sinfo['isfile'], mode, sinfo['mode'], sinfo['logname'], PgOPT.PGOPT['emerol']) - - (stat, derr) = PgUpdt.parse_download_error(derr, dact, sinfo) - if stat < -1: # uncrecoverable error - PgLOG.pglog("{}: error {}\n{}".format(sname, dcmd, derr), PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['rstat'] = stat - ecnt += 1 - break - elif stat > 0 and PgLOG.PGLOG['DSCHECK'] and sinfo: - PgCMD.add_dscheck_dcount(0, sinfo['data_size'], PgOPT.PGOPT['extlog']) - - if sinfo: - if info0: - if info0['data_size'] == sinfo['data_size'] and bname: - if PgFile.compare_md5sum(bname, sname, PgOPT.PGOPT['emlsum']): - PgLOG.pglog("{}: GOT same size, but different content, {} file via {}".format(sname, ftype, dact), PgOPT.PGOPT['emlsum']) + self.pglog("{}: GOT different {} file via {}".format(sname, ftype, dact), self.PGOPT['emlsum']) tempinfo['gotnew'] = gotnew = 1 - PgOPT.PGOPT['rdcnt'] += rdcnt + self.PGOPT['rdcnt'] += rdcnt scnt += 1 - else: - PgLOG.pglog("{}: GOT same {} file via {}".format(sname, ftype, dact), emlsum) - if rinfo and rname != sname and 'KS' not in PgOPT.params: - PgLOG.pgsystem("rm -f " + sname, PgOPT.PGOPT['emllog'], 5) - sinfo = None - if tempinfo['archived']: - ncnt += 1 - else: - PgLOG.pglog("{}: GOT different {} file via {}".format(sname, ftype, dact), PgOPT.PGOPT['emlsum']) - tempinfo['gotnew'] = gotnew = 1 - PgOPT.PGOPT['rdcnt'] += rdcnt + if bname: self.pgsystem("rm -rf " + bname, self.PGOPT['emerol'], 4) + elif rcmd: + self.pglog("{}: GOT {} file via {}".format(sname, ftype, dact), emlsum) + self.PGOPT['rdcnt'] += rdcnt scnt += 1 - if bname: PgLOG.pgsystem("rm -rf " + bname, PgOPT.PGOPT['emerol'], 4) - elif rcmd: - PgLOG.pglog("{}: GOT {} file via {}".format(sname, ftype, dact), emlsum) - PgOPT.PGOPT['rdcnt'] += rdcnt - scnt += 1 - - PgOPT.PGOPT['dcnt'] += 1 - if tempinfo['prcmd']: pcmd = tempinfo['prcmd'] - elif info0: - if bname: - PgLOG.pglog("{}: RETAIN the older {} file".format(sname, ftype), emlsum) - PgLOG.pgsystem("mv -f {} {}".format(bname, sname), PgOPT.PGOPT['emerol'], 4) + self.PGOPT['dcnt'] += 1 if tempinfo['prcmd']: pcmd = tempinfo['prcmd'] - sinfo = info0 - elif cfile: - if tempinfo['archived']: - ocnt += 1 - elif rcnt == 1: + elif info0: + if bname: + self.pglog("{}: RETAIN the older {} file".format(sname, ftype), emlsum) + self.pgsystem("mv -f {} {}".format(bname, sname), self.PGOPT['emerol'], 4) if tempinfo['prcmd']: pcmd = tempinfo['prcmd'] - if cfile == sname: sinfo = info0 - elif not rinfo and cfile == lfile: - continue - elif not cfile: - ecnt += 1 - if sfile['amiss']: - PgLOG.pglog("{}: SKIP {} file for FAIL {}\n{}".format(sname, ftype, dact, derr), PgOPT.PGOPT['emlsum']) - PgOPT.PGOPT['mcnt'] += 1 - else: - PgOPT.PGOPT['rstat'] = 0 if 'IE' in PgOPT.params else -1 - if not derr or derr and derr.find(PgLOG.PGLOG['MISSFILE']) > -1: - msg = "{}: NOT Available for {}\n".format(sname, dact) - PgLOG.set_email(msg, PgOPT.PGOPT['emlsum']) - if derr: PgLOG.pglog(derr, PgOPT.PGOPT['emllog']) + elif cfile: + if tempinfo['archived']: + ocnt += 1 + elif rcnt == 1: + if tempinfo['prcmd']: pcmd = tempinfo['prcmd'] + if cfile == sname: + sinfo = info0 + elif not rinfo and cfile == lfile: + continue + elif not cfile: + ecnt += 1 + if sfile['amiss']: + self.pglog("{}: SKIP {} file for FAIL {}\n{}".format(sname, ftype, dact, derr), self.PGOPT['emlsum']) + self.PGOPT['mcnt'] += 1 else: - PgLOG.pglog("{}: ERROR {}\n{}".format(sname, dact, derr), PgOPT.PGOPT['emlerr']) - if PgOPT.PGOPT['rstat'] < 0: break - continue - else: - ecnt += 1 - if sfile['amiss']: PgOPT.PGOPT['mcnt'] += 1 - continue - - if sinfo: - if rname == sname: - rinfo = sinfo - elif not rinfo or gotnew: - if rinfo: PgLOG.pgsystem("rm -f " + rname, PgOPT.PGOPT['emerol'], 5) - if PgFile.convert_files(rname, sname, ks, PgOPT.PGOPT['emerol']): - rinfo = PgFile.check_local_file(rname, 64, PgOPT.PGOPT['emerol']) + self.PGOPT['rstat'] = 0 if 'IE' in self.params else -1 + if not derr or derr and derr.find(self.PGLOG['MISSFILE']) > -1: + msg = "{}: NOT Available for {}\n".format(sname, dact) + self.set_email(msg, self.PGOPT['emlsum']) + if derr: self.pglog(derr, self.PGOPT['emllog']) + else: + self.pglog("{}: ERROR {}\n{}".format(sname, dact, derr), self.PGOPT['emlerr']) + if self.PGOPT['rstat'] < 0: break + continue + else: + ecnt += 1 + if sfile['amiss']: self.PGOPT['mcnt'] += 1 + continue + if sinfo: + if rname == sname: + rinfo = sinfo + elif not rinfo or gotnew: + if rinfo: self.pgsystem("rm -f " + rname, self.PGOPT['emerol'], 5) + if self.convert_files(rname, sname, ks, self.PGOPT['emerol']): + rinfo = self.check_local_file(rname, 64, self.PGOPT['emerol']) + else: + self.PGOPT['rstat'] = -1 + ecnt += 1 + break + if not rinfo: + ecnt += 1 + if sfile['amiss']: + self.pglog(rname + ": SKIP missing remote file", self.PGOPT['emlsum']) + self.PGOPT['mcnt'] += 1 + elif 'IE' in self.params: + self.pglog(rname + ": MISS remote file", self.PGOPT['emlerr']) + self.PGOPT['rstat'] = -1 else: - PgOPT.PGOPT['rstat'] = -1 + self.pglog(rname + ": ERROR missing remote file", self.PGOPT['emlerr']) + self.PGOPT['rstat'] = -2 + break + continue + if pcmd: + pcmd = self.executable_command(self.replace_pattern(pcmd, rfile['date'], rfile['hour'], tempinfo['FQ']), + rname, self.params['DS'], rfile['date'], rfile['hour']) + if not self.pgsystem(pcmd, self.PGOPT['emllog'], 259): + if self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.PGOPT['emlerr']) + self.PGOPT['rstat'] = -1 ecnt += 1 break - - if not rinfo: - ecnt += 1 - if sfile['amiss']: - PgLOG.pglog(rname + ": SKIP missing remote file", PgOPT.PGOPT['emlsum']) - PgOPT.PGOPT['mcnt'] += 1 - elif 'IE' in PgOPT.params: - PgLOG.pglog(rname + ": MISS remote file", PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['rstat'] = -1 + dfiles.append(rname) + dcnt += 1 + self.PGLOG['MINSIZE'] = omsize + if ncnt == rcnt: + self.PGOPT['rstat'] = 0 + if dcnt > 0: dcnt = 0 + elif ecnt > 0: + s = 's' if rcnt > 1 else '' + if dcnt > scnt: + self.pglog("{}/{} of {} rfile{} obtained/at local".format(scnt, dcnt, rcnt, s), self.PGOPT['emllog']) else: - PgLOG.pglog(rname + ": ERROR missing remote file", PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['rstat'] = -2 - break - continue - - if pcmd: - pcmd = PgUpdt.executable_command(PgUpdt.replace_pattern(pcmd, rfile['date'], rfile['hour'], tempinfo['FQ']), - rname, PgOPT.params['DS'], rfile['date'], rfile['hour']) - if not PgLOG.pgsystem(pcmd, PgOPT.PGOPT['emllog'], 259): - if PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgOPT.PGOPT['emlerr']) - PgOPT.PGOPT['rstat'] = -1 - ecnt += 1 - break - dfiles.append(rname) - dcnt += 1 - - PgLOG.PGLOG['MINSIZE'] = omsize - if ncnt == rcnt: - PgOPT.PGOPT['rstat'] = 0 - if dcnt > 0: dcnt = 0 - elif ecnt > 0: - s = 's' if rcnt > 1 else '' - if dcnt > scnt: - PgLOG.pglog("{}/{} of {} rfile{} obtained/at local".format(scnt, dcnt, rcnt, s), PgOPT.PGOPT['emllog']) + self.pglog("{} of {} rfile{} obtained".format(scnt, rcnt, s), self.PGOPT['emllog']) + if dcnt > 0 and ocnt > 0: dcnt = 0 + elif ocnt == rcnt: + self.PGOPT['rstat'] = 0 + return dfiles if self.PGOPT['rstat'] == 1 and dcnt > 0 else None + + # build up local files + def build_local_file(self, rfiles, lfile, linfo, locrec, tempinfo, lcnt, l): + emlsum = self.PGOPT['emlsum'] if (self.PGOPT['ACTS'] == self.OPTS['BL'][0]) else self.PGOPT['emllog'] + if lcnt > 1: + rcnt = 1 + rmax = l + 1 else: - PgLOG.pglog("{} of {} rfile{} obtained".format(scnt, rcnt, s), PgOPT.PGOPT['emllog']) - if dcnt > 0 and ocnt > 0: dcnt = 0 - elif ocnt == rcnt: - PgOPT.PGOPT['rstat'] = 0 - - return dfiles if PgOPT.PGOPT['rstat'] == 1 and dcnt > 0 else None - -# -# build up local files -# -def build_local_file(rfiles, lfile, linfo, locrec, tempinfo, lcnt, l): - - emlsum = PgOPT.PGOPT['emlsum'] if (PgOPT.PGOPT['ACTS'] == PgOPT.OPTS['BL'][0]) else PgOPT.PGOPT['emllog'] - - if lcnt > 1: - rcnt = 1 - rmax = l + 1 - else: - rmax = rcnt = len(rfiles) if rfiles else 0 - - rbfile = None - if linfo: - if rcnt == 1 and lfile == rfiles[l]: return 1 - if PgLOG.pgsystem("mv -f {} {}".format(lfile, rbfile), PgOPT.PGOPT['emerol'], 4): - rbfile = lfile + '.rb' - else: - s = op.dirname(lfile) - if s and not op.isdir(s): PgFile.make_local_directory(s, PgOPT.PGOPT['emllog']|PgLOG.EXITLG) - - cext = None - if locrec['options']: - ms = re.search(r'-AF\s+([\w\.]+)', locrec['options'], re.I) - if ms: - fmt = ms.group(1) - ms = re.search(r'(\w+)\.TAR(\.|$)', fmt, re.I) - if ms: # check compression before tarring + rmax = rcnt = len(rfiles) if rfiles else 0 + rbfile = None + if linfo: + if rcnt == 1 and lfile == rfiles[l]: return 1 + if self.pgsystem("mv -f {} {}".format(lfile, rbfile), self.PGOPT['emerol'], 4): + rbfile = lfile + '.rb' + else: + s = op.dirname(lfile) + if s and not op.isdir(s): self.make_local_directory(s, self.PGOPT['emllog']|self.EXITLG) + cext = None + if locrec['options']: + ms = re.search(r'-AF\s+([\w\.]+)', locrec['options'], re.I) + if ms: fmt = ms.group(1) - ms = re.match(r'^({})$'.format(PgFile.CMPSTR), fmt, re.I) - if ms: cext = '.' + fmt - - if tempinfo['blcmd']: - blcmd = PgUpdt.executable_command(PgUpdt.replace_pattern(tempinfo['blcmd'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']), - lfile, PgOPT.params['DS'], tempinfo['edate'], tempinfo['ehour']) - if not PgLOG.pgsystem(blcmd, PgOPT.PGOPT['emllog']) or PgFile.local_file_size(lfile, 2, PgOPT.PGOPT['emerol']) <= 0: - ret = PgLOG.pglog("{}: error build {}".format(blcmd, lfile), PgOPT.PGOPT['emlerr']) + ms = re.search(r'(\w+)\.TAR(\.|$)', fmt, re.I) + if ms: # check compression before tarring + fmt = ms.group(1) + ms = re.match(r'^({})$'.format(self.CMPSTR), fmt, re.I) + if ms: cext = '.' + fmt + if tempinfo['blcmd']: + blcmd = self.executable_command(self.replace_pattern(tempinfo['blcmd'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']), + lfile, self.params['DS'], tempinfo['edate'], tempinfo['ehour']) + if not self.pgsystem(blcmd, self.PGOPT['emllog']) or self.local_file_size(lfile, 2, self.PGOPT['emerol']) <= 0: + ret = self.pglog("{}: error build {}".format(blcmd, lfile), self.PGOPT['emlerr']) + else: + self.PGOPT['bcnt'] += 1 + ret = 1 + if rbfile: + if ret: + self.pgsystem("rm -rf " + rbfile, self.PGOPT['emerol'], 4) + else: + self.pglog(lfile + ": RETAIN the older local file", emlsum) + self.pgsystem("mv -f {} {}".format(rbfile, lfile), self.PGOPT['emerol'], 4) + return ret + if lfile[0] == '!': # executable for build up local file name + blcmd = self.executable_command(lfile[1:], None, self.params['DS'], tempinfo['edate'], tempinfo['ehour']) + lfile = self.pgsystem(blcmd, self.PGOPT['emllog'], 21) + if lfile and self.local_file_size(lfile, 2, self.PGOPT['emerol']) > 0: + tempinfo['lfile'] = lfile + return 1 + else: + return self.pglog("{}: error build {}".format(blcmd, lfile), self.PGOPT['emlerr']) + if rcnt == 0 and not linfo: return 0 # no remote file found to build local file + ret = 1 + kr = 1 if 'KR' in self.params else 0 + if rcnt == 1 and not op.isdir(rfiles[l]): + rfile = rfiles[l] else: - PgOPT.PGOPT['bcnt'] += 1 - ret = 1 - + ms = re.match(r'^(.+)\.({})$'.format(self.CMPSTR), lfile, re.I) + rfile = ms.group(1) if ms else lfile + fd = None + if tempinfo['AQ']: + if not self.validate_one_infile(rfile, self.params['DS']): return 0 + fd = open(rfile, 'w') + fd.write(tempinfo['AQ'] + "File\n") + for i in range(rmax): + tfile = rfiles[i] + if fd: + fd.write(tfile + "\n") + continue + if op.isfile(tfile) and cext and not re.search(r'{}$'.format(cext), tfile, re.I): + ms = re.match(r'^(.+)\.({})$'.format(self.CMPSTR), tfile, re.I) + if ms: tfile = ms.group(1) + tfile += cext + if not self.convert_files(tfile, rfiles[i], kr, self.PGOPT['emllog']): + if op.exists(rfile): self.pgsystem("rm -f " + rfile, self.PGOPT['emllog']) + ret = self.pglog("{}: QUIT converting file from {}".format(rfile, tfile), self.PGOPT['emllog']) + break + cmd = "tar -{}vf {} {}".format('u' if i else 'c', rfile, tfile) + ret = self.pgsystem(cmd, self.PGOPT['emllog']) + if not ret: break + if fd: + ret = -1 + fd.close() + if op.exists(rfile): + s = "s" if rcnt > 1 else "" + if tempinfo['AQ']: + self.pglog("{}: input file CREATED for backing up {} {} file{}".format(rfile, rcnt, tempinfo['AQ'], s), emlsum) + else: + self.pglog("{}: tar file CREATED from {} file{}".format(rfile, rcnt, s), emlsum) + else: + ret = self.pglog(rfile + ": ERROR creating tar file", self.PGOPT['emlerr']) + if ret > 0: + if lfile != rfile: + ret = self.convert_files(lfile, rfile, kr, self.PGOPT['emllog']) + if ret: self.pglog("{}: BUILT from {}".format(lfile, rfile), emlsum) + if ret: + fsize = self.local_file_size(lfile, 3, self.PGOPT['emerol']) + if fsize > 0: + self.PGOPT['bcnt'] += 1 + if self.PGLOG['DSCHECK']: self.add_dscheck_dcount(0, fsize, self.PGOPT['extlog']) + else: + ret = 0 if rbfile: if ret: - PgLOG.pgsystem("rm -rf " + rbfile, PgOPT.PGOPT['emerol'], 4) + self.pgsystem("rm -rf " + rbfile, self.PGOPT['emerol'], 4) else: - PgLOG.pglog(lfile + ": RETAIN the older local file", emlsum) - PgLOG.pgsystem("mv -f {} {}".format(rbfile, lfile), PgOPT.PGOPT['emerol'], 4) - return ret - - if lfile[0] == '!': # executable for build up local file name - blcmd = PgUpdt.executable_command(lfile[1:], None, PgOPT.params['DS'], tempinfo['edate'], tempinfo['ehour']) - lfile = PgLOG.pgsystem(blcmd, PgOPT.PGOPT['emllog'], 21) - if lfile and PgFile.local_file_size(lfile, 2, PgOPT.PGOPT['emerol']) > 0: - tempinfo['lfile'] = lfile - return 1 + self.pglog(lfile + ": RETAIN the older local file", emlsum) + self.pgsystem("mv -f {} {}".format(rbfile, lfile), self.PGOPT['emerol'], 4) + return 1 if ret else 0 + + # append data type to options for given type name if not in options + def append_data_type(self, tname, options): + mp = r'(^|\s)-{}(\s|$)'.format(tname) + if not re.search(mp, options, re.I): options += " -{} {}".format(tname, self.DEFTYPES[tname]) + return options + + # get data type from options for given type name, and default one if not in options + def get_data_type(self, tname, options): + mp = r'(^|\s)-{}\s+(\w)(\s|$)'.format(tname) + ms = re.search(mp, options, re.I) + return ms.group(2) if ms else self.DEFTYPES[tname] + + # archive a data file + def archive_data_file(self, lfile, locrec, tempinfo, eidx): + growing = -1 + if tempinfo['ainfo']: + ainfo = tempinfo['ainfo'] + if ainfo['vindex']: growing = self.is_growing_file(locrec['locfile'], tempinfo['FQ']) + tempinfo['ainfo'] = None # clean the archive info recorded earlier else: - return PgLOG.pglog("{}: error build {}".format(blcmd, lfile), PgOPT.PGOPT['emlerr']) - - if rcnt == 0 and not linfo: return 0 # no remote file found to build local file - - ret = 1 - kr = 1 if 'KR' in PgOPT.params else 0 - if rcnt == 1 and not op.isdir(rfiles[l]): - rfile = rfiles[l] - else: - ms = re.match(r'^(.+)\.({})$'.format(PgFile.CMPSTR), lfile, re.I) - rfile = ms.group(1) if ms else lfile - fd = None + ainfo = {'archived' : 0, 'note' : None} # reference to empty hash + self.pglog("{}: start {} for {}".format(lfile, locrec['action'], tempinfo['einfo']), self.PGOPT['emllog']) + options = locrec['options'] if locrec['options'] else "" + act = locrec['action'] + archfile = None + if locrec['archfile']: archfile = self.replace_pattern(locrec['archfile'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']) + if act == 'AW': + if archfile and 'wfile' not in ainfo: ainfo['wfile'] = archfile + options = self.append_data_type('WT', options) + elif act == 'AS': + if archfile and 'sfile' not in ainfo: ainfo['sfile'] = archfile + options = self.append_data_type('ST', options) + elif act == 'AQ': + if archfile and 'bfile' not in ainfo: ainfo['bfile'] = archfile + options = self.append_data_type('QT', options) + if tempinfo['archived'] and not ('RA' in self.params and growing > 0): + if (ainfo['chksm'] and ainfo['chksm'] == self.PGOPT['chksm'] or + ainfo['asize'] and ainfo['asize'] == self.PGOPT['fsize'] and + self.cmptime(self.PGOPT['fdate'], self.PGOPT['ftime'], ainfo['adate'], ainfo['atime']) >= 0): + if 'RA' not in self.params: + amsg = "{}: ARCHIVED by {}".format(lfile, ainfo['adate']) + if tempinfo['ehour'] != None: amsg += ":{:02}".format(ainfo['ahour']) + self.pglog(amsg, self.PGOPT['emllog']) + if eidx == 0: self.pglog("Add Mode option -RA if you want to re-archive", self.PGOPT['emllog']) + return -1 + elif growing == 0: + growing = -1 + if growing == 0: tempinfo['archived'] = self.move_archived_file(ainfo, tempinfo['archived']) if tempinfo['AQ']: - if not PgOPT.validate_one_infile(rfile, PgOPT.params['DS']): return 0 - fd = open(rfile, 'w') - fd.write(tempinfo['AQ'] + "File\n") - - for i in range(rmax): - tfile = rfiles[i] - if fd: - fd.write(tfile + "\n") - continue - - if op.isfile(tfile) and cext and not re.search(r'{}$'.format(cext), tfile, re.I): - ms = re.match(r'^(.+)\.({})$'.format(PgFile.CMPSTR), tfile, re.I) - if ms: tfile = ms.group(1) - tfile += cext - if not PgFile.convert_files(tfile, rfiles[i], kr, PgOPT.PGOPT['emllog']): - if op.exists(rfile): PgLOG.pgsystem("rm -f " + rfile, PgOPT.PGOPT['emllog']) - ret = PgLOG.pglog("{}: QUIT converting file from {}".format(rfile, tfile), PgOPT.PGOPT['emllog']) - break - cmd = "tar -{}vf {} {}".format('u' if i else 'c', rfile, tfile) - ret = PgLOG.pgsystem(cmd, PgOPT.PGOPT['emllog']) - if not ret: break - - if fd: - ret = -1 - fd.close() - - if op.exists(rfile): - s = "s" if rcnt > 1 else "" - if tempinfo['AQ']: - PgLOG.pglog("{}: input file CREATED for backing up {} {} file{}".format(rfile, rcnt, tempinfo['AQ'], s), emlsum) - else: - PgLOG.pglog("{}: tar file CREATED from {} file{}".format(rfile, rcnt, s), emlsum) - else: - ret = PgLOG.pglog(rfile + ": ERROR creating tar file", PgOPT.PGOPT['emlerr']) - - if ret > 0: - if lfile != rfile: - ret = PgFile.convert_files(lfile, rfile, kr, PgOPT.PGOPT['emllog']) - if ret: PgLOG.pglog("{}: BUILT from {}".format(lfile, rfile), emlsum) - if ret: - fsize = PgFile.local_file_size(lfile, 3, PgOPT.PGOPT['emerol']) - if fsize > 0: - PgOPT.PGOPT['bcnt'] += 1 - if PgLOG.PGLOG['DSCHECK']: PgCMD.add_dscheck_dcount(0, fsize, PgOPT.PGOPT['extlog']) - else: - ret = 0 - - if rbfile: - if ret: - PgLOG.pgsystem("rm -rf " + rbfile, PgOPT.PGOPT['emerol'], 4) + ifopt = 'IF' else: - PgLOG.pglog(lfile + ": RETAIN the older local file", emlsum) - PgLOG.pgsystem("mv -f {} {}".format(rbfile, lfile), PgOPT.PGOPT['emerol'], 4) - - return 1 if ret else 0 - - -# -# append data type to options for given type name if not in options -# -def append_data_type(tname, options): - - mp = r'(^|\s)-{}(\s|$)'.format(tname) - if not re.search(mp, options, re.I): options += " -{} {}".format(tname, DEFTYPES[tname]) - return options - -# -# get data type from options for given type name, and default one if not in options -# -def get_data_type(tname, options): - - mp = r'(^|\s)-{}\s+(\w)(\s|$)'.format(tname) - ms = re.search(mp, options, re.I) - return ms.group(2) if ms else DEFTYPES[tname] - -# -# archive a data file -# -def archive_data_file(lfile, locrec, tempinfo, eidx): - - growing = -1 - if tempinfo['ainfo']: - ainfo = tempinfo['ainfo'] - if ainfo['vindex']: growing = PgUpdt.is_growing_file(locrec['locfile'], tempinfo['FQ']) - tempinfo['ainfo'] = None # clean the archive info recorded earlier - else: - ainfo = {'archived' : 0, 'note' : None} # reference to empty hash - - PgLOG.pglog("{}: start {} for {}".format(lfile, locrec['action'], tempinfo['einfo']), PgOPT.PGOPT['emllog']) - - options = locrec['options'] if locrec['options'] else "" - act = locrec['action'] - archfile = None - if locrec['archfile']: archfile = PgUpdt.replace_pattern(locrec['archfile'], tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']) - if act == 'AW': - if archfile and 'wfile' not in ainfo: ainfo['wfile'] = archfile - options = append_data_type('WT', options) - elif act == 'AS': - if archfile and 'sfile' not in ainfo: ainfo['sfile'] = archfile - options = append_data_type('ST', options) - elif act == 'AQ': - if archfile and 'bfile' not in ainfo: ainfo['bfile'] = archfile - options = append_data_type('QT', options) - - if tempinfo['archived'] and not ('RA' in PgOPT.params and growing > 0): - if (ainfo['chksm'] and ainfo['chksm'] == PgOPT.PGOPT['chksm'] or - ainfo['asize'] and ainfo['asize'] == PgOPT.PGOPT['fsize'] and - PgUtil.cmptime(PgOPT.PGOPT['fdate'], PgOPT.PGOPT['ftime'], ainfo['adate'], ainfo['atime']) >= 0): - if 'RA' not in PgOPT.params: - amsg = "{}: ARCHIVED by {}".format(lfile, ainfo['adate']) - if tempinfo['ehour'] != None: amsg += ":{:02}".format(ainfo['ahour']) - PgLOG.pglog(amsg, PgOPT.PGOPT['emllog']) - if eidx == 0: PgLOG.pglog("Add Mode option -RA if you want to re-archive", PgOPT.PGOPT['emllog']) - return -1 - elif growing == 0: - growing = -1 - - if growing == 0: tempinfo['archived'] = move_archived_file(ainfo, tempinfo['archived']) - - if tempinfo['AQ']: - ifopt = 'IF' - else: - ifopt = 'LF' - acmd = "dsarch {} {} -{} {}".format(PgOPT.params['DS'], act, ifopt, lfile) - if 'wfile' in ainfo: acmd += " -WF " + ainfo['wfile'] - if 'sfile' in ainfo: acmd += " -SF " + ainfo['sfile'] - if 'bfile' in ainfo: acmd += " -QF " + ainfo['bfile'] - if PgOPT.PGOPT['chksm']: acmd += " -MC " + PgOPT.PGOPT['chksm'] - - if growing > 0 and not re.search(r'(^|\s)-GF(\s|$)', options, re.I): acmd += " -GF" - if 'MD' in PgOPT.params and not re.search(r'(^|\s)-MD(\s|$)', options, re.I): acmd += " -MD" - if not re.search(r'(^|\s)-NE(\s|$)', options, re.I): acmd += " -NE" # no email in dsarch - if tempinfo['gotnew'] and not re.search(r'(^|\s)-OE(\s|$)', options, re.I): acmd += " -OE" - if 'VS' in PgOPT.params: - acmd += " -VS {}".format(PgOPT.params['VS']) - if 'VS' in tempinfo: options = re.sub('-VS\s+\d+(\s+|$)', '', options, flags=re.I) - if tempinfo['RS'] == 1: acmd += " -RS" - - fnote = None - if locrec['note'] and not re.search(r'(^|\s)-DE(\s|$)', options, re.I): - note = build_data_note(ainfo['note'], lfile, locrec, tempinfo) - if note: - if re.search(r'(\n|\"|\')', note): # if found \n or ' or ", create temporary input file - fnote = PgOPT.params['DS'] + ".note" - nd = open(fnote, 'w') - nd.write("DE<:>\n{}<:>\n".format(note)) - nd.close() - acmd += " -IF " + fnote - else: - acmd += " -DE '{}'".format(note) - - if options: - if locrec['cleancmd']: options = re.sub(r'(^-NW\s+|\s+-NW$)', '', options, 1, re.I) - acmd += " " + PgUpdt.replace_pattern(options, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']) - - ret = PgLOG.pgsystem(acmd, PgOPT.PGOPT['emerol'], 69) # 1 + 4 + 64 - if fnote: PgLOG.pgsystem("rm -f " + fnote, PgOPT.PGOPT['emerol'], 4) - - tempinfo['ainfo'] = file_archive_info(lfile, locrec, tempinfo) - note = count_update_files(ainfo, tempinfo['ainfo'], ret, tempinfo['RS']) - PgLOG.pglog("{}: UPDATED({}) for {}".format(lfile, locrec['action'], tempinfo['einfo']), PgOPT.PGOPT['emlsum']) - - return ret - -# -# count files updated -# -def count_update_files(oinfo, ninfo, success, rsopt): - - nrecs = ninfo['types'] if ninfo else {} - orecs = oinfo['types'] if oinfo else {} - astrs = [] - astr = "" - - for type in nrecs: - nrec = nrecs[type] - orec = orecs[type] if type in orecs else None - - if 'sfile' in nrec: - atype = "Saved {} File".format(PgOPT.STYPE[type]) - elif 'bfile' in nrec: - atype = "Quasar backup {} File".format(PgOPT.BTYPE[type]) - else: - atype = "RDA {} File".format(PgOPT.WTYPE[type]) - if rsopt == 1: - tidx = nrec['tindex'] if nrec['tindex'] else 0 - PgOPT.PGOPT['wtidx'][tidx] = 1 + ifopt = 'LF' + acmd = "dsarch {} {} -{} {}".format(self.params['DS'], act, ifopt, lfile) + if 'wfile' in ainfo: acmd += " -WF " + ainfo['wfile'] + if 'sfile' in ainfo: acmd += " -SF " + ainfo['sfile'] + if 'bfile' in ainfo: acmd += " -QF " + ainfo['bfile'] + if self.PGOPT['chksm']: acmd += " -MC " + self.PGOPT['chksm'] + if growing > 0 and not re.search(r'(^|\s)-GF(\s|$)', options, re.I): acmd += " -GF" + if 'MD' in self.params and not re.search(r'(^|\s)-MD(\s|$)', options, re.I): acmd += " -MD" + if not re.search(r'(^|\s)-NE(\s|$)', options, re.I): acmd += " -NE" # no email in dsarch + if tempinfo['gotnew'] and not re.search(r'(^|\s)-OE(\s|$)', options, re.I): acmd += " -OE" + if 'VS' in self.params: + acmd += " -VS {}".format(self.params['VS']) + if 'VS' in tempinfo: options = re.sub('-VS\s+\d+(\s+|$)', '', options, flags=re.I) + if tempinfo['RS'] == 1: acmd += " -RS" + fnote = None + if locrec['note'] and not re.search(r'(^|\s)-DE(\s|$)', options, re.I): + note = self.build_data_note(ainfo['note'], lfile, locrec, tempinfo) + if note: + if re.search(r'(\n|\"|\')', note): # if found \n or ' or ", create temporary input file + fnote = self.params['DS'] + ".note" + nd = open(fnote, 'w') + nd.write("DE<:>\n{}<:>\n".format(note)) + nd.close() + acmd += " -IF " + fnote + else: + acmd += " -DE '{}'".format(note) + if options: + if locrec['cleancmd']: options = re.sub(r'(^-NW\s+|\s+-NW$)', '', options, 1, re.I) + acmd += " " + self.replace_pattern(options, tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ']) + ret = self.pgsystem(acmd, self.PGOPT['emerol'], 69) # 1 + 4 + 64 + if fnote: self.pgsystem("rm -f " + fnote, self.PGOPT['emerol'], 4) + tempinfo['ainfo'] = self.file_archive_info(lfile, locrec, tempinfo) + note = self.count_update_files(ainfo, tempinfo['ainfo'], ret, tempinfo['RS']) + self.pglog("{}: UPDATED({}) for {}".format(lfile, locrec['action'], tempinfo['einfo']), self.PGOPT['emlsum']) + return ret - if (not orec or - nrec['data_size'] != orec['data_size'] or - PgUtil.cmptime(orec['date_modified'], orec['time_modified'], nrec['date_modified'], nrec['time_modified']) or - not (nrec['checksum'] and orec['checksum'] and nrec['checksum'] == orec['checksum'])): + # count files updated + def count_update_files(self, oinfo, ninfo, success, rsopt): + nrecs = ninfo['types'] if ninfo else {} + orecs = oinfo['types'] if oinfo else {} + astrs = [] + astr = "" + for type in nrecs: + nrec = nrecs[type] + orec = orecs[type] if type in orecs else None if 'sfile' in nrec: - PgOPT.PGOPT['uscnt'] += 1 + atype = "Saved {} File".format(self.STYPE[type]) elif 'bfile' in nrec: - if type == 'D': PgOPT.PGOPT['qdcnt'] += 1 - PgOPT.PGOPT['qbcnt'] += 1 - elif type == 'D': - PgOPT.PGOPT['udcnt'] += 1 - elif type == 'N': - PgOPT.PGOPT['uncnt'] += 1 + atype = "Quasar backup {} File".format(self.BTYPE[type]) else: - PgOPT.PGOPT['uwcnt'] += 1 - - astrs.append("{} {}rchived".format(atype, "Re-a" if orec else "A")) - if PgLOG.PGLOG['DSCHECK']: - PgCMD.add_dscheck_dcount(0, nrec['data_size'], PgOPT.PGOPT['extlog']) - - if astrs: - PgOPT.PGOPT['ucnt'] += 1 - if len(astrs) < ninfo['archcnt']: + atype = "RDA {} File".format(self.WTYPE[type]) + if rsopt == 1: + tidx = nrec['tindex'] if nrec['tindex'] else 0 + self.PGOPT['wtidx'][tidx] = 1 + if (not orec or + nrec['data_size'] != orec['data_size'] or + self.cmptime(orec['date_modified'], orec['time_modified'], nrec['date_modified'], nrec['time_modified']) or + not (nrec['checksum'] and orec['checksum'] and nrec['checksum'] == orec['checksum'])): + if 'sfile' in nrec: + self.PGOPT['uscnt'] += 1 + elif 'bfile' in nrec: + if type == 'D': self.PGOPT['qdcnt'] += 1 + self.PGOPT['qbcnt'] += 1 + elif type == 'D': + self.PGOPT['udcnt'] += 1 + elif type == 'N': + self.PGOPT['uncnt'] += 1 + else: + self.PGOPT['uwcnt'] += 1 + astrs.append("{} {}rchived".format(atype, "Re-a" if orec else "A")) + if self.PGLOG['DSCHECK']: + self.add_dscheck_dcount(0, nrec['data_size'], self.PGOPT['extlog']) + if astrs: + self.PGOPT['ucnt'] += 1 + if len(astrs) < ninfo['archcnt']: + if success: + astr = " Successful, but only " + else: + astr = " Partially finished, " + astr += ', '.join(astrs) + else: if success: - astr = " Successful, but only " + astr = " Successful, but NO file Re-archived" else: - astr = " Partially finished, " - astr += ', '.join(astrs) - else: - if success: - astr = " Successful, but NO file Re-archived" + astr = " Failed, NO file {}rchived".format('Re-a' if oinfo['present'] == ninfo['archcnt'] else "A") + if astr: + s = "s" if ninfo['archcnt'] > 1 else "" + astr += " of {} archfile{}".format(ninfo['archcnt'], s) + return astr + + # get the temporal info in local and remote file names and the possible values# between the break update and the current date + # BTW, change to working directory + def get_tempinfo(self, locrec, locinfo, eidx = 0): + # get data end date for update action + edate = self.params['ED'][eidx] if ('ED' in self.params and self.params['ED'][eidx]) else locrec['enddate'] + if not edate: return self.pglog(locinfo + ": MISS End Data Date for local update", self.PGOPT['emlerr']) + ehour = self.params['EH'][eidx] if ('EH' in self.params and self.params['EH'][eidx] != None) else locrec['endhour'] + if not isinstance(edate, str): edate = str(edate) + if ehour is None and self.pgget('drupdt', '', "lindex = {} and tinterval like '%H'".format(locrec['lindex'])): + return self.pglog(locinfo + ": MISS End Data Hour for hourly remote update", self.PGOPT['emlerr']) + if locrec['validint']: + val = locrec['validint'] + elif self.PGOPT['UCNTL'] and self.PGOPT['UCNTL']['validint']: + val = self.PGOPT['UCNTL']['validint'] else: - astr = " Failed, NO file {}rchived".format('Re-a' if oinfo['present'] == ninfo['archcnt'] else "A") - - if astr: - s = "s" if ninfo['archcnt'] > 1 else "" - astr += " of {} archfile{}".format(ninfo['archcnt'], s) - - return astr - -# -# get the temporal info in local and remote file names and the possible values -# between the break update and the current date -# BTW, change to working directory -# -def get_tempinfo(locrec, locinfo, eidx = 0): - - # get data end date for update action - edate = PgOPT.params['ED'][eidx] if ('ED' in PgOPT.params and PgOPT.params['ED'][eidx]) else locrec['enddate'] - if not edate: return PgLOG.pglog(locinfo + ": MISS End Data Date for local update", PgOPT.PGOPT['emlerr']) - ehour = PgOPT.params['EH'][eidx] if ('EH' in PgOPT.params and PgOPT.params['EH'][eidx] != None) else locrec['endhour'] - if not isinstance(edate, str): edate = str(edate) - - if ehour is None and PgDBI.pgget('drupdt', '', "lindex = {} and tinterval like '%H'".format(locrec['lindex'])): - return PgLOG.pglog(locinfo + ": MISS End Data Hour for hourly remote update", PgOPT.PGOPT['emlerr']) - - if locrec['validint']: - val = locrec['validint'] - elif PgOPT.PGOPT['UCNTL'] and PgOPT.PGOPT['UCNTL']['validint']: - val = PgOPT.PGOPT['UCNTL']['validint'] - else: - val = None - - tempinfo = {'AT' : None, 'DC' : None, 'ED' : [], 'EH' : [], 'VI' : None, - 'VD' : None, 'VH' : None, 'CVD' : None, 'NX' : None, 'FQ' : None, - 'QU' : None, 'EP' : 0, 'RS' : -1, 'AQ' : None} - - if val: val = PgUpdt.get_control_time(val, "Valid Internal") - if val: - tempinfo['VI'] = val - if ehour is None and val[3]: ehour = 0 - - val = PgUpdt.get_control_time(locrec['agetime'], "File Age Time") - if val: - tempinfo['AT'] = val - if ehour is None and val[3]: ehour = 0 - - frequency = PgOPT.params['FQ'][0] if 'FQ' in PgOPT.params else locrec['frequency'] - if frequency: # get data update frequency info - (val, unit) = PgOPT.get_control_frequency(frequency) + val = None + tempinfo = {'AT' : None, 'DC' : None, 'ED' : [], 'EH' : [], 'VI' : None, + 'VD' : None, 'VH' : None, 'CVD' : None, 'NX' : None, 'FQ' : None, + 'QU' : None, 'EP' : 0, 'RS' : -1, 'AQ' : None} + if val: val = self.get_control_time(val, "Valid Internal") if val: - tempinfo['FQ'] = val - tempinfo['QU'] = unit # update frequency unit of meassure - else: - locinfo = PgUpdt.replace_pattern(locinfo, edate, ehour) - return PgLOG.pglog("{}: {}".format(locinfo, unit), PgOPT.PGOPT['emlerr']) - if locrec['endperiod']: tempinfo['EP'] = locrec['endperiod'] - if val[3] and ehour is None: ehour = 0 - edate = PgUtil.enddate(edate, tempinfo['EP'], unit, tempinfo['FQ'][6]) - elif 'MU' in PgOPT.params or 'CP' in PgOPT.params: - locinfo = PgUpdt.replace_pattern(locinfo, edate, ehour) - return PgLOG.pglog(locinfo + ": MISS frequency for Update", PgOPT.PGOPT['emlerr']) - - val = PgUpdt.get_control_time(locrec['nextdue'], "Due Internval") - if val: - tempinfo['NX'] = val - if ehour is None and val[3]: ehour = 0 - - # check if allow missing remote file - if 'MR' in PgOPT.params and PgOPT.params['MR'][0]: - tempinfo['amiss'] = PgOPT.params['MR'][0] - elif locrec['missremote']: - tempinfo['amiss'] = locrec['missremote'] - else: - tempinfo['amiss'] = 'N' - - options = locrec['options'] - if locrec['action'] == 'AQ': - if options: - ms = re.search(r'-(ST|WT)\s+(\w)', options) - if ms: - if ms.group(1) == 'ST': - tempinfo['AQ'] = 'Saved' - tempinfo['ST'] = ms.group(2) - else: - tempinfo['AQ'] = 'Web' + tempinfo['VI'] = val + if ehour is None and val[3]: ehour = 0 + val = self.get_control_time(locrec['agetime'], "File Age Time") + if val: + tempinfo['AT'] = val + if ehour is None and val[3]: ehour = 0 + frequency = self.params['FQ'][0] if 'FQ' in self.params else locrec['frequency'] + if frequency: # get data update frequency info + (val, unit) = self.get_control_frequency(frequency) + if val: + tempinfo['FQ'] = val + tempinfo['QU'] = unit # update frequency unit of meassure else: - return PgLOG.pglog("{}: MISS -ST or -WT to backup {}".format(options, locinfo), PgOPT.PGOPT['emlerr']) - else: - return PgLOG.pglog("Set -ST or -WT in Options to backup {}".format(locinfo), PgOPT.PGOPT['emlerr']) - if (options and re.search(r'(^|\s)-GX(\s|$)', options, re.I) and - not re.search(r'(^|\s)-RS(\s|$)', options, re.I)): - tempinfo['RS'] = 0 # set to 1 if need pass -RS to dsarch - ddate = edate - dhour = ehour - dcnt = 0 - PgOPT.PGOPT['wtidx'] = {} - - if options: - ms = re.search(r'-VS\s+(\d+)', options, re.I) - if ms: tempinfo['VS'] = int(ms.group(1)) - - if tempinfo['VI']: - if tempinfo['VI'][3]: - (vdate, vhour) = PgUtil.adddatehour(PgOPT.PGOPT['CURDATE'], PgOPT.PGOPT['CURHOUR'], -tempinfo['VI'][0], - -tempinfo['VI'][1], -tempinfo['VI'][2], -tempinfo['VI'][3]) + locinfo = self.replace_pattern(locinfo, edate, ehour) + return self.pglog("{}: {}".format(locinfo, unit), self.PGOPT['emlerr']) + if locrec['endperiod']: tempinfo['EP'] = locrec['endperiod'] + if val[3] and ehour is None: ehour = 0 + edate = self.enddate(edate, tempinfo['EP'], unit, tempinfo['FQ'][6]) + elif 'MU' in self.params or 'CP' in self.params: + locinfo = self.replace_pattern(locinfo, edate, ehour) + return self.pglog(locinfo + ": MISS frequency for Update", self.PGOPT['emlerr']) + val = self.get_control_time(locrec['nextdue'], "Due Internval") + if val: + tempinfo['NX'] = val + if ehour is None and val[3]: ehour = 0 + # check if allow missing remote file + if 'MR' in self.params and self.params['MR'][0]: + tempinfo['amiss'] = self.params['MR'][0] + elif locrec['missremote']: + tempinfo['amiss'] = locrec['missremote'] else: - vdate = PgUtil.adddate(PgOPT.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -tempinfo['VI'][2]) - vhour = PgOPT.PGOPT['CURHOUR'] - - if 'CN' in PgOPT.params and locrec['cleancmd']: - tempinfo['CVD'] = PgUtil.adddate(PgOPT.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -(1+tempinfo['VI'][2])) - tempinfo['setmiss'] = 1 - if PgUtil.diffdatehour(edate, ehour, vdate, vhour) < 0: - vdate = edate - vhour = ehour - if tempinfo['amiss'] == 'N' and locrec['missdate']: - dhour = PgUtil.diffdatehour(vdate, vhour, locrec['missdate'], locrec['misshour']) - if dhour > 0: - if dhour > 240: - record = {'missdate' : None, 'misshour' : None} - PgDBI.pgupdt("dlupdt", record, "lindex = {}".format(locrec['lindex'])) + tempinfo['amiss'] = 'N' + options = locrec['options'] + if locrec['action'] == 'AQ': + if options: + ms = re.search(r'-(ST|WT)\s+(\w)', options) + if ms: + if ms.group(1) == 'ST': + tempinfo['AQ'] = 'Saved' + tempinfo['ST'] = ms.group(2) + else: + tempinfo['AQ'] = 'Web' else: - vdate = locrec['missdate'] - vhour = locrec['misshour'] - - if vdate and not isinstance(vdate, str): vdate = str(vdate) - tempinfo['VD'] = vdate - tempinfo['VH'] = vhour - if 'ED' not in PgOPT.params and PgUtil.diffdatehour(edate, ehour, vdate, vhour) > 0: - edate = vdate - if tempinfo['FQ']: - if tempinfo['EP'] or tempinfo['QU'] == 'M': - edate = PgUtil.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) - while True: - (udate, uhour) = PgUpdt.addfrequency(edate, ehour, tempinfo['FQ'], -1) - if PgUtil.diffdatehour(udate, uhour, vdate, vhour) < 0: break - edate = udate - ehour = uhour + return self.pglog("{}: MISS -ST or -WT to backup {}".format(options, locinfo), self.PGOPT['emlerr']) + else: + return self.pglog("Set -ST or -WT in Options to backup {}".format(locinfo), self.PGOPT['emlerr']) + if (options and re.search(r'(^|\s)-GX(\s|$)', options, re.I) and + not re.search(r'(^|\s)-RS(\s|$)', options, re.I)): + tempinfo['RS'] = 0 # set to 1 if need pass -RS to dsarch + ddate = edate + dhour = ehour + dcnt = 0 + self.PGOPT['wtidx'] = {} + if options: + ms = re.search(r'-VS\s+(\d+)', options, re.I) + if ms: tempinfo['VS'] = int(ms.group(1)) + if tempinfo['VI']: + if tempinfo['VI'][3]: + (vdate, vhour) = self.adddatehour(self.PGOPT['CURDATE'], self.PGOPT['CURHOUR'], -tempinfo['VI'][0], + -tempinfo['VI'][1], -tempinfo['VI'][2], -tempinfo['VI'][3]) + else: + vdate = self.adddate(self.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -tempinfo['VI'][2]) + vhour = self.PGOPT['CURHOUR'] + if 'CN' in self.params and locrec['cleancmd']: + tempinfo['CVD'] = self.adddate(self.PGOPT['CURDATE'], -tempinfo['VI'][0], -tempinfo['VI'][1], -(1+tempinfo['VI'][2])) + tempinfo['setmiss'] = 1 + if self.diffdatehour(edate, ehour, vdate, vhour) < 0: + vdate = edate + vhour = ehour + if tempinfo['amiss'] == 'N' and locrec['missdate']: + dhour = self.diffdatehour(vdate, vhour, locrec['missdate'], locrec['misshour']) + if dhour > 0: + if dhour > 240: + record = {'missdate' : None, 'misshour' : None} + self.pgupdt("dlupdt", record, "lindex = {}".format(locrec['lindex'])) + else: + vdate = locrec['missdate'] + vhour = locrec['misshour'] + if vdate and not isinstance(vdate, str): vdate = str(vdate) + tempinfo['VD'] = vdate + tempinfo['VH'] = vhour + if 'ED' not in self.params and self.diffdatehour(edate, ehour, vdate, vhour) > 0: + edate = vdate + if tempinfo['FQ']: if tempinfo['EP'] or tempinfo['QU'] == 'M': - edate = PgUtil.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) - - vdate = PgOPT.params['CD'] - vhour = PgOPT.params['CH'] - if tempinfo['NX']: - if tempinfo['NX'][3]: - (udate, uhour) = PgUtil.adddatehour(PgOPT.PGOPT['CURDATE'], vhour, -tempinfo['NX'][0], - -tempinfo['NX'][1], -tempinfo['NX'][2], -tempinfo['NX'][3]) - else: - udate = PgUtil.adddate(PgOPT.PGOPT['CURDATE'], -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2]) - uhour = vhour - if PgUtil.diffdatehour(udate, uhour, vdate, vhour) <= 0: - vdate = udate - vhour = uhour - - if 'CP' in PgOPT.params: (vdate, vhour) = PgUpdt.addfrequency(vdate, vhour, tempinfo['FQ'], 1) - - fupdate = 1 if 'FU' in PgOPT.params else 0 - while fupdate or PgUtil.diffdatehour(edate, ehour, vdate, vhour) <= 0: - tempinfo['ED'].append(edate) - if ehour != None and tempinfo['QU'] != 'H': - tempinfo['EH'].append(23) - else: - tempinfo['EH'].append(ehour) - if 'MU' not in PgOPT.params: break - if tempinfo['RS'] == 0 and dcnt < 3: - if PgUtil.diffdatehour(edate, ehour, ddate, dhour) >= 0: dcnt += 1 - (edate, ehour) = PgUpdt.addfrequency(edate, ehour, tempinfo['FQ'], 1) - edate = PgUtil.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) - fupdate = 0 - - if tempinfo['RS'] == 0 and dcnt > 2: tempinfo['RS'] = 1 - if not tempinfo['ED']: # no end time found, update not due yet - if tempinfo['NX']: - (udate, uhour) = PgUtil.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3]) - else: - udate = edate - uhour = ehour - locinfo = PgUpdt.replace_pattern(locinfo, edate, ehour, tempinfo['FQ']) - vdate = PgOPT.params['CD'] - val = "Update data" - if tempinfo['NX']: val += " due" - if uhour is None: - locinfo += ": {} on {}".format(val, udate) - else: - locinfo += ": {} at {}:{:02}".format(val, udate, uhour) - vdate += ":{:02}".format(PgOPT.params['CH']) - - return PgLOG.pglog("{} NOT due yet by {}".format(locinfo, vdate), PgOPT.PGOPT['emllog']) - - return tempinfo - -# -# get archived file info -# -def file_archive_info(lfile, locrec, tempinfo): - - if tempinfo['ainfo'] != None: return tempinfo['ainfo'] - - edate = tempinfo['edate'] - ehour = tempinfo['ehour'] - ainfo = {'archcnt' : 0, 'archived' : 0, 'present' : 0, 'vindex' : 0, 'types' : {}, 'note' : None} - growing = PgUpdt.is_growing_file(locrec['locfile'], tempinfo['FQ']) - if growing: + edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + while True: + (udate, uhour) = self.addfrequency(edate, ehour, tempinfo['FQ'], -1) + if self.diffdatehour(udate, uhour, vdate, vhour) < 0: break + edate = udate + ehour = uhour + if tempinfo['EP'] or tempinfo['QU'] == 'M': + edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + vdate = self.params['CD'] + vhour = self.params['CH'] if tempinfo['NX']: - (udate, uhour) = PgUtil.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3]) - else: - udate = edate - uhour = ehour - if PgLOG.PGLOG['GMTZ'] and uhour != None: # convert to local times - (udate, uhour) = PgUtil.adddatehour(udate, uhour, 0, 0, 0, -PgLOG.PGLOG['GMTZ']) - - options = locrec['options'] if locrec['options'] else "" - act = locrec['action'] - locrec['gindex'] = PgUpdt.get_group_index(options, edate, ehour, tempinfo['FQ']) - dsid = PgOPT.params['DS'] - gcnd = "gindex = {}".format(locrec['gindex']) - cnd = "dsid = '{}' AND {}".format(dsid, gcnd) - mmiss = 0 - if re.match(r'^A(B|W)$', act): # check existing web files - ainfo['archcnt'] = 1 - ms = re.search(r'(^|\s)-WT\s+(\w)(\s|$)', options, re.I) - type = get_data_type('WT', options) - if locrec['archfile']: - afile = PgUpdt.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) - else: - afile = lfile if re.search(r'(^|\s)-KP(\s|$)', lfile, re.I) else op.basename(lfile) - ms =re.search(r'(^|\s)-WP\s+(\S+)', options, re.I) - if ms: - path = PgUpdt.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ']) + if tempinfo['NX'][3]: + (udate, uhour) = self.adddatehour(self.PGOPT['CURDATE'], vhour, -tempinfo['NX'][0], + -tempinfo['NX'][1], -tempinfo['NX'][2], -tempinfo['NX'][3]) else: - path = PgDBI.get_group_field_path(locrec['gindex'], dsid, 'webpath') - if path: afile = PgLOG.join_paths(path, afile) - - wrec = PgSplit.pgget_wfile(dsid, "*", "{} AND type = '{}' AND wfile = '{}'".format(gcnd, type, afile), PgOPT.PGOPT['extlog']) - if wrec: - ainfo['wfile'] = wrec['wfile'] - adate = ainfo['adate'] = str(wrec['date_modified']) - atime = ainfo['atime'] = str(wrec['time_modified']) - ahour = None - if atime: - ms = re.match(r'^(\d+):', atime) - if ms: ahour = int(ms.group(1)) - ainfo['ahour'] = ahour - ainfo['asize'] = wrec['data_size'] - ainfo['chksm'] = wrec['checksum'] if wrec['checksum'] else '' - ainfo['note'] = wrec['note'] - ainfo['types'][type] = wrec - ainfo['wtype'] = type - if not growing or PgUtil.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 - if wrec['vindex']: ainfo['vindex'] = wrec['vindex'] - ainfo['present'] += 1 - - if act == 'AS': # check existing save files - ainfo['archcnt'] = 1 - type = get_data_type('ST', options) - if locrec['archfile']: - afile = PgUpdt.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) - else: - afile = lfile if re.search(r'(^|\s)-KP(\s|$)', options, re.I) else op.basename(lfile) - ms = re.search(r'(^|\s)-SP\s+(\S+)', options, re.I) - if ms: - path = PgUpdt.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ']) + udate = self.adddate(self.PGOPT['CURDATE'], -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2]) + uhour = vhour + if self.diffdatehour(udate, uhour, vdate, vhour) <= 0: + vdate = udate + vhour = uhour + if 'CP' in self.params: (vdate, vhour) = self.addfrequency(vdate, vhour, tempinfo['FQ'], 1) + fupdate = 1 if 'FU' in self.params else 0 + while fupdate or self.diffdatehour(edate, ehour, vdate, vhour) <= 0: + tempinfo['ED'].append(edate) + if ehour != None and tempinfo['QU'] != 'H': + tempinfo['EH'].append(23) else: - path = PgDBI.get_group_field_path(locrec['gindex'], PgOPT.params['DS'], 'savedpath') - if path: afile = PgLOG.join_paths(path, afile) - - srec = PgDBI.pgget("sfile", "*", "{} AND type = '{}' AND sfile = '{}'".format(cnd, type, afile), PgOPT.PGOPT['extlog']) - if srec: - ainfo['sfile'] = srec['sfile'] - adate = ainfo['adate'] = str(srec['date_modified']) - atime = ainfo['atime'] = str(srec['time_modified']) - ahour = None - if atime: - ms = re.match(r'^(\d+):', atime) - if ms: ahour = int(ms.group(1)) - ainfo['asize'] = srec['data_size'] - ainfo['chksm'] = srec['checksum'] if srec['checksum'] else '' - ainfo['note'] = srec['note'] - ainfo['types'][type] = srec - ainfo['stype'] = type - if not growing or PgUtil.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 - if srec['vindex']: ainfo['vindex'] = srec['vindex'] - ainfo['present'] += 1 - - if act == 'AQ': # check existing quasar backup files - ainfo['archcnt'] = 1 - type = get_data_type('QT', options) - if locrec['archfile']: - afile = PgUpdt.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) - else: - return PgLOG.pglog(lfile + ": Miss Backup file name via (FA|FileArchived)", PgOPT.PGOPT['emlerr']) - - brec = PgDBI.pgget("bfile", "*", "dsid = '{}' AND type = '{}' AND bfile = '{}'".format(PgOPT.params['DS'], type, afile), PgOPT.PGOPT['extlog']) - if brec: - ainfo['bfile'] = brec['bfile'] - adate = ainfo['adate'] = str(brec['date_modified']) - atime = ainfo['atime'] = str(brec['time_modified']) - ahour = None - if atime: - ms = re.match(r'^(\d+):', atime) - if ms: ahour = int(ms.group(1)) - ainfo['asize'] = brec['data_size'] - ainfo['chksm'] = brec['checksum'] if brec['checksum'] else '' - ainfo['note'] = brec['note'] - ainfo['types'][type] = brec - ainfo['btype'] = type - if not growing or PgUtil.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 - ainfo['present'] += 1 - - if ainfo['archcnt'] == 0: - PgLOG.pglog("{}: unknown archive action {}".format(lfile, act), PgOPT.PGOPT['extlog']) - - return ainfo # always returns a hash reference for archiving info - -# -# build up data note based on temporal info, keep the begin timestamp -# for existing record; change end timestamp only if new data added -# return None if no change for existing note -# -def build_data_note(onote, lfile, locrec, tempinfo): - - note = locrec['note'] - if not note: return onote - - seps = PgOPT.params['PD'] - match = "[^{}]+".format(seps[1]) - edate = tempinfo['edate'] - ehour = tempinfo['ehour'] - - if note[0] == '!': # executable for build up data note - cmd = PgUpdt.executable_command(1, None, None, edate) - if not cmd: return 0 - return PgLOG.pgsystem(cmd, PgOPT.PGOPT['emllog'], 21) - - # repalce generic patterns first - note = PgUpdt.replace_pattern(note, None) # replace generic patterns first - - # get temporal patterns - patterns = re.findall(r'{}({}){}'.format(seps[0], match, seps[1]), note) - pcnt = len(patterns) - if pcnt == 0: return note # no pattern temporal matches - if pcnt > 2: - PgLOG.pglog("{}-{}: TOO many ({}) temporal patterns".format(lfile, note, pcnt), PgOPT.PGOPT['emllog']) - return onote - - if pcnt == 2: # replace start time - if onote: # get start time from existing note - replace = "{}{}{}".format(seps[0], patterns[0], seps[1]) - ms = re.match(r'^(.*){}(.*){}'.format(replace, PgOPT.params['PD'][0]), note) - if ms: - init = ms.group(1) - sp = ms.group(2) - ms = re.search(r'{}(.+){}'.format(init, sp), onote) + tempinfo['EH'].append(ehour) + if 'MU' not in self.params: break + if tempinfo['RS'] == 0 and dcnt < 3: + if self.diffdatehour(edate, ehour, ddate, dhour) >= 0: dcnt += 1 + (edate, ehour) = self.addfrequency(edate, ehour, tempinfo['FQ'], 1) + edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + fupdate = 0 + if tempinfo['RS'] == 0 and dcnt > 2: tempinfo['RS'] = 1 + if not tempinfo['ED']: # no end time found, update not due yet + if tempinfo['NX']: + (udate, uhour) = self.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3]) + else: + udate = edate + uhour = ehour + locinfo = self.replace_pattern(locinfo, edate, ehour, tempinfo['FQ']) + vdate = self.params['CD'] + val = "Update data" + if tempinfo['NX']: val += " due" + if uhour is None: + locinfo += ": {} on {}".format(val, udate) + else: + locinfo += ": {} at {}:{:02}".format(val, udate, uhour) + vdate += ":{:02}".format(self.params['CH']) + return self.pglog("{} NOT due yet by {}".format(locinfo, vdate), self.PGOPT['emllog']) + return tempinfo + + # get archived file info + def file_archive_info(self, lfile, locrec, tempinfo): + if tempinfo['ainfo'] != None: return tempinfo['ainfo'] + edate = tempinfo['edate'] + ehour = tempinfo['ehour'] + ainfo = {'archcnt' : 0, 'archived' : 0, 'present' : 0, 'vindex' : 0, 'types' : {}, 'note' : None} + growing = self.is_growing_file(locrec['locfile'], tempinfo['FQ']) + if growing: + if tempinfo['NX']: + (udate, uhour) = self.adddatehour(edate, ehour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3]) + else: + udate = edate + uhour = ehour + if self.PGLOG['GMTZ'] and uhour != None: # convert to local times + (udate, uhour) = self.adddatehour(udate, uhour, 0, 0, 0, -self.PGLOG['GMTZ']) + options = locrec['options'] if locrec['options'] else "" + act = locrec['action'] + locrec['gindex'] = self.get_group_index(options, edate, ehour, tempinfo['FQ']) + dsid = self.params['DS'] + gcnd = "gindex = {}".format(locrec['gindex']) + cnd = "dsid = '{}' AND {}".format(dsid, gcnd) + mmiss = 0 + if re.match(r'^A(B|W)$', act): # check existing web files + ainfo['archcnt'] = 1 + ms = re.search(r'(^|\s)-WT\s+(\w)(\s|$)', options, re.I) + type = self.get_data_type('WT', options) + if locrec['archfile']: + afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) + else: + afile = lfile if re.search(r'(^|\s)-KP(\s|$)', lfile, re.I) else op.basename(lfile) + ms =re.search(r'(^|\s)-WP\s+(\S+)', options, re.I) if ms: - sdate = ms.group(1) - note = re.sub(replace, sdate, note, 1) - elif tempinfo['FQ']: # get start time - (sdate, shour) = PgUpdt.addfrequency(edate, ehour, tempinfo['FQ'], 0) - note = PgUpdt.replace_pattern(note, sdate, shour, None, 1) - - return PgUpdt.replace_pattern(note, edate, ehour) # repalce end time now - -# -# get data file status info -# -def file_status_info(lfile, rfile, tempinfo): - - # check and cache new data info - finfo = PgFile.check_local_file(lfile, 33, PgOPT.PGOPT['wrnlog']) # 33 = 1 + 32 - if not finfo: - PgOPT.PGOPT['chksm'] = '' - PgOPT.PGOPT['fsize'] = 0 - return - - fdate = finfo['date_modified'] - ftime = finfo['time_modified'] - fhour = None - ms = re.match(r'^(\d+):', ftime) - if ms: four = int(ms.group(1)) - PgOPT.PGOPT['fsize'] = finfo['data_size'] - PgOPT.PGOPT['chksm'] = finfo['checksum'] - - if rfile and lfile != rfile: - finfo = PgFile.check_local_file(rfile, 1, PgOPT.PGOPT['wrnlog']) - if finfo and PgUtil.cmptime(finfo['date_modified'], finfo['time_modified'], fdate, ftime) < 0: - fdate = finfo['date_modified'] - ftime = finfo['time_modified'] - ms = re.match(r'^(\d+):', ftime) - if ms: four = int(ms.group(1)) - - PgOPT.PGOPT['fdate'] = fdate - PgOPT.PGOPT['ftime'] = ftime - PgOPT.PGOPT['fhour'] = fhour - - if 'RE' in PgOPT.params: # reset end data/time/hour - if tempinfo['NX']: - if tempinfo['NX'][3]: - (fdate, fhour) = PgUtil.adddatehour(fdate, fhour, -tempinfo['NX'][0], -tempinfo['NX'][1], - -tempinfo['NX'][2], -tempinfo['NX'][3]) + path = self.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ']) + else: + path = self.get_group_field_path(locrec['gindex'], dsid, 'webpath') + if path: afile = self.join_paths(path, afile) + wrec = self.pgget_wfile(dsid, "*", "{} AND type = '{}' AND wfile = '{}'".format(gcnd, type, afile), self.PGOPT['extlog']) + if wrec: + ainfo['wfile'] = wrec['wfile'] + adate = ainfo['adate'] = str(wrec['date_modified']) + atime = ainfo['atime'] = str(wrec['time_modified']) + ahour = None + if atime: + ms = re.match(r'^(\d+):', atime) + if ms: ahour = int(ms.group(1)) + ainfo['ahour'] = ahour + ainfo['asize'] = wrec['data_size'] + ainfo['chksm'] = wrec['checksum'] if wrec['checksum'] else '' + ainfo['note'] = wrec['note'] + ainfo['types'][type] = wrec + ainfo['wtype'] = type + if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 + if wrec['vindex']: ainfo['vindex'] = wrec['vindex'] + ainfo['present'] += 1 + if act == 'AS': # check existing save files + ainfo['archcnt'] = 1 + type = self.get_data_type('ST', options) + if locrec['archfile']: + afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) else: - fdate = PgUtil.adddate(fdate, -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2]) - - while True: - (edate, ehour) = PgUpdt.addfrequency(tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'], 1) - edate = PgUtil.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) - if PgUtil.diffdatehour(edate, ehour, fdate, fhour) > 0: break - tempinfo['edate'] = edate - tempinfo['ehour'] = ehour - -# -# check if a Server file is aged enough for download -# return 1 if valid, 0 if not aged enough, -1 if cannot check -# -def check_agetime(dcmd, sfile, atime): - - info = PgUpdt.check_server_file(dcmd, 1) - if not info: - sact = get_download_action(dcmd) - (stat, derr) = PgUpdt.parse_download_error(PgOPT.PGOPT['STATUS'], sact) - PgOPT.PGOPT['STATUS'] = derr - PgLOG.pglog("{}: cannot check file age\n{}".format(sfile, PgOPT.PGOPT['STATUS']), PgOPT.PGOPT['emlerr']) - return stat - - ahour = None - if atime[3]: - ms = re.match(r'^(\d+):', info['time_modified']) - if ms: ahour = int(ms.group(1)) - (adate, ahour) = PgUtil.adddatehour(info['date_modified'], ahour, atime[0], atime[1], atime[2], atime[3]) - if PgUtil.diffdatehour(PgOPT.params['CD'], PgOPT.params['CH'], adate, ahour) >= 0: - return 1 - - if ahour is None: - PgLOG.pglog(("{}: original {} file ready by {}\n".format(sfile, info['ftype'], info['date_modified']) + - "but NOT aged enough for retrieving yet by " + PgOPT.params['CD']), PgOPT.PGOPT['emllog']) - else: - PgLOG.pglog(("{}: original {} file ready by {}:{:02}\n".format(sfile, info['ftype'], info['date_modified'], ahour) + - "but NOT aged enough for retrieving yet by {}:{:02}".format(PgOPT.params['CD'], PgOPT.params['CH'])), PgOPT.PGOPT['emllog']) - - return 0 # otherwise server file is not aged enough - -# -# check if a Server file is changed with different size -# return 1 - file changed, 2 - new file retrieved, 3 - force redlownload, -# 0 - no change , -1 - error check, -2 - cannot check -# -def check_newer_file(dcmd, cfile, ainfo): - - if cfile: - finfo = PgFile.check_local_file(cfile, 33, PgOPT.PGOPT['wrnlog']) - if not finfo: return 3 # download if can not check newer - else: - finfo = {'isfile' : 0, 'checksum' : ainfo['chksm'], 'data_size' : ainfo['asize'], - 'date_modified' : ainfo['adate'], 'time_modified' : ainfo['atime']} - - cinfo = PgUpdt.check_server_file(dcmd, 33, cfile) - if not cinfo: - sact = get_download_action(dcmd) - (stat, derr) = PgUpdt.parse_download_error(PgOPT.PGOPT['STATUS'], sact) - PgOPT.PGOPT['STATUS'] = derr - return stat - - stat = 2 if cinfo['ftype'] == "WGET" else 1 - if finfo['isfile'] and cfile == cinfo['fname'] and finfo['data_size'] and cinfo['data_size'] and cinfo['data_size'] != finfo['data_size']: - return stat - - PgOPT.PGOPT['STATUS'] = '' - if (finfo['data_size'] != cinfo['data_size'] or 'checksum' not in cinfo or - 'checksum' not in finfo or finfo['checksum'] != cinfo['checksum']): - if 'HO' in PgOPT.params and cinfo['ftype'] == "FTP": - (cdate, ctime) = PgUtil.addhour(cinfo['date_modified'], cinfo['time_modified'], -PgOPT.params['HO'][0]) + afile = lfile if re.search(r'(^|\s)-KP(\s|$)', options, re.I) else op.basename(lfile) + ms = re.search(r'(^|\s)-SP\s+(\S+)', options, re.I) + if ms: + path = self.replace_pattern(ms.group(2), edate, ehour, tempinfo['FQ']) + else: + path = self.get_group_field_path(locrec['gindex'], self.params['DS'], 'savedpath') + if path: afile = self.join_paths(path, afile) + srec = self.pgget("sfile", "*", "{} AND type = '{}' AND sfile = '{}'".format(cnd, type, afile), self.PGOPT['extlog']) + if srec: + ainfo['sfile'] = srec['sfile'] + adate = ainfo['adate'] = str(srec['date_modified']) + atime = ainfo['atime'] = str(srec['time_modified']) + ahour = None + if atime: + ms = re.match(r'^(\d+):', atime) + if ms: ahour = int(ms.group(1)) + ainfo['asize'] = srec['data_size'] + ainfo['chksm'] = srec['checksum'] if srec['checksum'] else '' + ainfo['note'] = srec['note'] + ainfo['types'][type] = srec + ainfo['stype'] = type + if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 + if srec['vindex']: ainfo['vindex'] = srec['vindex'] + ainfo['present'] += 1 + if act == 'AQ': # check existing quasar backup files + ainfo['archcnt'] = 1 + type = self.get_data_type('QT', options) + if locrec['archfile']: + afile = self.replace_pattern(locrec['archfile'], edate, ehour, tempinfo['FQ']) + else: + return self.pglog(lfile + ": Miss Backup file name via (FA|FileArchived)", self.PGOPT['emlerr']) + brec = self.pgget("bfile", "*", "dsid = '{}' AND type = '{}' AND bfile = '{}'".format(self.params['DS'], type, afile), self.PGOPT['extlog']) + if brec: + ainfo['bfile'] = brec['bfile'] + adate = ainfo['adate'] = str(brec['date_modified']) + atime = ainfo['atime'] = str(brec['time_modified']) + ahour = None + if atime: + ms = re.match(r'^(\d+):', atime) + if ms: ahour = int(ms.group(1)) + ainfo['asize'] = brec['data_size'] + ainfo['chksm'] = brec['checksum'] if brec['checksum'] else '' + ainfo['note'] = brec['note'] + ainfo['types'][type] = brec + ainfo['btype'] = type + if not growing or self.diffdatehour(udate, uhour, adate, ahour) <= 0: ainfo['archived'] += 1 + ainfo['present'] += 1 + if ainfo['archcnt'] == 0: + self.pglog("{}: unknown archive action {}".format(lfile, act), self.PGOPT['extlog']) + return ainfo # always returns a hash reference for archiving info + + # build up data note based on temporal info, keep the begin timestamp# for existing record; change end timestamp only if new data added + # return None if no change for existing note + def build_data_note(self, onote, lfile, locrec, tempinfo): + note = locrec['note'] + if not note: return onote + seps = self.params['PD'] + match = "[^{}]+".format(seps[1]) + edate = tempinfo['edate'] + ehour = tempinfo['ehour'] + if note[0] == '!': # executable for build up data note + cmd = self.executable_command(1, None, None, edate) + if not cmd: return 0 + return self.pgsystem(cmd, self.PGOPT['emllog'], 21) + # repalce generic patterns first + note = self.replace_pattern(note, None) # replace generic patterns first + # get temporal patterns + patterns = re.findall(r'{}({}){}'.format(seps[0], match, seps[1]), note) + pcnt = len(patterns) + if pcnt == 0: return note # no pattern temporal matches + if pcnt > 2: + self.pglog("{}-{}: TOO many ({}) temporal patterns".format(lfile, note, pcnt), self.PGOPT['emllog']) + return onote + if pcnt == 2: # replace start time + if onote: # get start time from existing note + replace = "{}{}{}".format(seps[0], patterns[0], seps[1]) + ms = re.match(r'^(.*){}(.*){}'.format(replace, self.params['PD'][0]), note) + if ms: + init = ms.group(1) + sp = ms.group(2) + ms = re.search(r'{}(.+){}'.format(init, sp), onote) + if ms: + sdate = ms.group(1) + note = re.sub(replace, sdate, note, 1) + elif tempinfo['FQ']: # get start time + (sdate, shour) = self.addfrequency(edate, ehour, tempinfo['FQ'], 0) + note = self.replace_pattern(note, sdate, shour, None, 1) + return self.replace_pattern(note, edate, ehour) # repalce end time now + + # get data file status info + def file_status_info(self, lfile, rfile, tempinfo): + # check and cache new data info + finfo = self.check_local_file(lfile, 33, self.PGOPT['wrnlog']) # 33 = 1 + 32 + if not finfo: + self.PGOPT['chksm'] = '' + self.PGOPT['fsize'] = 0 + return + fdate = finfo['date_modified'] + ftime = finfo['time_modified'] + fhour = None + ms = re.match(r'^(\d+):', ftime) + if ms: four = int(ms.group(1)) + self.PGOPT['fsize'] = finfo['data_size'] + self.PGOPT['chksm'] = finfo['checksum'] + if rfile and lfile != rfile: + finfo = self.check_local_file(rfile, 1, self.PGOPT['wrnlog']) + if finfo and self.cmptime(finfo['date_modified'], finfo['time_modified'], fdate, ftime) < 0: + fdate = finfo['date_modified'] + ftime = finfo['time_modified'] + ms = re.match(r'^(\d+):', ftime) + if ms: four = int(ms.group(1)) + self.PGOPT['fdate'] = fdate + self.PGOPT['ftime'] = ftime + self.PGOPT['fhour'] = fhour + if 'RE' in self.params: # reset end data/time/hour + if tempinfo['NX']: + if tempinfo['NX'][3]: + (fdate, fhour) = self.adddatehour(fdate, fhour, -tempinfo['NX'][0], -tempinfo['NX'][1], + -tempinfo['NX'][2], -tempinfo['NX'][3]) + else: + fdate = self.adddate(fdate, -tempinfo['NX'][0], -tempinfo['NX'][1], -tempinfo['NX'][2]) + while True: + (edate, ehour) = self.addfrequency(tempinfo['edate'], tempinfo['ehour'], tempinfo['FQ'], 1) + edate = self.enddate(edate, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + if self.diffdatehour(edate, ehour, fdate, fhour) > 0: break + tempinfo['edate'] = edate + tempinfo['ehour'] = ehour + + # check if a Server file is aged enough for download# return 1 if valid, 0 if not aged enough, -1 if cannot check + def check_agetime(self, dcmd, sfile, atime): + info = self.check_server_file(dcmd, 1) + if not info: + sact = self.get_download_action(dcmd) + (stat, derr) = self.parse_download_error(self.PGOPT['STATUS'], sact) + self.PGOPT['STATUS'] = derr + self.pglog("{}: cannot check file age\n{}".format(sfile, self.PGOPT['STATUS']), self.PGOPT['emlerr']) + return stat + ahour = None + if atime[3]: + ms = re.match(r'^(\d+):', info['time_modified']) + if ms: ahour = int(ms.group(1)) + (adate, ahour) = self.adddatehour(info['date_modified'], ahour, atime[0], atime[1], atime[2], atime[3]) + if self.diffdatehour(self.params['CD'], self.params['CH'], adate, ahour) >= 0: + return 1 + if ahour is None: + self.pglog(("{}: original {} file ready by {}\n".format(sfile, info['ftype'], info['date_modified']) + + "but NOT aged enough for retrieving yet by " + self.params['CD']), self.PGOPT['emllog']) else: - cdate = cinfo['date_modified'] - ctime = cinfo['time_modified'] - - if PgUtil.cmptime(cdate, ctime, finfo['date_modified'], finfo['time_modified']) > 0: - msg = "{} Newer {} {}: {} {} {}".format(PgOPT.params['DS'], cinfo['ftype'], cinfo['fname'], cdate, ctime, cinfo['data_size']) - if 'checksum' in cinfo: msg += " " + cinfo['checksum'] - msg += "; {}: ".format(cfile if cfile else "archived") - msg += "{} {} {}".format(finfo['date_modified'], finfo['time_modified'], finfo['data_size']) - if 'checksum' in finfo: msg += " " + finfo['checksum'] - PgLOG.pglog(msg, PgOPT.PGOPT['wrnlog']) + self.pglog(("{}: original {} file ready by {}:{:02}\n".format(sfile, info['ftype'], info['date_modified'], ahour) + + "but NOT aged enough for retrieving yet by {}:{:02}".format(self.params['CD'], self.params['CH'])), self.PGOPT['emllog']) + return 0 # otherwise server file is not aged enough + + # check if a Server file is changed with different size# return 1 - file changed, 2 - new file retrieved, 3 - force redlownload, + # 0 - no change , -1 - error check, -2 - cannot check + def check_newer_file(self, dcmd, cfile, ainfo): + if cfile: + finfo = self.check_local_file(cfile, 33, self.PGOPT['wrnlog']) + if not finfo: return 3 # download if can not check newer + else: + finfo = {'isfile' : 0, 'checksum' : ainfo['chksm'], 'data_size' : ainfo['asize'], + 'date_modified' : ainfo['adate'], 'time_modified' : ainfo['atime']} + cinfo = self.check_server_file(dcmd, 33, cfile) + if not cinfo: + sact = self.get_download_action(dcmd) + (stat, derr) = self.parse_download_error(self.PGOPT['STATUS'], sact) + self.PGOPT['STATUS'] = derr return stat + stat = 2 if cinfo['ftype'] == "WGET" else 1 + if finfo['isfile'] and cfile == cinfo['fname'] and finfo['data_size'] and cinfo['data_size'] and cinfo['data_size'] != finfo['data_size']: + return stat + self.PGOPT['STATUS'] = '' + if (finfo['data_size'] != cinfo['data_size'] or 'checksum' not in cinfo or + 'checksum' not in finfo or finfo['checksum'] != cinfo['checksum']): + if 'HO' in self.params and cinfo['ftype'] == "FTP": + (cdate, ctime) = self.addhour(cinfo['date_modified'], cinfo['time_modified'], -self.params['HO'][0]) + else: + cdate = cinfo['date_modified'] + ctime = cinfo['time_modified'] + if self.cmptime(cdate, ctime, finfo['date_modified'], finfo['time_modified']) > 0: + msg = "{} Newer {} {}: {} {} {}".format(self.params['DS'], cinfo['ftype'], cinfo['fname'], cdate, ctime, cinfo['data_size']) + if 'checksum' in cinfo: msg += " " + cinfo['checksum'] + msg += "; {}: ".format(cfile if cfile else "archived") + msg += "{} {} {}".format(finfo['date_modified'], finfo['time_modified'], finfo['data_size']) + if 'checksum' in finfo: msg += " " + finfo['checksum'] + self.pglog(msg, self.PGOPT['wrnlog']) + return stat + if 'adate' in ainfo: + self.PGOPT['STATUS'] = "archived: {} {}".format(ainfo['adate'], ainfo['atime']) + elif cfile: + self.PGOPT['STATUS'] += "local copy timestamp: {} {}".format(finfo['date_modified'], finfo['time_modified']) + if 'note' in cinfo: + self.PGOPT['STATUS'] += "\n" + cinfo['note'] + return 0 - if 'adate' in ainfo: - PgOPT.PGOPT['STATUS'] = "archived: {} {}".format(ainfo['adate'], ainfo['atime']) - elif cfile: - PgOPT.PGOPT['STATUS'] += "local copy timestamp: {} {}".format(finfo['date_modified'], finfo['time_modified']) - - if 'note' in cinfo: - PgOPT.PGOPT['STATUS'] += "\n" + cinfo['note'] - - return 0 - -# -# get download action name -# -def get_download_action(dcmd): - - if not dcmd: return "download" - - dact = "DOWNLOAD" - ms = re.search(r'(^|\S\/)tar\s+-(\w+)\s', dcmd) - if ms: - taropt = ms.group(2) - dact = "UNTAR" if taropt.find('x') > -1 else "TAR" - else: - ms = re.match(r'^\s*(\S+)', dcmd) + # get download action name + def get_download_action(self, dcmd): + if not dcmd: return "download" + dact = "DOWNLOAD" + ms = re.search(r'(^|\S\/)tar\s+-(\w+)\s', dcmd) if ms: - dact = op.basename(ms.group(1)) - if dact == "wc": - ms = re.search(r'\|\s*(\S+)', dcmd) - if ms: dact = op.basename(ms.group(1)) - - return dact - -# -# change to working directory if not there yet -# -def change_workdir(wdir, locinfo, edate, ehour, FQ): - - if 'WD' in PgOPT.params and PgOPT.params['WD'][0]: wdir = PgOPT.params['WD'][0] - if not wdir: - return PgLOG.pglog(locinfo + ": MISS working directory", PgOPT.PGOPT['emlerr']) - else: - wdir = PgLOG.replace_environments(wdir) - wdir = PgUpdt.replace_pattern(wdir, edate, ehour, FQ) - if not PgFile.change_local_directory(wdir, PgOPT.PGOPT['emllog']): return 0 - - return 1 - -# -# clean the working copies of remote and local files/directories -# -def clean_files(cleancmd, edate, ehour, lfiles, rfiles, freq): - - lfile = ' '.join(lfiles) if lfiles else '' - cleancmd = PgUpdt.replace_pattern(cleancmd, edate, ehour, freq) - cleancmd = PgUpdt.executable_command(cleancmd, lfile, None, None, None, rfiles) - PgLOG.PGLOG['ERR2STD'] = [PgLOG.PGLOG['MISSFILE']] - PgLOG.pgsystem(cleancmd, PgOPT.PGOPT['emllog'], 5) - PgLOG.PGLOG['ERR2STD'] = [] - -# -# clean files rematching pattern on given date/hour -# -def clean_older_files(cleancmd, workdir, locinfo, edate, locfile, rmtrecs, rcnt, tempinfo): - - rfiles = None - lfiles = PgUpdt.get_local_names(locfile, tempinfo, edate) - change_workdir(workdir, locinfo, edate, tempinfo['ehour'], tempinfo['FQ']) - - if rcnt and cleancmd.find(' -RF') > 0: - rfiles = get_all_remote_files(rmtrecs, rcnt, tempinfo, edate) - clean_files(cleancmd, edate, tempinfo['ehour'], lfiles, rfiles, tempinfo['FQ']) - -# -# get all remote file names for one update period -# -def get_all_remote_files(rmtrecs, rcnt, tempinfo, edate): - - rfiles = [] - for i in range(rcnt): # processs each remote record - rmtrec = PgUtil.onerecord(rmtrecs, i) - file = rmtrec['remotefile'] - if not file: continue - files = PgUpdt.get_remote_names(file, rmtrec, file, tempinfo, edate) - if files: rfiles.extend(files) - - return rfiles - -# -# check remote file status and sed email to specialist for irregular update cases -# -def check_dataset_status(): - - if 'CD' in PgOPT.params: - PgOPT.params['CD'] = PgUtil.format_date(PgOPT.params['CD']) # standard format in case not yet - else: - PgOPT.params['CD'] = PgUtil.curdate() # default to current date - - condition = "specialist = '{}'".format(PgOPT.params['LN']) - if 'ED' not in PgOPT.params: condition += " AND enddate < '{}'".format(PgOPT.params['CD']) - if 'DS' in PgOPT.params: condition += " AND dsid = '{}'".format(PgOPT.params['DS']) - s = PgUpdt.file_condition('dlupdt', ('L' if 'LI' in PgOPT.params else "FIXA"), None, 1) - if s: condition += " AND " + s - condition += " ORDER BY dsid, execorder, lindex" - locrecs = PgDBI.pgmget("dlupdt", "*", condition, PgOPT.PGOPT['extlog']) - loccnt = len(locrecs['locfile']) if locrecs else 0 - if not loccnt: return PgLOG.pglog("No Update record found for checking update status on {} for '{}'".format(PgOPT.params['CD'], PgOPT.params['LN']), PgOPT.PGOPT['wrnlog']) - - s = "s" if loccnt > 1 else "" - PgLOG.pglog("Check {} record{} for update status...".format(loccnt, s), PgOPT.PGOPT['wrnlog']) - for i in range(loccnt): - locrec = PgUtil.onerecord(locrecs, i) - if loccnt == 1 and 'LI' in PgOPT.params and 'LF' in PgOPT.params and len(PgOPT.params['LF']) == 1 and PgOPT.params['LF'][0] != locrec['locfile']: - locrec['locfile'] = PgOPT.params['LF'][0] - check_locfile_status(locrec) - - if PgOPT.PGOPT['lcnt'] or PgLOG.PGLOG['ERRMSG']: - if PgOPT.PGOPT['lcnt']: - loccnt = PgOPT.PGOPT['lcnt'] - s = "s" if (loccnt > 1) else "" - SUBJECT = "DSUPDT Status of {} update record{}".format(loccnt, s) - if 'DS' in PgOPT.params: SUBJECT += " for {}".format(PgOPT.params['DS']) - TOPMSG = " ready for update of {} local file{}".format(loccnt, s) - s = "s" if (PgOPT.PGOPT['rcnt'] > 1) else "" - TOPMSG = "{}/{} remote{}{}".format(PgOPT.PGOPT['ucnt'], PgOPT.PGOPT['rcnt'], s, TOPMSG) - else: - PgLOG.pglog("No local file ready for checking {} on {} for {}".format(SUBJECT, PgOPT.params['CD'], PgOPT.params['LN']), PgOPT.PGOPT['wrnlog']) - SUBJECT = TOPMSG = None - - if PgOPT.PGOPT['UCNTL']: - PgUpdt.reset_control_time() - if SUBJECT: SUBJECT += "-C{}".format(PgOPT.PGOPT['UCNTL']['cindex']) - -# -# check update status for a given local file -# -def check_locfile_status(locrec): - - loccnd = "lindex = {}".format(locrec['lindex']) - lfile = locrec['locfile'] - locinfo = "{}-L{}".format(locrec['dsid'], locrec['lindex']) - if not lfile: return PgLOG.pglog(locinfo + ": local file name NOT specified", PgOPT.PGOPT['emlerr']) - locinfo += "-" + lfile - tempinfo = get_tempinfo(locrec, locinfo, 0) - if not tempinfo: return 0 # simply return if miss temporal info for update - - rmtcnd = loccnd - rcnd = PgUpdt.file_condition('drupdt', ('D' if 'DO' in PgOPT.params else "RS"), None, 1) - if rcnd: rmtcnd += " AND " + rcnd - rmtrecs = PgDBI.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", PgOPT.PGOPT['extlog']) - rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0 - if rcnt == 0: - if rcnd and PgDBI.pgget("drupdt", "", loccnd): - return PgLOG.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), PgOPT.PGOPT['emlerr']) - rcnt = 1 # create a empty record remote file - rmtrecs = {'lindex' : locrec['lindex'], 'remotefile' : None, 'serverfile' : None} - - if rcnt == 1: - if 'RF' in PgOPT.params and len(PgOPT.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and PgOPT.params['RF'][0] == rmtrecs['remotefile'][0]): - rmtrecs['remotefile'][0] = PgOPT.params['RF'][0] - if 'SF' in PgOPT.params and len(PgOPT.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and PgOPT.params['SF'][0] == rmtrecs['serverfile'][0]): - rmtrecs['serverfile'][0] = PgOPT.params['SF'][0] - - ecnt = len(tempinfo['ED']) - PgOPT.PGOPT['lindex'] = locrec['lindex'] - logact = PgOPT.PGOPT['emllog'] - - retcnt = 0 - for i in range(ecnt): - if ALLCNT > 1 and i > 0: - tempinfo = get_tempinfo(locrec, locinfo, i) - if not tempinfo: break - edate = tempinfo['ED'][0] - ehour = tempinfo['EH'][0] + taropt = ms.group(2) + dact = "UNTAR" if taropt.find('x') > -1 else "TAR" else: - edate = tempinfo['ED'][i] - ehour = tempinfo['EH'][i] - tempinfo['edate'] = edate - if ehour != None: - tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour) - tempinfo['ehour'] = ehour + ms = re.match(r'^\s*(\S+)', dcmd) + if ms: + dact = op.basename(ms.group(1)) + if dact == "wc": + ms = re.search(r'\|\s*(\S+)', dcmd) + if ms: dact = op.basename(ms.group(1)) + return dact + + # change to working directory if not there yet + def change_workdir(self, wdir, locinfo, edate, ehour, FQ): + if 'WD' in self.params and self.params['WD'][0]: wdir = self.params['WD'][0] + if not wdir: + return self.pglog(locinfo + ": MISS working directory", self.PGOPT['emlerr']) else: - tempinfo['einfo'] = "end data date {}".format(edate) - tempinfo['ehour'] = None - - if 'GZ' in PgOPT.params: tempinfo['einfo'] += "(UTC)" - lfile = PgUpdt.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ']) - locinfo = "{}-L{}-{}".format(locrec['dsid'], locrec['lindex'], lfile) - PgLOG.pglog("{}: Check Update Status for {}".format(locinfo, tempinfo['einfo']), logact) - logact = PgOPT.PGOPT['emlsep'] - PgOPT.PGOPT['lcnt'] += 1 - j = 0 - while j < rcnt: # check each remote record, stop checking if error - pgrec = PgUtil.onerecord(rmtrecs, j) - if not check_remote_status(pgrec, lfile, locrec, locinfo, tempinfo) and 'CA' not in PgOPT.params: - break - j += 1 - if j == 0: break - - PgOPT.PGOPT['lindex'] = 0 - - return (1 if retcnt > 0 else 0) - -# -# check update status for given remote file -# -def check_remote_status(rmtrec, lfile, locrec, locinfo, tempinfo): - - rfile = rmtrec['remotefile'] - rmtinfo = locinfo - if not rfile: - rfile = lfile - rcnt = 1 - - if rfile != locrec['locfile']: rmtinfo += "-" + rfile - tempinfo['DC'] = (PgOPT.params['DC'][0] if ('DC' in PgOPT.params and PgOPT.params['DC'][0]) else - (rmtrec['download'] if rmtrec['download'] else locrec['download'])) - rfiles = PgUpdt.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo) - rcnt = len(rfiles) if rfiles else 0 - if not rcnt: return PgLOG.pglog(rmtinfo + ": NO remote file name identified", PgOPT.PGOPT['emlerr']) - - PgOPT.PGOPT['rcnt'] += rcnt # accumulate remote file counts - if tempinfo['DC']: - PgOPT.PGOPT['PCNT'] = PgUpdt.count_pattern_path(tempinfo['DC']) - tempinfo['DC'] = None - - sfile = rmtrec['serverfile'] - if sfile and sfile != rfile: - sfiles = PgUpdt.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo) - scnt = len(sfiles) if sfiles else 0 - if scnt != rcnt: - PgOPT.PGOPT['rstat'] = -2 - return PgLOG.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), PgOPT.PGOPT['emlerr']) - else: - sfiles = rfiles - scnt = rcnt - - dcnt = 0 - for i in range(rcnt): - rmtinfo = locinfo - rfile = rfiles[i] - if rfile['fname'] != lfile: rmtinfo += "-" + rfile['fname'] - sfile = sfiles[i] - if sfile['fname'] != rfile['fname']: rmtinfo += "-" + sfile['fname'] - rcmd = rfile['rcmd'] - if not rcmd: - return PgLOG.pglog(rmtinfo + ": Missing download command", PgOPT.PGOPT['emlerr']) - elif not sfile['ready']: - PgLOG.pglog(rmtinfo + ": NOT Ready yet for update", PgOPT.PGOPT['emllog']) - break - dcnt += 1 - - return 1 if dcnt else 0 - -# -# process the update control records -# -def process_update_controls(): - - global ALLCNT - ctime = PgUtil.curtime(1) - if not ('CI' in PgOPT.params or 'DS' in PgOPT.params): - PgOPT.set_default_value("SN", PgOPT.params['LN']) - - condition = ("(pid = 0 OR lockhost = '{}') AND cntltime <= '{}'".format(PgLOG.PGLOG['HOSTNAME'], ctime) + - PgOPT.PgOPT.get_hash_condition('dcupdt') + " ORDER BY hostname DESC, cntltime") - pgrecs = PgDBI.pgmget("dcupdt", "*", condition, PgOPT.PGOPT['extlog']) - - ALLCNT = len(pgrecs['cindex']) if pgrecs else 0 - if ALLCNT == 0: - return PgLOG.pglog("No update control record idetified due for process", PgLOG.LOGWRN) - - s = 's' if ALLCNT > 1 else '' - PgLOG.pglog("Process {} update control record{} ...".format(ALLCNT, s), PgLOG.WARNLG) - - pcnt = 0 - for i in range(ALLCNT): - pcnt += process_one_control(PgUtil.onerecord(pgrecs, i)) - if pcnt > 1 and not ('CI' in PgOPT.params or 'DS' in PgOPT.params): break - rmsg = "{} of {} update control{} reprocessed by {}".format(pcnt, ALLCNT, s, PgLOG.PGLOG['CURUID']) - if PgLOG.PGLOG['CURUID'] != PgOPT.params['LN']: rmsg += " for " + PgOPT.params['LN'] - PgLOG.pglog(rmsg, PgOPT.PGOPT['wrnlog']) - -# -# process one update control -# -def process_one_control(pgrec): - - cidx = pgrec['cindex'] - cstr = "Control Index {}".format(cidx) - if not pgrec['action']: return PgLOG.pglog(cstr + ": Miss update action", PgOPT.PGOPT['errlog']) - if not (PgOPT.OPTS[pgrec['action']][0]&PgOPT.PGOPT['CNTLACTS']): - return PgLOG.pglog("{}: Invalid dsupdt action '{}'".format(cstr, pgrec['action']), PgOPT.PGOPT['errlog']) - if not pgrec['frequency']: return PgLOG.pglog(cstr + ": Miss update Frequency", PgOPT.PGOPT['errlog']) - if pgrec['pid'] > 0 and PgSIG.check_process(pgrec['pid']): - if 'CI' in PgOPT.params: PgLOG.pglog("{}: Under processing {}/{}".format(cstr, pgrec['pid'], PgLOG.PGLOG['HOSTNAME']), PgOPT.PGOPT['wrnlog']) - return 0 - if pgrec['specialist'] != PgOPT.params['LN']: - return PgLOG.pglog("{}: must be specialist '{}' to process".format(cstr, pgrec['specialist']), PgOPT.PGOPT['errlog']) - if not ('ED' in PgOPT.params or PgOPT.valid_data_time(pgrec, cstr, PgOPT.PGOPT['wrnlog'])): - return 0 - cmd = "dsupdt " - if pgrec['dsid']: cmd += pgrec['dsid'] + ' ' - cmd += "{} -CI {} ".format(pgrec['action'], cidx) - if PgLOG.PGLOG['CURUID'] != PgOPT.params['LN']: cmd += "-LN " + PgOPT.params['LN'] - cmd += "-d -b" - - # make sure it is not locked - if PgLock.lock_update_control(cidx, 0, PgOPT.PGOPT['errlog']) <= 0: return 0 - PgLOG.pglog("{}-{}{}: {}".format(PgLOG.PGLOG['HOSTNAME'], pgrec['specialist'], PgLOG.current_datetime(), cmd), PgLOG.LOGWRN|PgLOG.FRCLOG) - os.system(cmd + " &") - return 1 - -# -# move the previous archived version controlled files -# -def move_archived_file(ainfo, archived): - - stat = 0 - if 'wfile' in ainfo: - type = ainfo['wtype'] - pgrec = ainfo['types'][type] - if pgrec and pgrec['vindex']: - tofile = fromfile = ainfo['wfile'] - ftype = "Web" - ttype = " Saved" - i = 0 - while True: # create tofile name - if i > 0: tofile = "{}.vbu{}".format(fromfile, i) - if not PgDBI.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(PgOPT.params['DS'], tofile), PgOPT.PGOPT['extlog']): - break - i += 1 - stat = PgLOG.pgsystem("dsarch {} MV -WF {} -WT {} -SF {} -ST V -KM -TS".format(PgOPT.params['DS'], fromfile, type, tofile), PgOPT.PGOPT['emerol'], 5) + wdir = self.replace_environments(wdir) + wdir = self.replace_pattern(wdir, edate, ehour, FQ) + if not self.change_local_directory(wdir, self.PGOPT['emllog']): return 0 + return 1 - if stat == 0 and ainfo['sfile']: - type = ainfo['stype'] - pgrec = ainfo['types'][type] - if pgrec and pgrec['vindex']: - fromfile = ainfo['sfile'] - ftype = "Saved" - ttype = '' - i = 0 - while True: # create tofile name - tofile = "{}.vbu{}".format(fromfile, i) - if not PgDBI.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(PgOPT.params['DS'], tofile), PgOPT.PGOPT['extlog']): + # clean the working copies of remote and local files/directories + def clean_files(self, cleancmd, edate, ehour, lfiles, rfiles, freq): + lfile = ' '.join(lfiles) if lfiles else '' + cleancmd = self.replace_pattern(cleancmd, edate, ehour, freq) + cleancmd = self.executable_command(cleancmd, lfile, None, None, None, rfiles) + self.PGLOG['ERR2STD'] = [self.PGLOG['MISSFILE']] + self.pgsystem(cleancmd, self.PGOPT['emllog'], 5) + self.PGLOG['ERR2STD'] = [] + + # clean files rematching pattern on given date/hour + def clean_older_files(self, cleancmd, workdir, locinfo, edate, locfile, rmtrecs, rcnt, tempinfo): + rfiles = None + lfiles = self.get_local_names(locfile, tempinfo, edate) + self.change_workdir(workdir, locinfo, edate, tempinfo['ehour'], tempinfo['FQ']) + if rcnt and cleancmd.find(' -RF') > 0: + rfiles = self.get_all_remote_files(rmtrecs, rcnt, tempinfo, edate) + self.clean_files(cleancmd, edate, tempinfo['ehour'], lfiles, rfiles, tempinfo['FQ']) + + # get all remote file names for one update period + def get_all_remote_files(self, rmtrecs, rcnt, tempinfo, edate): + rfiles = [] + for i in range(rcnt): # processs each remote record + rmtrec = self.onerecord(rmtrecs, i) + file = rmtrec['remotefile'] + if not file: continue + files = self.get_remote_names(file, rmtrec, file, tempinfo, edate) + if files: rfiles.extend(files) + return rfiles + + # check remote file status and sed email to specialist for irregular update cases + def check_dataset_status(self): + if 'CD' in self.params: + self.params['CD'] = self.format_date(self.params['CD']) # standard format in case not yet + else: + self.params['CD'] = self.curdate() # default to current date + condition = "specialist = '{}'".format(self.params['LN']) + if 'ED' not in self.params: condition += " AND enddate < '{}'".format(self.params['CD']) + if 'DS' in self.params: condition += " AND dsid = '{}'".format(self.params['DS']) + s = self.file_condition('dlupdt', ('L' if 'LI' in self.params else "FIXA"), None, 1) + if s: condition += " AND " + s + condition += " ORDER BY dsid, execorder, lindex" + locrecs = self.pgmget("dlupdt", "*", condition, self.PGOPT['extlog']) + loccnt = len(locrecs['locfile']) if locrecs else 0 + if not loccnt: return self.pglog("No Update record found for checking update status on {} for '{}'".format(self.params['CD'], self.params['LN']), self.PGOPT['wrnlog']) + s = "s" if loccnt > 1 else "" + self.pglog("Check {} record{} for update status...".format(loccnt, s), self.PGOPT['wrnlog']) + for i in range(loccnt): + locrec = self.onerecord(locrecs, i) + if loccnt == 1 and 'LI' in self.params and 'LF' in self.params and len(self.params['LF']) == 1 and self.params['LF'][0] != locrec['locfile']: + locrec['locfile'] = self.params['LF'][0] + self.check_locfile_status(locrec) + if self.PGOPT['lcnt'] or self.PGLOG['ERRMSG']: + if self.PGOPT['lcnt']: + loccnt = self.PGOPT['lcnt'] + s = "s" if (loccnt > 1) else "" + self.SUBJECT = "DSUPDT Status of {} update record{}".format(loccnt, s) + if 'DS' in self.params: self.SUBJECT += " for {}".format(self.params['DS']) + self.TOPMSG = " ready for update of {} local file{}".format(loccnt, s) + s = "s" if (self.PGOPT['rcnt'] > 1) else "" + self.TOPMSG = "{}/{} remote{}{}".format(self.PGOPT['ucnt'], self.PGOPT['rcnt'], s, self.TOPMSG) + else: + self.pglog("No local file ready for checking {} on {} for {}".format(self.SUBJECT, self.params['CD'], self.params['LN']), self.PGOPT['wrnlog']) + self.SUBJECT = self.TOPMSG = None + if self.PGOPT['UCNTL']: + self.reset_control_time() + if self.SUBJECT: self.SUBJECT += "-C{}".format(self.PGOPT['UCNTL']['cindex']) + + # check update status for a given local file + def check_locfile_status(self, locrec): + loccnd = "lindex = {}".format(locrec['lindex']) + lfile = locrec['locfile'] + locinfo = "{}-L{}".format(locrec['dsid'], locrec['lindex']) + if not lfile: return self.pglog(locinfo + ": local file name NOT specified", self.PGOPT['emlerr']) + locinfo += "-" + lfile + tempinfo = self.get_tempinfo(locrec, locinfo, 0) + if not tempinfo: return 0 # simply return if miss temporal info for update + rmtcnd = loccnd + rcnd = self.file_condition('drupdt', ('D' if 'DO' in self.params else "RS"), None, 1) + if rcnd: rmtcnd += " AND " + rcnd + rmtrecs = self.pgmget("drupdt", "*", rmtcnd + " ORDER BY dindex, remotefile", self.PGOPT['extlog']) + rcnt = len(rmtrecs['remotefile']) if rmtrecs else 0 + if rcnt == 0: + if rcnd and self.pgget("drupdt", "", loccnd): + return self.pglog("{}: NO remote file record matched for {}".format(locinfo, rcnd), self.PGOPT['emlerr']) + rcnt = 1 # create a empty record remote file + rmtrecs = {'lindex' : locrec['lindex'], 'remotefile' : None, 'serverfile' : None} + if rcnt == 1: + if 'RF' in self.params and len(self.params['RF']) == 1 and not (rmtrecs['remotefile'][0] and self.params['RF'][0] == rmtrecs['remotefile'][0]): + rmtrecs['remotefile'][0] = self.params['RF'][0] + if 'SF' in self.params and len(self.params['SF']) == 1 and not (rmtrecs['serverfile'][0] and self.params['SF'][0] == rmtrecs['serverfile'][0]): + rmtrecs['serverfile'][0] = self.params['SF'][0] + ecnt = len(tempinfo['ED']) + self.PGOPT['lindex'] = locrec['lindex'] + logact = self.PGOPT['emllog'] + retcnt = 0 + for i in range(ecnt): + if self.ALLCNT > 1 and i > 0: + tempinfo = self.get_tempinfo(locrec, locinfo, i) + if not tempinfo: break + edate = tempinfo['ED'][0] + ehour = tempinfo['EH'][0] + else: + edate = tempinfo['ED'][i] + ehour = tempinfo['EH'][i] + tempinfo['edate'] = edate + if ehour != None: + tempinfo['einfo'] = "end data date:hour {}:{:02}".format(edate, ehour) + tempinfo['ehour'] = ehour + else: + tempinfo['einfo'] = "end data date {}".format(edate) + tempinfo['ehour'] = None + if 'GZ' in self.params: tempinfo['einfo'] += "(UTC)" + lfile = self.replace_pattern(locrec['locfile'], edate, ehour, tempinfo['FQ']) + locinfo = "{}-L{}-{}".format(locrec['dsid'], locrec['lindex'], lfile) + self.pglog("{}: Check Update Status for {}".format(locinfo, tempinfo['einfo']), logact) + logact = self.PGOPT['emlsep'] + self.PGOPT['lcnt'] += 1 + j = 0 + while j < rcnt: # check each remote record, stop checking if error + pgrec = self.onerecord(rmtrecs, j) + if not self.check_remote_status(pgrec, lfile, locrec, locinfo, tempinfo) and 'CA' not in self.params: break - i += 1 - stat = PgLOG.pgsystem("dsarch {} MV -RF {} -OT {} -SF {} -ST V".format(PgOPT.params['DS'], fromfile, type, tofile), PgOPT.PGOPT['emerol'], 5) - - if stat: - PgOPT.PGOPT['vcnt'] += 1 - if 'NE' in PgOPT.params or 'EE' in PgOPT.params: - if 'NE' in PgOPT.params: del PgOPT.params['NE'] - if 'EE' in PgOPT.params: del PgOPT.params['EE'] - PgOPT.params['SE'] = 1 # email summary at least - PgOPT.PGOPT['emllog'] |= PgLOG.EMEROL - PgLOG.pglog("{}-{}-{}: Found newer version-conrolled {} file; move to{} type V {}".format(PgOPT.params['DS'], type, fromfile, ftype, ttype, tofile), PgOPT.PGOPT['emlsum']) - archived = 0 + j += 1 + if j == 0: break + self.PGOPT['lindex'] = 0 + return (1 if retcnt > 0 else 0) + + # check update status for given remote file + def check_remote_status(self, rmtrec, lfile, locrec, locinfo, tempinfo): + rfile = rmtrec['remotefile'] + rmtinfo = locinfo + if not rfile: + rfile = lfile + rcnt = 1 + if rfile != locrec['locfile']: rmtinfo += "-" + rfile + tempinfo['DC'] = (self.params['DC'][0] if ('DC' in self.params and self.params['DC'][0]) else + (rmtrec['download'] if rmtrec['download'] else locrec['download'])) + rfiles = self.get_remote_names(rfile, rmtrec, rmtinfo, tempinfo) + rcnt = len(rfiles) if rfiles else 0 + if not rcnt: return self.pglog(rmtinfo + ": NO remote file name identified", self.PGOPT['emlerr']) + self.PGOPT['rcnt'] += rcnt # accumulate remote file counts + if tempinfo['DC']: + self.PGOPT['PCNT'] = self.count_pattern_path(tempinfo['DC']) + tempinfo['DC'] = None + sfile = rmtrec['serverfile'] + if sfile and sfile != rfile: + sfiles = self.get_remote_names(sfile, rmtrec, rmtinfo, tempinfo) + scnt = len(sfiles) if sfiles else 0 + if scnt != rcnt: + self.PGOPT['rstat'] = -2 + return self.pglog("{}/{}: {}/{} MISS match file counts".format(rmtinfo, sfile, rcnt, scnt), self.PGOPT['emlerr']) + else: + sfiles = rfiles + scnt = rcnt + dcnt = 0 + for i in range(rcnt): + rmtinfo = locinfo + rfile = rfiles[i] + if rfile['fname'] != lfile: rmtinfo += "-" + rfile['fname'] + sfile = sfiles[i] + if sfile['fname'] != rfile['fname']: rmtinfo += "-" + sfile['fname'] + rcmd = rfile['rcmd'] + if not rcmd: + return self.pglog(rmtinfo + ": Missing download command", self.PGOPT['emlerr']) + elif not sfile['ready']: + self.pglog(rmtinfo + ": NOT Ready yet for update", self.PGOPT['emllog']) + break + dcnt += 1 + return 1 if dcnt else 0 + + # process the update control records + def process_update_controls(self): + ctime = self.curtime(1) + if not ('CI' in self.params or 'DS' in self.params): + self.set_default_value("SN", self.params['LN']) + condition = ("(pid = 0 OR lockhost = '{}') AND cntltime <= '{}'".format(self.PGLOG['HOSTNAME'], ctime) + + self.self.get_hash_condition('dcupdt') + " ORDER BY hostname DESC, cntltime") + pgrecs = self.pgmget("dcupdt", "*", condition, self.PGOPT['extlog']) + self.ALLCNT = len(pgrecs['cindex']) if pgrecs else 0 + if self.ALLCNT == 0: + return self.pglog("No update control record idetified due for process", self.LOGWRN) + s = 's' if self.ALLCNT > 1 else '' + self.pglog("Process {} update control record{} ...".format(self.ALLCNT, s), self.WARNLG) + pcnt = 0 + for i in range(self.ALLCNT): + pcnt += self.process_one_control(self.onerecord(pgrecs, i)) + if pcnt > 1 and not ('CI' in self.params or 'DS' in self.params): break + rmsg = "{} of {} update control{} reprocessed by {}".format(pcnt, self.ALLCNT, s, self.PGLOG['CURUID']) + if self.PGLOG['CURUID'] != self.params['LN']: rmsg += " for " + self.params['LN'] + self.pglog(rmsg, self.PGOPT['wrnlog']) + + # process one update control + def process_one_control(self, pgrec): + cidx = pgrec['cindex'] + cstr = "Control Index {}".format(cidx) + if not pgrec['action']: return self.pglog(cstr + ": Miss update action", self.PGOPT['errlog']) + if not (self.OPTS[pgrec['action']][0]&self.PGOPT['CNTLACTS']): + return self.pglog("{}: Invalid dsupdt action '{}'".format(cstr, pgrec['action']), self.PGOPT['errlog']) + if not pgrec['frequency']: return self.pglog(cstr + ": Miss update Frequency", self.PGOPT['errlog']) + if pgrec['pid'] > 0 and self.check_process(pgrec['pid']): + if 'CI' in self.params: self.pglog("{}: Under processing {}/{}".format(cstr, pgrec['pid'], self.PGLOG['HOSTNAME']), self.PGOPT['wrnlog']) + return 0 + if pgrec['specialist'] != self.params['LN']: + return self.pglog("{}: must be specialist '{}' to process".format(cstr, pgrec['specialist']), self.PGOPT['errlog']) + if not ('ED' in self.params or self.valid_data_time(pgrec, cstr, self.PGOPT['wrnlog'])): + return 0 + cmd = "dsupdt " + if pgrec['dsid']: cmd += pgrec['dsid'] + ' ' + cmd += "{} -CI {} ".format(pgrec['action'], cidx) + if self.PGLOG['CURUID'] != self.params['LN']: cmd += "-LN " + self.params['LN'] + cmd += "-d -b" + # make sure it is not locked + if self.lock_update_control(cidx, 0, self.PGOPT['errlog']) <= 0: return 0 + self.pglog("{}-{}{}: {}".format(self.PGLOG['HOSTNAME'], pgrec['specialist'], self.current_datetime(), cmd), self.LOGWRN|self.FRCLOG) + os.system(cmd + " &") + return 1 - return archived + # move the previous archived version controlled files + def move_archived_file(self, ainfo, archived): + stat = 0 + if 'wfile' in ainfo: + type = ainfo['wtype'] + pgrec = ainfo['types'][type] + if pgrec and pgrec['vindex']: + tofile = fromfile = ainfo['wfile'] + ftype = "Web" + ttype = " Saved" + i = 0 + while True: # create tofile name + if i > 0: tofile = "{}.vbu{}".format(fromfile, i) + if not self.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(self.params['DS'], tofile), self.PGOPT['extlog']): + break + i += 1 + stat = self.pgsystem("dsarch {} MV -WF {} -WT {} -SF {} -ST V -KM -TS".format(self.params['DS'], fromfile, type, tofile), self.PGOPT['emerol'], 5) + if stat == 0 and ainfo['sfile']: + type = ainfo['stype'] + pgrec = ainfo['types'][type] + if pgrec and pgrec['vindex']: + fromfile = ainfo['sfile'] + ftype = "Saved" + ttype = '' + i = 0 + while True: # create tofile name + tofile = "{}.vbu{}".format(fromfile, i) + if not self.pgget("sfile", "", "dsid = '{}' AND sfile = '{}'".format(self.params['DS'], tofile), self.PGOPT['extlog']): + break + i += 1 + stat = self.pgsystem("dsarch {} MV -RF {} -OT {} -SF {} -ST V".format(self.params['DS'], fromfile, type, tofile), self.PGOPT['emerol'], 5) + if stat: + self.PGOPT['vcnt'] += 1 + if 'NE' in self.params or 'EE' in self.params: + if 'NE' in self.params: del self.params['NE'] + if 'EE' in self.params: del self.params['EE'] + self.params['SE'] = 1 # email summary at least + self.PGOPT['emllog'] |= self.EMEROL + self.pglog("{}-{}-{}: Found newer version-conrolled {} file; move to{} type V {}".format(self.params['DS'], type, fromfile, ftype, ttype, tofile), self.PGOPT['emlsum']) + archived = 0 + return archived + +# main function to excecute this script +def main(): + object = DsUpdt() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_dsupdt/pg_updt.py b/src/rda_python_dsupdt/pg_updt.py new file mode 100644 index 0000000..189ac42 --- /dev/null +++ b/src/rda_python_dsupdt/pg_updt.py @@ -0,0 +1,1654 @@ +# +############################################################################### +# +# Title : pg_updt.py +# Author : Zaihua Ji, zji@ucar.edu +# Date : 09/23/2020 +# 2025-02-07 transferred to package rda_python_dsupdt from +# https://github.com/NCAR/rda-shared-libraries.git +# 2025-12-08 transfer to class PgUpdt +# Purpose : python library module to help rountinely updates of new data +# for one or multiple datasets +# +# Github : https://github.com/NCAR/rda-python-dsupdt.git +# +############################################################################### +# +import os +import re +import time +from os import path as op +from rda_python_common.pg_cmd import PgCMD +from rda_python_common.pg_opt import PgOPT + +class PgUpdt(PgOPT, PgCMD): + + def __init__(self): + super().__init__() # initialize parent class + self.CORDERS = {} + self.OPTS.update({ + 'DR' : [0x00010, 'DownloadRemote',2], + 'BL' : [0x00020, 'BuildLocal', 2], + 'PB' : [0x00030, 'ProcessBoth', 2], # DR & BL + 'AF' : [0x00040, 'ArchiveFile', 2], + 'CF' : [0x00080, 'CleanFile', 2], + 'UF' : [0x000F0, 'UpdateFile', 2], # DR & BL & AF & CF + 'CU' : [0x00200, 'CheckUpdate', 0], + 'GC' : [0x00400, 'GetControl', 0], + 'GL' : [0x00800, 'GetLocalFile', 0], + 'GR' : [0x01000, 'GetRemoteFile', 0], + 'GA' : [0x01C00, 'GetALL', 0], # GC & GL & GR + 'SC' : [0x02000, 'SetControl', 1], + 'SL' : [0x04000, 'SetLocalFile', 1], + 'SR' : [0x08000, 'SetRemoteFile', 1], + 'SA' : [0x0E000, 'SetALL', 4], # SC & SL & SR + 'DL' : [0x20000, 'Delete', 1], + 'UL' : [0x40000, 'UnLock', 1], + 'AW' : [0, 'AnyWhere'], + 'BG' : [0, 'BackGround'], + 'CA' : [0, 'CheckAll'], + 'CN' : [0, 'CheckNew'], + 'CP' : [0, 'CurrrentPeriod'], + 'EE' : [0, 'ErrorEmail'], # send email when error happens only + 'FO' : [0, 'FormatOutput'], + 'FU' : [0, 'FutureUpdate'], + 'GZ' : [0, 'GMTZone'], + 'HU' : [0, 'HourlyUpdate'], + 'IE' : [0, 'IgnoreError'], + 'KR' : [0, 'KeepRemote'], + 'KS' : [0, 'KeepServer'], + 'LO' : [0, 'LogOn'], + 'MD' : [0, 'PgDataset'], + 'MO' : [0, 'MissedOnly'], + 'MU' : [0, 'MultipleUpdate'], + 'NC' : [0, 'NewControl'], + 'NE' : [0, 'NoEmail'], + 'NL' : [0, 'NewLocfile'], + 'NY' : [0, 'NoLeapYear'], + 'QE' : [0, 'QuitError'], + 'RA' : [0, 'RetryArchive'], + 'RD' : [0, 'RetryDownload'], + 'RE' : [0, 'ResetEndTime'], + 'RO' : [0, 'ResetOrder'], + 'SE' : [0, 'SummaryEmail'], # send summary email only + 'UB' : [0, 'UseBeginTime'], + 'UT' : [0, 'UpdateTime'], + 'AO' : [1, 'ActOption', 1], # default to + 'CD' : [1, 'CurrentDate', 256], # used this instead of curdate() + 'CH' : [1, 'CurrentHour', 16], # used this instead of (localtime)[2] + 'DS' : [1, 'Dataset', 0], + 'DV' : [1, 'Divider', 1], # default to <:> + 'ES' : [1, 'EqualSign', 1], # default to <=> + 'FN' : [1, 'FieldNames', 0], + 'LN' : [1, 'LoginName', 1], + 'OF' : [1, 'OutputFile', 0], + 'ON' : [1, 'OrderNames', 0], + 'PL' : [1, 'ProcessLimit', 17], + 'VS' : [1, 'ValidSize', 17], # default to self.PGLOG['MINSIZE'] + 'AN' : [2, 'ActionName', 1], + 'AT' : [2, 'AgeTime', 1], + 'BC' : [2, 'BuildCommand', 1], + 'BP' : [2, 'BatchProcess', 0, ''], + 'BT' : [2, 'BeginTime', 1], + 'CC' : [2, 'CarbonCopy', 0], + 'CI' : [2, 'ControlIndex', 16], + 'CL' : [2, 'CleanCommand', 1], + 'CO' : [2, "ControlOffset", 1], + 'CT' : [2, 'ControlTime', 32+356], + 'DB' : [2, 'Debug', 0], + 'DC' : [2, 'DownloadCommand', 1], + 'DE' : [2, 'Description', 64], + 'DO' : [2, 'DownloadOrder', 16], + 'DT' : [2, 'DataTime', 1+32+256], + 'EC' : [2, 'ErrorControl', 1, "NIQ"], + 'ED' : [2, 'EndDate', 257], + 'EH' : [2, 'EndHour', 33], + 'EP' : [2, 'EndPeriod', 1], + 'ET' : [2, 'EndTime', 33], + 'FA' : [2, 'FileArchived', 0], + 'FQ' : [2, 'Frequency', 1], + 'GP' : [2, 'GenericPattern', 0], + 'HN' : [2, "HostName", 1], + 'HO' : [2, 'HourOffset', 17], + 'ID' : [2, 'ControlID', 0], + 'IF' : [2, 'InputFile', 0], + 'KF' : [2, 'KeepFile', 1, "NRSB"], + 'LF' : [2, 'LocalFile', 0], + 'LI' : [2, 'LocalIndex', 17], + 'MC' : [2, 'EMailControl', 1, "ASNEB"], + 'MR' : [2, 'MissRemote', 128, "NY"], + 'DI' : [2, 'DueInterval', 1], + 'OP' : [2, 'Options', 1], + 'PD' : [2, 'PatternDelimiter', 2], # pattern delimiters, default to ["<", ">"] + 'PI' : [2, 'ParentIndex', 17], + 'PR' : [2, 'ProcessRemote', 1], + 'QS' : [2, 'QSubOptions', 0], + 'RF' : [2, 'RemoteFile', 0], + 'RI' : [2, 'RetryInterval', 1], + 'SB' : [2, 'SBatchOptions', 1], + 'SF' : [2, 'ServerFile', 0], + 'SN' : [2, 'Specialist', 1], + 'TI' : [2, 'TimeInterval', 1], + 'UC' : [2, 'UpdateControl', 1], + 'VI' : [2, 'ValidInterval', 1], + 'WD' : [2, 'WorkDir', 1], + 'XC' : [2, 'ExecuteCommand', 1], + 'XO' : [2, 'ExecOrder', 16], + }) + self.ALIAS.update({ + 'AN' : ['Action', "AC"], + 'AT' : ['FileAge', "FileAgeTime"], + 'BC' : ['BuildCmd'], + 'BG' : ['b'], + 'BL' : ['BuildLocalfile'], + 'BP' : ['d', 'DelayedMode'], + 'BT' : ['IT', 'InitialTime'], + 'CI' : ['UpdateControlIndex'], + 'CL' : ['CleanFile'], + 'CN' : ['CheckNewFile'], + 'DC' : ['Command', 'Download'], + 'DE' : ['Desc', 'Note', 'FileDesc', 'FileDescription'], + 'DI' : ['NextDue'], + 'DL' : ['RM', 'Remove'], + 'DR' : ['DownloadRemoteFile'], + 'DS' : ['Dsid', 'DatasetID'], + 'DV' : ['Delimiter', 'Separator'], + 'ED' : ['UpdateEndDate'], + 'EH' : ['UpdateEndHour'], + 'EP' : ['EndPeriodDay'], + 'FA' : ['SF', 'WF', 'QF'], + 'FQ' : ['UpdateFrequency'], + 'FU' : ["ForceUpdate"], + 'GC' : ['GetUpdateControl'], + 'GL' : ['GetLocal'], + 'GN' : ['GroupID'], + 'GP' : ['GeneralPattern'], + 'GR' : ['GetRemote'], + 'GZ' : ['GMT', 'GreenwichZone', 'UTC'], + 'HN' : ['HostMachine'], + 'KR' : ['KeepRemoteFile'], + 'KS' : ['KeepServerFile'], + 'LF' : ['LocalFileIndex'], + 'LI' : ['LocIndex', "UpdateIndex"], + 'LO' : ['LoggingOn'], + 'OP' : ['DsarchOption'], + 'NC' : ['NewUpdateControl'], + 'NL' : ['NewLocalFile'], + 'PD' : ['TD', 'TemporalDelimiter'], + 'QE' : ['QuitOnError'], + 'QS' : ['PBSOptions'], + 'RD' : ['Redownlaod'], + 'RO' : ['Reorder'], + 'SB' : ['SlurmOptions'], + 'SC' : ['SetUpdateControl'], + 'SL' : ['SetLocal'], + 'SN' : ['SpecialistName'], + 'SR' : ['SetRemote'], + 'TI' : ['Interval'], + 'UL' : ["UnLockUpdate"], + 'XC' : ['ExecCmd'], + 'XO' : ['ExecuteOrder'] + }) + # single letter short names for option 'FN' (Field Names) to retrieve info + # from RDADB; only the fields can be manipulated by this application are listed + # SHORTNM KEYS(self.OPTS) DBFIELD + self.TBLHASH['dlupdt'] = { # condition flag, 0-int, 1-string, -1-exclude + 'L' : ['LI', "lindex", 0], + 'F' : ['LF', "locfile", 1], + 'A' : ['AN', "action", 1], # dsarch action + 'I' : ['CI', "cindex", 0], + 'U' : ['FA', "archfile", 1], + 'X' : ['XO', "execorder", 1], + 'S' : ['SN', "specialist", 1], + 'M' : ['MR', "missremote", 1], + 'W' : ['WD', "workdir", 1], + 'O' : ['OP', "options", 1], + 'C' : ['DC', "download", 1], + 'Q' : ['FQ', "frequency", 1], + 'E' : ['EP', "endperiod", 0], + 'J' : ['ED', "enddate", 1], + 'K' : ['EH', "endhour", 0], + 'N' : ['DI', "nextdue", 1], + 'V' : ['VI', "validint", 1], + 'T' : ['AT', "agetime", 1], + 'R' : ['PR', "processremote", 1], + 'B' : ['BC', "buildcmd", 1], + 'Z' : ['CL', "cleancmd", 1], + 'D' : ['DE', "note", 1], + } + self.TBLHASH['drupdt'] = { + 'L' : ['LI', "lindex", 0], # same as dlupdt.lindex + 'F' : ['RF', "remotefile", 1], + 'D' : ['DO', "dindex", 0], + 'S' : ['SF', "serverfile", 1], + 'C' : ['DC', "download", 1], + 'B' : ['BT', "begintime", 1], + 'E' : ['ET', "endtime", 1], + 'T' : ['TI', "tinterval", 1], + } + self.TBLHASH['dcupdt'] = { + 'C' : ['CI', "cindex", 0], + 'L' : ['ID', "cntlid", 1], + 'N' : ['SN', "specialist", 1], + 'P' : ['PI', "pindex", 0], # if not 0, refer to another dcupdt.cindex + 'A' : ['AN', "action", 1], # dsupdt action + 'F' : ['FQ', "frequency", 1], + 'O' : ['CO', "cntloffset", 1], + 'T' : ['CT', "cntltime", 1], + 'R' : ['RI', "retryint", 1], + 'V' : ['VI', "validint", 1], + 'U' : ['UC', "updtcntl", 1], + 'J' : ['MC', "emailcntl", 1], + 'E' : ['EC', "errorcntl", 1], + 'K' : ['KF', "keepfile", 1], + 'Z' : ['HO', "houroffset", 1], + 'D' : ['DT', "datatime", 1], + 'H' : ['HN', "hostname", 1], + 'Q' : ['QS', "qoptions", 1], + 'Y' : ['CC', "emails", 1], + 'X' : ['XC', "execcmd", 1], + } + # global info to be used by the whole application + self.PGOPT['updated'] = 0 + self.PGOPT['AUTODS'] = 0 + self.PGOPT['CNTLACTS'] = self.OPTS['UF'][0]|self.OPTS['CU'][0] + self.PGOPT['UPDTACTS'] = "AF|BL|CF|CU|DR|PB|UF" + self.PGOPT['ARCHACTS'] = "AW|AS|AQ" + self.PGOPT['DTIMES'] = {} + self.PGOPT['UCNTL'] = {} + #default fields for getting info + self.PGOPT['dlupdt'] = "LFAXIUCOQJNVWRZ" + self.PGOPT['drupdt'] = "LFDSCBET" + self.PGOPT['dcupdt'] = "CLNPAFOTRVUJEKZ" + #all fields for getting info + self.PGOPT['dlall'] = "LFAXIUCOQEJKNVTWMRBZSD" + self.PGOPT['drall'] = self.PGOPT['drupdt'] + self.PGOPT['dcall'] = "CLNPAFOTRVUJEKZDHSQYX" + # remote file download status + # 0 error download, but continue for further download + # 1 successful full/partial download, continue for build local files + # < 0 error download, stop + self.PGOPT['rstat'] = 1 # default to successful download + # counts + self.PGOPT['PCNT'] = 1 + self.PGOPT['vcnt'] = self.PGOPT['rcnt'] = self.PGOPT['dcnt'] = self.PGOPT['lcnt'] = 0 + self.PGOPT['bcnt'] = self.PGOPT['acnt'] = self.PGOPT['mcnt'] = 0 + self.PGOPT['ucnt'] = self.PGOPT['upcnt'] = self.PGOPT['ubcnt'] = self.PGOPT['uhcnt'] = 0 + self.PGOPT['uscnt'] = self.PGOPT['qbcnt'] = self.PGOPT['qdcnt'] = 0 + self.PGOPT['uwcnt'] = self.PGOPT['udcnt'] = self.PGOPT['uncnt'] = self.PGOPT['rdcnt'] = 0 + self.PGOPT['lindex'] = 0 # the current lindex is under updating + self.WSLOWS = { + 'nomads.ncep.noaa.gov' : 8 + } + # set default parameters + self.params['PD'] = ["<" , ">"] # temporal pattern delimiters + self.params['PL'] = 1 # max number of child processes allowed + + # get file contion + def file_condition(self, tname, include = None, exclude = None, nodsid = 0): + condition = "" + hash = self.TBLHASH[tname] + noand = 1 if nodsid else 0 + if not hash: self.pglog(tname + ": not defined in self.TBLHASH", self.PGOPT['extlog']) + for key in hash: + if include and include.find(key) < 0: continue + if exclude and exclude.find(key) > -1: continue + type = hash[key][2] + if type < 0: continue # exclude + opt = hash[key][0] + if opt not in self.params: continue + fld = hash[key][1] + condition += self.get_field_condition(fld, self.params[opt], type, noand) + noand = 0 + if not nodsid: + condition = "dsid = '{}'{}".format(self.params['DS'], condition) + return condition + + # check if enough information entered on command line and/or input file + # for given action(s) + def check_enough_options(self, cact, acts): + errmsg = [ + "Miss dataset number per -DS(-Dataset)", + "Miss local file names per -LF(-LocalFile)", + "Miss remote file names per -RF(-RemoteFile)", + "Miss local Index per -LI(-LocalIndex)", + "Miss Control Index per -CI(-ControlIndex)", + "Process one Update Control Index at a time", + ] + erridx = -1 + lcnt = ccnt = 0 + if 'LI' in self.params: lcnt = self.validate_lindices(cact) + if 'CI' in self.params or 'ID' in self.params: ccnt = self.validate_cindices(cact) + if self.OPTS[cact][2] == 1: + if acts&self.OPTS['SC'][0]: + if 'CI' not in self.params: erridx = 4 + elif cact == 'DL' or cact == 'UL': + if not ('LI' in self.params or 'CI' in self.params): erridx = 3 + elif 'LI' not in self.params: + erridx = 3 + elif acts&self.OPTS['SR'][0] and 'RF' not in self.params: + erridx = 2 + if erridx < 0: + if (lcnt + ccnt) > 0: + if 'DS' not in self.params: + erridx = 0 + elif lcnt > 0 and cact == 'SL' and 'LF' not in self.params: + erridx = 1 + elif self.OPTS[cact][2] == 2: + if 'CI' in self.params and len(self.params['CI']) > 1: + erridx = 5 + if erridx >= 0: self.action_error(errmsg[erridx], cact) + self.set_uid("dsupdt") # set uid before any action + if 'VS' in self.params: # minimal size for a file to be valid for archive + self.PGLOG['MINSIZE'] = int(self.params['VS']) + if 'BP' in self.params: + if 'PL' in self.params: self.params['PL'] = 1 + if 'CI' in self.params: + oidx = self.params['CI'][0] + otype = 'C' + elif 'LI' in self.params: + oidx = self.params['LI'][0] + otype = 'L' + else: + oidx = 0 + otype = '' + # set command line Batch options + self.set_batch_options(self.params, 2, 1) + self.init_dscheck(oidx, otype, "dsupdt", self.get_dsupdt_dataset(), + cact, "" if 'AW' in self.params else self.PGLOG['CURDIR'], self.params['LN'], + self.params['BP'], self.PGOPT['extlog']) + if 'NY' in self.params: self.PGLOG['NOLEAP'] = 1 + if 'NE' in self.params: + self.PGLOG['LOGMASK'] &= ~self.EMLALL # turn off all email acts + else: + if 'SE' in self.params: self.PGOPT['emllog'] |= self.EMEROL + if 'CC' in self.params and (self.PGOPT['ACTS']&self.OPTS['SC'][2]) == 2: self.add_carbon_copy(self.params['CC']) + if self.PGOPT['ACTS']&self.OPTS['UF'][0]: + plimit = self.params['PL'] if 'PL' in self.params else 1 + logon = self.params['LO'] if 'LO' in self.params else 1 + self.start_none_daemon('dsupdt', self.PGOPT['CACT'], self.params['LN'], plimit, 120, logon) + else: + self.start_none_daemon('dsupdt', self.PGOPT['CACT'], self.params['LN'], 1, 120, 1) + if self.PGSIG['MPROC'] > 1: + self.PGOPT['emllog'] |= self.FRCLOG + self.PGOPT['wrnlog'] |= self.FRCLOG + + # get the associated dataset id + def get_dsupdt_dataset(self): + if 'DS' in self.params: return self.params['DS'] + if 'CI' in self.params and self.params['CI'][0]: + pgrec = self.pgget("dcupdt", "dsid", "cindex = {}".format(self.params['CI'][0]), self.PGOPT['extlog']) + if pgrec: return pgrec['dsid'] + if 'LI' in self.params and self.params['LI'][0]: + pgrec = self.pgget("dlupdt", "dsid", "lindex = {}".format(self.params['LI'][0]), self.PGOPT['extlog']) + if pgrec: return pgrec['dsid'] + return None + + # replace the temoral patterns in given fname with date/hour + # return pattern array only if not date + def replace_pattern(self, fname, date, hour = None, intv = None, limit = 0, bdate = None, bhour = None): + if not fname: return None + if date and not isinstance(date, str): date = str(date) + if bdate and not isinstance(bdate, str): bdate = str(bdate) + seps = self.params['PD'] + match = r"[^{}]+".format(seps[1]) + patterns = re.findall(r'{}([^{}]+){}'.format(seps[0], seps[1], seps[1]), fname) + pcnt = len(patterns) + if pcnt == 0: return fname # return original name if no pattern + if limit and pcnt > limit: pcnt = limit + mps = {'b' : r'^B(.+)B$', 'c': r'^C(.+)C$', 'd' : r'(\d+)$', 'm' : r'^M([NC])M$', + 'n' : r'^N(H+|D+)N$', 'p' : r'^P(\d+)$', 's' : r'^S[\d:]+S$', 'w' : r'^W(.+)W$'} + for i in range(pcnt): + pattern = patterns[i] + replace = "{}{}{}".format(seps[0], pattern, seps[1]) + d = None + domatch = 1 + ms = re.match(mps['p'], pattern, re.I) + if ms: # generic pattern matches + pidx = int(ms.group(1)) + pattern = self.params['GP'][pidx] if 'GP' in self.params else None + if not pattern: self.pglog("{}: MISS value per option -GP for matching general pattern '{}'".format(fname, replace), self.PGOPT['extlog']) + domatch = 1 + if domatch: + ms = re.match(mps['c'], pattern, re.I) # current date + if ms: + pattern = ms.group(1) + d = self.params['CD'] + h = self.params['CH'] + domatch = 0 + if domatch and (not date or re.match(mps['s'], pattern, re.I)): continue + if domatch: + ms = re.match(mps['m'], pattern, re.I) + if ms: + pattern = ms.group(1) + if intv and len(intv) == 7 and intv[6] and re.search(mps['d'], date): + ms = re.search(mps['d'], date) + d = ms.group(1) + d = (intv[6] - 1) if d >= 28 else int(d*intv/30) + if pattern == "C": + pattern = chr(65 + d) # upper case, chr(65) is A + elif pattern == "c": + pattern = chr(97 + d) # lower case, chr(97) is a + else: + pattern = d + 1 # numeric, start from 1 + d = None + domatch = 0 + else: + self.pglog("{}: MISS month fraction for '{}'".format(fname, replace), self.PGOPT['emllog']) + if domatch: + ms = re.match(mps['n'], pattern, re.I) + if ms: + pattern = ms.group(1) + if not bdate: (bdate, bhour) = self.addfrequency(date, hour, intv, 0) + plen = len(pattern) + if re.match(r'^D', pattern): + diff = self.diffdate(date, bdate) + else: + diff = self.diffdatehour(date, hour, bdate, bhour) + pattern = "{:0{}}".format(diff, plen) + domatch = 0 + if domatch: + ms = re.match(mps['b'], pattern, re.I) + if ms: + pattern = ms.group(1) + d = date + elif 'UB' in self.params: + d = date + if d and intv: # beginning time of update period + if bdate: + d = bdate + h = bhour + else: + (d, h) = self.addfrequency(d, hour, intv, 0) + else: + ms = re.match(mps['w'], pattern, re.I) + if ms: # back to the nearest Wed + pattern = ms.group(1) + wd = self.get_weekday(date) + if wd < 3: + wd += 4 + else: + wd -= 3 + d = self.adddate(date, 0, 0, -wd) if (wd > 0) else date + else: + d = date + h = hour + if d: pattern = self.format_datehour(d, h, pattern) + fname = re.sub(replace, pattern, fname, 1) + return fname + + # get next display order of an archived data file of given dataset (and group) + def get_next_exec_order(self, dsid, next): + if not dsid: + self.CORDERS = {} # reinitial lize cached display orders + return + if dsid not in self.CORDERS: + if next: + pgrec = self.pgget("dlupdt", "max(execorder) max_order", "dsid = '{}'".format(dsid), self.PGOPT['extlog']) + self.CORDERS[dsid] = pgrec['max_order'] if pgrec else 0 + self.CORDERS[dsid] += 1 + return self.CORDERS[dsid] + + # execute specialist specified command + def executable_command(self, cmd, file, dsid, edate, ehour, rfiles = None): + if not cmd or re.match(r'^#', cmd): return None + if re.search(r'\$', cmd): cmd = self.replace_environments(cmd, None, self.PGOPT['emlerr']) + if file: + ms = re.search(r'__(FN|FNAME|FILENAME)__', cmd) + if ms: + cmd = re.sub(r'__{}__'.format(ms.group(1)), file, cmd) + elif re.search(r'(-LF|-RF|-SF)', cmd): + ms = re.search(r'(-LF|-RF|-SF)', cmd) + cmd = re.sub(ms.group(1), file, cmd) + elif re.search(r'/$', cmd): + cmd += file + if re.search(r'(^|\s|\||\S/)msrcp\s', cmd): + cmd += " file" + elif re.search(r'(^|\s|\||\S/)(cp|mv)\s', cmd): + cmd += " ." + elif cmd.find(file) < 0 and re.search(r'(^|\s|\||\S/)(rm\s|tar\s.+\.tar$)', cmd): + cmd += " file" + if re.search(r'-RF', cmd): + names = [] + if rfiles: + for rfile in rfiles: + if isinstance(rfile, dict): + names.append(rfile['fname']) + else: + names.append(rfile) + name = ' '.join(names) + cmd = re.sub(r'-RF', name, cmd, 1) + if re.search(r'-DS', cmd): + name = dsid if dsid else "" + cmd = re.sub(r'-DS', name, cmd, 1) + if edate and re.search(r'-ED', cmd): + name = str(edate) if edate else "" + cmd = re.sub('-ED', name, cmd, 1) + if re.search(r'-EH', cmd): + name = str(ehour) if ehour != None else '' + cmd = re.sub(r'-EH', name, cmd, 1) + ms = re.search(r'(-SN|-LN)', cmd) + if ms: + cmd = re.sub(ms.group(1), self.params['LN'], cmd, 1) + if re.search(r'-LI', cmd): + name = str(self.PGOPT['lindex']) if self.PGOPT['lindex'] else '' + cmd = re.sub(r'-LI', name, cmd, 1) + return cmd + + # get the local file names + def get_local_names(self, lfile, tempinfo, edate = None): + locfiles = [] + ehour = tempinfo['ehour'] + if not edate: edate = tempinfo['edate'] + if lfile[0] == '!': # executable for build up local file names + cmd = self.executable_command(lfile[1:], None, self.params['DS'], edate, ehour) + if not cmd: return 0 + buf = self.pgsystem(cmd, self.PGOPT['wrnlog'], 21) + if not buf: return self.pglog(lfile + ": NO local filename returned", self.PGOPT['emlerr']) + locfiles = re.split('::', buf) + else: + lfiles = self.expand_serial_pattern(lfile) + lcnt = len(lfiles) + for i in range(lcnt): + locfiles.append(self.replace_pattern(lfiles[i], edate, ehour, tempinfo['FQ'])) + return locfiles if locfiles else None + + # expend serial pattern + def expand_serial_pattern(self, fname): + if not fname: return None + seps = self.params['PD'] + ms = re.search(r'{}S(\d[\d:]+\d)S{}'.format(seps[0], seps[1]), fname) + if not ms: return [fname] + rep = "{}S{}S{}".format(seps[0], ms.group(1), seps[1]) + mcs = re.split(':', ms.group(1)) + tlen = len(mcs[0]) + idx = [0]*3 + idx[0] = int(mcs[0]) + idx[1] = int(mcs[1]) + idx[2] = int(mcs[2]) if len(mcs) > 2 else 1 + fns = [] + i = idx[0] + while i <= idx[1]: + val = "{:0{}}".format(i, tlen) + fn = re.sub(rep, val, fname, 1) + fns.append(fn) + i += idx[2] + return fns + + # get the remote file names + def get_remote_names(self, rfile, rmtrec, rmtinfo, tempinfo, edate = None): + rmtfiles = [] + if not edate: edate = tempinfo['edate'] + if rfile[0] == '!': # executable for build up remote file names + cmd = self.executable_command(rfile[1:], None, self.params['DS'], edate, tempinfo['ehour']) + if not cmd: return None + rfile = self.pgsystem(cmd, self.PGOPT['wrnlog'], 21) + if not rfile: return self.pglog(rmtinfo + ": NO remote filename returned", self.PGOPT['emlerr']) + rmtfiles = re.split('::', rfile) + else: + rfiles = self.expand_serial_pattern(rfile) + rcnt = len(rfiles) + for i in range(rcnt): + rmtfiles.extend(self.replace_remote_pattern_times(rfiles[i], rmtrec, rmtinfo, tempinfo, edate)) + return rmtfiles if rmtfiles else None + + # get and replace pattern dates/hours for remote files + def replace_remote_pattern_times(self, rfile, rmtrec, rmtinfo, tempinfo, edate = None): + rfiles = [] + if not edate: edate = tempinfo['edate'] + ehour = tempinfo['ehour'] + freq = tempinfo['FQ'] + (bdate, bhour) = self.addfrequency(edate, ehour, freq, 0) + funit = tempinfo['QU'] if tempinfo['QU'] else None + tintv = rmtrec['tinterval'] if rmtrec['tinterval'] else None + if not tintv: + if rmtrec['dindex'] and funit: + if self.need_time_interval(rfile, freq): return [] + rfiles = [self.one_remote_filename(rfile, edate, ehour, tempinfo, None, bdate, bhour)] + return rfiles + elif not funit: + self.pglog("{}: MISS Update Frequency for given time interval '{}'".format(rmtinfo, tintv), self.PGOPT['emlerr']) + return [] + ms = re.match(r'^(\d*)([YMWDH])$', tintv) + if ms: + val = int(ms.group(1)) if len(ms.group(1)) > 0 else 1 + unit = ms.group(2) + if unit == 'W': val *= 7 + else: + self.pglog("{}: time interval '{}' NOT in (Y,M,W,D,H)".format(rmtinfo, tintv), self.PGOPT['emlerr']) + return [] + # check if multiple data periods + i = 0 # not single period + if unit == 'H': + if freq[3] and freq[3] <= val: i = 1 + elif unit == 'D' or unit == 'W': + if freq[3] or freq[2] and freq[2] <= val: i = 1 + elif unit == 'M': + if freq[3] or freq[2] or freq[1] and freq[1] <= val: i = 1 + elif unit == 'Y': + if not freq[0] or freq[0] <= val: i = 1 + if i == 1: + rfiles = [self.one_remote_filename(rfile, edate, ehour, tempinfo, None, bdate, bhour)] + return rfiles + date = edate + hour = ehour + # set ending date/hour for multiple data periods + max = self.replace_pattern(rmtrec['endtime'], date, 0) if rmtrec['endtime'] else 0 + if max: + ms = re.match(r'^(\d+-\d+-\d+)', max) + if ms: + edate = ms.group(1) + ms = re.search(r':(\d+)', max) + if ms: ehour = int(ms.group(1)) + max = 0 + else: + if freq[1] and max.find(':') > -1: + maxs = re.split(':', max) + if len(maxs) == 12: + mn = 1 + ms = re.match(r'^(\d+)-(\d+)', bdate) + if ms: mn = int(ms.group(2)) + max = int(maxs[mn - 1]) + else: # use the first one + max = int(maxs[0]) + if max: + if unit == 'H': + (edate, ehour) = self.adddatehour(bdate, bhour, 0, 0, 0, max) + elif unit == 'Y': + edate = self.adddate(bdate, max, 0, 0) + elif unit == 'M': + edate = self.adddate(bdate, 0, max, 0) + elif unit == 'W' or unit == 'D': + edate = self.adddate(bdate, 0, 0, max) + # set beginning date/hour for multiple data periods + min = self.replace_pattern(rmtrec['begintime'], date, 0) if rmtrec['begintime'] else 0 + if min: + ms = re.match(r'^(\d+-\d+-\d+)', min) + if ms: + date = ms.group(1) + ms = re.search(r':(\d+)', min) + if ms: + hour = int(ms.group(1)) + else: + hour = 0 + min = 0 + else: + date = bdate + hour = bhour + if freq[1] and min.find(':') > -1: + mins = re.split(':', min) + if len(mins) == 12: + mn = 1 + ms = re.match(r'^(\d+)-(\d+)', date) + if ms: mn = int(ms.group(2)) + min = int(mins[mn-1]) + else: # use the first one + min = int(mins[0]) + else: + date = bdate + hour = bhour + + if min and not isinstance(min, int): min = int(min) + gotintv = 0 + intv = [0]*4 + if unit == 'Y': + intv[0] = val + gotintv += 1 + if min: date = self.adddate(date, min, 0, 0) + elif unit == 'M': + intv[1] = val + gotintv += 1 + if min: + date = self.adddate(date, 0, min, 0) + else: + date = self.enddate(date, 0, 'M') + elif unit == 'W' or unit == 'D': + intv[2] = val + gotintv += 1 + if min: date = self.adddate(date, 0, 0, min) + elif unit == 'H': + intv[3] = val + gotintv += 1 + if hour is None or not freq[3]: + ehour = 23 + hour = 0 + if min: (date, hour) = self.adddatehour(date, hour, 0, 0, 0, min) + if not gotintv: + self.pglog("{}: error process time internal '{}'".format(rmtinfo, tintv), self.PGOPT['emlerr']) + return [] + rfiles = [] + i = 0 + while self.diffdatehour(date, hour, edate, ehour) <= 0: + rfiles.append(self.one_remote_filename(rfile, date, hour, tempinfo, intv, bdate, bhour)) + (date, hour) = self.adddatehour(date, hour, intv[0], intv[1], intv[2], intv[3]) + return rfiles + + # get one hash array for a single remote file name + def one_remote_filename(self, fname, date, hour, tempinfo, intv, bdate, bhour): + if tempinfo['NX']: + (udate, uhour) = self.adddatehour(date, hour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3]) + else: + udate = date, + uhour = hour + if 'CP' in self.params: + (vdate, vhour) = self.addfrequency(self.PGOPT['CURDATE'], self.PGOPT['CURHOUR'], tempinfo['FQ'], 1) + else: + vdate = self.PGOPT['CURDATE'] + vhour = self.PGOPT['CURHOUR'] + rfile = {} + if intv is None: intv = tempinfo['FQ'] + rfile['fname'] = self.replace_pattern(fname, date, hour, intv, 0, bdate, bhour) + if 'FU' in self.params or self.diffdatehour(udate, uhour, vdate, vhour) <= 0: + if tempinfo['VD'] and self.diffdatehour(date, hour, tempinfo['VD'], tempinfo['VH']) < 0: + rfile['ready'] = -1 + else: + rfile['ready'] = 1 + else: + rfile['ready'] = 0 + rfile['amiss'] = 1 if (tempinfo['amiss'] == 'Y') else 0 + rfile['date'] = date + rfile['hour'] = hour + if hour is None: + rfile['time'] = "23:59:59" + else: + rfile['time'] = "{:02}:00:00".format(hour) + if tempinfo['DC']: + rfile['rcmd'] = self.replace_pattern(tempinfo['DC'], date, hour, intv, 0, bdate, bhour) + else: + rfile['rcmd'] = None + return rfile + + # record the date/hour for missing data + def set_miss_time(self, lfile, locrec, tempinfo, rmonly = 0): + setmiss = 1 + mdate = mhour = None + pgrec = {} + if rmonly: + if(not locrec['missdate'] or + self.diffdatehour(tempinfo['edate'], tempinfo['ehour'], locrec['missdate'], locrec['misshour'])): + return setmiss # do not remove if miss times not match + elif self.diffdatehour(tempinfo['edate'], tempinfo['ehour'], tempinfo['VD'], tempinfo['VH']) >= 0: + mdate = tempinfo['edate'] + if tempinfo['ehour'] is not None: mhour = tempinfo['ehour'] + setmiss = 0 + if locrec['missdate']: + if not mdate: + pgrec['missdate'] = pgrec['misshour'] = None + elif (self.diffdatehour(mdate, mhour, locrec['missdate'], locrec['misshour']) and + self.diffdatehour(locrec['missdate'], locrec['misshour'], tempinfo['VD'], tempinfo['VH']) < 0): + pgrec['missdate'] = mdate + pgrec['misshour'] = mhour + elif mdate: + pgrec['missdate'] = mdate + pgrec['misshour'] = mhour + if not pgrec: + if locrec['misshour']: + if mhour is None or mhour != locrec['misshour']: + pgrec['misshour'] = mhour + elif mhour is not None: + pgrec['misshour'] = mhour + if pgrec: self.pgupdt("dlupdt", pgrec, "lindex = {}".format(locrec['lindex']), self.PGOPT['extlog']) + return setmiss + + # reset next data end/update times + def reset_update_time(self, locinfo, locrec, tempinfo, arccnt, endonly): + gx = 1 if re.search(r'(^|\s)-GX(\s|$)', locrec['options'], re.I) else 0 + date = tempinfo['edate'] + hour = tempinfo['ehour'] + if not gx and ('UT' in self.params or arccnt > 0): + pgrec = self.get_period_record(locrec['gindex'], self.params['DS'], locinfo) + if pgrec: + ehour = None + if hour != None: + ms = re.match(r'^(\d+):', str(pgrec['time_end'])) + if ms: ehour = int(ms.group(1)) + diff = self.diffdatehour(date, hour, pgrec['date_end'], ehour) + if 'UT' in self.params or diff > 0: + sdpcmd = "sdp -d {} -g {} -ed {}".format(self.params['DS'][2:], pgrec['gindex'], date) + if hour != None: sdpcmd += " -et {:02}:59:59".format(hour) + if self.pgsystem(sdpcmd, self.MSGLOG, 32): + einfo = "{}".format(date) + if hour != None: einfo += ":{:02}".format(hour) + self.pglog("{}: data archive period {} to {}".format(locinfo, ("EXTENDED" if diff > 0 else "CHANGED"), einfo), self.PGOPT['emllog']) + if not tempinfo['FQ'] or endonly and arccnt < 1: return + if self.diffdatehour(date, hour, self.params['CD'], self.params['CH']) <= 0: + (date, hour) = self.addfrequency(date, hour, tempinfo['FQ'], 1) + date = self.enddate(date, tempinfo['EP'], tempinfo['QU'], tempinfo['FQ'][6]) + if 'UT' in self.params or not locrec['enddate'] or self.diffdatehour(date, hour, locrec['enddate'], locrec['endhour']) > 0: + record = {'enddate' : date} + if hour != None: + record['endhour'] = hour + einfo = "end data date:hour {}:{:02}".format(date, hour) + else: + einfo = "end data date {}".format(date) + if 'GZ' in self.params: einfo += "(UTC)" + if tempinfo['NX']: + (date, hour) = self.adddatehour(date, hour, tempinfo['NX'][0], tempinfo['NX'][1], tempinfo['NX'][2], tempinfo['NX'][3]) + if(locrec['enddate'] and + self.pgupdt("dlupdt", record, "lindex = {}".format(locrec['lindex']), self.PGOPT['extlog'])): + self.pglog("{}: {} {} for NEXT update".format(locinfo, ("set" if arccnt > 0 else "SKIP to"), einfo), self.PGOPT['emllog']) + if self.PGOPT['UCNTL']: self.reset_data_time(tempinfo['QU'], tempinfo['edate'], tempinfo['ehour'], locrec['lindex']) + else: + self.pglog("{}: {} for NEXT update".format(locinfo, einfo), self.PGOPT['emllog']) + else: + if locrec['endhour'] != None: + einfo = "end data date:hour {}:{:02}".format(locrec['enddate'], locrec['endhour']) + else: + einfo = "end data date {}".format(locrec['enddate']) + if 'GZ' in self.params: einfo += "(UTC)" + self.pglog("{}: ALREADY set {} for NEXT update".format(locinfo, einfo), self.PGOPT['emllog']) + if self.PGOPT['UCNTL']: self.reset_data_time(tempinfo['QU'], tempinfo['edate'], tempinfo['ehour'], locrec['lindex']) + + # get period record for sub group + def get_period_record(self, gindex, dsid, locinfo): + pgrec = self.pgget("dsperiod", "gindex, date_end, time_end, dorder", + "dsid = '{}' AND gindex = {} ORDER BY dorder".format(dsid, gindex), self.PGOPT['extlog']) + if not pgrec and gindex: + pgrec = self.pgget("dsgroup", "pindex", "dsid = '{}' AND gindex = {}".format(dsid, gindex), self.PGOPT['extlog']) + if pgrec: pgrec = self.get_period_record(pgrec['pindex'], dsid, locinfo) + if pgrec and pgrec['date_end'] and pgrec['date_end'] == "0000-00-00": + self.pglog(locinfo + ": dsperiod.date_end set as '0000-00-00' by 'gatherxml'", self.PGOPT['emlerr']) + pgrec = None + return pgrec + + # check if need time interval for remote/server file + def need_time_interval(self, fname, freq): + units = self.temporal_pattern_units(fname, self.params['PD']) + if not units: return 0 # no temporal pattern found in file name + funit = punit = None + if freq[2] > 0: + if 'H' in units: + punit = "Hourly" + funit = "Daily" + elif freq[1] > 0: + if 'H' in units: + punit = "Hourly" + elif 'D' in units: + punit = "Daily" + if punit: funit = "Monthly" + elif freq[0] > 0: + if 'H' in units: + punit = "Hourly" + elif 'D' in units: + punit = "Daily" + elif 'M' in units: + punit = "Monthly" + if punit: funit = "Yearly" + if punit: + self.pglog("{}: Remote File Name seems defined at {} Time Interval for {} Update, ".format(fname, punit, funit) + + "specify the Time Interval in remote file record to continue", self.PGOPT['emllog']) + return 1 + else: + return 0 + + # check if local file is a growing one + def is_growing_file(self, fname, freq): + units = self.temporal_pattern_units(fname, self.params['PD']) + if not units: return 1 # no temporal pattern found in file name + if freq[3] > 0: + if 'H' in units: return 0 + elif freq[2] > 0: + if 'H' in units or 'D' in units: return 0 + elif freq[1] > 0: + if 'H' in units or 'D' in units or 'M' in units and not freq[6]: return 0 + elif freq[0] > 0: + return 0 + return 1 + + # add update frequency to date/hour + # opt = -1 - minus, 0 - begin time, 1 - add (default) + def addfrequency(self, date, hour, intv, opt = 1): + if date and not isinstance(date, str): date = str(date) + if not intv: return (date, hour) + freq = intv.copy() + if opt == 0: # get begin time of next period + if freq[3]: + if freq[3] == 1: return (date, hour) + (date, hour) = self.adddatehour(date, hour, 0, 0, 0, 1) # add one hour + else: + if freq[2] == 1: return (date, hour) + date = self.adddate(date, 0, 0, 1) # add one day + if opt < 1: # negative frequency for minus + flen = len(freq) + for i in range(flen): + if freq[i]: freq[i] = -freq[i] + if freq[6]: # add fraction month + date = self.addmonth(date, freq[1], freq[6]) + elif hour != None: # add date/hour + (date, hour) = self.adddatehour(date, hour, freq[0], freq[1], freq[2], freq[3]) + else: # add date only + date = self.adddate(date, freq[0], freq[1], freq[2]) + return (date, hour) + + # send a cumtomized email if built during specialist's process + def send_updated_email(self, lindex, locinfo): + pgrec = self.pgget("dlupdt", "emnote", "lindex = {}".format(lindex), self.LOGERR) + if not (pgrec and pgrec['emnote']): return # no customized email info to send + if not self.send_customized_email(locinfo, pgrec['emnote'], self.PGOPT['emllog']): return + self.pgexec("update dlupdt set emnote = null where lindex = {}".format(lindex), self.LOGERR) # empty email after sent + + # validate given local indices + def validate_lindices(self, cact): + if (self.OPTS['LI'][2]&8) == 8: return 0 # already validated + zcnt = 0 + lcnt = len(self.params['LI']) + i = 0 + while i < lcnt: + val = self.params['LI'][i] + if val: + if isinstance(val, int): + self.params['LI'][i] = val + else: + if re.match(r'^(!|<|>|<>)$', val): break + self.params['LI'][i] = int(val) + else: + self.params['LI'][i] = 0 + i += 1 + if i >= lcnt: # normal locfile index given + for i in range(lcnt): + val = self.params['LI'][i] + if not val: + if cact == "SL": + if 'NL' not in self.params: self.action_error("Mode option -NL to add new local file record") + zcnt += 1 + elif cact == "SR": + self.action_error("Local File Index 0 is not allowed/n" + + "Use Action SL with Mode option -NL to add new record") + continue + if i > 0 and val == self.params['LI'][i-1]: continue + pgrec = self.pgget("dlupdt", "dsid, specialist", "lindex = {}".format(val), self.PGOPT['extlog']) + if not pgrec: + self.action_error("Locfile Index {} is not in RDADB".format(val)) + elif self.OPTS[self.PGOPT['CACT']][2] > 0: + if pgrec['specialist'] == self.PGLOG['CURUID']: + self.params['MD'] = 1 + else: + self.validate_dsowner("dsupdt", pgrec['dsid']) + else: # found none-equal condition sign + pgrec = self.pgmget("dlupdt", "DISTINCT lindex", self.get_field_condition("lindex", self.params['LI'], 0, 1), self.PGOPT['extlog']) + if not pgrec: self.action_error("No update record matches given Locfile Index condition") + self.params['LI'] = pgrec['lindex'] + self.OPTS['LI'][2] |= 8 # set validated flag + return zcnt + + # validate given control indices + def validate_cindices(self, cact): + if (self.OPTS['CI'][2] & 8) == 8: return 0 # already validated + zcnt = 0 + if 'CI' in self.params: + ccnt = len(self.params['CI']) + i = 0 + while i < ccnt: + val = self.params['CI'][i] + if val: + if isinstance(val, int): + self.params['CI'][i] = val + else: + if re.match(r'^(!|<|>|<>)$', val): break + self.params['CI'][i] = int(val) + else: + self.params['CI'][i] = 0 + i += 1 + if i >= ccnt: # normal locfile index given + for i in range(ccnt): + val = self.params['CI'][i] + if not val: + if cact == 'SC': + if 'NC' in self.params: + self.params['CI'][i] = 0 + zcnt += 1 + else: + self.action_error("Mode option -NC to add new update control record") + continue + if i > 0 and val == self.params['CI'][i-1]: continue + pgrec = self.pgget("dcupdt", "dsid, specialist", "cindex = {}".format(val), self.PGOPT['extlog']) + if not pgrec: + self.action_error("Control Index {} is not in RDADB".format(val)) + elif self.OPTS[self.PGOPT['CACT']][2] > 0: + if pgrec['specialist'] == self.PGLOG['CURUID']: + self.params['MD'] = 1 + else: + self.validate_dsowner("dsupdt", pgrec['dsid']) + else: # found none-equal condition sign + pgrec = self.pgmget("dcupdt", "DISTINCT cindex", self.get_field_condition("cindex", self.params['CI'], 0, 1), self.PGOPT['extlog']) + if not pgrec: self.action_error("No update control record matches given Index condition") + self.params['CI'] = pgrec['cindex'] + if len(self.params['CI']) > 1 and self.PGOPT['ACTS']&self.PGOPT['CNTLACTS']: + self.action_error("Process one Update Control each time") + elif 'ID' in self.params: + self.params['CI'] = self.cid2cindex(cact, self.params['ID'], zcnt) + self.OPTS['CI'][2] |= 8 # set validated flag + return zcnt + + # get control index array from given control IDs + def cid2cindex(self, cact, cntlids, zcnt): + count = len(cntlids) if cntlids else 0 + if count == 0: return None + i = 0 + while i < count: + val = cntlids[i] + if val and (re.match(r'^(!|<|>|<>)$', val) or val.find('%') > -1): break + i += 1 + if i >= count: # normal control id given + indices = [0]*count + for i in range(count): + val = cntlids[i] + if not val: + continue + elif i and (val == cntlids[i-1]): + indices[i] = indices[i-1] + continue + else: + pgrec = self.pgget("dcupdt", "cindex", "cntlid = '{}'".format(val), self.PGOPT['extlog']) + if pgrec: indices[i] = pgrec['cindex'] + if not indices[i]: + if cact == "SC": + if 'NC' in self.params: + indices[i] = 0 + zcnt += 1 + else: + self.action_error("Control ID {} is not in RDADB,\n".format(val) + + "Use Mode Option -NC (-NewControl) to add new Control", cact) + else: + self.action_error("Control ID '{}' is not in RDADB".format(val), cact) + return indices + else: # found wildcard and/or none-equal condition sign + pgrec = self.pgmget("dcupdt", "DISTINCT cindex", self.get_field_condition("cntlid", cntlids, 1, 1), self.PGOPT['extlog']) + if not pgrec: self.action_error("No Control matches given Control ID condition") + return pgrec['cindex'] + + # check remote server file information + def check_server_file(self, dcmd, opt, cfile): + sfile = info = type = None + self.PGLOG['SYSERR'] = self.PGOPT['STATUS'] = '' + docheck = 1 + copt = opt|256 + ms = re.search(r'(^|\s|\||\S/)rdacp\s+(.+)$', dcmd) + if ms: + buf = ms.group(2) + type = "RDACP" + docheck = 0 + ms = re.match(r'^(-\w+)', buf) + while ms: + flg = ms.group(1) + buf = re.sub('^-\w+\s+'.format(flg), '', buf, 1) # remove options + if flg != "-r": # no option value + m = re.match(r'^(\S+)\s', buf) + if not m: break + if flg == "-f": + sfile = ms.group(1) + elif flg == "-fh": + target = ms.group(1) + buf = re.sub(r'^\S\s+', '', buf, 1) # remove values + ms = re.match(r'^(-\w+)', buf) + if not sfile: + ms = re.match(r'^(\S+)', buf) + if ms: sfile = ms.group(1) + info = self.check_rda_file(sfile, target, copt) + if docheck: + ms = re.search(r'(^|\s|\||\S/)(mv|cp)\s+(.+)$', dcmd) + if ms: + sfile = ms.group(3) + type = "COPY" if ms.group(2) == "cp" else "MOVE" + docheck = 0 + ms = re.match(r'^(-\w+\s+)', sfile) + while ms: + sfile = re.sub(r'^-\w+\s+', '', sfile, 1) # remove options + ms = re.match(r'^(-\w+\s+)', sfile) + ms = re.match(r'^(\S+)\s', sfile) + if ms: sfile = ms.group(1) + info = self.check_local_file(sfile, copt) + if docheck: + ms = re.search(r'(^|\s|\||\S/)tar\s+(-\w+)\s+(\S+\.tar)\s+(\S+)$', dcmd) + if ms: + sfile = ms.group(4) + target = ms.group(3) + type = "UNTAR" if ms.group(2).find('x') > -1 else "TAR" + docheck = 0 + info = self.check_tar_file(sfile, target, copt) + if docheck: + ms = re.search(r'(^|\s|\||\S/)ncftpget\s(.*)(ftp://\S+)', dcmd, re.I) + if ms: + sfile = ms.group(3) + buf = ms.group(2) + type = "FTP" + docheck = 0 + user = pswd = None + if buf: + ms = re.search(r'(-u\s+|--user=)(\S+)', buf) + if ms: user = ms.group(2) + ms = re.search(r'(-p\s+|--password=)(\S+)', buf) + if ms: pswd = ms.group(2) + info = self.check_ftp_file(sfile, copt, user, pswd) + if docheck: + ms = re.search(r'(^|\s|\||\S/)wget(\s.*)https{0,1}://(\S+)', dcmd, re.I) + if ms: + obuf = ms.group(2) + wbuf = ms.group(3) + sfile = op.basename(wbuf) + self.slow_web_access(wbuf) + type = "WGET" + docheck = 0 + if not obuf or not re.search(r'\s-N\s', obuf): dcmd = re.sub(r'wget', 'wget -N', dcmd, 1) + flg = 0 + if cfile and sfile != cfile: + if self.pgsystem("cp -p {} {}".format(cfile, sfile), self.PGOPT['emerol'], 4): flg = 1 + buf = self.pgsystem(dcmd, self.PGOPT['wrnlog'], 16+32) + info = self.check_local_file(sfile, opt, self.PGOPT['wrnlog']) + if buf: + if not info: self.PGOPT['STATUS'] = buf + if re.search(r'Saving to:\s', buf): + flg = 0 + elif not re.search(r'(Server file no newer|not modified on server)', buf): + if info: info['note'] = "{}:\n{}".format(dcmd, buf) + else: + if info: info['note'] = dcmd + ": Failed checking new file" + if flg: self.pgsystem("rm -rf " + sfile, self.PGOPT['emerol'], 4) + if docheck: + ms = re.match(r'^(\S+)\s+(.+)$', dcmd) + if ms: + buf = ms.group(2) + type = op.basename(ms.group(1)).upper() + files = re.split(' ', buf) + for file in files: + if re.match(r'^-\w+', file) or not op.exists(file) or cfile and file == cfile: continue + info = self.check_local_file(file, copt) + if info: + info['data_size'] = 0 + break + sfile = file + if info: + info['ftype'] = type + else: + if not self.PGOPT['STATUS']: self.PGOPT['STATUS'] = self.PGLOG['SYSERR'] + if not sfile: self.pglog(dcmd + ": NO enough information in command to check file info", self.PGOPT['errlog']) + return info + + # check and sleep if given web site need to be slowdown for accessing + def slow_web_access(self, wbuf): + for wsite in self.WSLOWS: + if wbuf.find(wsite) > -1: + time.sleep(self.WSLOWS[wsite]) + + # check remote server/file status information + # return 1 if exists; 0 missed, -1 with error, -2 comand not surported yet + # an error message is stored in self.PGOPT['STATUS'] if not success + def check_server_status(self, dcmd): + self.PGOPT['STATUS'] = '' + target = None + ms = re.search(r'(^|\s|\||\S/)rdacp\s+(.+)$', dcmd) + if ms: + buf = ms.group(2) + ms = re.search(r'-fh\s+(\S+)', buf) + if ms: target = ms.group(1) + ms = re.search(r'-f\s+(\S+)', buf) + if ms: + fname = ms.group(1) + else: + ms = re.match(r'^(-\w+)', buf) + while ms: + flg = ms.group(1) + buf = re.sub(r'^-\w+\s+', '', buf, 1) # remove options + if flg != "-r": # no option value + if not re.match(r'^\S+\s', buf): break + buf = re.sub(r'^\S+\s+', '', buf, 1) # remove values + ms = re.match(r'^(-\w+)', buf) + ms = re.match(r'^(\S+)', buf) + if ms: fname = ms.group(1) + if not fname: + self.PGOPT['STATUS'] = dcmd + ": MISS from-file per option -f" + return -1 + if not target: + return self.check_local_status(fname) + else: + return self.check_remote_status(target, fname) + ms = re.search(r'(^|\s|\||\S/)(mv|cp|tar|cnvgrib|grabbufr|pb2nc)\s+(.+)$', dcmd) + if ms: + buf = ms.group(2) + fname = ms.group(3) + ms = re.match(r'^(-\w+\s+)', fname) + while ms: + fname = re.sub(r'^-\w+\s+', '', fname, 1) # remove options + ms = re.match(r'^(-\w+\s+)', fname) + ms = re.match(r'^(\S+)\s+(\S*)', fname) + if ms: + fname = ms.group(1) + if buf == 'tar': target = ms.group(2) + if target: + return self.check_tar_status(fname, target) + else: + return self.check_local_status(fname) + ms = re.search(r'(^|\s|\||\S/)ncftpget\s(.*)(ftp://[^/]+)(/\S+)', dcmd, re.I) + if ms: + buf = ms.group(2) + target = ms.group(3) + fname = ms.group(4) + user = pswd = None + if buf: + ms = re.search(r'(-u\s+|--user=)(\S+)', buf) + if ms: user = ms.group(2) + ms = re.search(r'(-p\s+|--password=)(\S+)', buf) + if ms: pswd = ms.group(2) + return self.check_ftp_status(target, fname, user, pswd) + ms = re.search(r'(^|\s|\||\S/)wget\s(.*)(https{0,1}://[^/]+)(/\S+)', dcmd, re.I) + if ms: + buf = ms.group(2) + target = ms.group(3) + fname = ms.group(4) + user = pswd = None + if buf: + ms = re.search(r'(-u\s+|--user=|--http-user=)(\S+)', buf) + if ms: user = ms.group(2) + ms = re.search(r'(-p\s+|--password=|--http-passwd=)(\S+)', buf) + if ms: pswd = ms.group(2) + return self.check_wget_status(target, fname, user, pswd) + ms = re.match(r'^\s*(\S+)', dcmd) + if ms and self.valid_command(ms.group(1)): + return 0 + else: + self.PGOPT['STATUS'] = dcmd + ": Invalid command" + return -2 + + # check status for remote server/file via wget + # return self.SUCCESS if file exist and self.FAILURE otherwise. + # file status message is returned via reference string of $status + def check_wget_status(self, server, fname, user, pswd): + cmd = "wget --spider --no-check-certificate " + if user or pswd: + self.PGOPT['STATUS'] = "{}{}: {}".format(server, fname, self.PGLOG['MISSFILE']) + return -1 + if user: cmd += "--user={} ".format(user) + if pswd: cmd += "--password={} ".format(pswd) + cmd += server + pname = None + i = 0 + while True: + msg = self.pgsystem(cmd + fname, self.LOGWRN, 48) # 16+32 + if msg: + if msg.find('Remote file exists') > -1: + if pname: + self.PGOPT['STATUS'] = "{}{}: {}".format(server, pname, self.PGLOG['MISSFILE']) + return (-1 if i > self.PGOPT['PCNT'] else 0) + else: + return 1 + elif msg.find('unable to resolve host address') > -1: + self.PGOPT['STATUS'] = server + ": Server Un-accessible" + return -2 + elif msg.find('Remote file does not exist') < 0: + self.PGOPT['STATUS'] = "{}{}: Error check status:\n{}".format(cmd, fname, msg) + return -2 + pname = fname + fname = op.dirname(pname) + if not fname or fname == "/": + self.PGOPT['STATUS'] = "{}{}: {}".format(server, pname, self.PGLOG['MISSFILE']) + return -1 + fname += "/" + i += 1 + + # check status for remote server/file via check_ftp_file() + # return self.SUCCESS if file exist and self.FAILURE otherwise. + # file status message is returned via reference string of $status + def check_ftp_status(self, server, fname, user, pswd): + cmd = "ncftpls " + if user: cmd += "-u {} ".format(user) + if pswd: cmd += "-p {} ".format(pswd) + cmd += server + pname = None + i = 0 + while True: + msg = self.pgsystem(cmd + fname, self.LOGWRN, 272) # 16+256 + if self.PGLOG['SYSERR']: + if self.PGLOG['SYSERR'].find('unknown host') > -1: + self.PGOPT['STATUS'] = server + ": Server Un-accessible" + return -2 + elif self.PGLOG['SYSERR'].find('Failed to change directory') < 0: + self.PGOPT['STATUS'] = "{}{}: Error check status:\n{}".format(server, fname, self.PGLOG['SYSERR']) + return -2 + elif not msg: + self.PGOPT['STATUS'] = "{}{}: {}".format(server, fname, self.PGLOG['MISSFILE']) + return -1 if i >= self.PGOPT['PCNT'] else 0 + elif pname: + self.PGOPT['STATUS'] = "{}{}: {}".format(server, pname, self.PGLOG['MISSFILE']) + return -1 if i > self.PGOPT['PCNT'] else 0 + else: + return 1 + pname = fname + fname = op.dirname(pname) + if not fname or fname == "/": + self.PGOPT['STATUS'] = "{}{}: {}".format(server, pname, self.PGLOG['MISSFILE']) + return -1 + i += 1 + + # check remote server status + def check_remote_status(self, host, fname): + pname = None + i = 0 + while True: + msg = self.pgsystem("{}-sync {}".format(host, fname), self.LOGWRN, 272) # 16+256 + if msg: + for line in re.split('\n', msg): + info = self.remote_file_stat(line, 0) + if info: + if pname: + self.PGOPT['STATUS'] = "{}-{}: {}".format(host, pname. self.PGLOG['MISSFILE']) + return -1 if i > self.PGOPT['PCNT'] else 0 + else: + return 1 + if self.PGLOG['SYSERR'] and self.PGLOG['SYSERR'].find(self.PGLOG['MISSFILE']) < 0: + self.PGOPT['STATUS'] = "{}-sync {}: Error check status:\n{}".format(host, fname, self.PGLOG['SYSERR']) + return -2 + pname = fname + fname = op.dirname(pname) + if not fname or fname == "/": + self.PGOPT['STATUS'] = "{}-{}: {}".format(host, pname, self.PGLOG['MISSFILE']) + return -1 + i += 1 + + # check local disk status + def check_local_status(self, fname): + pname = None + i = 0 + while True: + if op.exists(fname): + if pname: + self.PGOPT['STATUS'] = "{}: {}".format(pname, self.PGLOG['MISSFILE']) + return -1 if i > self.PGOPT['PCNT'] else 0 + else: + return 1 + if self.PGLOG['SYSERR'] and self.PGLOG['SYSERR'].find(self.PGLOG['MISSFILE']) < 0: + self.PGOPT['STATUS'] = "{}: Error check status:\n{}".format(fname, self.PGLOG['SYSERR']) + return -2 + pname = fname + fname = op.dirname(pname) + if not fname or fname == "/": + self.PGOPT['STATUS'] = "{}: {}".format(pname, self.PGLOG['MISSFILE']) + return -1 + i += 1 + + # check tar file status + def check_tar_status(self, fname, target): + stat = self.check_local_status(fname) + if stat < 1: return stat + msg = self.pgsystem("tar -tvf {} {}".format(fname, target), self.LOGWRN, 272) # 16+256 + if msg: + for line in re.split('\n', msg): + if self.tar_file_stat(line, 0): return 1 + if not self.PGLOG['SYSERR'] or self.PGLOG['SYSERR'].find('Not found in archive') > -1: + self.PGOPT['STATUS'] = "{}: Not found in tar file {}".format(target, fname) + return 0 + else: + self.PGOPT['STATUS'] = "{}: Error check tar file {}:\n{}".format(target, fname, self.PGLOG['SYSERR']) + return -1 + + # count directories with temoral patterns in given path + def count_pattern_path(self, dcmd): + getpath = 1 + ms = re.search(r'(^|\s|\||\S/)rdacp\s+(.+)$', dcmd) + if ms: + path = ms.group(2) + getpath = 0 + ms = re.search(r'-f\s+(\S+)', path) + if ms: + path = ms.group(1) + else: + ms = re.match(r'^(-\w+)', path) + while ms: + flg = ms.group(1) + path = re.sub(r'^-\w+\s+', '', path, 1) # remove options + if flg != "-r": # no option value + ms = re.match(r'^(\S+)\s', path) + if not ms: break + path = re.sub(r'^\S+\s+', '', path, 1) # remove values + ms = re.match(r'^(-\w+)', path) + ms = re.match(r'^(\S+)', path) + if ms: path = ms.group(1) + if not path: return self.pglog(dcmd + ": MISS from-file per option -f", self.PGOPT['emlerr']) + if getpath: + ms = re.search(r'(^|\s|\||\S/)(mv|cp|tar|cnvgrib|grabbufr|pb2nc)\s+(.+)$', dcmd) + if ms: + path = ms.group(3) + getpath = 0 + ms = re.match(r'^-\w+\s', path) + while ms: + path = re.sub(r'^-\w+\s+', '', path, 1) # remove options + ms = re.match(r'^-\w+\s', path) + ms = re.match(r'^(\S+)\s+(\S*)', path) + if ms: path = ms.group(1) + if getpath: + ms = re.search(r'(^|\s|\||\S/)(ncftpget|wget)\s(.*)(ftp|http|https)://[^/]+(/\S+)', dcmd, re.I) + if ms: path = ms.group(5) + if not path: return self.pglog(dcmd + ": Unkown command to count pattern path", self.PGOPT['emlerr']) + pcnt = path.find(self.params['PD'][0]) + if pcnt > 0: + path = path[pcnt:] + p = re.findall(r'/', path) + pcnt = len(p) + 1 + else: + pcnt = 1 + return pcnt + + # check error message for download action + def parse_download_error(self, err, act, sinfo = None): + derr = '' + stat = 0 + if sinfo: + if sinfo['data_size'] == 0: + derr = ", empty file" + if err: derr += ' ' + err + elif sinfo['data_size'] < self.PGLOG['MINSIZE']: + derr = ", small file({}B)".format(sinfo['data_size']) + if err: derr += ' ' + err + else: + stat = 1 + elif err: + derr = err + if (err.find('command not found') > -1 or + err.find('403 Forbidden') > -1): + stat = -2 + elif (act == "wget" and err.find('404 Not Found') > -1 or + act == "UNTAR" and err.find('Not found in archive') > -1 or + act == "ncftpget" and err.find('Failed to open file') > -1 or + err.find(self.PGLOG['MISSFILE']) > -1): + derr = self.PGLOG['MISSFILE'] + else: + stat = -1 + return (stat, derr) + + # cache update control information + def cache_update_control(self, cidx, dolock = 0): + cstr = "C{}".format(cidx) + pgrec = self.pgget("dcupdt", "*", "cindex = {}".format(cidx), self.PGOPT['emlerr']) + if not pgrec: return self.pglog(cstr + ": update control record NOT in RDADB", self.PGOPT['errlog']) + if pgrec['dsid']: + if 'DS' not in self.params: self.params['DS'] = pgrec['dsid'] + cstr = "{}-{}".format(self.params['DS'], cstr) + if self.params['DS'] != pgrec['dsid']: + return self.pglog("{}: Control dataset {} NOT match".format(cstr, pgrec['dsid']), self.PGOPT['emlerr']) + if pgrec['hostname'] and not self.valid_control_host(cstr, pgrec['hostname'], self.PGOPT['emlerr']): return self.FAILURE + if not ('ED' in self.params or self.valid_data_time(pgrec, cstr, self.PGOPT['emlerr'])): return self.FAILURE + if dolock and self.lock_update_control(cidx, 1, self.PGOPT['emlerr']) <= 0: return self.FAILURE + if self.PGLOG['DSCHECK']: self.set_dscheck_attribute("oindex", cidx) + if pgrec['updtcntl']: + if pgrec['updtcntl'].find('A') > -1: self.params['CA'] = 1 + if pgrec['updtcntl'].find('B') > -1: self.params['UB'] = 1 + if pgrec['updtcntl'].find('C') > -1: self.params['CP'] = 1 + if pgrec['updtcntl'].find('E') > -1: self.params['RE'] = 1 + if pgrec['updtcntl'].find('F') > -1: self.params['FU'] = 1 + if pgrec['updtcntl'].find('G') > -1: + self.params['GZ'] = 1 + self.PGLOG['GMTZ'] = self.diffgmthour() + if pgrec['updtcntl'].find('M') > -1: self.params['MU'] = 1 + if pgrec['updtcntl'].find('N') > -1: self.params['CN'] = 1 + if pgrec['updtcntl'].find('O') > -1: self.params['MO'] = 1 + if pgrec['updtcntl'].find('Y') > -1: self.PGLOG['NOLEAP'] = self.params['NY'] = 1 + if pgrec['updtcntl'].find('Z') > -1 and 'VS' not in self.params: + self.PGLOG['MINSIZE'] = self.params['VS'] = 0 + if pgrec['emailcntl'] != 'A': + if pgrec['emailcntl'] == "N": + self.params['NE'] = 1 + self.PGLOG['LOGMASK'] &= ~self.EMLALL # turn off all email acts + elif pgrec['emailcntl'] == "S": + self.params['SE'] = 1 + self.PGOPT['emllog'] |= self.EMEROL + elif pgrec['emailcntl'] == "E": + self.params['EE'] = 1 + elif pgrec['emailcntl'] == "B": + self.params['SE'] = 1 + self.params['EE'] = 1 + self.PGOPT['emllog'] |= self.EMEROL + if pgrec['errorcntl'] != 'N': + if pgrec['errorcntl'] == "I": + self.params['IE'] = 1 + elif pgrec['errorcntl'] == "Q": + self.params['QE'] = 1 + if pgrec['keepfile'] != 'N': + if pgrec['keepfile'] == "S": + self.params['KS'] = 1 + elif pgrec['keepfile'] == "R": + self.params['KR'] = 1 + elif pgrec['keepfile'] == "B": + self.params['KR'] = 1 + self.params['KS'] = 1 + if pgrec['houroffset'] and 'HO' not in self.params: self.params['HO'] = [pgrec['houroffset']] + if pgrec['emails'] and 'CC' not in self.params: self.add_carbon_copy(pgrec['emails'], 1) + self.cache_data_time(cidx) + self.PGOPT['UCNTL'] = pgrec + return self.SUCCESS + + # cache date time info + def cache_data_time(self, cidx): + pgrecs = self.pgmget("dlupdt", "lindex, enddate, endhour", "cindex = {}".format(cidx), self.PGOPT['emlerr']) + cnt = len(pgrecs['lindex']) if pgrecs else 0 + for i in range(cnt): + if not pgrecs['enddate'][i]: continue + dhour = pgrecs['endhour'][i] if (pgrecs['endhour'][i] is not None) else 23 + self.PGOPT['DTIMES'][pgrecs['lindex'][i]] = "{} {:02}:59:59".format(pgrecs['enddate'][i], dhour) + + # check if valid host to process update control + def valid_control_host(self, cstr, hosts, logact): + host = self.get_host(1) + if hosts: + if re.search(host, hosts, re.I): + if hosts[0] == '!': + return self.pglog("{}: CANNOT be processed on {}".format(cstr, hosts[1:]), logact) + elif not re.match(r'^!', hosts): + return self.pglog("{}-{}: MUST be processed on {}".format(host, cstr, hosts), logact) + return self.SUCCESS + + # reset updated data time + def reset_data_time(self, qu, ddate, dhour, lidx): + pgrec = self.PGOPT['UCNTL'] + record = {'chktime' : int(time.time())} + if ddate: + if dhour is None: dhour = 0 if qu == 'H' else 23 + dtime = "{} {:02}:59:59".format(ddate, dhour) + if lidx not in self.PGOPT['DTIMES'] or self.pgcmp(self.PGOPT['DTIMES'][lidx], dtime) < 0: + self.PGOPT['DTIMES'][lidx] = dtime + # get earliest data time + for ltime in self.PGOPT['DTIMES'].values(): + if self.pgcmp(ltime, dtime) < 0: dtime = ltime + if not pgrec['datatime'] or self.pgcmp(pgrec['datatime'], dtime) < 0: + self.PGOPT['UCNTL']['datatime'] = record['datatime'] = dtime + if self.pgupdt("dcupdt", record, "cindex = {}".format(pgrec['cindex']), self.PGOPT['extlog']) and 'datatime' in record: + self.pglog("{}-C{}: Data time updated to {}".format(self.params['DS'], pgrec['cindex'], dtime), self.PGOPT['emllog']) + + # adjust control time according to the control offset + def adjust_control_time(self, cntltime, freq, unit, offset, curtime): + if offset: + ofreq = self.get_control_time(offset, "Control Offset") + if ofreq: # remove control offset + nfreq = ofreq.copy() + for i in range(6): + if nfreq[i]: nfreq[i] = -nfreq[i] + cntltime = self.adddatetime(cntltime, nfreq[0], nfreq[1], nfreq[2], nfreq[3], nfreq[4], nfreq[5], nfreq[6]) + else: + ofreq = None + (cdate, ctime) = re.split(' ', cntltime) + if unit == "H": + hr = 0 + if ctime: + ms = re.match(r'^(\d+)', ctime) + if ms: hr = int(int(ms.group(1))/freq[3])*freq[3] + else: + i = 0 + cntltime = "{} {:02}:00:00".format(cdate, hr) + else: + cdate = self.enddate(cdate, (0 if unit == "W" else 1), unit, freq[6]) + cntltime = "{} 00:00:00".format(cdate) + if ofreq: cntltime = self.adddatetime(cntltime, ofreq[0], ofreq[1], ofreq[2], ofreq[3], ofreq[4], ofreq[5], ofreq[6]) # add control offset + while self.pgcmp(cntltime, curtime) <= 0: + cntltime = self.adddatetime(cntltime, freq[0], freq[1], freq[2], freq[3], freq[4], freq[5], freq[6]) + return cntltime + + # reset control time + def reset_control_time(self): + pgrec = self.PGOPT['UCNTL'] + cstr = "{}-C{}".format(self.params['DS'], pgrec['cindex']) + gmt = self.PGLOG['GMTZ'] + self.PGLOG['GMTZ'] = 0 + curtime = self.curtime(1) + self.PGLOG['GMTZ'] = gmt + (freq, unit) = self.get_control_frequency(pgrec['frequency']) + if not freq: return self.pglog("{}: {}".format(cstr, unit), self.PGOPT['emlerr']) + cntltime = self.check_datetime(pgrec['cntltime'], curtime) + nexttime = self.adjust_control_time(cntltime, freq, unit, pgrec['cntloffset'], curtime) + if self.PGLOG['ERRCNT']: + cfreq = self.get_control_time(pgrec['retryint'], "Retry Interval") + if cfreq: + while self.pgcmp(cntltime, curtime) <= 0: + cntltime = self.adddatetime(cntltime, cfreq[0], cfreq[1], cfreq[2], cfreq[3], cfreq[4], cfreq[5], cfreq[6]) + if self.pgcmp(cntltime, nexttime) < 0: nexttime = cntltime + record = {} + cstr += ": Next Control Time " + if not pgrec['cntltime'] or self.pgcmp(nexttime, pgrec['cntltime']) > 0: + record['cntltime'] = nexttime + cstr += "set to {}".format(nexttime) + if self.PGLOG['ERRCNT']: cstr += " to retry" + else: + cstr += "already set to {}".format(pgrec['cntltime']) + cstr += " for Action {}({})".format(self.PGOPT['CACT'], self.OPTS[self.PGOPT['CACT']][1]) + record['pid'] = 0 + if self.pgupdt("dcupdt", record, "cindex = {}".format(pgrec['cindex']), self.PGOPT['extlog']): + self.pglog(cstr, self.PGOPT['emllog']) + + # get array information of individual controlling time + def get_control_time(self, val, type): + if not val or val == '0': return 0 + if re.search(r'/(\d+)$', val): + return self.pglog("{}: '{}' NOT support Fraction".format(val, type), self.PGOPT['emlerr']) + ctimes = [0]*7 # initialize control times + ms = re.search(r'(\d+)Y', val, re.I) + if ms: ctimes[0] = int(ms.group(1)) + ms = re.search(r'(\d+)M', val, re.I) + if ms: ctimes[1] = int(ms.group(1)) + ms = re.search(r'(\d+)D', val, re.I) + if ms: ctimes[2] = int(ms.group(1)) + ms = re.search(r'(\d+)W', val, re.I) + if ms: ctimes[2] += 7*int(ms.group(1)) + ms = re.search(r'(\d+)H', val, re.I) + if ms: ctimes[3] = int(ms.group(1)) + ms = re.search(r'(\d+)N', val, re.I) + if ms: ctimes[4] = int(ms.group(1)) + ms = re.search(r'(\d+)S', val, re.I) + if ms: ctimes[5] = int(ms.group(1)) + for ctime in ctimes: + if ctime > 0: return ctimes + return self.pglog("{}: invalid '{}', must be (Y,M,W,D,H,N,S)".format(val, type), self.PGOPT['emlerr']) + + # get group index from given option string + def get_group_index(self, option, edate, ehour, freq): + ms = re.search(r'-GI\s+(\S+)', option, re.I) + if ms: return int(self.replace_pattern(ms.group(1), edate, ehour, freq)) + ms = re.search(r'-GN\s+(.*)$', option, re.I) + if ms: + grp = ms.group(1) + if grp[0] == "'": + grp = grp[1:] + idx = grp.find("'") + grp = grp[:idx] + else: + ms = re.match(r'^(\S+)', grp) + if ms: grp = ms.group(1) + pgrec = self.pgget("dsgroup", "gindex", "dsid = '{}' AND grpid = '{}'".format(self.params['DS'], self.replace_pattern(grp, edate, ehour, freq)), self.PGOPT['extlog']) + if pgrec: return pgrec['gindex'] + return 0 diff --git a/tests/test_dsupdt.py b/tests/test_dsupdt.py index 8736284..f4b12b2 100644 --- a/tests/test_dsupdt.py +++ b/tests/test_dsupdt.py @@ -1,6 +1,6 @@ -# test_hello_world.py - +# test_dsupdt.py import pytest def test_dsupdt(): - pass + import rda_python_dsupdt.pg_updt + import rda_python_dsupdt.dsupdt