Skip to content

Commit

Permalink
Fix permission error for ORTModule lock file (microsoft#7814)
Browse files Browse the repository at this point in the history
  • Loading branch information
Thiago Crepaldi authored May 26, 2021
1 parent c487824 commit c5ea590
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 13 deletions.
32 changes: 27 additions & 5 deletions orttraining/orttraining/python/training/ortmodule/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,49 @@
################################################################################
ONNX_OPSET_VERSION = 12
MINIMUM_TORCH_VERSION_STR = '1.8.1'
TORCH_CPP_BUILD_DIR = os.path.join(os.path.dirname(__file__),'torch_inline_extensions')

# Use one of the available directories as Torch CPP extension in the following order:
# 1) Path at listed at TORCH_EXTENSIONS_DIR environment variable
# 2) Default Python package dir
# 3) <Home directory>/.cache
home_dir = os.path.expanduser("~")
python_package_dir = os.path.dirname(__file__)
torch_extensions_dir = os.environ.get('TORCH_EXTENSIONS_DIR')

TORCH_CPP_BUILD_DIR = os.path.join(python_package_dir,'torch_inline_extensions')
TORCH_CPP_BUILD_DIR_BACKUP = os.path.join(home_dir, '.cache', 'torch_ort_extensions')

if torch_extensions_dir is not None and os.access(torch_extensions_dir, os.X_OK | os.W_OK):
TORCH_CPP_BUILD_DIR = torch_extensions_dir
elif not os.access(python_package_dir, os.X_OK | os.W_OK):
if os.access(home_dir, os.X_OK | os.W_OK):
TORCH_CPP_BUILD_DIR = TORCH_CPP_BUILD_DIR_BACKUP
else:
extra_message = ''
if torch_extensions_dir:
extra_message = 'or the path pointed by the TORCH_EXTENSIONS_DIR environment variable '
raise PermissionError('ORTModule could not find a writable directory to cache its internal files.',
f'Make {python_package_dir} or {home_dir} {extra_message}writable and try again.')

# Check whether Torch C++ extension compilation was aborted in previous runs
if not os.path.exists(TORCH_CPP_BUILD_DIR):
os.makedirs(TORCH_CPP_BUILD_DIR, exist_ok = True)
elif os.path.exists(os.path.join(TORCH_CPP_BUILD_DIR,'lock')):
print("WARNING: ORTModule detected PyTorch CPP extension's lock file during initialization, "
"which can cause unexpected hangs. "
f"Delete {os.path.join(TORCH_CPP_BUILD_DIR,'lock')} to prevent unexpected behavior.")
f"Delete {os.path.join(TORCH_CPP_BUILD_DIR,'lock')} if a hang occurs.")

# Verify proper PyTorch is installed before proceding to ONNX Runtime initializetion
# Verify proper PyTorch is installed before proceding to ONNX Runtime initialization
try:
import torch
torch_version = version.parse(torch.__version__.split('+')[0])
minimum_torch_version = version.parse(MINIMUM_TORCH_VERSION_STR)
if torch_version < minimum_torch_version:
raise RuntimeError(
f'ONNXRuntime ORTModule frontend requires PyTorch version greater or equal to {MINIMUM_TORCH_VERSION_STR}, '
f'ONNX Runtime ORTModule frontend requires PyTorch version greater or equal to {MINIMUM_TORCH_VERSION_STR}, '
f'but version {torch.__version__} was found instead.')
except:
raise(f'PyTorch {MINIMUM_TORCH_VERSION_STR} must be installed in order to run ONNXRuntime ORTModule frontend!')
raise(f'PyTorch {MINIMUM_TORCH_VERSION_STR} must be installed in order to run ONNX Runtime ORTModule frontend!')

# ORTModule must be loaded only after all validation passes
from .ortmodule import ORTModule
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ stages:

- task: CmdLine@2
displayName: 'Build Python Documentation'
condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
inputs:
script: |
mkdir -p $HOME/.onnx
Expand All @@ -137,7 +137,7 @@ stages:

- task: CopyFiles@2
displayName: 'Copy Python Documentation to: $(Build.ArtifactStagingDirectory)'
condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
inputs:
SourceFolder: '$(Build.BinariesDirectory)/docs/inference/html'
Contents: '**'
Expand Down Expand Up @@ -431,7 +431,7 @@ stages:

- task: CmdLine@2
displayName: 'Build Python Documentation'
condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
inputs:
script: |
mkdir -p $HOME/.onnx
Expand All @@ -447,7 +447,7 @@ stages:

- task: CopyFiles@2
displayName: 'Copy Python Documentation to: $(Build.ArtifactStagingDirectory)'
condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
inputs:
SourceFolder: '$(Build.BinariesDirectory)/docs/training/html'
Contents: '**'
Expand Down Expand Up @@ -588,7 +588,7 @@ stages:

- task: CmdLine@2
displayName: 'Build Python Documentation'
condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
inputs:
script: |
mkdir -p $HOME/.onnx
Expand All @@ -606,7 +606,7 @@ stages:

- task: CopyFiles@2
displayName: 'Copy Python Documentation to: $(Build.ArtifactStagingDirectory)'
condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
inputs:
SourceFolder: '$(Build.BinariesDirectory)/docs/training/html'
Contents: '**'
Expand Down Expand Up @@ -761,7 +761,7 @@ stages:

- task: CmdLine@2
displayName: 'Build Python Documentation'
condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
inputs:
script: |
mkdir -p $HOME/.onnx
Expand All @@ -779,7 +779,7 @@ stages:

- task: CopyFiles@2
displayName: 'Copy Python Documentation to: $(Build.ArtifactStagingDirectory)'
condition: ne(variables['PythonVersion'], '3.9') # tensorflow not available on python 3.9
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
inputs:
SourceFolder: '$(Build.BinariesDirectory)/docs/training/html'
Contents: '**'
Expand Down

0 comments on commit c5ea590

Please sign in to comment.