-
Notifications
You must be signed in to change notification settings - Fork 10
Description
Hi!
I am currently trying to practice your package with your vignette and I faced error when launching mod.train().
I am currently running HPC of my institute, linux-based with GPU of 40x NVidia RTX.
The message I got is:
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Set SLURM handle signals.
RuntimeError Traceback (most recent call last)
Input In [22], in <cell line: 1>()
----> 1 mod.train()File /opt/anaconda/lib/python3.9/site-packages/cell2fate/_cell2fate_DynamicalModel.py:164, in Cell2fate_DynamicalModel.train(self, max_epochs, batch_size, train_size, lr, **kwargs)
161 kwargs["train_size"] = train_size
162 kwargs["lr"] = lr
--> 164 super().train(**kwargs)File /opt/anaconda/lib/python3.9/site-packages/scvi/model/base/_pyromixin.py:146, in PyroSviTrainMixin.train(self, max_epochs, use_gpu, train_size, validation_size, batch_size, early_stopping, lr, training_plan, plan_kwargs, **trainer_kwargs)
136 trainer_kwargs["callbacks"].append(PyroJitGuideWarmup())
138 runner = TrainRunner(
139 self,
140 training_plan=training_plan,
(...)
144 **trainer_kwargs,
145 )
--> 146 return runner()File /opt/anaconda/lib/python3.9/site-packages/scvi/train/_trainrunner.py:74, in TrainRunner.call(self)
71 if hasattr(self.data_splitter, "n_val"):
72 self.training_plan.n_obs_validation = self.data_splitter.n_val
---> 74 self.trainer.fit(self.training_plan, self.data_splitter)
75 self._update_history()
77 # data splitter only gets these attrs after fitFile /opt/anaconda/lib/python3.9/site-packages/scvi/train/_trainer.py:186, in Trainer.fit(self, *args, **kwargs)
180 if isinstance(args[0], PyroTrainingPlan):
181 warnings.filterwarnings(
182 action="ignore",
183 category=UserWarning,
184 message="LightningModule.configure_optimizersreturnedNone",
185 )
--> 186 super().fit(*args, **kwargs)File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:740, in Trainer.fit(self, model, train_dataloaders, val_dataloaders, datamodule, train_dataloader, ckpt_path)
735 rank_zero_deprecation(
736 "trainer.fit(train_dataloader)is deprecated in v1.4 and will be removed in v1.6."
737 " Usetrainer.fit(train_dataloaders)instead. HINT: added 's'"
738 )
739 train_dataloaders = train_dataloader
--> 740 self._call_and_handle_interrupt(
741 self._fit_impl, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path
742 )File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:685, in Trainer._call_and_handle_interrupt(self, trainer_fn, *args, **kwargs)
675 r"""
676 Error handling, intended to be used only for main trainer function entry points (fit, validate, test, predict)
677 as all errors should funnel through them
(...)
682 **kwargs: keyword arguments to be passed totrainer_fn
683 """
684 try:
--> 685 return trainer_fn(*args, **kwargs)
686 # TODO: treat KeyboardInterrupt as BaseException (delete the code below) in v1.7
687 except KeyboardInterrupt as exception:File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:777, in Trainer._fit_impl(self, model, train_dataloaders, val_dataloaders, datamodule, ckpt_path)
775 # TODO: ckpt_path only in v1.7
776 ckpt_path = ckpt_path or self.resume_from_checkpoint
--> 777 self._run(model, ckpt_path=ckpt_path)
779 assert self.state.stopped
780 self.training = FalseFile /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:1199, in Trainer._run(self, model, ckpt_path)
1196 self.checkpoint_connector.resume_end()
1198 # dispatchstart_trainingorstart_evaluatingorstart_predicting
-> 1199 self._dispatch()
1201 # plugin will finalized fitting (e.g. ddp_spawn will load trained model)
1202 self._post_dispatch()File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:1279, in Trainer._dispatch(self)
1277 self.training_type_plugin.start_predicting(self)
1278 else:
-> 1279 self.training_type_plugin.start_training(self)File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py:202, in TrainingTypePlugin.start_training(self, trainer)
200 def start_training(self, trainer: "pl.Trainer") -> None:
201 # double dispatch to initiate the training loop
--> 202 self._results = trainer.run_stage()File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:1289, in Trainer.run_stage(self)
1287 if self.predicting:
1288 return self._run_predict()
-> 1289 return self._run_train()File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/trainer/trainer.py:1319, in Trainer._run_train(self)
1317 self.fit_loop.trainer = self
1318 with torch.autograd.set_detect_anomaly(self._detect_anomaly):
-> 1319 self.fit_loop.run()File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/loops/base.py:145, in Loop.run(self, *args, **kwargs)
143 try:
144 self.on_advance_start(*args, **kwargs)
--> 145 self.advance(*args, **kwargs)
146 self.on_advance_end()
147 self.restarting = FalseFile /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/loops/fit_loop.py:234, in FitLoop.advance(self)
231 data_fetcher = self.trainer._data_connector.get_profiled_dataloader(dataloader)
233 with self.trainer.profiler.profile("run_training_epoch"):
--> 234 self.epoch_loop.run(data_fetcher)
236 # the global step is manually decreased here due to backwards compatibility with existing loggers
237 # as they expect that the same step is used when logging epoch end metrics even when the batch loop has
238 # finished. this means the attribute does not exactly track the number of optimizer steps applied.
239 # TODO(@carmocca): deprecate and rename so users don't get confused
240 self.global_step -= 1File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/loops/base.py:145, in Loop.run(self, *args, **kwargs)
143 try:
144 self.on_advance_start(*args, **kwargs)
--> 145 self.advance(*args, **kwargs)
146 self.on_advance_end()
147 self.restarting = FalseFile /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/loops/epoch/training_epoch_loop.py:193, in TrainingEpochLoop.advance(self, *args, **kwargs)
190 self.batch_progress.increment_started()
192 with self.trainer.profiler.profile("run_training_batch"):
--> 193 batch_output = self.batch_loop.run(batch, batch_idx)
195 self.batch_progress.increment_processed()
197 # update non-plateau LR schedulers
198 # update epoch-interval ones only when we are at the end of training epochFile /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/loops/base.py:145, in Loop.run(self, *args, **kwargs)
143 try:
144 self.on_advance_start(*args, **kwargs)
--> 145 self.advance(*args, **kwargs)
146 self.on_advance_end()
147 self.restarting = FalseFile /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/loops/batch/training_batch_loop.py:90, in TrainingBatchLoop.advance(self, batch, batch_idx)
88 outputs = self.optimizer_loop.run(split_batch, optimizers, batch_idx)
89 else:
---> 90 outputs = self.manual_loop.run(split_batch, batch_idx)
91 if outputs:
92 # automatic: can be empty if all optimizers skip their batches
93 # manual: #9052 added support for raisingStopIterationin thetraining_step. If that happens,
94 # thenadvancedoesn't finish and an empty dict is returned
95 self._outputs.append(outputs)File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/loops/base.py:145, in Loop.run(self, *args, **kwargs)
143 try:
144 self.on_advance_start(*args, **kwargs)
--> 145 self.advance(*args, **kwargs)
146 self.on_advance_end()
147 self.restarting = FalseFile /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/loops/optimization/manual_loop.py:111, in ManualOptimization.advance(self, batch, batch_idx)
109 lightning_module._current_fx_name = "training_step"
110 with self.trainer.profiler.profile("training_step"):
--> 111 training_step_output = self.trainer.accelerator.training_step(step_kwargs)
112 self.trainer.training_type_plugin.post_training_step()
114 del step_kwargsFile /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/accelerators/accelerator.py:219, in Accelerator.training_step(self, step_kwargs)
214 """The actual training step.
215
216 See :meth:~pytorch_lightning.core.lightning.LightningModule.training_stepfor more details
217 """
218 with self.precision_plugin.train_step_context():
--> 219 return self.training_type_plugin.training_step(*step_kwargs.values())File /opt/anaconda/lib/python3.9/site-packages/pytorch_lightning/plugins/training_type/training_type_plugin.py:213, in TrainingTypePlugin.training_step(self, *args, **kwargs)
212 def training_step(self, *args, **kwargs):
--> 213 return self.model.training_step(*args, **kwargs)File /opt/anaconda/lib/python3.9/site-packages/scvi/train/_trainingplans.py:741, in PyroTrainingPlan.training_step(self, batch, batch_idx)
739 kwargs.update({"kl_weight": self.kl_weight})
740 # pytorch lightning requires a Tensor object for loss
--> 741 loss = torch.Tensor([self.svi.step(*args, **kwargs)])
743 return {"loss": loss}File /opt/anaconda/lib/python3.9/site-packages/pyro/infer/svi.py:145, in SVI.step(self, *args, **kwargs)
143 # get loss and compute gradients
144 with poutine.trace(param_only=True) as param_capture:
--> 145 loss = self.loss_and_grads(self.model, self.guide, *args, **kwargs)
147 params = set(
148 site["value"].unconstrained() for site in param_capture.trace.nodes.values()
149 )
151 # actually perform gradient steps
152 # torch.optim objects gets instantiated for any params that haven't been seen yetFile /opt/anaconda/lib/python3.9/site-packages/pyro/infer/trace_elbo.py:140, in Trace_ELBO.loss_and_grads(self, model, guide, *args, **kwargs)
138 loss = 0.0
139 # grab a trace from the generator
--> 140 for model_trace, guide_trace in self._get_traces(model, guide, args, kwargs):
141 loss_particle, surrogate_loss_particle = self._differentiable_loss_particle(
142 model_trace, guide_trace
143 )
144 loss += loss_particle / self.num_particlesFile /opt/anaconda/lib/python3.9/site-packages/pyro/infer/elbo.py:237, in ELBO._get_traces(self, model, guide, args, kwargs)
235 else:
236 for i in range(self.num_particles):
--> 237 yield self._get_trace(model, guide, args, kwargs)File /opt/anaconda/lib/python3.9/site-packages/pyro/infer/trace_elbo.py:57, in Trace_ELBO._get_trace(self, model, guide, args, kwargs)
52 def _get_trace(self, model, guide, args, kwargs):
53 """
54 Returns a single trace from the guide, and the model that is run
55 against it.
56 """
---> 57 model_trace, guide_trace = get_importance_trace(
58 "flat", self.max_plate_nesting, model, guide, args, kwargs
59 )
60 if is_validation_enabled():
61 check_if_enumerated(guide_trace)File /opt/anaconda/lib/python3.9/site-packages/pyro/infer/enum.py:75, in get_importance_trace(graph_type, max_plate_nesting, model, guide, args, kwargs, detach)
72 guide_trace = prune_subsample_sites(guide_trace)
73 model_trace = prune_subsample_sites(model_trace)
---> 75 model_trace.compute_log_prob()
76 guide_trace.compute_score_parts()
77 if is_validation_enabled():File /opt/anaconda/lib/python3.9/site-packages/pyro/poutine/trace_struct.py:230, in Trace.compute_log_prob(self, site_filter)
228 if "log_prob" not in site:
229 try:
--> 230 log_p = site["fn"].log_prob(
231 site["value"], *site["args"], **site["kwargs"]
232 )
233 except ValueError as e:
234 _, exc_value, traceback = sys.exc_info()File /opt/anaconda/lib/python3.9/site-packages/torch/distributions/gamma.py:71, in Gamma.log_prob(self, value)
67 if self._validate_args:
68 self._validate_sample(value)
69 return (self.concentration * torch.log(self.rate) +
70 (self.concentration - 1) * torch.log(value) -
---> 71 self.rate * value - torch.lgamma(self.concentration))RuntimeError: nvrtc: error: invalid value for --gpu-architecture (-arch)
#define POS_INFINITY __int_as_float(0x7f800000)
#define INFINITY POS_INFINITY
#define NEG_INFINITY __int_as_float(0xff800000)
#define NAN __int_as_float(0x7fffffff)typedef long long int int64_t;
typedef unsigned int uint32_t;
typedef signed char int8_t;
typedef unsigned char uint8_t; // NOTE: this MUST be "unsigned char"! "char" is equivalent to "signed char"
typedef short int16_t;
static_assert(sizeof(int64_t) == 8, "expected size does not match");
static_assert(sizeof(uint32_t) == 4, "expected size does not match");
static_assert(sizeof(int8_t) == 1, "expected size does not match");
constexpr int num_threads = 128;
constexpr int thread_work_size = 4; // TODO: make template substitution once we decide where those vars live
constexpr int block_work_size = thread_work_size * num_threads;
//TODO use _assert_fail, because assert is disabled in non-debug builds
#define ERROR_UNSUPPORTED_CAST assert(false);namespace std {
using ::signbit;
using ::isfinite;
using ::isinf;
using ::isnan;using ::abs;
using ::acos;
using ::acosf;
using ::asin;
using ::asinf;
using ::atan;
using ::atanf;
using ::atan2;
using ::atan2f;
using ::ceil;
using ::ceilf;
using ::cos;
using ::cosf;
using ::cosh;
using ::coshf;using ::exp;
using ::expf;using ::fabs;
using ::fabsf;
using ::floor;
using ::floorf;using ::fmod;
using ::fmodf;using ::frexp;
using ::frexpf;
using ::ldexp;
using ::ldexpf;using ::log;
using ::logf;using ::log10;
using ::log10f;
using ::modf;
using ::modff;using ::pow;
using ::powf;using ::sin;
using ::sinf;
using ::sinh;
using ::sinhf;using ::sqrt;
using ::sqrtf;
using ::tan;
using ::tanf;using ::tanh;
using ::tanhf;using ::acosh;
using ::acoshf;
using ::asinh;
using ::asinhf;
using ::atanh;
using ::atanhf;
using ::cbrt;
using ::cbrtf;using ::copysign;
using ::copysignf;using ::erf;
using ::erff;
using ::erfc;
using ::erfcf;
using ::exp2;
using ::exp2f;
using ::expm1;
using ::expm1f;
using ::fdim;
using ::fdimf;
using ::fmaf;
using ::fma;
using ::fmax;
using ::fmaxf;
using ::fmin;
using ::fminf;
using ::hypot;
using ::hypotf;
using ::ilogb;
using ::ilogbf;
using ::lgamma;
using ::lgammaf;
using ::llrint;
using ::llrintf;
using ::llround;
using ::llroundf;
using ::log1p;
using ::log1pf;
using ::log2;
using ::log2f;
using ::logb;
using ::logbf;
using ::lrint;
using ::lrintf;
using ::lround;
using ::lroundf;using ::nan;
using ::nanf;using ::nearbyint;
using ::nearbyintf;
using ::nextafter;
using ::nextafterf;
using ::remainder;
using ::remainderf;
using ::remquo;
using ::remquof;
using ::rint;
using ::rintf;
using ::round;
using ::roundf;
using ::scalbln;
using ::scalblnf;
using ::scalbn;
using ::scalbnf;
using ::tgamma;
using ::tgammaf;
using ::trunc;
using ::truncf;} // namespace std
// NB: Order matters for this macro; it is relied upon in
// promoteTypesLookup and the serialization format.
// Note, some types have ctype as void because we don't support them in codegen
#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX()
_(uint8_t, Byte) /* 0 /
_(int8_t, Char) / 1 /
_(int16_t, Short) / 2 /
_(int, Int) / 3 /
_(int64_t, Long) / 4 /
_(at::Half, Half) / 5 /
_(float, Float) / 6 /
_(double, Double) / 7 /
_(std::complexat::Half, ComplexHalf) / 8 /
_(std::complex, ComplexFloat) / 9 /
_(std::complex, ComplexDouble) / 10 /
_(bool, Bool) / 11 /
_(void, QInt8) / 12 /
_(void, QUInt8) / 13 /
_(void, QInt32) / 14 /
_(at::BFloat16, BFloat16) / 15 */ \#define AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF(_)
_(uint8_t, Byte)
_(int8_t, Char)
_(int16_t, Short)
_(int, Int)
_(int64_t, Long)
_(at::Half, Half)
_(float, Float)
_(double, Double)
_(std::complex, ComplexFloat)
_(std::complex, ComplexDouble)
_(bool, Bool)
_(at::BFloat16, BFloat16)enum class ScalarType : int8_t {
#define DEFINE_ENUM(_1, n) n,
AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_ENUM)
#undef DEFINE_ENUM
Undefined,
NumOptions
};template <typename T, int size>
struct Array {
T data[size];device T operator[](int i) const {
return data[i];
}
device T& operator[](int i) {
return data[i];
}
Array() = default;
Array(const Array&) = default;
Array& operator=(const Array&) = default;
};template
device inline scalar_t load(char* base_ptr, uint32_t offset) {
return (reinterpret_cast<scalar_t>(base_ptr) + offset);
}template
device inline void store(scalar_t value, char *base_ptr, uint32_t offset) {
*(reinterpret_cast<scalar_t *>(base_ptr) + offset) = value;
}// aligned vector generates vectorized load/store on CUDA
template<typename scalar_t, int vec_size>
struct alignas(sizeof(scalar_t) * vec_size) aligned_vector {
scalar_t val[vec_size];
};template T lgamma_kernel(T a) { return lgamma(a); }
// TODO: setup grid-stride loop
extern "C" global
void lgamma_kernel_vectorized4_kernel(
const int N,
Array<char*, 1+1> data,
float scalar_val) //[1+1],
{
constexpr int vec_size = 4;
int remaining = N - block_work_size * blockIdx.x;
auto thread_idx = threadIdx.x;
int idx = blockIdx.x;
float arg0[4];float out[4]; if (remaining < block_work_size) { #pragma unroll for (int j = 0; j < thread_work_size; j++){ if (thread_idx >= remaining) { break; } int linear_idx = thread_idx + block_work_size * idx; arg0[j] = load<float>(data[1], linear_idx); thread_idx += num_threads; } #pragma unroll for (int j = 0; j < thread_work_size; j++) { if ((threadIdx.x + j*num_threads) < remaining) { out[j] = lgamma_kernel<float>(arg0[j] ); } } thread_idx = threadIdx.x; #pragma unroll for (int j = 0; j < thread_work_size; j++) { if (thread_idx >= remaining) { break; } int linear_idx = thread_idx + block_work_size * idx; store<float>(out[j], data[0], linear_idx); thread_idx += num_threads; } } else { static constexpr int loop_size = thread_work_size / vec_size;//actual loading
using vec_t_input = aligned_vector<float, vec_size>;
vec_t_input * vec0 = reinterpret_cast<vec_t_input *>(data[0+1]) + block_work_size / vec_size * idx;#pragma unroll for (int i = 0; i<loop_size; i++){ vec_t_input v; v = vec0[thread_idx]; #pragma unroll for (int j=0; j < vec_size; j++){ arg0[vec_size * i + j] = v.val[j]; } thread_idx += num_threads; } #pragma unroll for (int j = 0; j < thread_work_size; j++) { out[j] = lgamma_kernel<float>(arg0[j]); } using vec_t_output = aligned_vector<float, vec_size>; vec_t_output * to_ = reinterpret_cast<vec_t_output *>(data[0]) + block_work_size / vec_size * idx; int thread_idx = threadIdx.x; #pragma unroll for (int i = 0; i<loop_size; i++){ vec_t_output v; #pragma unroll for (int j=0; j<vec_size; j++){ v.val[j] = out[vec_size * i + j]; } to_[thread_idx] = v; thread_idx += num_threads; } }}