diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 9a02f5990..c1ecf43e3 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -294,6 +294,28 @@ windows-x86_64 test mgpu:
     - os/windows
     - gpu/2x-A5000
 
+
+linux-x86_64 test warp-init:
+  stage: test
+  needs: [linux-x86_64 build]
+  extends:
+    - .omni_nvks_gpu
+    - .save_test_report_artifact
+  before_script:
+    - echo -e "\\e[0Ksection_start:`date +%s`:install_dependencies[collapsed=true]\\r\\e[0KInstalling dependencies"
+    - df -h
+    # Move compiled binaries out of platform-specific directory
+    - mv warp/bin/linux-x86_64/warp.so warp/bin/
+    - mv warp/bin/linux-x86_64/warp-clang.so warp/bin/
+    - tools/packman/packman install -l _build/target-deps/python python ${DEFAULT_PYTHON}-linux-x86_64
+    - $PYTHON -m venv _venv
+    - source _venv/bin/activate
+    - python -m pip install --upgrade pip
+    - python -m pip install -e .
+    - echo -e "\\e[0Ksection_end:`date +%s`:install_dependencies\\r\\e[0K"
+  script:
+    - python -m warp.tests --junit-report-xml rspec.xml -s autodetect --disable-process-pooling --disable-concurrent-futures --level test -p 'test_implicit_init.py'
+
 # The only purpose of this job is to make sure documentation can be built on Windows.
 # The output does not get published anywhere, but the website can be viewed in the
 # artifacts.
diff --git a/warp/context.py b/warp/context.py
index 9b160206f..c758e8ff2 100644
--- a/warp/context.py
+++ b/warp/context.py
@@ -347,7 +347,7 @@ def __repr__(self):
 def call_builtin(func: Function, *params) -> Tuple[bool, Any]:
     uses_non_warp_array_type = False
 
-    warp.context.init()
+    init()
 
     # Retrieve the built-in function from Warp's dll.
     c_func = getattr(warp.context.runtime.core, func.mangled_name)
@@ -3632,6 +3632,9 @@ def is_mempool_access_supported(target_device: Devicelike, peer_device: Deviceli
 
     init()
 
+    target_device = runtime.get_device(target_device)
+    peer_device = runtime.get_device(peer_device)
+
     return target_device.is_mempool_supported and is_peer_access_supported(target_device, peer_device)
 
 
diff --git a/warp/tests/test_implicit_init.py b/warp/tests/test_implicit_init.py
new file mode 100644
index 000000000..e9daef584
--- /dev/null
+++ b/warp/tests/test_implicit_init.py
@@ -0,0 +1,354 @@
+# Copyright (c) 2024 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+
+import unittest
+
+import warp as wp
+from warp.tests.unittest_utils import *
+
+#   Array Initialization
+# ------------------------------------------------------------------------------
+
+
+def test_array_from_data(test, device):
+    wp.array((1.0, 2.0, 3.0), dtype=float)
+
+
+class TestImplicitInitArrayFromData(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitArrayFromData,
+    "test_array_from_data",
+    test_array_from_data,
+    check_output=False,
+)
+
+
+def test_array_from_ptr(test, device):
+    wp.array(ptr=0, shape=(123,), dtype=float)
+
+
+class TestImplicitInitArrayFromPtr(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitArrayFromPtr,
+    "test_array_from_ptr",
+    test_array_from_ptr,
+    check_output=False,
+)
+
+
+#   Builtin Call
+# ------------------------------------------------------------------------------
+
+
+def test_builtin_call(test, device):
+    wp.sin(1.23)
+
+
+class TestImplicitInitBuiltinCall(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitBuiltinCall,
+    "test_builtin_call",
+    test_builtin_call,
+    check_output=False,
+)
+
+
+#   Devices
+# ------------------------------------------------------------------------------
+
+
+def test_get_cuda_device_count(test, device):
+    wp.get_cuda_device_count()
+
+
+class TestImplicitInitGetCudaDeviceCount(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitGetCudaDeviceCount,
+    "test_get_cuda_device_count",
+    test_get_cuda_device_count,
+    check_output=False,
+)
+
+
+def test_get_cuda_devices(test, device):
+    wp.get_cuda_devices()
+
+
+class TestImplicitInitGetCudaDevices(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitGetCudaDevices,
+    "test_get_cuda_devices",
+    test_get_cuda_devices,
+    check_output=False,
+)
+
+
+def test_get_device(test, device):
+    wp.get_device("cpu")
+
+
+class TestImplicitInitGetDevice(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitGetDevice,
+    "test_get_device",
+    test_get_device,
+    check_output=False,
+)
+
+
+def test_get_devices(test, device):
+    wp.get_devices()
+
+
+class TestImplicitInitGetDevices(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitGetDevices,
+    "test_get_devices",
+    test_get_devices,
+    check_output=False,
+)
+
+
+def test_get_preferred_device(test, device):
+    wp.get_preferred_device()
+
+
+class TestImplicitInitGetPreferredDevice(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitGetPreferredDevice,
+    "test_get_preferred_device",
+    test_get_preferred_device,
+    check_output=False,
+)
+
+
+def test_is_cpu_available(test, device):
+    wp.is_cpu_available()
+
+
+class TestImplicitInitIsCpuAvailable(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsCpuAvailable,
+    "test_is_cpu_available",
+    test_is_cpu_available,
+    check_output=False,
+)
+
+
+def test_is_cuda_available(test, device):
+    wp.is_cuda_available()
+
+
+class TestImplicitInitIsCudaAvailable(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsCudaAvailable,
+    "test_is_cuda_available",
+    test_is_cuda_available,
+    check_output=False,
+)
+
+
+def test_is_device_available(test, device):
+    wp.is_device_available("cpu")
+
+
+class TestImplicitInitIsDeviceAvailable(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsDeviceAvailable,
+    "test_is_device_available",
+    test_is_device_available,
+    check_output=False,
+)
+
+
+def test_set_device(test, device):
+    wp.set_device("cpu")
+
+
+class TestImplicitInitSetDevice(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitSetDevice,
+    "test_set_device",
+    test_set_device,
+    check_output=False,
+)
+
+
+#   Launch
+# ------------------------------------------------------------------------------
+
+
+@wp.kernel
+def launch_kernel():
+    pass
+
+
+def test_launch(test, device):
+    wp.launch(launch_kernel, dim=1)
+
+
+class TestImplicitInitLaunch(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitLaunch,
+    "test_launch",
+    test_launch,
+    check_output=False,
+)
+
+
+#   Mempool
+# ------------------------------------------------------------------------------
+
+
+def test_is_mempool_enabled(test, device):
+    wp.is_mempool_enabled("cpu")
+
+
+class TestImplicitInitIsMempoolEnabled(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsMempoolEnabled,
+    "test_is_mempool_enabled",
+    test_is_mempool_enabled,
+    check_output=False,
+)
+
+
+def test_is_mempool_supported(test, device):
+    wp.is_mempool_supported("cpu")
+
+
+class TestImplicitInitIsMempoolSupported(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsMempoolSupported,
+    "test_is_mempool_supported",
+    test_is_mempool_supported,
+    check_output=False,
+)
+
+
+#   Mempool Access
+# ------------------------------------------------------------------------------
+
+
+def test_is_mempool_access_enabled(test, device):
+    wp.is_mempool_access_enabled("cpu", "cpu")
+
+
+class TestImplicitInitIsMempoolAccessEnabled(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsMempoolAccessEnabled,
+    "test_is_mempool_access_enabled",
+    test_is_mempool_access_enabled,
+    check_output=False,
+)
+
+
+def test_is_mempool_access_supported(test, device):
+    wp.is_mempool_access_supported("cpu", "cpu")
+
+
+class TestImplicitInitIsMempoolAccessSupported(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsMempoolAccessSupported,
+    "test_is_mempool_access_supported",
+    test_is_mempool_access_supported,
+    check_output=False,
+)
+
+
+#   Peer Access
+# ------------------------------------------------------------------------------
+
+
+def test_is_peer_access_enabled(test, device):
+    wp.is_peer_access_enabled("cpu", "cpu")
+
+
+class TestImplicitInitIsPeerAccessEnabled(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsPeerAccessEnabled,
+    "test_is_peer_access_enabled",
+    test_is_peer_access_enabled,
+    check_output=False,
+)
+
+
+def test_is_peer_access_supported(test, device):
+    wp.is_peer_access_supported("cpu", "cpu")
+
+
+class TestImplicitInitIsPeerAccessSupported(unittest.TestCase):
+    pass
+
+
+add_function_test(
+    TestImplicitInitIsPeerAccessSupported,
+    "test_is_peer_access_supported",
+    test_is_peer_access_supported,
+    check_output=False,
+)
+
+
+if __name__ == "__main__":
+    # Do not clear the kernel cache or call anything that would initialize Warp
+    # since these tests are specifically aiming to catch issues where Warp isn't
+    # correctly initialized upon calling certain public APIs.
+    unittest.main(verbosity=2, failfast=True)
diff --git a/warp/thirdparty/unittest_parallel.py b/warp/thirdparty/unittest_parallel.py
index 79ac956bd..acf7eff0b 100644
--- a/warp/thirdparty/unittest_parallel.py
+++ b/warp/thirdparty/unittest_parallel.py
@@ -554,9 +554,6 @@ def initialize_test_process(lock, shared_index, args, temp_dir):
             wp.config.kernel_cache_dir = cache_root_dir
 
             wp.build.clear_kernel_cache()
-        else:
-            # Initialize Warp is if hasn't been initialized already
-            wp.init()
 
 
 if __name__ == "__main__":  # pragma: no cover
diff --git a/warp/types.py b/warp/types.py
index 7946c2427..d277d96c0 100644
--- a/warp/types.py
+++ b/warp/types.py
@@ -1650,9 +1650,10 @@ def _init_from_data(self, data, dtype, shape, device, copy, pinned):
             try:
                 # Performance note: try first, ask questions later
                 device = warp.context.runtime.get_device(device)
-            except:
-                warp.context.init()
-                raise
+            except Exception:
+                # Fallback to using the public API for retrieving the device,
+                # which takes take of initializing Warp if needed.
+                device = warp.context.get_device(device)
 
             if device.is_cuda:
                 desc = data.__cuda_array_interface__
@@ -1779,9 +1780,10 @@ def _init_from_data(self, data, dtype, shape, device, copy, pinned):
         try:
             # Performance note: try first, ask questions later
             device = warp.context.runtime.get_device(device)
-        except:
-            warp.context.init()
-            raise
+        except Exception:
+            # Fallback to using the public API for retrieving the device,
+            # which takes take of initializing Warp if needed.
+            device = warp.context.get_device(device)
 
         if device.is_cpu and not copy and not pinned:
             # reference numpy memory directly
@@ -1805,9 +1807,10 @@ def _init_from_ptr(self, ptr, dtype, shape, strides, capacity, device, pinned, d
         try:
             # Performance note: try first, ask questions later
             device = warp.context.runtime.get_device(device)
-        except:
-            warp.context.init()
-            raise
+        except Exception:
+            # Fallback to using the public API for retrieving the device,
+            # which takes take of initializing Warp if needed.
+            device = warp.context.get_device(device)
 
         check_array_shape(shape)
         ndim = len(shape)
@@ -1852,9 +1855,10 @@ def _init_new(self, dtype, shape, strides, device, pinned):
         try:
             # Performance note: try first, ask questions later
             device = warp.context.runtime.get_device(device)
-        except:
-            warp.context.init()
-            raise
+        except Exception:
+            # Fallback to using the public API for retrieving the device,
+            # which takes take of initializing Warp if needed.
+            device = warp.context.get_device(device)
 
         check_array_shape(shape)
         ndim = len(shape)