Skip to content

Commit

Permalink
feat: HandTrackingService now allows selecting any suitable video inp…
Browse files Browse the repository at this point in the history
…ut device
  • Loading branch information
AlmasB committed Jan 24, 2025
1 parent 0adee8e commit 878533a
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 86 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,111 +6,93 @@

package com.almasb.fxgl.intelligence.gesturerecog

import com.almasb.fxgl.core.EngineService
import com.almasb.fxgl.core.concurrent.Async
import com.almasb.fxgl.core.util.EmptyRunnable
import com.almasb.fxgl.intelligence.WebAPI
import com.almasb.fxgl.intelligence.WebAPIService
import com.almasb.fxgl.logging.Logger
import com.almasb.fxgl.net.ws.LocalWebSocketServer
import com.almasb.fxgl.net.ws.VideoInputDeviceInfo
import javafx.geometry.Point3D
import org.openqa.selenium.WebDriver
import org.openqa.selenium.chrome.ChromeDriver
import org.openqa.selenium.chrome.ChromeOptions
import java.util.function.Consumer

/**
* Service that provides access to hand tracking.
*
* @author Almas Baim (https://github.com/AlmasB)
*/
class HandTrackingService : EngineService() {
class HandTrackingService : WebAPIService(
LocalWebSocketServer("HandTrackingServer", WebAPI.GESTURE_RECOGNITION_PORT),
WebAPI.GESTURE_RECOGNITION_API
) {

private val log = Logger.get(HandTrackingService::class.java)
private val server = LocalWebSocketServer("HandTrackingServer", WebAPI.GESTURE_RECOGNITION_PORT)

private var webDriver: WebDriver? = null
private val videoInputDevices = arrayListOf<VideoInputDeviceInfo>()

private val handDataHandlers = arrayListOf<Consumer<Hand>>()

var onMediaDeviceDetectionCompleted: Runnable = EmptyRunnable

val videoDevices: List<VideoInputDeviceInfo>
get() = videoInputDevices.toList()

val landmarksView: HandLandmarksView by lazy {
HandLandmarksView().also { addInputHandler(it) }
}

override fun onInit() {
server.addMessageHandler { message ->
try {
val rawData = message.split(",").filter { it.isNotEmpty() }
// kind: audioinput, videoinput, audiooutput
private fun onMediaDeviceDetected(kind: String, label: String, deviceID: String) {
log.debug("New media device detected: $kind,$label,$deviceID")

val id = rawData[0].toInt()
val points = ArrayList<Point3D>()
if (kind == "videoinput") {
videoInputDevices += VideoInputDeviceInfo(label, deviceID)
}
}

var i = 1
while (i < rawData.size) {
val x = rawData[i + 0].toDouble()
val y = rawData[i + 1].toDouble()
val z = rawData[i + 2].toDouble()
private fun onMediaDeviceDetectionComplete() {
Async.startAsyncFX {
onMediaDeviceDetectionCompleted.run()
}
}

points.add(Point3D(x, y, z))
private fun initService() {
log.debug("initService()")

i += 3
}
setReady()
}

Async.startAsyncFX {
handDataHandlers.forEach { it.accept(Hand(id, points)) }
}
private fun onHandInput(message: String) {
try {
val rawData = message.split(",").filter { it.isNotEmpty() }

} catch (e: Exception) {
log.warning("Failed to parse message.", e)
}
}
val id = rawData[0].toInt()
val points = ArrayList<Point3D>()

server.start()
}
var i = 1
while (i < rawData.size) {
val x = rawData[i + 0].toDouble()
val y = rawData[i + 1].toDouble()
val z = rawData[i + 2].toDouble()

/**
* Starts this service in a background thread.
* Can be called after stop() to restart the service.
* If the service has already started, then calls stop() and restarts it.
*/
fun start() {
Async.startAsync {
try {
if (webDriver != null) {
stop()
}

val options = ChromeOptions()
options.addArguments("--headless=new")

// for modules
options.addArguments("--allow-file-access-from-files")
// for webcam, audio input
options.addArguments("--use-fake-ui-for-media-stream")

webDriver = ChromeDriver(options)
webDriver!!.get(WebAPI.GESTURE_RECOGNITION_API.toExternalForm())

// we are ready to use the web api service
} catch (e: Exception) {
log.warning("Failed to start Chrome web driver. Ensure Chrome is installed in default location")
log.warning("Error data", e)
points.add(Point3D(x, y, z))

i += 3
}
}
}

/**
* Stops this service.
* No-op if it has not started via start() before.
*/
fun stop() {
try {
if (webDriver != null) {
webDriver!!.quit()
webDriver = null
Async.startAsyncFX {
handDataHandlers.forEach { it.accept(Hand(id, points)) }
}

} catch (e: Exception) {
log.warning("Failed to quit web driver", e)
log.warning("Failed to parse message.", e)
}
}

/**
* Add input handler for hand tracking data.
* Input handlers are called on the JavaFX thread.
*/
fun addInputHandler(handler: Consumer<Hand>) {
handDataHandlers += handler
}
Expand All @@ -119,8 +101,9 @@ class HandTrackingService : EngineService() {
handDataHandlers -= handler
}

override fun onExit() {
stop()
server.stop()
fun setVideoDevice(videoDevice: VideoInputDeviceInfo) {
log.debug("setting video device = $videoDevice")

rpcRun("setVideoInputDevice", videoDevice.id)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
* FXGL - JavaFX Game Library. The MIT License (MIT).
* Copyright (c) AlmasB ([email protected]).
* See LICENSE for details.
*/

package com.almasb.fxgl.net.ws

/**
* This captures the WebAPI from
* https://developer.mozilla.org/en-US/docs/Web/API/MediaDevices/getUserMedia
*
* @author Almas Baim (https://github.com/AlmasB)
*/
open class MediaDeviceInfo
internal constructor(
val kind: String,
val label: String,
val id: String
)

class AudioInputDeviceInfo(label: String, id: String)
: MediaDeviceInfo("audioinput", label, id) {

override fun toString(): String {
if (label.isNotEmpty())
return "(audio)$label"

return "(audio)$id"
}
}

class VideoInputDeviceInfo(label: String, id: String)
: MediaDeviceInfo("videoinput", label, id) {

override fun toString(): String {
if (label.isNotEmpty())
return "(video)$label"

return "(video)$id"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
<body>
<h2>Hand tracking service</h2>

<section id="demos" class="invisible">
<section id="demos" class="invisible" hidden>
<div id="liveView" class="videoView">
<div style="position: relative;">
<video id="webcam" autoplay playsinline></video>
<video id="webcam" autoplay></video>
</div>
</div>
</section>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,36 @@ import {
} from "https://cdn.jsdelivr.net/npm/@mediapipe/[email protected]";

let handLandmarker = undefined;
let nextVideoDeviceId = "";
let isReady = false;

const socket = new WebSocket('ws://localhost:55560');

socket.addEventListener('open', function (event) {
createHandLandmarker();
});

socket.addEventListener('message', function (event) {
let message = event.data;

if (message.startsWith(FUNCTION_CALL_TAG)) {
let func = message.substring(FUNCTION_CALL_TAG.length);
let tokens = func.split('*,,*');
let funcName = tokens[0];

if (funcName === "setVideoInputDevice") {
let deviceId = tokens[1];

// TODO: window["functionName"](arguments);

setVideoInputDevice(deviceId);
}
}
});

const video = document.getElementById("webcam");
let lastVideoTime = -1;

// Before we can use HandLandmarker class we must wait for it to finish
// loading. Machine Learning models can be large and take a moment to
// get everything needed to run.
Expand All @@ -37,21 +64,29 @@ const createHandLandmarker = async () => {
numHands: 2
});

enableCam();
};
createHandLandmarker();
checkMediaDevices();
enableWebcam();

/********************************************************************
// Continuously grab image from webcam stream and detect it.
********************************************************************/
rpcRun("initService");
};

const video = document.getElementById("webcam");
let lastVideoTime = -1;
function checkMediaDevices() {
navigator.mediaDevices
.enumerateDevices()
.then((devices) => {
devices.forEach((device) => {
rpcRun("onMediaDeviceDetected", `${device.kind}`, `${device.label}`, `${device.deviceId}`);
});

rpcRun("onMediaDeviceDetectionComplete");
})
.catch((err) => {
// ignore `${err.name}: ${err.message}`
});
}

// Enable the live webcam view and start detection.
function enableCam() {

// getUsermedia parameters.
function enableWebcam() {
const constraints = {
video: true
};
Expand All @@ -60,10 +95,41 @@ function enableCam() {
navigator.mediaDevices.getUserMedia(constraints).then((stream) => {
video.srcObject = stream;
video.addEventListener("loadeddata", predictWebcam);

isReady = true;
});
}

function setVideoInputDevice(selectedDeviceId) {
nextVideoDeviceId = selectedDeviceId;
isReady = false;

const constraints = {
video: {
deviceId: nextVideoDeviceId,
},
};

// Activate the webcam stream.
navigator.mediaDevices.getUserMedia(constraints).then((stream) => {
// stop previous tracks
video.srcObject.getVideoTracks().forEach((track) => {
track.stop();
});

// set new stream
video.srcObject = stream;
video.addEventListener("loadeddata", predictWebcam);

console.log("set stream: " + stream);
isReady = true;
});
}

async function predictWebcam() {
if (!isReady)
return;

let startTimeMs = performance.now();
if (lastVideoTime !== video.currentTime) {
lastVideoTime = video.currentTime;
Expand All @@ -80,7 +146,7 @@ async function predictWebcam() {
data += point.x + "," + point.y + "," + point.z + ",";
});

socket.send(data);
rpcRun("onHandInput", data);

id++;
}
Expand All @@ -90,3 +156,27 @@ async function predictWebcam() {
// Call this function again to keep predicting when the browser is ready.
window.requestAnimationFrame(predictWebcam);
}

// the below is a copy-paste since this js file is a module but ../rpc-common.js is not
// include ../rpc-common.js

const SEPARATOR = "*,,*";
const FUNCTION_CALL_TAG = "F_CALL:";
const FUNCTION_RETURN_TAG = "F_RETURN:";

function rpcRun(funcName, ...args) {
let argsString = "";

for (const arg of args) {
argsString += arg + SEPARATOR;
}

let message = `${FUNCTION_CALL_TAG}${funcName}${SEPARATOR}${argsString}`;

socket.send(message);
}

function rpcReturn(funcName) {
// TODO: unique id?
//socket.send(`${FUNCTION_RETURN_TAG}${funcName}.F_RESULT:${names}`);
}

0 comments on commit 878533a

Please sign in to comment.