microsoft · metastableB · Jul 18, 2019 · Jul 18, 2019 · Jul 18, 2019 · Jul 18, 2019
diff --git a/Applications/KeywordSpotting/MXChip-SRNN/README.md b/Applications/KeywordSpotting/MXChip-SRNN/README.md
@@ -0,0 +1,46 @@
+# S-RNN for Speech Command Detection
+
+Here we demonstrate how SRNN can be used to deploy a key-word spotting model on
+the [Azure IoT Dev-Kit](https://microsoft.github.io/azure-iot-developer-kit/)
+powered by the Coretex M4. The model provided is based on the [Speech Commands Dataset](https://ai.googleblog.com/2017/08/launching-speech-commands-dataset.html). It is trainined to recognise commands in the set: `[go, no, on, up, bed, cat, dog, off, one, six, two, yes]`. When no keyword is detected, the screen will print 'Noise'.
+
+Unit testing and benchmarking code that was used to develop this implementation of S-RNN is provided in the `tests` directory and can be used for debugging purposes.
+
+## Instructions for Deployment 
+
+1. Follow instructions [here](https://github.com/VSChina/devkit-mbedos5-getstarted) 
+to set-up the MXChip environment. Verify that the set-up is
+working properly by burning the `GettingStarted` example mentioned there. 
+2. Clone the EdgeML repository. Let this repository lie in `$EDGEML_HOME`.
+3. Change directory to `devkit-mbedos5-getstarted` cloned in step 1.
+   ```
+      cd devkit-mbedos5-getstarted/
+   ```
+3. Remove the provided `GetStarted` example and replace it with
+   `$EDGEML_HOME/Applications/KeywordSpotting/MXChip-SRNN/`
+   ```
+      rm -r GetStarted
+      cp -r $EDGEML_HOME/Applications/KeywordSpotting/MXChip-SRNN ./
+   ```
+4. Open `devkit-mbedos5-getstarted/.mbedignore` in your favourite text editor
+   and append the following lines:
+   ```
+      MXChip-SRNN/test/*
+   ```
+5. Copy the provided build profile file, `develop_custom.json` into the
+   `mbed-os` profiles folder:
+   ```
+      cp MXChip-SRNN/develop_custom.json mbed-os/tools/profiles/
+   ```
+6. Compile using:
+   ```
+      mbed compile --profile develop_custom
+   ```
+
+7. Upload to MXChip IoT DevKit:
+  - Connect the MXChip IoT DevKit with your machine via USB.
+  - You will find a removable USB Mass Storage disk named AZ3166.
+  - Copy the
+  `.\BUILD\AZ3166\GCC_ARM-DEVELOP_CUSTOM\devkit-mbedos5-getstarted.bin` into this disk.
+  - The device will reboot and run the application. 
+
diff --git a/Applications/KeywordSpotting/MXChip-SRNN/WakeWord-MXChip.cpp b/Applications/KeywordSpotting/MXChip-SRNN/WakeWord-MXChip.cpp
@@ -0,0 +1,125 @@
+#include "src/lib/sfastrnn_pipeline/sfastrnnpipeline.h"
+#include <Arduino.h>
+#include <AudioClassV2.h>
+#include "src/models/model.h"
+#include "src/lib/utils/circularq.h"
+
+extern struct FastRNNParams fastrnnParams0;
+extern struct FastRNNParams fastrnnParams1;
+extern struct FCParams fcParams;
+extern void initFastRNN0();
+extern void initFastRNN1();
+extern void initFC();
+extern const char *labelInvArr[];
+// A circular q for voting
+#define VOTE_WIN_LEN 10
+#define VOTE_MAJORITY 5
+FIFOCircularQ votingQ;
+static int votingContainer[VOTE_WIN_LEN];
+static int votingFrequence[NUM_LABELS];
+
+// TODO: Explain this
+#define TRANSFER_BUFFER_MAX_LEN 128
+static AudioClass& Audio = AudioClass::getInstance();
+char readBuffer[AUDIO_CHUNK_SIZE];
+static int transfer_buffer_curr_len = 0;
+static int16_t transfer_buffer[TRANSFER_BUFFER_MAX_LEN];
+
+void recordCallback(void) {
+    int length = Audio.readFromRecordBuffer(readBuffer, AUDIO_CHUNK_SIZE);
+    // We are 16bit (short) and not 8bit (char). Hence actual number of samples
+    // is half. Further, we need to ignore the second channel in the
+    // audio (interleaved with the first channel).
+    length = length / 2;
+    length = length - (length % 2);
+    length = length / 2;
+    if(length > TRANSFER_BUFFER_MAX_LEN)
+        error("Transfer buffer too small");
+    // Convert to 16 bit samples
+    int16_t *tempAudio = (int16_t*)readBuffer;
+    if (transfer_buffer_curr_len != 0) {
+        Serial.printf("Error: Transfer buffer not empty. %d dropped\n", length);
+        return;
+    }
+    // Drop every other sample (the second channel) while copying
+    for(int i = 0; i < length; i++)
+        transfer_buffer[i] = tempAudio[2 * i];
+    transfer_buffer_curr_len = length;
+}
+
+void init_record(){
+    // Sampling rate 16000Hz @ 16 bit resolution 
+    // This is hardcoded in the code. Don't change.
+    Audio.format(16000U, 16U);
+}
+
+void start_record(){
+  Audio.startRecord(recordCallback);
+}
+
+void prediction_callback(float *vec, int len){
+    int arg = argmax(vec, len);
+    int oldarg = *(int*)q_oldest(&votingQ);
+    if (oldarg >= NUM_LABELS || oldarg < 0)
+        oldarg = 0;
+    votingFrequence[arg]++;
+    votingFrequence[oldarg]--;
+    q_force_enqueue(&votingQ, &arg);
+    if (votingFrequence[arg] >= VOTE_MAJORITY){
+        char str[20];
+        sprintf(str, "Pred: %s (%d)", labelInvArr[arg], arg);
+        Screen.print(str, false);
+    }
+}
+
+
+void setup(){
+    q_init(&votingQ, votingContainer, VOTE_WIN_LEN, cb_write_int, cb_read_int);
+    votingFrequence[0] = 5;
+    Serial.begin(115200);
+    Screen.init();
+    delay(500);
+    initFastRNN0();
+    initFastRNN1();
+    initFC();
+    delay(500);
+    Screen.clean();
+    unsigned ret = sfastrnn2p_init(&fastrnnParams0,
+        &fastrnnParams1, &fcParams, prediction_callback);
+    Serial.printf("Return code: %d (init)\n", ret);
+    if(ret != 0)
+        error("Shallow FastRNN initialization failed (code %d)", ret);
+    if(ret != 0) while(1);
+    init_record();
+    delay(500);
+    Serial.println();
+    Serial.println("Ready");
+    Screen.print(0, "Ready");
+    delay(500);
+    start_record();
+}
+
+void loop(){
+    while (1){
+        if (transfer_buffer_curr_len == 0){
+            // For a 16, 16 fastRNN model, this can be pushed
+            // 6ms without causing errors.
+            rtos:wait_ms(5);
+            continue;
+        }
+        unsigned ret = sfastrnn2p_add_new_samples(transfer_buffer,
+            transfer_buffer_curr_len);
+        if(ret != 0){
+            Serial.printf("Error pushing to interface %d\n", ret);
+        }
+        static int count = 0;
+        count += transfer_buffer_curr_len;
+        if(count % (128 * 1000) == 0)
+            Serial.printf("Pushed %d seconds\n", (count/16000));
+        transfer_buffer_curr_len = 0;
+    }
+}
+
+void printStr(char *a){
+    Serial.println(a);
+}
diff --git a/Applications/KeywordSpotting/MXChip-SRNN/develop_custom.json b/Applications/KeywordSpotting/MXChip-SRNN/develop_custom.json
@@ -0,0 +1,48 @@
+{
+    "GCC_ARM": {
+        "common": ["-c", "-Wall", "-Wextra",
+                   "-Wno-unused-parameter", "-Wno-missing-field-initializers",
+                   "-fmessage-length=0", "-fno-exceptions", "-fno-builtin",
+                   "-ffunction-sections", "-fdata-sections", "-funsigned-char",
+                   "-MMD", "-fno-delete-null-pointer-checks",
+                   "-fomit-frame-pointer", "-Os", "-mfloat-abi=hard", "-mfpu=fpv4-sp-d16",
+                   "-mcpu=cortex-m4", "-mfloat-abi=softfp", "-mfpu=fpv4-sp-d16", "-Lstatic",
+                   "-DDEBUG_MODE", "-DNFFT_512",  "-DFFT_F32", "-DNORMALIZE_FEAT"],
+        "asm": ["-x", "assembler-with-cpp"],
+        "c": ["-std=gnu99"],
+        "cxx": ["-std=gnu++11", "-fno-rtti", "-Wvla"],
+        "ld": ["-Wl,--gc-sections", "-Wl,--wrap,main", "-Wl,--wrap,_malloc_r",
+               "-Wl,--wrap,_free_r", "-Wl,--wrap,_realloc_r",
+               "-Wl,--wrap,_calloc_r", "-Wl,--wrap,exit", "-Wl,--wrap,atexit",
+               "-Wl,-n",
+               "-u _printf_float"]
+    },
+    "ARM": {
+        "common": ["-c", "--gnu", "-Otime", "--split_sections",
+                   "--apcs=interwork", "--brief_diagnostics", "--restrict",
+                   "--multibyte_chars", "-O3"],
+        "asm": [],
+        "c": ["--md", "--no_depend_system_headers", "--c99", "-D__ASSERT_MSG"],
+        "cxx": ["--cpp", "--no_rtti", "--no_vla"],
+        "ld": []
+    },
+    "uARM": {
+        "common": ["-c", "--gnu", "-Otime", "--split_sections",
+                   "--apcs=interwork", "--brief_diagnostics", "--restrict",
+                   "--multibyte_chars", "-O3", "-D__MICROLIB",
+                   "--library_type=microlib", "-DMBED_RTOS_SINGLE_THREAD"],
+        "asm": [],
+        "c": ["--md", "--no_depend_system_headers", "--c99", "-D__ASSERT_MSG"],
+        "cxx": ["--cpp", "--no_rtti", "--no_vla"],
+        "ld": ["--library_type=microlib"]
+    },
+    "IAR": {
+        "common": [
+            "--no_wrap_diagnostics", "-e",
+            "--diag_suppress=Pa050,Pa084,Pa093,Pa082", "-Oh"],
+        "asm": [],
+        "c": ["--vla"],
+        "cxx": ["--guard_calls", "--no_static_destruction"],
+        "ld": ["--skip_dynamic_initialization", "--threaded_lib"]
+    }
+}
diff --git a/Applications/KeywordSpotting/MXChip-SRNN/src/debug_mode/debugmethods.h b/Applications/KeywordSpotting/MXChip-SRNN/src/debug_mode/debugmethods.h
@@ -0,0 +1,18 @@
+#include <arm_math.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void printFloatArrF32(float32_t *, int, float);
+void printFloatArrQ31(q31_t *, int, float);
+void printIntArr(int32_t *, int, int);
+void printHexQ31(q31_t);
+void printInt32(int32_t);
+void printVoid(void *);
+void printStr(char *);
+void printFloatAddr(float *);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fastrnn.c b/Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fastrnn.c
@@ -0,0 +1,43 @@
+#include "fastrnn.h"
+
+void combineHX(const struct FastRNNParams *fastrnnParams, const float *h,
+        const float *x, float *dst){
+    memcpy(dst, h, fastrnnParams->statesLen * sizeof(float));
+    memcpy(&(dst[fastrnnParams->statesLen]), x, fastrnnParams->featLen* sizeof(float));
+}
+
+void FastRNNStep(const struct FastRNNParams *fastrnnParams, const float *x,
+        const float *input_h, float *result_h){
+    unsigned statesLen = fastrnnParams->statesLen;
+    unsigned featLen = fastrnnParams->featLen;
+    float h[statesLen];
+    memcpy(h, input_h, statesLen * sizeof(float));
+    float combinedOut[statesLen];
+    float hx[statesLen + featLen];
+    combineHX(fastrnnParams, h, x, hx);
+    // W[h, x]
+    matrixVectorMul(fastrnnParams->W, statesLen, statesLen + featLen, hx,
+            combinedOut);
+    // h_ = h_ + b 
+    vectorVectorAdd(combinedOut, fastrnnParams->b, statesLen);
+    // Apply non-linearity (currently only sigmoid)
+    vsigmoid(combinedOut, statesLen);
+    scalarVectorMul(combinedOut, statesLen, fastrnnParams->alpha);
+    scalarVectorMul(h, statesLen, fastrnnParams->beta);
+    vectorVectorAdd(combinedOut, h, statesLen);
+    memcpy(result_h, combinedOut, statesLen * sizeof(float));
+}
+
+
+void FastRNNInference(const struct FastRNNParams *fastrnnParams,
+        const float x[], float* result_h){
+    for(int i = 0; i < fastrnnParams->statesLen; i++){
+        result_h[i] = 0;
+    }
+    for (int t = 0; t < fastrnnParams->timeSteps; t++){
+        FastRNNStep(fastrnnParams, (float*)&(x[t * fastrnnParams->featLen]),
+                result_h, result_h);
+    }
+}
+
+
diff --git a/Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fastrnn.h b/Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fastrnn.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <math.h>
+#include "../utils/helpermath.h"
+#include <string.h>
+
+struct FastRNNParams {
+    // h~ = W1.h + W2.x + b
+    // The projection matrix W1 and W2 concatenated along axis=1, [W1, W2]
+    // in row major order. Will be of dimension [n_hid, (n_hid + n_inp)] in
+    // numpy.
+    float* W;
+    // The bias vector. Of dimension [n_hidden]
+    float *b;
+    // Alpha and beta for FastRNN (sigmoided version)
+    float alpha; float beta;
+    unsigned timeSteps;
+    unsigned featLen;
+    unsigned statesLen;
+};
+
+// FastRNNParams: Pointer to an instance of FastRNNParams
+// x: Input data. Should be of shape [numtime_steps, num_feats] flattened to
+//    1-D. That is, the ith time step will be the VECTOR x[i * num_feats]
+// result_h: hidden-state(h) stored in this vector.
+void FastRNNInference(const struct FastRNNParams *fastrnnParams, const float x[],
+        float *result_h);
+void FastRNNStep(const struct FastRNNParams *fastrnnParams, const float *x,
+        const float *input_h, float *result_h);
+void combineHX(const struct FastRNNParams *fastrnnParams, const float *h,
+        const float *x, float *dst);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fc.c b/Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fc.c
@@ -0,0 +1,9 @@
+#include "fc.h"
+
+void FCInference(const struct FCParams* fcParams, const float x[],
+	float* result, unsigned nonLinearity){
+	matrixVectorMul(fcParams->W, fcParams->outputDim,
+		fcParams->inputDim, x, result);
+	vectorVectorAdd(result, fcParams->B, fcParams->outputDim);
+}
+
diff --git a/Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fc.h b/Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fc.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif 
+
+#include <math.h>
+#include "../utils/helpermath.h"
+
+/* An instance of FCParams needs to be defined else where. This will hold the
+ * model matrices. You pass a pointer to this FCParams instance here to perform
+ * your computations.
+ */
+
+struct FCParams{
+	// let x in a vector of size n = input dim
+	// And let the hidden dim (or output dim ) be m
+	// Then FC would do softmax(Wx) were W is m x n
+	float *W;
+	float *B;
+	unsigned inputDim;
+	unsigned outputDim;
+};
+
+//
+// Set nonLinearity can be used to perform non-linearity on outputs. This
+// feature is not implemented as of now and the raw outputs are returned.
+// 
+// params: The FCParams struct instance containing the parameters for the
+//		current layer.
+//	x: Input data 
+//	result: Float array to hold the FC result.
+//	nonLinearity: Choice of non-linearity to use. Currently not implemented.
+void FCInference(const struct FCParams* params, const float x[], float* result, 
+		unsigned nonLinearity);
+
+#ifdef __cplusplus
+}
+#endif