Skip to content

[WIP] MXChip Wakeword detection with SRNN #107

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 22 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions Applications/KeywordSpotting/MXChip-SRNN/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# S-RNN for Speech Command Detection

Here we demonstrate how SRNN can be used to deploy a key-word spotting model on
the [Azure IoT Dev-Kit](https://microsoft.github.io/azure-iot-developer-kit/)
powered by the Coretex M4. The model provided is based on the [Speech Commands Dataset](https://ai.googleblog.com/2017/08/launching-speech-commands-dataset.html). It is trainined to recognise commands in the set: `[go, no, on, up, bed, cat, dog, off, one, six, two, yes]`. When no keyword is detected, the screen will print 'Noise'.

Unit testing and benchmarking code that was used to develop this implementation of S-RNN is provided in the `tests` directory and can be used for debugging purposes.

## Instructions for Deployment

1. Follow instructions [here](https://github.com/VSChina/devkit-mbedos5-getstarted)
to set-up the MXChip environment. Verify that the set-up is
working properly by burning the `GettingStarted` example mentioned there.
2. Clone the EdgeML repository. Let this repository lie in `$EDGEML_HOME`.
3. Change directory to `devkit-mbedos5-getstarted` cloned in step 1.
```
cd devkit-mbedos5-getstarted/
```
3. Remove the provided `GetStarted` example and replace it with
`$EDGEML_HOME/Applications/KeywordSpotting/MXChip-SRNN/`
```
rm -r GetStarted
cp -r $EDGEML_HOME/Applications/KeywordSpotting/MXChip-SRNN ./
```
4. Open `devkit-mbedos5-getstarted/.mbedignore` in your favourite text editor
and append the following lines:
```
MXChip-SRNN/test/*
```
5. Copy the provided build profile file, `develop_custom.json` into the
`mbed-os` profiles folder:
```
cp MXChip-SRNN/develop_custom.json mbed-os/tools/profiles/
```
6. Compile using:
```
mbed compile --profile develop_custom
```

7. Upload to MXChip IoT DevKit:
- Connect the MXChip IoT DevKit with your machine via USB.
- You will find a removable USB Mass Storage disk named AZ3166.
- Copy the
`.\BUILD\AZ3166\GCC_ARM-DEVELOP_CUSTOM\devkit-mbedos5-getstarted.bin` into this disk.
- The device will reboot and run the application.

125 changes: 125 additions & 0 deletions Applications/KeywordSpotting/MXChip-SRNN/WakeWord-MXChip.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#include "src/lib/sfastrnn_pipeline/sfastrnnpipeline.h"
#include <Arduino.h>
#include <AudioClassV2.h>
#include "src/models/model.h"
#include "src/lib/utils/circularq.h"

extern struct FastRNNParams fastrnnParams0;
extern struct FastRNNParams fastrnnParams1;
extern struct FCParams fcParams;
extern void initFastRNN0();
extern void initFastRNN1();
extern void initFC();
extern const char *labelInvArr[];
// A circular q for voting
#define VOTE_WIN_LEN 10
#define VOTE_MAJORITY 5
FIFOCircularQ votingQ;
static int votingContainer[VOTE_WIN_LEN];
static int votingFrequence[NUM_LABELS];

// TODO: Explain this
#define TRANSFER_BUFFER_MAX_LEN 128
static AudioClass& Audio = AudioClass::getInstance();
char readBuffer[AUDIO_CHUNK_SIZE];
static int transfer_buffer_curr_len = 0;
static int16_t transfer_buffer[TRANSFER_BUFFER_MAX_LEN];

void recordCallback(void) {
int length = Audio.readFromRecordBuffer(readBuffer, AUDIO_CHUNK_SIZE);
// We are 16bit (short) and not 8bit (char). Hence actual number of samples
// is half. Further, we need to ignore the second channel in the
// audio (interleaved with the first channel).
length = length / 2;
length = length - (length % 2);
length = length / 2;
if(length > TRANSFER_BUFFER_MAX_LEN)
error("Transfer buffer too small");
// Convert to 16 bit samples
int16_t *tempAudio = (int16_t*)readBuffer;
if (transfer_buffer_curr_len != 0) {
Serial.printf("Error: Transfer buffer not empty. %d dropped\n", length);
return;
}
// Drop every other sample (the second channel) while copying
for(int i = 0; i < length; i++)
transfer_buffer[i] = tempAudio[2 * i];
transfer_buffer_curr_len = length;
}

void init_record(){
// Sampling rate 16000Hz @ 16 bit resolution
// This is hardcoded in the code. Don't change.
Audio.format(16000U, 16U);
}

void start_record(){
Audio.startRecord(recordCallback);
}

void prediction_callback(float *vec, int len){
int arg = argmax(vec, len);
int oldarg = *(int*)q_oldest(&votingQ);
if (oldarg >= NUM_LABELS || oldarg < 0)
oldarg = 0;
votingFrequence[arg]++;
votingFrequence[oldarg]--;
q_force_enqueue(&votingQ, &arg);
if (votingFrequence[arg] >= VOTE_MAJORITY){
char str[20];
sprintf(str, "Pred: %s (%d)", labelInvArr[arg], arg);
Screen.print(str, false);
}
}


void setup(){
q_init(&votingQ, votingContainer, VOTE_WIN_LEN, cb_write_int, cb_read_int);
votingFrequence[0] = 5;
Serial.begin(115200);
Screen.init();
delay(500);
initFastRNN0();
initFastRNN1();
initFC();
delay(500);
Screen.clean();
unsigned ret = sfastrnn2p_init(&fastrnnParams0,
&fastrnnParams1, &fcParams, prediction_callback);
Serial.printf("Return code: %d (init)\n", ret);
if(ret != 0)
error("Shallow FastRNN initialization failed (code %d)", ret);
if(ret != 0) while(1);
init_record();
delay(500);
Serial.println();
Serial.println("Ready");
Screen.print(0, "Ready");
delay(500);
start_record();
}

void loop(){
while (1){
if (transfer_buffer_curr_len == 0){
// For a 16, 16 fastRNN model, this can be pushed
// 6ms without causing errors.
rtos:wait_ms(5);
continue;
}
unsigned ret = sfastrnn2p_add_new_samples(transfer_buffer,
transfer_buffer_curr_len);
if(ret != 0){
Serial.printf("Error pushing to interface %d\n", ret);
}
static int count = 0;
count += transfer_buffer_curr_len;
if(count % (128 * 1000) == 0)
Serial.printf("Pushed %d seconds\n", (count/16000));
transfer_buffer_curr_len = 0;
}
}

void printStr(char *a){
Serial.println(a);
}
48 changes: 48 additions & 0 deletions Applications/KeywordSpotting/MXChip-SRNN/develop_custom.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"GCC_ARM": {
"common": ["-c", "-Wall", "-Wextra",
"-Wno-unused-parameter", "-Wno-missing-field-initializers",
"-fmessage-length=0", "-fno-exceptions", "-fno-builtin",
"-ffunction-sections", "-fdata-sections", "-funsigned-char",
"-MMD", "-fno-delete-null-pointer-checks",
"-fomit-frame-pointer", "-Os", "-mfloat-abi=hard", "-mfpu=fpv4-sp-d16",
"-mcpu=cortex-m4", "-mfloat-abi=softfp", "-mfpu=fpv4-sp-d16", "-Lstatic",
"-DDEBUG_MODE", "-DNFFT_512", "-DFFT_F32", "-DNORMALIZE_FEAT"],
"asm": ["-x", "assembler-with-cpp"],
"c": ["-std=gnu99"],
"cxx": ["-std=gnu++11", "-fno-rtti", "-Wvla"],
"ld": ["-Wl,--gc-sections", "-Wl,--wrap,main", "-Wl,--wrap,_malloc_r",
"-Wl,--wrap,_free_r", "-Wl,--wrap,_realloc_r",
"-Wl,--wrap,_calloc_r", "-Wl,--wrap,exit", "-Wl,--wrap,atexit",
"-Wl,-n",
"-u _printf_float"]
},
"ARM": {
"common": ["-c", "--gnu", "-Otime", "--split_sections",
"--apcs=interwork", "--brief_diagnostics", "--restrict",
"--multibyte_chars", "-O3"],
"asm": [],
"c": ["--md", "--no_depend_system_headers", "--c99", "-D__ASSERT_MSG"],
"cxx": ["--cpp", "--no_rtti", "--no_vla"],
"ld": []
},
"uARM": {
"common": ["-c", "--gnu", "-Otime", "--split_sections",
"--apcs=interwork", "--brief_diagnostics", "--restrict",
"--multibyte_chars", "-O3", "-D__MICROLIB",
"--library_type=microlib", "-DMBED_RTOS_SINGLE_THREAD"],
"asm": [],
"c": ["--md", "--no_depend_system_headers", "--c99", "-D__ASSERT_MSG"],
"cxx": ["--cpp", "--no_rtti", "--no_vla"],
"ld": ["--library_type=microlib"]
},
"IAR": {
"common": [
"--no_wrap_diagnostics", "-e",
"--diag_suppress=Pa050,Pa084,Pa093,Pa082", "-Oh"],
"asm": [],
"c": ["--vla"],
"cxx": ["--guard_calls", "--no_static_destruction"],
"ld": ["--skip_dynamic_initialization", "--threaded_lib"]
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include <arm_math.h>

#ifdef __cplusplus
extern "C" {
#endif

void printFloatArrF32(float32_t *, int, float);
void printFloatArrQ31(q31_t *, int, float);
void printIntArr(int32_t *, int, int);
void printHexQ31(q31_t);
void printInt32(int32_t);
void printVoid(void *);
void printStr(char *);
void printFloatAddr(float *);

#ifdef __cplusplus
}
#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#include "fastrnn.h"

void combineHX(const struct FastRNNParams *fastrnnParams, const float *h,
const float *x, float *dst){
memcpy(dst, h, fastrnnParams->statesLen * sizeof(float));
memcpy(&(dst[fastrnnParams->statesLen]), x, fastrnnParams->featLen* sizeof(float));
}

void FastRNNStep(const struct FastRNNParams *fastrnnParams, const float *x,
const float *input_h, float *result_h){
unsigned statesLen = fastrnnParams->statesLen;
unsigned featLen = fastrnnParams->featLen;
float h[statesLen];
memcpy(h, input_h, statesLen * sizeof(float));
float combinedOut[statesLen];
float hx[statesLen + featLen];
combineHX(fastrnnParams, h, x, hx);
// W[h, x]
matrixVectorMul(fastrnnParams->W, statesLen, statesLen + featLen, hx,
combinedOut);
// h_ = h_ + b
vectorVectorAdd(combinedOut, fastrnnParams->b, statesLen);
// Apply non-linearity (currently only sigmoid)
vsigmoid(combinedOut, statesLen);
scalarVectorMul(combinedOut, statesLen, fastrnnParams->alpha);
scalarVectorMul(h, statesLen, fastrnnParams->beta);
vectorVectorAdd(combinedOut, h, statesLen);
memcpy(result_h, combinedOut, statesLen * sizeof(float));
}


void FastRNNInference(const struct FastRNNParams *fastrnnParams,
const float x[], float* result_h){
for(int i = 0; i < fastrnnParams->statesLen; i++){
result_h[i] = 0;
}
for (int t = 0; t < fastrnnParams->timeSteps; t++){
FastRNNStep(fastrnnParams, (float*)&(x[t * fastrnnParams->featLen]),
result_h, result_h);
}
}


Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#pragma once

#ifdef __cplusplus
extern "C" {
#endif

#include <math.h>
#include "../utils/helpermath.h"
#include <string.h>

struct FastRNNParams {
// h~ = W1.h + W2.x + b
// The projection matrix W1 and W2 concatenated along axis=1, [W1, W2]
// in row major order. Will be of dimension [n_hid, (n_hid + n_inp)] in
// numpy.
float* W;
// The bias vector. Of dimension [n_hidden]
float *b;
// Alpha and beta for FastRNN (sigmoided version)
float alpha; float beta;
unsigned timeSteps;
unsigned featLen;
unsigned statesLen;
};

// FastRNNParams: Pointer to an instance of FastRNNParams
// x: Input data. Should be of shape [numtime_steps, num_feats] flattened to
// 1-D. That is, the ith time step will be the VECTOR x[i * num_feats]
// result_h: hidden-state(h) stored in this vector.
void FastRNNInference(const struct FastRNNParams *fastrnnParams, const float x[],
float *result_h);
void FastRNNStep(const struct FastRNNParams *fastrnnParams, const float *x,
const float *input_h, float *result_h);
void combineHX(const struct FastRNNParams *fastrnnParams, const float *h,
const float *x, float *dst);

#ifdef __cplusplus
}
#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "fc.h"

void FCInference(const struct FCParams* fcParams, const float x[],
float* result, unsigned nonLinearity){
matrixVectorMul(fcParams->W, fcParams->outputDim,
fcParams->inputDim, x, result);
vectorVectorAdd(result, fcParams->B, fcParams->outputDim);
}

39 changes: 39 additions & 0 deletions Applications/KeywordSpotting/MXChip-SRNN/src/lib/algorithms/fc.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#pragma once

#ifdef __cplusplus
extern "C" {
#endif

#include <math.h>
#include "../utils/helpermath.h"

/* An instance of FCParams needs to be defined else where. This will hold the
* model matrices. You pass a pointer to this FCParams instance here to perform
* your computations.
*/

struct FCParams{
// let x in a vector of size n = input dim
// And let the hidden dim (or output dim ) be m
// Then FC would do softmax(Wx) were W is m x n
float *W;
float *B;
unsigned inputDim;
unsigned outputDim;
};

//
// Set nonLinearity can be used to perform non-linearity on outputs. This
// feature is not implemented as of now and the raw outputs are returned.
//
// params: The FCParams struct instance containing the parameters for the
// current layer.
// x: Input data
// result: Float array to hold the FC result.
// nonLinearity: Choice of non-linearity to use. Currently not implemented.
void FCInference(const struct FCParams* params, const float x[], float* result,
unsigned nonLinearity);

#ifdef __cplusplus
}
#endif
Loading