From 607270191d6cc5b395f0ee648da883ea18f38138 Mon Sep 17 00:00:00 2001 From: iory Date: Sat, 11 Jun 2022 21:14:47 +0900 Subject: [PATCH 01/22] [respeaker_ros] Publish audio_info --- respeaker_ros/scripts/respeaker_node.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index d9ec870aa..339f77b32 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -16,6 +16,7 @@ import sys import time from audio_common_msgs.msg import AudioData +from audio_common_msgs.msg import AudioInfo from geometry_msgs.msg import PoseStamped from std_msgs.msg import Bool, Int32, ColorRGBA from dynamic_reconfigure.server import Server @@ -333,6 +334,8 @@ def __init__(self): self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True) self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True) self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10) + self.pub_audio_info = rospy.Publisher("audio_info", AudioInfo, + queue_size=1, latch=True) self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10) # init config self.config = None @@ -348,6 +351,14 @@ def __init__(self): self.timer_led = None self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led) + info_msg = AudioInfo( + channels=self.n_channel, + sample_rate=self.respeaker_audio.rate, + sample_format='S16LE', + bitrate=self.respeaker_audio.rate * self.respeaker_audio.bitdepth, + coding_format='WAVE') + self.pub_audio_info.publish(info_msg) + def on_shutdown(self): self.info_timer.shutdown() try: From 3f08b747e6774784d897ee63f26f87027b6657d3 Mon Sep 17 00:00:00 2001 From: iory Date: Sat, 11 Jun 2022 21:19:00 +0900 Subject: [PATCH 02/22] [respeaker_ros] Enable speech to text for multi channel. --- respeaker_ros/scripts/speech_to_text.py | 32 ++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/respeaker_ros/scripts/speech_to_text.py b/respeaker_ros/scripts/speech_to_text.py index 0974b2f65..0063ac197 100644 --- a/respeaker_ros/scripts/speech_to_text.py +++ b/respeaker_ros/scripts/speech_to_text.py @@ -2,6 +2,8 @@ # -*- coding: utf-8 -*- # Author: Yuki Furuta +from __future__ import division + import actionlib import rospy try: @@ -9,8 +11,10 @@ except ImportError as e: raise ImportError(str(e) + '\nplease try "pip install speechrecognition"') +import numpy as np from actionlib_msgs.msg import GoalStatus, GoalStatusArray from audio_common_msgs.msg import AudioData +from audio_common_msgs.msg import AudioInfo from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal from speech_recognition_msgs.msg import SpeechRecognitionCandidates @@ -18,8 +22,26 @@ class SpeechToText(object): def __init__(self): # format of input audio data - self.sample_rate = rospy.get_param("~sample_rate", 16000) - self.sample_width = rospy.get_param("~sample_width", 2) + if rospy.get_param('~audio_info', None): + rospy.loginfo('Extract audio info params from {}'.format( + rospy.get_param('~audio_info'))) + audio_info_msg = rospy.wait_for_message( + rospy.get_param('~audio_info'), AudioInfo) + self.sample_rate = audio_info_msg.sample_rate + self.sample_width = audio_info_msg.bitrate // self.sample_rate // 8 + self.channels = audio_info_msg.channels + else: + self.sample_rate = rospy.get_param("~sample_rate", 16000) + self.sample_width = rospy.get_param("~sample_width", 2) + self.channels = rospy.get_param("~channels", 1) + if self.sample_width == 2: + self.dtype = 'int16' + elif self.sample_width == 4: + self.dtype = 'int32' + else: + raise NotImplementedError('sample_width {} is not supported' + .format(self.sample_width)) + self.target_channel = rospy.get_param("~target_channel", 0) # language of STT service self.language = rospy.get_param("~language", "ja-JP") # ignore voice input while the robot is speaking @@ -78,7 +100,11 @@ def audio_cb(self, msg): if self.is_canceling: rospy.loginfo("Speech is cancelled") return - data = SR.AudioData(msg.data, self.sample_rate, self.sample_width) + + data = SR.AudioData( + np.frombuffer(msg.data, dtype=self.dtype)[ + self.target_channel::self.channels].tobytes(), + self.sample_rate, self.sample_width) try: rospy.loginfo("Waiting for result %d" % len(data.get_raw_data())) result = self.recognizer.recognize_google( From 04f6e1ad39c1ae297e8b57a003d3a51c01d714e1 Mon Sep 17 00:00:00 2001 From: iory Date: Sat, 11 Jun 2022 21:30:30 +0900 Subject: [PATCH 03/22] [respeaker_ros] Add audio_info arg in sample --- respeaker_ros/launch/sample_respeaker.launch | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/respeaker_ros/launch/sample_respeaker.launch b/respeaker_ros/launch/sample_respeaker.launch index 31d083608..6741fd7c5 100644 --- a/respeaker_ros/launch/sample_respeaker.launch +++ b/respeaker_ros/launch/sample_respeaker.launch @@ -13,6 +13,8 @@ + + + respawn="true" respawn_delay="10" > + + publish_multichannel: $(arg publish_multichannel) + + - + + audio_info: $(arg audio_info) language: $(arg language) self_cancellation: $(arg self_cancellation) tts_tolerance: 0.5 From 5bbed143d06d5eb5d972fe96d152315cf06e3e16 Mon Sep 17 00:00:00 2001 From: iory Date: Sat, 11 Jun 2022 21:33:53 +0900 Subject: [PATCH 04/22] [respeaker_ros] Add publish_multichannel option for publishing multi channel audio --- respeaker_ros/scripts/respeaker_node.py | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index 339f77b32..a734dcb17 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -233,7 +233,9 @@ def close(self): class RespeakerAudio(object): - def __init__(self, on_audio, channel=0, suppress_error=True): + def __init__(self, on_audio, channel=0, suppress_error=True, + publish_multichannel=False): + self.publish_multichannel = publish_multichannel self.on_audio = on_audio with ignore_stderr(enable=suppress_error): self.pyaudio = pyaudio.PyAudio() @@ -266,7 +268,6 @@ def __init__(self, on_audio, channel=0, suppress_error=True): if self.channels != 6: rospy.logwarn("%d channel is found for respeaker" % self.channels) rospy.logwarn("You may have to update firmware.") - self.channel = min(self.channels - 1, max(0, self.channel)) self.stream = self.pyaudio.open( input=True, start=False, @@ -296,7 +297,10 @@ def stream_callback(self, in_data, frame_count, time_info, status): data = np.frombuffer(in_data, dtype=np.int16) chunk_per_channel = int(len(data) / self.channels) data = np.reshape(data, (chunk_per_channel, self.channels)) - chan_data = data[:, self.channel] + if self.publish_multichannel is True: + chan_data = data + else: + chan_data = data[:, self.channel] # invoke callback self.on_audio(chan_data.tobytes()) return None, pyaudio.paContinue @@ -318,6 +322,7 @@ def __init__(self): self.doa_xy_offset = rospy.get_param("~doa_xy_offset", 0.0) self.doa_yaw_offset = rospy.get_param("~doa_yaw_offset", 90.0) self.speech_prefetch = rospy.get_param("~speech_prefetch", 0.5) + self.publish_multichannel = rospy.get_param("~publish_multichannel", False) self.speech_continuation = rospy.get_param("~speech_continuation", 0.5) self.speech_max_duration = rospy.get_param("~speech_max_duration", 7.0) self.speech_min_duration = rospy.get_param("~speech_min_duration", 0.1) @@ -341,9 +346,16 @@ def __init__(self): self.config = None self.dyn_srv = Server(RespeakerConfig, self.on_config) # start - self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error) + self.respeaker_audio = RespeakerAudio(self.on_audio, suppress_error=suppress_pyaudio_error, + publish_multichannel=self.publish_multichannel) + self.n_channel = 1 + if self.publish_multichannel: + self.n_channel = self.respeaker_audio.channels self.speech_prefetch_bytes = int( - self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0) + self.n_channel + * self.speech_prefetch + * self.respeaker_audio.rate + * self.respeaker_audio.bitdepth / 8.0) self.speech_prefetch_buffer = b"" self.respeaker_audio.start() self.info_timer = rospy.Timer(rospy.Duration(1.0 / self.update_rate), @@ -446,7 +458,7 @@ def on_timer(self, event): self.speech_audio_buffer = b"" self.is_speeching = False duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth - duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth + duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth / self.n_channel rospy.loginfo("Speech detected for %.3f seconds" % duration) if self.speech_min_duration <= duration < self.speech_max_duration: From 334017735fba3b2f257d6239b5626862541acfee Mon Sep 17 00:00:00 2001 From: iory Date: Sun, 12 Jun 2022 11:41:09 +0900 Subject: [PATCH 05/22] [respeaker_ros] Restore audio input to use argment's audio --- respeaker_ros/launch/sample_respeaker.launch | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/respeaker_ros/launch/sample_respeaker.launch b/respeaker_ros/launch/sample_respeaker.launch index 6741fd7c5..33cb8c25c 100644 --- a/respeaker_ros/launch/sample_respeaker.launch +++ b/respeaker_ros/launch/sample_respeaker.launch @@ -33,7 +33,7 @@ - + audio_info: $(arg audio_info) From d662b2c5bcd9a6f237ec1d4da0a85a481300e10e Mon Sep 17 00:00:00 2001 From: iory Date: Mon, 13 Jun 2022 16:21:04 +0900 Subject: [PATCH 06/22] [respeaker_ros] Add publish_multichannel option to fix launch file error. --- respeaker_ros/launch/sample_respeaker.launch | 2 ++ 1 file changed, 2 insertions(+) diff --git a/respeaker_ros/launch/sample_respeaker.launch b/respeaker_ros/launch/sample_respeaker.launch index 33cb8c25c..d3e0badae 100644 --- a/respeaker_ros/launch/sample_respeaker.launch +++ b/respeaker_ros/launch/sample_respeaker.launch @@ -15,6 +15,8 @@ + + Date: Mon, 13 Jun 2022 16:30:11 +0900 Subject: [PATCH 07/22] [respeaker_ros] Add roslaunch_add_file_check to test launch file format --- respeaker_ros/CMakeLists.txt | 2 ++ respeaker_ros/package.xml | 1 + respeaker_ros/test/sample_respeaker.test | 1 + 3 files changed, 4 insertions(+) diff --git a/respeaker_ros/CMakeLists.txt b/respeaker_ros/CMakeLists.txt index 390a82e0f..b4009cd1a 100644 --- a/respeaker_ros/CMakeLists.txt +++ b/respeaker_ros/CMakeLists.txt @@ -35,7 +35,9 @@ catkin_install_python(PROGRAMS ${PYTHON_SCRIPTS} if(CATKIN_ENABLE_TESTING) find_package(rostest REQUIRED) + find_package(roslaunch REQUIRED) add_rostest(test/sample_respeaker.test DEPENDENCIES ${PROJECT_NAME}_generate_virtualenv ) + roslaunch_add_file_check(launch/sample_respeaker.launch) endif() diff --git a/respeaker_ros/package.xml b/respeaker_ros/package.xml index ac83b898a..be16789a2 100644 --- a/respeaker_ros/package.xml +++ b/respeaker_ros/package.xml @@ -17,6 +17,7 @@ flac geometry_msgs std_msgs + sound_play speech_recognition_msgs tf python-numpy diff --git a/respeaker_ros/test/sample_respeaker.test b/respeaker_ros/test/sample_respeaker.test index 5d51c220c..61f10fb7b 100644 --- a/respeaker_ros/test/sample_respeaker.test +++ b/respeaker_ros/test/sample_respeaker.test @@ -3,6 +3,7 @@ + From b85bea5f02363da4da358d6d45918d1e6f8ff2a7 Mon Sep 17 00:00:00 2001 From: iory Date: Mon, 13 Jun 2022 16:41:55 +0900 Subject: [PATCH 08/22] [respeaker_ros] Set audio_info topic name and check length of it. --- respeaker_ros/scripts/speech_to_text.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/respeaker_ros/scripts/speech_to_text.py b/respeaker_ros/scripts/speech_to_text.py index 0063ac197..32a8bef7d 100644 --- a/respeaker_ros/scripts/speech_to_text.py +++ b/respeaker_ros/scripts/speech_to_text.py @@ -22,11 +22,12 @@ class SpeechToText(object): def __init__(self): # format of input audio data - if rospy.get_param('~audio_info', None): + audio_info_topic_name = rospy.get_param('~audio_info', '') + if len(audio_info_topic_name) > 0: rospy.loginfo('Extract audio info params from {}'.format( - rospy.get_param('~audio_info'))) + audio_info_topic_name)) audio_info_msg = rospy.wait_for_message( - rospy.get_param('~audio_info'), AudioInfo) + audio_info_topic_name, AudioInfo) self.sample_rate = audio_info_msg.sample_rate self.sample_width = audio_info_msg.bitrate // self.sample_rate // 8 self.channels = audio_info_msg.channels From 8bad18342aa31c443b85824514a6a566526a6eed Mon Sep 17 00:00:00 2001 From: iory Date: Mon, 13 Jun 2022 18:11:58 +0900 Subject: [PATCH 09/22] [respeaker_ros] Add comment for why we add publish_multichannel option. --- respeaker_ros/scripts/respeaker_node.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index a734dcb17..fba98b517 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -350,6 +350,12 @@ def __init__(self): publish_multichannel=self.publish_multichannel) self.n_channel = 1 if self.publish_multichannel: + # The respeaker has 4 or 6 microphones. + # Multiple microphones can be used for + # beam forming (strengthening the sound in a specific direction) + # and sound localization (the respeaker outputs the azimuth + # direction, but the multichannel can estimate + # the elevation direction). etc. self.n_channel = self.respeaker_audio.channels self.speech_prefetch_bytes = int( self.n_channel From bb8d365452a920112a4a473f14e7966346ec7893 Mon Sep 17 00:00:00 2001 From: iory Date: Mon, 13 Jun 2022 23:10:47 +0900 Subject: [PATCH 10/22] [respeaker_ros] Remove publish_multichannel option and publish raw multichannel audio as default. --- respeaker_ros/launch/sample_respeaker.launch | 5 -- respeaker_ros/scripts/respeaker_node.py | 74 +++++++++++++------- 2 files changed, 49 insertions(+), 30 deletions(-) diff --git a/respeaker_ros/launch/sample_respeaker.launch b/respeaker_ros/launch/sample_respeaker.launch index d3e0badae..e2c43c557 100644 --- a/respeaker_ros/launch/sample_respeaker.launch +++ b/respeaker_ros/launch/sample_respeaker.launch @@ -15,8 +15,6 @@ - - - - publish_multichannel: $(arg publish_multichannel) - 1: + # The respeaker has 4 microphones. + # Multiple microphones can be used for + # beam forming (strengthening the sound in a specific direction) + # and sound localization (the respeaker outputs the azimuth + # direction, but the multichannel can estimate + # the elevation direction). etc. + + # Channel 0: processed audio for ASR + # Channel 1: mic1 raw data + # Channel 2: mic2 raw data + # Channel 3: mic3 raw data + # Channel 4: mic4 raw data + # Channel 5: merged playback + # (self.n_channel - 2) = 4 channels are multiple microphones. + self.pub_audio_raw = rospy.Publisher("audio_raw", AudioData, + queue_size=10) + self.pub_audio_merged_playback = rospy.Publisher( + "audio_merged_playback", AudioData, + queue_size=10) + info_raw_msg = AudioInfo( + channels=self.n_channel - 2, + sample_rate=self.respeaker_audio.rate, + sample_format='S16LE', + bitrate=(self.respeaker_audio.rate * + self.respeaker_audio.bitdepth), + coding_format='WAVE') + self.pub_audio_raw_info.publish(info_raw_msg) + def on_shutdown(self): self.info_timer.shutdown() try: @@ -415,13 +432,20 @@ def on_status_led(self, msg): oneshot=True) def on_audio(self, data): - self.pub_audio.publish(AudioData(data=data)) + # take processed audio for ASR. + processed_data = data[:, 0].tobytes() + self.pub_audio.publish(AudioData(data=processed_data)) + if self.n_channel > 1: + self.pub_audio_raw.publish( + AudioData(data=data[:, 1:5].reshape(-1).tobytes())) + self.pub_audio_merged_playback.publish( + AudioData(data=data[:, 5].tobytes())) if self.is_speeching: if len(self.speech_audio_buffer) == 0: self.speech_audio_buffer = self.speech_prefetch_buffer - self.speech_audio_buffer += data + self.speech_audio_buffer += processed_data else: - self.speech_prefetch_buffer += data + self.speech_prefetch_buffer += processed_data self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:] def on_timer(self, event): From 6ed3f115c1e6bffeb42e3714a1e9fec9aee5f9fe Mon Sep 17 00:00:00 2001 From: iory Date: Mon, 13 Jun 2022 23:19:55 +0900 Subject: [PATCH 11/22] [respeaker_ros] Publish spech_audio_raw --- respeaker_ros/scripts/respeaker_node.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index e0dd92729..e8a1a322d 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -394,6 +394,16 @@ def __init__(self): coding_format='WAVE') self.pub_audio_raw_info.publish(info_raw_msg) + self.speech_audio_raw_buffer = b"" + self.speech_raw_prefetch_buffer = b"" + self.pub_speech_audio_raw = rospy.Publisher( + "speech_audio_raw", AudioData, queue_size=10) + self.speech_raw_prefetch_bytes = int( + self.n_channel - 2 + * self.speech_prefetch + * self.respeaker_audio.rate + * self.respeaker_audio.bitdepth / 8.0) + def on_shutdown(self): self.info_timer.shutdown() try: @@ -436,17 +446,26 @@ def on_audio(self, data): processed_data = data[:, 0].tobytes() self.pub_audio.publish(AudioData(data=processed_data)) if self.n_channel > 1: + raw_audio_data = data[:, 1:5].reshape(-1).tobytes() self.pub_audio_raw.publish( - AudioData(data=data[:, 1:5].reshape(-1).tobytes())) + AudioData(data=raw_audio_data)) self.pub_audio_merged_playback.publish( AudioData(data=data[:, 5].tobytes())) if self.is_speeching: if len(self.speech_audio_buffer) == 0: self.speech_audio_buffer = self.speech_prefetch_buffer + if self.n_channel > 1: + self.speech_audio_raw_buffer = self.speech_raw_prefetch_buffer self.speech_audio_buffer += processed_data + if self.n_channel > 1: + self.speech_audio_raw_buffer += raw_audio_data else: self.speech_prefetch_buffer += processed_data self.speech_prefetch_buffer = self.speech_prefetch_buffer[-self.speech_prefetch_bytes:] + if self.n_channel > 1: + self.speech_raw_prefetch_buffer += raw_audio_data + self.speech_raw_prefetch_buffer = self.speech_raw_prefetch_buffer[ + -self.speech_raw_prefetch_bytes:] def on_timer(self, event): stamp = event.current_real or rospy.Time.now() From d56eed7f37458e3b553e72936f9a7a30fc08f9e3 Mon Sep 17 00:00:00 2001 From: iory Date: Tue, 14 Jun 2022 03:14:28 +0900 Subject: [PATCH 12/22] [respeaker_ros] Add comment to know more defails --- respeaker_ros/scripts/respeaker_node.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index e8a1a322d..4ef07653c 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -379,6 +379,8 @@ def __init__(self): # Channel 3: mic3 raw data # Channel 4: mic4 raw data # Channel 5: merged playback + # For more detail, please see + # https://wiki.seeedstudio.com/ReSpeaker_Mic_Array_v2.0/ # (self.n_channel - 2) = 4 channels are multiple microphones. self.pub_audio_raw = rospy.Publisher("audio_raw", AudioData, queue_size=10) From 129a58133d7ef46ad02017f3c1c56dfddf888f62 Mon Sep 17 00:00:00 2001 From: iory Date: Tue, 14 Jun 2022 03:14:40 +0900 Subject: [PATCH 13/22] [respeaker_ros] Publish speech audio raw --- respeaker_ros/scripts/respeaker_node.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index 4ef07653c..ed431e271 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -507,13 +507,15 @@ def on_timer(self, event): elif self.is_speeching: buf = self.speech_audio_buffer self.speech_audio_buffer = b"" + buf_raw = self.speech_audio_raw_buffer + self.speech_audio_raw_buffer = b"" self.is_speeching = False duration = 8.0 * len(buf) * self.respeaker_audio.bitwidth duration = duration / self.respeaker_audio.rate / self.respeaker_audio.bitdepth / self.n_channel rospy.loginfo("Speech detected for %.3f seconds" % duration) if self.speech_min_duration <= duration < self.speech_max_duration: - self.pub_speech_audio.publish(AudioData(data=buf)) + self.pub_speech_audio_raw.publish(AudioData(data=buf_raw)) if __name__ == '__main__': From cd490fb688c589924ed8ca97ad3c70f81bfd0d46 Mon Sep 17 00:00:00 2001 From: iory Date: Tue, 14 Jun 2022 03:15:16 +0900 Subject: [PATCH 14/22] [respeaker_ros] Add parameters for respaker ros --- respeaker_ros/README.md | 91 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/respeaker_ros/README.md b/respeaker_ros/README.md index e42ba1202..780d47e3b 100644 --- a/respeaker_ros/README.md +++ b/respeaker_ros/README.md @@ -92,6 +92,97 @@ A ROS Package for Respeaker Mic Array a: 0.3" ``` +## Parameters for respeaker_node.py + + - ### Publishing topics + + - `~audio` (`audio_common_msgs/AudioData`) + + Processed audio for ASR. 1 channel. + + - `~audio_info` (`audio_common_msgs/AudioInfo`) + + Audio info with respect to `~audio`. + + - `~audio_raw` (`audio_common_msgs/AudioData`) + + Micarray audio data has 4-channels. Maybe you need to update respeaker firmware. + + If the firmware isn't supported, this will not be output. + + - `~audio_info_raw` (`audio_common_msgs/AudioInfo`) + + Audio info with respect to `~audio_raw`. + + If the firmware isn't supported, this will not be output. + + - `~speech_audio` (`audio_common_msgs/AudioData`) + + Audio data while a person is speaking using the VAD function. + + - `~speech_audio_raw` (`audio_common_msgs/AudioData`) + + Audio data has 4-channels while a person is speaking using the VAD function. + + If the firmware isn't supported, this will not be output. + + - `~audio_merged_playback` (`audio_common_msgs/AudioData`) + + Data that combines the sound of mic and speaker. + + If the firmware isn't supported, this will not be output. + + For more detail, please see https://wiki.seeedstudio.com/ReSpeaker_Mic_Array_v2.0/ + + - `~is_speeching` (`std_msgs/Bool`) + + Using VAD function, publish whether someone is speaking. + + - `~sound_direction` (`std_msgs/Int32`) + + Direction of sound. + + - `~sound_localization` (`geometry_msgs/PoseStamped`) + + Localized Sound Direction. The value of the position in the estimated direction with `~doa_offset` as the radius is obtained. + + - ### Parameters + + - `~update_rate` (`Double`, default: `10.0`) + + Publishing info data such as `~is_speeching`, `~sound_direction`, `~sound_localization`, `~speech_audio` and `~speech_audio_raw`. + + - `~sensor_frame_id` (`String`, default: `respeaker_base`) + + Frame id. + + - `~doa_xy_offset` (`Double`, default: `0.0`) + + `~doa_offset` is a estimated sound direction's radius. + + - `~doa_yaw_offset` (`Double`, default: `90.0`) + + Estimated DoA angle offset. + + - `~speech_prefetch` (`Double`, default: `0.5`) + + Time to represent how long speech is pre-stored in buffer. + + - `~speech_continuation` (`Double`, default: `0.5`) + + If the time between the current time and the time when the speech is stopped is shorter than this time, + it is assumed that someone is speaking. + + - `~speech_max_duration` (`Double`, default: `7.0`) + + - `~speech_min_duration` (`Double`, default: `0.1`) + + If the speaking interval is within these times, `~speech_audio` and `~speech_audio_raw` will be published. + + - `~suppress_pyaudio_error` (`Bool`, default: `True`) + + If this value is `True`, suppress error from pyaudio. + ## Use cases ### Voice Recognition From d6b1476dc532cacc54700c15cb8fb9e3168d5621 Mon Sep 17 00:00:00 2001 From: iory Date: Tue, 14 Jun 2022 03:19:16 +0900 Subject: [PATCH 15/22] [respeaker_ros] Fixed publishing audio topic's namespace --- respeaker_ros/README.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/respeaker_ros/README.md b/respeaker_ros/README.md index 780d47e3b..1a5d54ce4 100644 --- a/respeaker_ros/README.md +++ b/respeaker_ros/README.md @@ -96,37 +96,37 @@ A ROS Package for Respeaker Mic Array - ### Publishing topics - - `~audio` (`audio_common_msgs/AudioData`) + - `audio` (`audio_common_msgs/AudioData`) Processed audio for ASR. 1 channel. - - `~audio_info` (`audio_common_msgs/AudioInfo`) + - `audio_info` (`audio_common_msgs/AudioInfo`) Audio info with respect to `~audio`. - - `~audio_raw` (`audio_common_msgs/AudioData`) + - `audio_raw` (`audio_common_msgs/AudioData`) Micarray audio data has 4-channels. Maybe you need to update respeaker firmware. If the firmware isn't supported, this will not be output. - - `~audio_info_raw` (`audio_common_msgs/AudioInfo`) + - `audio_info_raw` (`audio_common_msgs/AudioInfo`) Audio info with respect to `~audio_raw`. If the firmware isn't supported, this will not be output. - - `~speech_audio` (`audio_common_msgs/AudioData`) + - `speech_audio` (`audio_common_msgs/AudioData`) Audio data while a person is speaking using the VAD function. - - `~speech_audio_raw` (`audio_common_msgs/AudioData`) + - `speech_audio_raw` (`audio_common_msgs/AudioData`) Audio data has 4-channels while a person is speaking using the VAD function. If the firmware isn't supported, this will not be output. - - `~audio_merged_playback` (`audio_common_msgs/AudioData`) + - `audio_merged_playback` (`audio_common_msgs/AudioData`) Data that combines the sound of mic and speaker. From 9bb766f43270ed37dda1c14f2d2e712981a01b51 Mon Sep 17 00:00:00 2001 From: iory Date: Tue, 14 Jun 2022 03:31:55 +0900 Subject: [PATCH 16/22] [respeaker_ros] Add parameters for speech_to_text --- respeaker_ros/README.md | 54 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/respeaker_ros/README.md b/respeaker_ros/README.md index 1a5d54ce4..247168ba5 100644 --- a/respeaker_ros/README.md +++ b/respeaker_ros/README.md @@ -183,6 +183,60 @@ A ROS Package for Respeaker Mic Array If this value is `True`, suppress error from pyaudio. +## Parameters for speech_to_text.py + + - ### Publishing topics + + - `~speech_to_text` (`speech_recognition_msgs/SpeechRecognitionCandidates`) + + Recognized text. + + - ### Subscribing topics + + - `audio` (`audio_common_msgs/AudioData`) + + Input audio. + + - ### Parameters + + - `~audio_info` (`String`, default: ``) + + audio_info (`audio_common_msgs/AudioInfo`) topic. If this value is specified, `~sample_rate`, `~sample_width` and `~channels` parameters are obtained from the topic. + + - `~sample_rate` (`Int`, default: `16000`) + + Sampling rate. + + - `~sample_width` (`Int`, default: `2`) + + Sample with. + + - `~channels` (`Int`, default: `1`) + + Number of channels. + + - `~target_channel` (`Int`, default: `0`) + + Target number of channel. + + - `~language` (`String`, default: `ja-JP`) + + language of speech to text service. For English users, you can specify `en-US`. + + - `~self_cancellation` (`Bool`, default: `True`) + + ignore voice input while the robot is speaking. + + - `~tts_tolerance` (`String`, default: `1.0`) + + time to assume as SPEAKING after tts service is finished. + + - `~tts_action_names` (`List[String]`, default: `['sound_play']`) + + If `~self_chancellation` is `True`, this value will be used. + + When the actions are active, do nothing with the callback that subscribes to `audio`. + ## Use cases ### Voice Recognition From 5b6a3761dd5e81a971dd3b2baaa0aa6b031980e2 Mon Sep 17 00:00:00 2001 From: iory Date: Tue, 14 Jun 2022 12:10:58 +0900 Subject: [PATCH 17/22] [respeaker_ros] Avoid AudioInfo import for backward compatibility --- respeaker_ros/scripts/respeaker_node.py | 45 +++++++++++++++---------- respeaker_ros/scripts/speech_to_text.py | 15 ++++++++- 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index ed431e271..0fde85c40 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -16,7 +16,13 @@ import sys import time from audio_common_msgs.msg import AudioData -from audio_common_msgs.msg import AudioInfo +enable_audio_info = True +try: + from audio_common_msgs.msg import AudioInfo +except Exception as e: + rospy.logwarn('audio_common_msgs/AudioInfo message is not exists.' + ' AudioInfo message will not be published.') + enable_audio_info = False from geometry_msgs.msg import PoseStamped from std_msgs.msg import Bool, Int32, ColorRGBA from dynamic_reconfigure.server import Server @@ -332,8 +338,9 @@ def __init__(self): self.pub_doa_raw = rospy.Publisher("sound_direction", Int32, queue_size=1, latch=True) self.pub_doa = rospy.Publisher("sound_localization", PoseStamped, queue_size=1, latch=True) self.pub_audio = rospy.Publisher("audio", AudioData, queue_size=10) - self.pub_audio_info = rospy.Publisher("audio_info", AudioInfo, - queue_size=1, latch=True) + if enable_audio_info is True: + self.pub_audio_info = rospy.Publisher("audio_info", AudioInfo, + queue_size=1, latch=True) self.pub_audio_raw_info = rospy.Publisher("audio_info_raw", AudioInfo, queue_size=1, latch=True) self.pub_speech_audio = rospy.Publisher("speech_audio", AudioData, queue_size=10) @@ -357,13 +364,14 @@ def __init__(self): self.sub_led = rospy.Subscriber("status_led", ColorRGBA, self.on_status_led) # processed audio for ASR - info_msg = AudioInfo( - channels=1, - sample_rate=self.respeaker_audio.rate, - sample_format='S16LE', - bitrate=self.respeaker_audio.rate * self.respeaker_audio.bitdepth, - coding_format='WAVE') - self.pub_audio_info.publish(info_msg) + if enable_audio_info is True: + info_msg = AudioInfo( + channels=1, + sample_rate=self.respeaker_audio.rate, + sample_format='S16LE', + bitrate=self.respeaker_audio.rate * self.respeaker_audio.bitdepth, + coding_format='WAVE') + self.pub_audio_info.publish(info_msg) if self.n_channel > 1: # The respeaker has 4 microphones. @@ -387,14 +395,15 @@ def __init__(self): self.pub_audio_merged_playback = rospy.Publisher( "audio_merged_playback", AudioData, queue_size=10) - info_raw_msg = AudioInfo( - channels=self.n_channel - 2, - sample_rate=self.respeaker_audio.rate, - sample_format='S16LE', - bitrate=(self.respeaker_audio.rate * - self.respeaker_audio.bitdepth), - coding_format='WAVE') - self.pub_audio_raw_info.publish(info_raw_msg) + if enable_audio_info is True: + info_raw_msg = AudioInfo( + channels=self.n_channel - 2, + sample_rate=self.respeaker_audio.rate, + sample_format='S16LE', + bitrate=(self.respeaker_audio.rate * + self.respeaker_audio.bitdepth), + coding_format='WAVE') + self.pub_audio_raw_info.publish(info_raw_msg) self.speech_audio_raw_buffer = b"" self.speech_raw_prefetch_buffer = b"" diff --git a/respeaker_ros/scripts/speech_to_text.py b/respeaker_ros/scripts/speech_to_text.py index 32a8bef7d..6765e2e04 100644 --- a/respeaker_ros/scripts/speech_to_text.py +++ b/respeaker_ros/scripts/speech_to_text.py @@ -4,6 +4,8 @@ from __future__ import division +import sys + import actionlib import rospy try: @@ -14,7 +16,13 @@ import numpy as np from actionlib_msgs.msg import GoalStatus, GoalStatusArray from audio_common_msgs.msg import AudioData -from audio_common_msgs.msg import AudioInfo +enable_audio_info = True +try: + from audio_common_msgs.msg import AudioInfo +except Exception as e: + rospy.logwarn('audio_common_msgs/AudioInfo message is not exists.' + ' AudioInfo message will not be published.') + enable_audio_info = False from sound_play.msg import SoundRequest, SoundRequestAction, SoundRequestGoal from speech_recognition_msgs.msg import SpeechRecognitionCandidates @@ -24,6 +32,11 @@ def __init__(self): # format of input audio data audio_info_topic_name = rospy.get_param('~audio_info', '') if len(audio_info_topic_name) > 0: + if enable_audio_info is False: + rospy.logerr( + 'audio_common_msgs/AudioInfo message is not exists.' + ' Giving ~audio_info is not valid in your environment.') + sys.exit(1) rospy.loginfo('Extract audio info params from {}'.format( audio_info_topic_name)) audio_info_msg = rospy.wait_for_message( From 6ac8a4055807a0e26adf4bea0134bb593a68e3b3 Mon Sep 17 00:00:00 2001 From: iory Date: Tue, 14 Jun 2022 12:43:59 +0900 Subject: [PATCH 18/22] [respeaker_ros] Fixed bytes calculation 'self.n_channe - 2' -> '(self.n_channel - 2)' --- respeaker_ros/scripts/respeaker_node.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/respeaker_ros/scripts/respeaker_node.py b/respeaker_ros/scripts/respeaker_node.py index 0fde85c40..261598444 100644 --- a/respeaker_ros/scripts/respeaker_node.py +++ b/respeaker_ros/scripts/respeaker_node.py @@ -410,7 +410,7 @@ def __init__(self): self.pub_speech_audio_raw = rospy.Publisher( "speech_audio_raw", AudioData, queue_size=10) self.speech_raw_prefetch_bytes = int( - self.n_channel - 2 + (self.n_channel - 2) * self.speech_prefetch * self.respeaker_audio.rate * self.respeaker_audio.bitdepth / 8.0) From defbba81a96e752f80970aa76af0ed267ae90808 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Wed, 1 Mar 2023 22:25:46 +0900 Subject: [PATCH 19/22] use sub and not delete --- rostwitter/scripts/tweet_image_server.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/rostwitter/scripts/tweet_image_server.py b/rostwitter/scripts/tweet_image_server.py index ea5e18c63..88e9a93d4 100755 --- a/rostwitter/scripts/tweet_image_server.py +++ b/rostwitter/scripts/tweet_image_server.py @@ -70,7 +70,7 @@ def _execute_cb(self, goal): self.image_topic_name = goal.image_topic_name with self.lock: self.img[self.image_topic_name] = None - self.sub = rospy.Subscriber( + sub = rospy.Subscriber( self.image_topic_name, Image, self._image_cb) @@ -130,8 +130,7 @@ def _execute_cb(self, goal): else: rospy.logerr('cannot subscribe image: {}'.format(self.image_topic_name)) ret = self.api.post_update(goal.text) - self.sub.unregister() - del self.sub + sub.unregister() else: ret = self.api.post_update(goal.text) From 1e494d8611cf09335178bc30cdd4e59004650756 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sat, 25 Mar 2023 23:43:26 +0900 Subject: [PATCH 20/22] fix typo in dialogflow_client.py --- dialogflow_task_executive/node_scripts/dialogflow_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dialogflow_task_executive/node_scripts/dialogflow_client.py b/dialogflow_task_executive/node_scripts/dialogflow_client.py index 98b99c15e..d8dbeb08a 100644 --- a/dialogflow_task_executive/node_scripts/dialogflow_client.py +++ b/dialogflow_task_executive/node_scripts/dialogflow_client.py @@ -271,7 +271,7 @@ def speak_result(self, result): volume=self.volume) # for japanese or utf-8 languages - if self.language == 'ja-JP' and sys.version <= 2: + if self.language == 'ja-JP' and sys.version_info.major <= 2: msg.arg = result.fulfillment_text.encode('utf-8') else: msg.arg = result.fulfillment_text From 37d2218fdd5d694bbe9685040ef175e85550a5c9 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Sun, 26 Mar 2023 00:10:49 +0900 Subject: [PATCH 21/22] not use map for python3 in task_executive --- dialogflow_task_executive/node_scripts/task_executive.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dialogflow_task_executive/node_scripts/task_executive.py b/dialogflow_task_executive/node_scripts/task_executive.py index 686327a55..457371d0a 100644 --- a/dialogflow_task_executive/node_scripts/task_executive.py +++ b/dialogflow_task_executive/node_scripts/task_executive.py @@ -82,8 +82,7 @@ def _list_apps_cb(self, msg): @property def running_apps(self): - return map(lambda a: a.name, - self._latest_msg.running_apps) + return [a.name for a in self._latest_msg.running_apps] @property def available_apps(self): From 9167080489e9dba0ff940feb53a59f7adee0b541 Mon Sep 17 00:00:00 2001 From: Shingo Kitagawa Date: Mon, 27 Mar 2023 02:16:05 +0900 Subject: [PATCH 22/22] chmod -x because of catkin_virtualenv --- dialogflow_task_executive/node_scripts/sample_app_print.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 dialogflow_task_executive/node_scripts/sample_app_print.py diff --git a/dialogflow_task_executive/node_scripts/sample_app_print.py b/dialogflow_task_executive/node_scripts/sample_app_print.py old mode 100755 new mode 100644