From 6a5e6ab81fcf1c3c304c8390a722644c3bf88491 Mon Sep 17 00:00:00 2001 From: Yoshiki Obinata Date: Mon, 22 Nov 2021 20:17:21 +0900 Subject: [PATCH 1/7] [voice_text] Dynamically load voice_text library --- 3rdparty/voice_text/.gitignore | 1 - 3rdparty/voice_text/CMakeLists.txt | 63 +---- 3rdparty/voice_text/README.md | 20 +- 3rdparty/voice_text/include/vt_handler.h | 107 ++++++++ 3rdparty/voice_text/include/vt_jpn.h | 294 +++++++++++++++++++++ 3rdparty/voice_text/include/vtapi.h | 262 ++++++++++++++++++ 3rdparty/voice_text/src/dummy/vt_dummy.cpp | 13 - 3rdparty/voice_text/src/dummy/vt_dummy.h | 50 ---- 3rdparty/voice_text/src/voice_text.cpp | 90 +++++++ 3rdparty/voice_text/src/voice_text.cpp.in | 178 ------------- 3rdparty/voice_text/src/vt_handler.cpp | 245 +++++++++++++++++ 11 files changed, 1028 insertions(+), 295 deletions(-) delete mode 100644 3rdparty/voice_text/.gitignore create mode 100644 3rdparty/voice_text/include/vt_handler.h create mode 100644 3rdparty/voice_text/include/vt_jpn.h create mode 100644 3rdparty/voice_text/include/vtapi.h delete mode 100644 3rdparty/voice_text/src/dummy/vt_dummy.cpp delete mode 100644 3rdparty/voice_text/src/dummy/vt_dummy.h create mode 100644 3rdparty/voice_text/src/voice_text.cpp delete mode 100644 3rdparty/voice_text/src/voice_text.cpp.in create mode 100644 3rdparty/voice_text/src/vt_handler.cpp diff --git a/3rdparty/voice_text/.gitignore b/3rdparty/voice_text/.gitignore deleted file mode 100644 index 46bf7136a..000000000 --- a/3rdparty/voice_text/.gitignore +++ /dev/null @@ -1 +0,0 @@ -src/voice_text.cpp diff --git a/3rdparty/voice_text/CMakeLists.txt b/3rdparty/voice_text/CMakeLists.txt index ef07dcceb..13cf10648 100644 --- a/3rdparty/voice_text/CMakeLists.txt +++ b/3rdparty/voice_text/CMakeLists.txt @@ -5,7 +5,8 @@ find_package(catkin REQUIRED COMPONENTS roscpp message_generation) -find_package(Boost REQUIRED COMPONENTS filesystem) +find_package(Boost REQUIRED COMPONENTS + filesystem) generate_dynamic_reconfigure_options( cfg/VoiceText.cfg @@ -20,57 +21,19 @@ generate_messages() catkin_package(CATKIN_DEPENDS message_runtime) -file(GLOB VT_ROOT /usr/vt/*/*) -if(NOT VT_ROOT) - message(WARNING "VoiceText directory should be /usr/vt/*/* (e.g., /usr/vt/sayaka/M16) but is not found") - set(VT_ROOT /usr/vt/sayaka/M16) # default value for following configure_file -else() - if("${CMAKE_SIZEOF_VOID_P}" STREQUAL "4") - set(VT_LIB_PATH_OLD ${VT_ROOT}/bin/x86_32/RAMIO/libvt_jpn.so) # e.g., /usr/vt/sayaka/M16/bin/x86_32/RAMIO/libvt_jpn.so - set(VT_LIB_PATH_NEW ${VT_ROOT}/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so) # e.g., /usr/vt/risa/H16/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so - elseif("${CMAKE_SIZEOF_VOID_P}" STREQUAL "8") - set(VT_LIB_PATH_OLD ${VT_ROOT}/bin/x86_64/RAMIO/libvt_jpn.so) # e.g., /usr/vt/sayaka/M16/bin/x86_64/RAMIO/libvt_jpn.so - set(VT_LIB_PATH_NEW ${VT_ROOT}/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so) # e.g., /usr/vt/risa/H16/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so - endif() - if(EXISTS ${VT_LIB_PATH_OLD}) - set(VT_LIB_PATH ${VT_LIB_PATH_OLD}) - else() - if(EXISTS ${VT_LIB_PATH_NEW}) - set(VT_LIB_PATH ${VT_LIB_PATH_NEW}) - endif() - endif() - if(VT_LIB_PATH) - message(WARNING "VoiceText library is found at ${VT_LIB_PATH}") - else() - message(WARNING "VoiceText library is not found at ${VT_LIB_PATH_OLD} or ${VT_LIB_PATH_NEW}") - endif() -endif() -configure_file(src/voice_text.cpp.in ${PROJECT_SOURCE_DIR}/src/voice_text.cpp) - - include_directories( - ${Boost_INCLUDE_DIRS} - ${catkin_INCLUDE_DIRS} - ) - add_executable(voice_text src/voice_text.cpp) - add_dependencies(voice_text ${PROJECT_NAME}_generate_messages_cpp ${PROJECT_NAME}_gencfg) - set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -D_REENTRANT) +include_directories( + include + ${Boost_INCLUDE_DIRS} + ${catkin_INCLUDE_DIRS} +) -if(NOT VT_LIB_PATH) - message(WARNING "Building dummy library") - add_library(vt_dummy src/dummy/vt_dummy.cpp) - set_target_properties(vt_dummy PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) - set_target_properties(vt_dummy PROPERTIES LIBRARY_OUTPUT_NAME vt_jpn) - set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -DUSE_DUMMY_INCLUDE) - set(VT_LIB_PATH ${PROJECT_BINARY_DIR}/libvt_jpn.so) -endif() +add_executable(voice_text src/voice_text.cpp src/vt_handler.cpp) +add_dependencies(voice_text ${PROJECT_NAME}_generate_messages_cpp ${PROJECT_NAME}_gencfg) +set_target_properties(voice_text PROPERTIES COMPILE_FLAGS -D_REENTRANT) - target_link_libraries(voice_text - ${catkin_LIBRARIES} - ${VT_LIB_PATH} -lm -lpthread - ) -if(NOT EXISTS ${VT_LIB_PATH}) - add_dependencies(voice_text vt_dummy) -endif() +target_link_libraries(voice_text + ${catkin_LIBRARIES} -lm -lpthread -ldl +) install(TARGETS voice_text # do not install vt_dummy target, that should be installed from voice_text library ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} diff --git a/3rdparty/voice_text/README.md b/3rdparty/voice_text/README.md index ed1159ecf..20f1da8ac 100644 --- a/3rdparty/voice_text/README.md +++ b/3rdparty/voice_text/README.md @@ -5,9 +5,23 @@ ROS Interface for HOYA VoiceText Speech Synthesis Engine ## Installation -1. Install VoiceText SDK -2. Put license file -3. Build this package +### 1. Install VoiceText SDK +#### If you have voicetext sdk install binary, please follow the official guide and install both engine and SDK +#### If you don't have the sdk install binary but have voice text API binary, please follow the guide below. +1. Install VoiceText Engine by official guide +2. Copy VoiceText API binaries to VoiceText binary directory + VoiceText API package includes binary libraries and header file. You have to copy those of them to specific directory by executing following commands. + ```bash + cd /path_to_api_package_directory # e.g. cd ~/Downloads/RS_VTAPI_SDK_Linux_4.3.0.2/20201113_VTAPI4.3.0.2_LINUX + cd bin/x64 # You have to cd x86 if your system is x86 architecture + # Assuming VoiceText engine's talker is hikari, type is D16. If it is different, please set appropriate directory. + sudo cp -a * /usr/vt/hikari/D16/bin # Don't forget to add -a not to break symbolic link. + cd ../../include/ + sudo mkdir /usr/vt/hikari/D16/inc # not include, but inc + sudo cp vtapi.h /usr/vt/hikari/D16/inc + ``` +### 2. Put license file +### 3. Build this package ```bash cd /path/to/catkin_workspace diff --git a/3rdparty/voice_text/include/vt_handler.h b/3rdparty/voice_text/include/vt_handler.h new file mode 100644 index 000000000..291b9d12e --- /dev/null +++ b/3rdparty/voice_text/include/vt_handler.h @@ -0,0 +1,107 @@ +/* + * vt_handler.h + * Author: Yoshiki Obinata + */ + +#ifndef VT_HANDLER_H_ +#define VT_HANDLER_H_ + +#include +#include +#include +#include + +// logging +#include + +#include "vt_jpn.h" +#include "vtapi.h" + +#define VT_ROOT "/usr/vt/*/*" +#define PATH_MAX 1024 + +#if __x86_64__ || __ppc64__ +#define ENV64 +#else +#define ENV32 +#endif + +typedef enum VT_TYPE{ +NO_VT, +VT_SDK, +VT_API +} VT_Types; + +namespace fs = boost::filesystem; + +class VTHandler{ + public: + VTHandler(const std::string license_path, const std::string db_path); + ~VTHandler(); + bool VTH_TextToFile(const int pitch, const int speed, const int volume, const int pause, + const std::string text, const std::string wave_path); + + private: + void* dl_handle; + VT_Types vt_type; + + // define handle + bool LoadSym(); + + // Load symbols + // Related to VoiceText SDK + std::vector VTSDK_func_ = { + "VT_LOADTTS_JPN", + "VT_UNLOADTTS_JPN", + "VT_GetTTSInfo_JPN", + "VT_TextToFile_JPN" + }; + + // Related to ReadSpeaker API + std::vector VTAPI_func_ = { + "VTAPI_Init", + "VTAPI_CreateHandle", + "VTAPI_SetLicenseFolder", + "VTAPI_GetEngine", + "VTAPI_SetEngineHandle", + "VTAPI_SetAttr", + "VTAPI_SetOutputFile", + "VTAPI_TextToFile", + "VTAPI_GetLastErrorInfo", + "VTAPI_ReleaseHandle", + "VTAPI_UnloadEngine", + "VTAPI_Exit" + }; + + // symbol map + std::map VTSDK_s_map_; + std::map VTAPI_s_map_; + + // Load Functions + // Related to VoiceText SDK + short (*VT_LOADTTS_JPN)(HWND, int, char*, char*); + void (*VT_UNLOADTTS_JPN)(int); + int (*VT_GetTTSInfo_JPN)(int, char*, void*, int); + short (*VT_TextToFile_JPN)(int, char*, char*, int, int, int, int, int, int, int); + + // Related to ReadSpeaker API + int (*VTAPI_Init)(char*); + VTAPI_HANDLE (*VTAPI_CreateHandle)(); + void (*VTAPI_SetLicenseFolder)(char*); + VTAPI_ENGINE_HANDLE (*VTAPI_GetEngine)(char*, char*); + int (*VTAPI_SetEngineHandle)(VTAPI_HANDLE, VTAPI_ENGINE_HANDLE); + int (*VTAPI_SetAttr)(VTAPI_HANDLE, int, int); + int (*VTAPI_SetOutputFile)(VTAPI_HANDLE, char*, int); + int (*VTAPI_TextToFile)(VTAPI_HANDLE, void*, int, int); + VTAPI_ERRS_INFO* (*VTAPI_GetLastErrorInfo)(VTAPI_HANDLE); + void (*VTAPI_ReleaseHandle)(VTAPI_HANDLE); + int (*VTAPI_UnloadEngine)(VTAPI_ENGINE_HANDLE); + void (*VTAPI_Exit)(); + + // ReadSpeaker API handler + VTAPI_HANDLE hVTAPI; + VTAPI_ENGINE_HANDLE hEngine; +}; + + +#endif // VT_HANDLER_H_ diff --git a/3rdparty/voice_text/include/vt_jpn.h b/3rdparty/voice_text/include/vt_jpn.h new file mode 100644 index 000000000..6a8000d23 --- /dev/null +++ b/3rdparty/voice_text/include/vt_jpn.h @@ -0,0 +1,294 @@ +/* +* Copyright (c) 2004 Voiceware Co., Ltd., All rights reserved. +* +* VoiceText +*/ + +#ifndef VT_JPN_H +#define VT_JPN_H + +#if defined(__cplusplus) + extern "C" { +#endif + +#if !defined(VT_BASIC_DEFINE) + #if defined(WIN32) + #if !defined(_DllMode) + #define _DllMode(_type_) __declspec( dllimport ) _type_ + #endif + #else + #if !defined(_DllMode) + #define _DllMode(_type_) extern _type_ + #endif + typedef int HWND; + #endif +#endif + + + + +/*===========================================================================*/ +/* Text format (used in texttype) */ +#if !defined(VT_BASIC_DEFINE) + #if !defined(VT_TEXT_FMT_PLAIN_TEXT) + #define VT_TEXT_FMT_PLAIN_TEXT 0 + #endif + + #if !defined(VT_TEXT_FMT_JEITA) + #define VT_TEXT_FMT_JEITA 4 + #endif + + #if !defined(VT_TEXT_FMT_JEITA_PLUS) + #define VT_TEXT_FMT_JEITA_PLUS 6 + #endif +#endif + + + +/*===========================================================================*/ +/* LOAD & UNLOAD */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_LOADTTS_SUCCESS 0 + #define VT_LOADTTS_ERROR_CONFLICT_DBPATH 1 + #define VT_LOADTTS_ERROR_TTS_STRUCTURE 2 + #define VT_LOADTTS_ERROR_TAGGER 3 + #define VT_LOADTTS_ERROR_BREAK_INDEX 4 + #define VT_LOADTTS_ERROR_TPP_DICT 5 + #define VT_LOADTTS_ERROR_TABLE 6 + #define VT_LOADTTS_ERROR_UNIT_INDEX 7 + #define VT_LOADTTS_ERROR_PROSODY_DB 8 + #define VT_LOADTTS_ERROR_PCM_DB 9 + #define VT_LOADTTS_ERROR_PM_DB 10 + #define VT_LOADTTS_ERROR_UNKNOWN 11 +#endif + +_DllMode(short) VT_LOADTTS_JPN(HWND hWnd, int nSpeakerID, char *db_path, char *licensefile); +_DllMode(void) VT_UNLOADTTS_JPN(int nSpeakerID); + + + +/*===========================================================================*/ +/* Load/Unload UserDict API */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_LOAD_USERDICT_SUCCESS (1) + #define VT_LOAD_USERDICT_ERROR_INVALID_INDEX (-1) + #define VT_LOAD_USERDICT_ERROR_INDEX_BUSY (-2) + #define VT_LOAD_USERDICT_ERROR_LOAD_FAIL (-3) + #define VT_LOAD_USERDICT_ERROR_UNKNOWN (-4) + + #define VT_UNLOAD_USERDICT_SUCCESS (1) + #define VT_UNLOAD_USERDICT_ERROR_NULL_INDEX (-1) + #define VT_UNLOAD_USERDICT_ERROR_INVALID_INDEX (-2) + #define VT_UNLOAD_USERDICT_ERROR_UNKNOWN (-3) +#endif + +_DllMode(short) VT_LOAD_UserDict_JPN(int dictidx, char *filename); +_DllMode(short) VT_UNLOAD_UserDict_JPN(int dictidx); + + + +/*===========================================================================*/ +/* SOUND CARD API */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_PLAY_API_SUCCESS (1) + #define VT_PLAY_API_ERROR_CREATE_THREAD (-1) + #define VT_PLAY_API_ERROR_NULL_TEXT (-2) + #define VT_PLAY_API_ERROR_EMPTY_TEXT (-3) + #define VT_PLAY_API_ERROR_DB_NOT_LOADED (-4) + #define VT_PLAY_API_ERROR_INITPLAY (-5) + #define VT_PLAY_API_ERROR_UNKNOWN (-6) +#endif + +#if defined(WIN32) + _DllMode(short) VT_PLAYTTS_JPN(HWND hcaller, UINT umsg, char *text_buff, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype); + _DllMode(void) VT_STOPTTS_JPN(void); + _DllMode(void) VT_RESTARTTTS_JPN(void); + _DllMode(void) VT_PAUSETTS_JPN(void); +#endif + + + +/*===========================================================================*/ +/* FILE WRITE API */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_FILE_API_SUCCESS (1) + #define VT_FILE_API_ERROR_INVALID_FORMAT (-1) + #define VT_FILE_API_ERROR_CREATE_THREAD (-2) + #define VT_FILE_API_ERROR_NULL_TEXT (-3) + #define VT_FILE_API_ERROR_EMPTY_TEXT (-4) + #define VT_FILE_API_ERROR_DB_NOT_LOADED (-5) + #define VT_FILE_API_ERROR_OUT_FILE_OPEN (-6) + #define VT_FILE_API_ERROR_UNKNOWN (-7) + + /* Audio Format */ + enum { + VT_FILE_API_FMT_S16PCM = 0, + VT_FILE_API_FMT_ALAW = 1, + VT_FILE_API_FMT_MULAW = 2, + VT_FILE_API_FMT_DADPCM = 3, + VT_FILE_API_FMT_S16PCM_WAVE = 4, + VT_FILE_API_FMT_U08PCM_WAVE = 5, + // VT_FILE_API_FMT_IMA_WAVE = 6, /* not supported! */ + VT_FILE_API_FMT_ALAW_WAVE = 7, + VT_FILE_API_FMT_MULAW_WAVE = 8, + VT_FILE_API_FMT_MULAW_AU = 9, + }; +#endif + +_DllMode(short) VT_TextToFile_JPN(int fmt, char *tts_text, char *filename, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype); + + + +/*===========================================================================*/ +/* BUFFER I/O API */ +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_BUFFER_API_PROCESSING (0) + #define VT_BUFFER_API_DONE (1) + #define VT_BUFFER_API_ERROR_INVALID_FORMAT (-1) + #define VT_BUFFER_API_ERROR_CREATE_THREAD (-2) + #define VT_BUFFER_API_ERROR_NULL_TEXT (-3) + #define VT_BUFFER_API_ERROR_EMPTY_TEXT (-4) + #define VT_BUFFER_API_ERROR_NULL_BUFFER (-5) + #define VT_BUFFER_API_ERROR_DB_NOT_LOADED (-6) + #define VT_BUFFER_API_ERROR_THREAD_BUSY (-7) + #define VT_BUFFER_API_ERROR_ABNORMAL_CONDITION (-8) + #define VT_BUFFER_API_ERROR_UNKNOWN (-9) + + /* Audio Format */ + enum { + VT_BUFFER_API_FMT_S16PCM = VT_FILE_API_FMT_S16PCM, + VT_BUFFER_API_FMT_ALAW = VT_FILE_API_FMT_ALAW, + VT_BUFFER_API_FMT_MULAW = VT_FILE_API_FMT_MULAW, + VT_BUFFER_API_FMT_DADPCM = VT_FILE_API_FMT_DADPCM, + }; +#endif + +_DllMode(int) VT_TextToBuffer_JPN(int fmt, char *tts_text, char *output_buff, int *output_len, int flag, int nThreadID, int nSpeakerID, int pitch, int speed, int volume, int pause, int dictidx, int texttype); + + + +/*===========================================================================*/ +/* CONFIGURE API */ +_DllMode(void) VT_SetPitchSpeedVolumePause_JPN(int pitch, int speed, int volume, int pause, int nSpeakerID); +_DllMode(void) VT_SetCommaPause_JPN(int pause, int nSpeakerID); + + + +/*=========================================================================== +SYNOPSIS + int VT_GetTTSInfo_JPN(int request, char *licensefile, void *value, int valuesize); + +PARAMETERS + request + VT_BUILD_DATE (char*): library build date + VT_VERIFY_CODE (int *): verification result(licensefile is required) + VT_MAX_CHANNEL (int *): max no. of possible channels(licensefile is required) + VT_DB_DIRECTORY (char*): default root DB fold name + VT_LOAD_SUCCESS_CODE (int *): return value, when db loading is success + VT_MAX_SPEAKER (int *): max no. of speaker ( >= 0 ) + VT_DEF_SPEAKER (int *): default speaker id ( >= 0 && < max no. of speaker ) + VT_CODEPAGE (int *): supported ansi codepage (WIN32 only) + VT_DB_ACCESS_MODE (int *): file or ram i/o ? (file:0, ram:1) + VT_FIXED_POINT_SUPPORT (int *): fixed point simulated or not? (float:0, fixed:1) + VT_SAMPLING_FREQUENCY (int *): current sampling frequency (8000, 11025, 16000 ) + VT_MAX_PITCH_RATE (int *): max value of pitch rate (%) + VT_DEF_PITCH_RATE (int *): default value of pitch rate (%) + VT_MIN_PITCH_RATE (int *): min value of pitch rate (%) + VT_MAX_SPEED_RATE (int *): max value of speed rate (%) + VT_DEF_SPEED_RATE (int *): default value of speed rate (%) + VT_MIN_SPEED_RATE (int *): min value of speed rate (%) + VT_MAX_VOLUME (int *): max value of volume (%) + VT_DEF_VOLUME (int *): default value of volume (%) + VT_MIN_VOLUME (int *): min value of volume (%) + VT_MAX_SENT_PAUSE (int *): max value of sentence pause (msec) + VT_DEF_SENT_PAUSE (int *): default value of sentence pause (msec) + VT_MIN_SENT_PAUSE (int *): min value of sentence pause (msec) + VT_DB_BUILD_DATE (char*): embedded db build date (for embedded engine only) + VT_MAX_COMMA_PAUSE (int *): max value of comma pause (msec) + VT_DEF_COMMA_PAUSE (int *): default value of comma pause (msec) + VT_MIN_COMMA_PAUSE (int *): min value of comma pause (msec) + + licensefile + if NULL, use default licensefile. + + value + VT_DB_DIRECTORY and VT_BUILD_DATE requests are (char *), and any other request is (int *) + + valuesize + maximum length of value in characters + +RETURN VALUE + On success, zero(VT_INFO_SUCCESS) is returned. + On error, the return value depends on the operation: + VT_INFO_ERROR_NOT_SUPPORTED_REQUEST (1) + VT_INFO_ERROR_INVALID_REQUEST (2) + VT_INFO_ERROR_NULL_VALUE (3) + VT_INFO_ERROR_SHORT_LENGTH_VALUE (4) + VT_INFO_ERROR_UNKNOWN (5) +===========================================================================*/ + +#if !defined(VT_BASIC_DEFINE) + /* Return Value */ + #define VT_INFO_SUCCESS (0) + #define VT_INFO_ERROR_NOT_SUPPORTED_REQUEST (1) + #define VT_INFO_ERROR_INVALID_REQUEST (2) + #define VT_INFO_ERROR_NULL_VALUE (3) + #define VT_INFO_ERROR_SHORT_LENGTH_VALUE (4) + #define VT_INFO_ERROR_UNKNOWN (5) + + /* Request */ + enum + { + VT_BUILD_DATE = 0, + VT_VERIFY_CODE = 1, + VT_MAX_CHANNEL = 2, + VT_DB_DIRECTORY = 3, + VT_LOAD_SUCCESS_CODE = 4, + VT_MAX_SPEAKER = 5, + VT_DEF_SPEAKER = 6, + VT_CODEPAGE = 7, + VT_DB_ACCESS_MODE = 8, + VT_FIXED_POINT_SUPPORT = 9, + VT_SAMPLING_FREQUENCY = 10, + VT_MAX_PITCH_RATE = 11, + VT_DEF_PITCH_RATE = 12, + VT_MIN_PITCH_RATE = 13, + VT_MAX_SPEED_RATE = 14, + VT_DEF_SPEED_RATE = 15, + VT_MIN_SPEED_RATE = 16, + VT_MAX_VOLUME = 17, + VT_DEF_VOLUME = 18, + VT_MIN_VOLUME = 19, + VT_MAX_SENT_PAUSE = 20, + VT_DEF_SENT_PAUSE = 21, + VT_MIN_SENT_PAUSE = 22, + VT_DB_BUILD_DATE = 23, + VT_MAX_COMMA_PAUSE = 24, + VT_DEF_COMMA_PAUSE = 25, + VT_MIN_COMMA_PAUSE = 26, + VT_MAX_SYMBOL_OPEN_PAUSE = 27, + VT_DEF_SYMBOL_OPEN_PAUSE = 28, + VT_MIN_SYMBOL_OPEN_PAUSE = 29, + VT_MAX_SYMBOL_CLOSE_PAUSE = 30, + VT_DEF_SYMBOL_CLOSE_PAUSE = 31, + VT_MIN_SYMBOL_CLOSE_PAUSE = 32, + }; +#endif + +_DllMode(int) VT_GetTTSInfo_JPN(int request, char *licensefile, void *value, int valuesize); + + +#if !defined(VT_BASIC_DEFINE) + #define VT_BASIC_DEFINE +#endif + +#if defined(__cplusplus) + } +#endif + +#endif /* VT_JPN_H */ diff --git a/3rdparty/voice_text/include/vtapi.h b/3rdparty/voice_text/include/vtapi.h new file mode 100644 index 000000000..9996df717 --- /dev/null +++ b/3rdparty/voice_text/include/vtapi.h @@ -0,0 +1,262 @@ +/* + * Copyright (c) 2000-2019 ReadSpeaker + * All Rights Reserved. + */ + +#ifndef _VTAPI_H_ +#define _VTAPI_H_ + +#if defined(__cplusplus) +extern "C" +{ +#endif + + +#if !defined(VTAPI_decl) +# if defined(WIN32) || defined(WINCE) +# define VTAPI_decl __declspec( dllexport ) +# define _CRTDBG_MAP_ALLOC +# include +# if defined(WINCE) +# else +# include +# endif + #else + #define VTAPI_decl extern + #endif +#endif + + +#if defined(WIN32) || defined(WINCE) +#pragma warning(disable:4996) +#include +#else +#if !defined(HWND_DEFINE) +#define HWND_DEFINE +typedef int HWND; +#endif +#if !defined(DWORD_DEFINE) +#define DWORD_DEFINE +typedef unsigned long DWORD; +#endif +#if !defined(UINT_DEFINE) +#define UINT_DEFINE +typedef unsigned int UINT; +#endif +#endif + +typedef struct VOICE_INFO* VTAPI_HANDLE; +typedef struct ENGINE_INFO* VTAPI_ENGINE_HANDLE; +#if 1 //defined(USE_NEW_USERDICT_MANAGER) +typedef struct USERDICT_INFO* VTAPI_USERDICT_HANDLE; +#endif + +#define VTAPI_VERSION "4.3.0.2" + +typedef enum OUTPUTFORMAT +{ + FORMAT_16PCM = 0, + FORMAT_8PCM = 1, + FORMAT_ALAW_PCM = 2, + FORMAT_MULAW_PCM = 3, + FORMAT_ADPCM_PCM = 4, + FORMAT_16PCM_WAV = 5, + FORMAT_8PCM_WAV = 6, + FORMAT_ALAW_WAV = 7, + FORMAT_MULAW_WAV = 8, + + FORMAT_MAX +} Output_Format; + +typedef enum TEXTTYPES +{ + TEXT_FORMAT_DEFAULT = 0, // multibyte + TEXT_FORMAT_UTF8 = 2 +} Text_Types; + +typedef enum AUDIOEVENTTYPES +{ + AUDIO_EVENT_NO_EVENTS = 0, + AUDIO_EVENT_START_INPUT_STREAM = 1, + AUDIO_EVENT_END_INPUT_STREAM = 2, + AUDIO_EVENT_VOICE_CHANGE = 4, + AUDIO_EVENT_TTS_MARK = 8, + AUDIO_EVENT_WORD_BOUNDARY = 16, + AUDIO_EVENT_SENTENCE_BOUNDARY = 32, + AUDIO_EVENT_BUFFER = 64, + AUDIO_EVENT_BUFFER_END = 128, + AUDIO_EVENT_FAILURE = 256, + AUDIO_EVENT_READY_PLAY = 512, + AUDIO_EVENT_ALL = 1023 +} Audio_Event_Types; + +typedef enum ATTRFLAGS +{ + ATTR_PITCH = 0, + ATTR_SPEED = 1, + ATTR_VOLUME = 2, + ATTR_PAUSE = 3, + ATTR_COMMAPAUSE = 5 +} Attr_Flags; + +typedef enum PRIORITYTYPES +{ + PRIOR_DEFAULT = 0, + PRIOR_OVERLAP = 1, + PRIOR_ALERT = 2, +} Priority_Types; + +#define MAX_STR_LEN 256 +#define MAX_ERR_MSG 512 + +typedef struct _audio_format +{ + int nChannel; + int nBits; + int nSampling; +} VTAPI_AUDIO_FORMAT; + +typedef struct _audio_info +{ + VTAPI_AUDIO_FORMAT audiofmt; + int nDevNo; +} VTAPI_AUDIO_INFO; + +typedef struct +{ + int nId; + int nTextStartOffset; // Text position of Word or Sentence or Mark + int nTextEndOffset; // Text position of Word or Sentence or Mark + char szStrValue[MAX_STR_LEN]; // Name of Mark + + int nOffsetInStream; // Buffer position of Mark + int nOffsetInBuffer; // Buffer position of Mark + + int nError; // Error // VTAPI4 Version over 4.2.2.x + + int nBufferSize; // Buffer size of Word or FrameBuffer + char* pFrameBuffer; // FrameBuffer +} VTAPI_AUDIO_EVENT; + + +typedef struct _vtapi_errs +{ + int nErr; + char szMsg[MAX_ERR_MSG]; +} VTAPI_ERRS_INFO; + + +#define VTAPI_SUCCESS 0 + +#define VTAPI_INVALID_PARAM_ERROR -1 +#define VTAPI_INVALID_VALUE_ERROR -2 +#define VTAPI_INCORRECT_SET_ERROR -3 +#define VTAPI_INVALID_ID_OR_THREAD_ERROR -4 +#define VTAPI_USERDICT_ERROR -5 +#define VTAPI_SSML_SYNTAX_ERROR -6 +#define VTAPI_MEMORY_ALLOC_ERROR -7 +#define VTAPI_NOT_OPENBUFFER_ERROR -8 +#define VTAPI_VTSSML_INVALID_ERROR -9 +#define VTAPI_INVALID_ENGINE_ERROR -10 +#define VTAPI_OVER_CHANNEL_ERROR -11 +#define VTAPI_NODATA_ERROR -12 +#define VTAPI_NOT_SUPPORTED_ERROR -13 + +#define VTAPI_DLL_VTPLAY_NOT_LINKING -21 +#define VTAPI_DLL_VTSSML_NOT_LINKING -22 +#define VTAPI_DLL_VTSAVE_NOT_LINKING -23 +#define VTAPI_DLL_VTCONV_NOT_LINKING -24 +#define VTAPI_DLL_VTEFFECT_NOT_LINKING -25 + + + +// API for VTAPI + +// VTAPI_INIT Return Value : You can check by XOR the lower values. +// If all Library links succeed, 0 or 0x11000 +#define VTAPI_LIB_VTPLAY_NOT_LINKING 0x1 +#define VTAPI_LIB_VTSAVE_NOT_LINKING 0x10 +#define VTAPI_LIB_VTCONV_NOT_LINKING 0x100 +#define VTAPI_LIB_VTSSML_NOT_LINKING 0x1000 + +VTAPI_decl int VTAPI_Init(char *szWorkingDir); // It is automatically Initiated when using other functions. +VTAPI_decl void VTAPI_Exit(); +VTAPI_decl void VTAPI_SetLicenseFolder(char *szPath); + +// API for VTAPI-Voice +VTAPI_decl VTAPI_HANDLE VTAPI_CreateHandle(); +VTAPI_decl void VTAPI_ReleaseHandle(VTAPI_HANDLE hVTAPI); + + +VTAPI_decl int VTAPI_SetEngineHandle(VTAPI_HANDLE hVTAPI, VTAPI_ENGINE_HANDLE hEngine); +VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_GetEngineHandle(VTAPI_HANDLE hVTAPI); + +VTAPI_decl int VTAPI_SetOutputAudio(VTAPI_HANDLE hVTAPI, VTAPI_AUDIO_INFO stAudioInfo); +VTAPI_decl int VTAPI_SetOutputFile(VTAPI_HANDLE hVTAPI, char *pszFileName, int nOutputFormat); +VTAPI_decl int VTAPI_SetOutputBuffer(VTAPI_HANDLE hVTAPI, int nOutputFormat); + +VTAPI_decl int VTAPI_SetAttr(VTAPI_HANDLE hVTAPI, int nFlag, int nValue); +VTAPI_decl int VTAPI_GetAttr(VTAPI_HANDLE hVTAPI, int nFlag, int *nValue); + + +#if !defined(__VTAPI_DEFINED_CALLBACK_FUNCTION__) +#define __VTAPI_DEFINED_CALLBACK_FUNCTION__ +typedef void (*LPPEventProc)(VTAPI_HANDLE hVTAPI, int nEventType, void *pParam); +#endif + +VTAPI_decl int VTAPI_SetEvent(VTAPI_HANDLE hVTAPI, void *pEventProc, int nEventTypes); + +#if defined(WIN32) || defined(WINCE) +// API for Audio Output (VTAPI_Speak - File Output included) +VTAPI_decl int VTAPI_Speak(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType); +VTAPI_decl int VTAPI_Stop(VTAPI_HANDLE hVTAPI); +VTAPI_decl int VTAPI_Pause(VTAPI_HANDLE hVTAPI); +VTAPI_decl int VTAPI_Resume(VTAPI_HANDLE hVTAPI); +VTAPI_decl int VTAPI_SetPriority(VTAPI_HANDLE hVTAPI, int nPrior); +#endif + +// API for FileSave +VTAPI_decl int VTAPI_TextToFile(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType); + +// API for Buffer +typedef enum BUFSIZE_VALUE +{ + VAL_DEFAULT = -1, + VAL_ONEBUF = 0, +} BUFSIZE_VALUE; +VTAPI_decl int VTAPI_BufferOpen(VTAPI_HANDLE hVTAPI, void *pszText, int nTextLength, int nTextType, int nBufferSize); +VTAPI_decl int VTAPI_GetBuffer(VTAPI_HANDLE hVTAPI, char** pFrameBuffer); +VTAPI_decl void VTAPI_FreeBuffer(char* ptr); +VTAPI_decl int VTAPI_BufferClose(VTAPI_HANDLE hVTAPI); + +VTAPI_decl VTAPI_ERRS_INFO *VTAPI_GetLastErrorInfo(VTAPI_HANDLE hVTAPI); + +VTAPI_decl int VTAPI_SetUserData(VTAPI_HANDLE hVTAPI, void *ptr); +VTAPI_decl void *VTAPI_GetUserData(VTAPI_HANDLE hVTAPI); + +VTAPI_decl void VTAPI_GetVersion(char output[100]); + +// API for tts engine +VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_GetEngine(char *pszSpeaker, char *pszType) ; +// support for not installed engines +VTAPI_decl VTAPI_ENGINE_HANDLE VTAPI_AddNewEngineInfo(char *pszSpeaker, char *pszType, int nSpeakerID, char *pszDBPath, char *pszLang, char *pszGender, int nCodePage, char *pszISOCode, char *pszVendor, int nSamplingRate); + +VTAPI_decl int VTAPI_GetEngineInfoFieldEx(VTAPI_ENGINE_HANDLE hEngine, char *pszSpeaker, char *pszType,char *pszDllPath, char *pszDBPath, char *pszLang, char *pszGender, char *pszVersion, int *nSampling, int *nChannel, char *pszISOCode); + +VTAPI_decl int VTAPI_LoadEngine(VTAPI_ENGINE_HANDLE hEngine); +VTAPI_decl int VTAPI_UnloadEngine(VTAPI_ENGINE_HANDLE hEngine); +VTAPI_decl int VTAPI_GetEngineVersion(VTAPI_ENGINE_HANDLE hEngine, char output[100]); + +#if 1 //defined(USE_NEW_USERDICT_MANAGER) +VTAPI_decl VTAPI_USERDICT_HANDLE VTAPI_CreateUserDictHandle(char* pszDicFile); +VTAPI_decl void VTAPI_ReleaseUserDictHandle(VTAPI_USERDICT_HANDLE hUSERDICT); +VTAPI_decl int VTAPI_SetUserDictHandle(VTAPI_HANDLE hVTAPI, VTAPI_USERDICT_HANDLE hUSERDICT); +VTAPI_decl VTAPI_USERDICT_HANDLE VTAPI_GetUserDictHandle(VTAPI_HANDLE hVTAPI); +VTAPI_decl int VTAPI_GetUserDictInfo(VTAPI_USERDICT_HANDLE hUSERDICT, char* pszDicFile); +#endif + +#if defined(__cplusplus) +} +#endif + +#endif /* _VTAPI_H_ */ diff --git a/3rdparty/voice_text/src/dummy/vt_dummy.cpp b/3rdparty/voice_text/src/dummy/vt_dummy.cpp deleted file mode 100644 index 0add52d66..000000000 --- a/3rdparty/voice_text/src/dummy/vt_dummy.cpp +++ /dev/null @@ -1,13 +0,0 @@ -#include "vt_dummy.h" -#include - -void VT_UNLOADTTS_JPN(int) {}; -int VT_LOADTTS_JPN(int, int, char*, char*) { - fprintf(stderr, "LOADING DUMMY VT_LOADTTS_JPN\n"); - fprintf(stderr, "You need to install voice_text application\n"); - fprintf(stderr, "exiting....\n"); - return -1; -}; -int VT_TextToFile_JPN(int, char *, char *, int, int, int, int, int, int, int) {}; - -void VT_GetTTSInfo_JPN(int , char *, void *, int) {}; diff --git a/3rdparty/voice_text/src/dummy/vt_dummy.h b/3rdparty/voice_text/src/dummy/vt_dummy.h deleted file mode 100644 index 3d7742967..000000000 --- a/3rdparty/voice_text/src/dummy/vt_dummy.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef __VT_DUMMY_H__ -#define __VT_DUMMY_H__ -extern "C" { - void VT_UNLOADTTS_JPN(int); - int VT_LOADTTS_JPN(int, int, char*, char*); - int VT_TextToFile_JPN(int, char *, char *, int, int, int, int, int, int, int); - void VT_GetTTSInfo_JPN(int , char *, void *, int); - int VT_LOADTTS_SUCCESS = 0; - int VT_FILE_API_SUCCESS = 0; - int VT_FILE_API_FMT_S16PCM_WAVE = 4; // https://pastebin.com/9LeCr2HN -} - -enum - { - VT_BUILD_DATE = 0, - VT_VERIFY_CODE = 1, - VT_MAX_CHANNEL = 2, - VT_DB_DIRECTORY = 3, - VT_LOAD_SUCCESS_CODE = 4, - VT_MAX_SPEAKER = 5, - VT_DEF_SPEAKER = 6, - VT_CODEPAGE = 7, - VT_DB_ACCESS_MODE = 8, - VT_FIXED_POINT_SUPPORT = 9, - VT_SAMPLING_FREQUENCY = 10, - VT_MAX_PITCH_RATE = 11, - VT_DEF_PITCH_RATE = 12, - VT_MIN_PITCH_RATE = 13, - VT_MAX_SPEED_RATE = 14, - VT_DEF_SPEED_RATE = 15, - VT_MIN_SPEED_RATE = 16, - VT_MAX_VOLUME = 17, - VT_DEF_VOLUME = 18, - VT_MIN_VOLUME = 19, - VT_MAX_SENT_PAUSE = 20, - VT_DEF_SENT_PAUSE = 21, - VT_MIN_SENT_PAUSE = 22, - VT_DB_BUILD_DATE = 23, - VT_MAX_COMMA_PAUSE = 24, - VT_DEF_COMMA_PAUSE = 25, - VT_MIN_COMMA_PAUSE = 26, - VT_MAX_SYMBOL_OPEN_PAUSE = 27, - VT_DEF_SYMBOL_OPEN_PAUSE = 28, - VT_MIN_SYMBOL_OPEN_PAUSE = 29, - VT_MAX_SYMBOL_CLOSE_PAUSE = 30, - VT_DEF_SYMBOL_CLOSE_PAUSE = 31, - VT_MIN_SYMBOL_CLOSE_PAUSE = 32, - }; - -#endif //__VT_DUMMY_H__ diff --git a/3rdparty/voice_text/src/voice_text.cpp b/3rdparty/voice_text/src/voice_text.cpp new file mode 100644 index 000000000..c86bc21db --- /dev/null +++ b/3rdparty/voice_text/src/voice_text.cpp @@ -0,0 +1,90 @@ +/* + * voice_text.cpp + * Author: Yuki Furuta , Yoshiki Obinata + */ + +#include +#include +#include +#include + +#include +#include +#include + +// ROS +#include +#include +#include +#include + +#include "vt_handler.h" + +namespace fs = boost::filesystem; + +class VoiceText { +public: + typedef voice_text::VoiceTextConfig Config; + + VoiceText() : nh_(), pnh_("~"), db_path_(""), license_path_(""), dyn_srv_(pnh_){ + pnh_.param("db_path", db_path_, ""); + pnh_.setParam("db_path", db_path_); // for backward compatibility (db_path is usually set previously) + pnh_.param("license_path", license_path_, ""); + + dynamic_reconfigure::Server::CallbackType f = + boost::bind(&VoiceText::config_callback, this, _1, _2); + dyn_srv_.setCallback(f); + + h_vt_ = (new VTHandler(license_path_, db_path_)); + + srv_ = nh_.advertiseService("text_to_speech", &VoiceText::text_to_speech, this); + ROS_INFO("Advertised service text_to_speech\n"); + } + + ~VoiceText(){ + delete h_vt_; + } + + void config_callback(Config &config, uint32_t level) { + boost::mutex::scoped_lock lock(mutex_); + config_ = config; + } + + bool text_to_speech(voice_text::TextToSpeech::Request &req, + voice_text::TextToSpeech::Response &res) { + boost::mutex::scoped_lock lock(mutex_); + // load text from file + if (!fs::exists(fs::path(req.text_path))) { + ROS_ERROR_STREAM("text file " << req.text_path << " not found"); + res.ok = false; + return true; + } + std::ifstream ifs(req.text_path.c_str()); + std::string text = "", line = ""; + while (ifs && std::getline(ifs, line)) { + text += line; + } + res.ok = h_vt_->VTH_TextToFile(config_.pitch, config_.speed, config_.volume, config_.pause, + text, req.wave_path); + return true; + } + + ros::NodeHandle nh_, pnh_; + boost::mutex mutex_; + dynamic_reconfigure::Server dyn_srv_; + Config config_; + ros::ServiceServer srv_; + bool initialized_; + std::string db_path_, license_path_; + VTHandler* h_vt_; +}; + +int main(int argc, char** argv) { + ros::init(argc, argv, "voice_text"); + + VoiceText vt; + + ros::spin(); + + return 0; +} diff --git a/3rdparty/voice_text/src/voice_text.cpp.in b/3rdparty/voice_text/src/voice_text.cpp.in deleted file mode 100644 index f52955098..000000000 --- a/3rdparty/voice_text/src/voice_text.cpp.in +++ /dev/null @@ -1,178 +0,0 @@ -/* - * voice_text_server.cpp - * Author: Yuki Furuta - */ - -#include -#include -#include -#include - -#include -#include - -// ROS -#include -#include -#include -#include - -// VoiceText -#ifdef USE_DUMMY_INCLUDE -#include "dummy/vt_dummy.h" -#else -#include "@VT_ROOT@/inc/vt_jpn.h" -#endif - -#define PATH_MAX 1024 - -namespace fs = boost::filesystem; - - -class VoiceText { -public: - typedef voice_text::VoiceTextConfig Config; - - VoiceText() : nh_(), pnh_("~"), db_path_(""), license_path_(""), dyn_srv_(pnh_) { - pnh_.param("db_path", db_path_, "@VT_ROOT@"); - pnh_.setParam("db_path", db_path_); // for backward compatibility (db_path is usually set previously) - pnh_.param("license_path", license_path_, ""); - - dynamic_reconfigure::Server::CallbackType f = - boost::bind(&VoiceText::config_callback, this, _1, _2); - dyn_srv_.setCallback(f); - } - - ~VoiceText() { - if (initialized_) { - VT_UNLOADTTS_JPN(-1); - } - } - - void config_callback(Config &config, uint32_t level) { - boost::mutex::scoped_lock lock(mutex_); - config_ = config; - } - - bool initialize() { - // initialize voice text - int ret = -1; - char* db_path_char = (char*)calloc(std::strlen(db_path_.c_str())+1, sizeof(char)); - std::strcpy(db_path_char, db_path_.c_str()); - char* license_path_char = NULL; - if (!license_path_.empty()) { - license_path_char = (char*)calloc(std::strlen(license_path_.c_str())+1, sizeof(char)); - std::strcpy(license_path_char, license_path_.c_str()); - } - ret = VT_LOADTTS_JPN((int)NULL, -1, db_path_char, license_path_char); - free(db_path_char); - if (!license_path_.empty()) free(license_path_char); - if (ret != VT_LOADTTS_SUCCESS) { - ROS_FATAL("Failed to load TTS engine (code %d)", ret); - if (ret == -1) { - ROS_FATAL("You must install voice_text library before building this library"); - } - return false; - } - VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret, sizeof(int)); - if (ret != 0) { - ROS_FATAL_STREAM("Verification failed (VT_VERIFY_CODE " << ret << ")"); - return false; - } - - // Print voice text info - char szTmp[PATH_MAX]; - char szTmp2[PATH_MAX]; - VT_GetTTSInfo_JPN(VT_BUILD_DATE, NULL, szTmp, PATH_MAX); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_BUILD_DATE = %s\n", szTmp); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret, sizeof(int)); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_VERIFY_CODE = %d\n", ret); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_MAX_CHANNEL, NULL, &ret, sizeof(int)); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_MAX_CHANNEL = %d\n", ret); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_DB_DIRECTORY, NULL, szTmp, PATH_MAX); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_DB_DIRECTORY = %s\n", szTmp); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_DEF_SPEAKER, NULL, &ret, sizeof(int)); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_DEF_SPEAKER = %d\n", ret); - ROS_INFO_STREAM(szTmp2); - VT_GetTTSInfo_JPN(VT_CODEPAGE, NULL, &ret, sizeof(int)); - sprintf(szTmp2, "VT_GetTTSInfo_JPN(VT_CODEPAGE = %d\n", ret); - ROS_INFO_STREAM(szTmp2); - - // advertise service - srv_ = nh_.advertiseService("text_to_speech", &VoiceText::text_to_speech, this); - - ROS_INFO_STREAM("Advertised service text_to_speech"); - - return true; - } - - bool text_to_speech(voice_text::TextToSpeech::Request &req, - voice_text::TextToSpeech::Response &res) { - boost::mutex::scoped_lock lock(mutex_); - // load text from file - if (!fs::exists(fs::path(req.text_path))) { - ROS_ERROR_STREAM("text file " << req.text_path << " not found"); - res.ok = false; - return true; - } - std::ifstream ifs(req.text_path.c_str()); - std::string text = "", line = ""; - while (ifs && std::getline(ifs, line)) { - text += line; - } - char* text_char = (char*)calloc(std::strlen(text.c_str())+1, sizeof(char)); - std::strcpy(text_char, text.c_str()); - - char* wave_char = (char*)calloc(std::strlen(req.wave_path.c_str())+1, sizeof(char)); - std::strcpy(wave_char, req.wave_path.c_str()); - - int ret = VT_TextToFile_JPN(VT_FILE_API_FMT_S16PCM_WAVE, - text_char, - wave_char, - -1, - config_.pitch, - config_.speed, - config_.volume, - config_.pause, - -1, -1); - - ROS_INFO_STREAM("voice text wave file is outputted to " << wave_char); - - free(text_char); - free(wave_char); - - if (ret != VT_FILE_API_SUCCESS) { - ROS_ERROR("Failed to execute tts: (code: %d)", ret); - res.ok = false; - return true; - } - - res.ok = true; - return true; - } - - ros::NodeHandle nh_, pnh_; - boost::mutex mutex_; - dynamic_reconfigure::Server dyn_srv_; - Config config_; - ros::ServiceServer srv_; - bool initialized_; - std::string db_path_, license_path_; -}; - -int main(int argc, char** argv) { - ros::init(argc, argv, "voice_text"); - - VoiceText vt; - if (!vt.initialize()) { - return 1; - }; - - ros::spin(); - - return 0; -} diff --git a/3rdparty/voice_text/src/vt_handler.cpp b/3rdparty/voice_text/src/vt_handler.cpp new file mode 100644 index 000000000..b59c227a8 --- /dev/null +++ b/3rdparty/voice_text/src/vt_handler.cpp @@ -0,0 +1,245 @@ +/* + * vt_handler.cpp + * Author: Yoshiki Obinata + */ + +#include "vt_handler.h" + +VTHandler::VTHandler(const std::string license_path, const std::string db_path){ + glob_t sdk_old_gbuf_, sdk_new_gbuf_, api_gbuf_; + char *lib_file_; + char *dl_err_, *db_path_char_, *license_path_char_; + bool sym_status_; + int ret_; + + // Locate libraries +#ifdef ENV64 + glob(("/usr/vt/*/*/bin/x86_64/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_64/RAMIO/libvt_jpn.so + glob(("/usr/vt/*/*/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so + glob(("/usr/vt/*/*/bin/libvtapi.so"), 0, NULL, &api_gbuf_); // e.g., /usr/vt/hikari/D16/bin/libvtapi.so +#elif ENV32 + glob(("/usr/vt/*/*/bin/x86_32/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_32/RAMIO/libvt_jpn.so + glob(("/usr/vt/*/*/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so + glob(("/usr/vt/*/*/bin/libvtapi.so"), 0, NULL, &api_gbuf_); // e.g., /usr/vt/hikari/D16/bin/libvtapi.so +#endif + + if(sdk_old_gbuf_.gl_pathc > 0){ + this->vt_type = VT_SDK; + strcpy(lib_file_, sdk_old_gbuf_.gl_pathv[0]); + }else if(sdk_new_gbuf_.gl_pathc > 0){ + this->vt_type = VT_SDK; + strcpy(lib_file_, sdk_new_gbuf_.gl_pathv[0]); + }else if(api_gbuf_.gl_pathc > 0){ + this->vt_type = VT_API; + strcpy(lib_file_, api_gbuf_.gl_pathv[0]); + }else{ + this->vt_type = NO_VT; + } + + globfree(&sdk_old_gbuf_); + globfree(&sdk_new_gbuf_); + globfree(&api_gbuf_); + + // Load libraries + if(this->vt_type != NO_VT){ + ROS_INFO("Opening %s \n", lib_file_); + this->dl_handle = dlopen(lib_file_, RTLD_NOW); + if(this->dl_handle == NULL){ + dl_err_ = dlerror(); + ROS_FATAL_STREAM("Error occured when opening VoiceText or ReadSpeaker libraries " << + dl_err_); + return; + } + }else{ + ROS_FATAL("No Voice Text or Read Speaker libraries have found\n"); + return; + } + + // Load symbols + sym_status_ = LoadSym(); + if(!sym_status_){ + return; + } + + // Initialize VT Handler + // db_path is for backward compatibility + db_path_char_ = (char*)calloc(std::strlen(db_path.c_str())+1, sizeof(char)); + std::strcpy(db_path_char_, db_path.c_str()); + + // Load license file + license_path_char_ = NULL; + if(!license_path.empty()){ + license_path_char_ = (char*)calloc(std::strlen(license_path.c_str())+1, sizeof(char)); + std::strcpy(license_path_char_, license_path.c_str()); + }else{ + ROS_FATAL("Please set license file\n"); + return; + } + + // Load license file + if(this->vt_type == VT_SDK){ + ret_ = VT_LOADTTS_JPN((int)NULL, -1, db_path_char_, license_path_char_); + if(ret_ != VT_LOADTTS_SUCCESS){ + ROS_FATAL("Failed to load TTS engine (code %d)\n", ret_); + return; + } + VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret_, sizeof(int)); + if (ret_ != 0) { + ROS_FATAL_STREAM("Verification failed (VT_VERIFY_CODE " << ret_ << ")"); + return; + } + }else if(this->vt_type == VT_API){ + fs::path p_ = lib_file_; + std::vector elements_; + std::string lib_path_, speaker_, type_; + char *lib_path_char_, *speaker_char_, *type_char_; + + // Get ReadSpeaker library directory + lib_path_ = p_.parent_path().string(); + lib_path_char_ = (char*)calloc(std::strlen(lib_path_.c_str())+1, sizeof(char)); + std::strcpy(lib_path_char_, lib_path_.c_str()); + + // Get speaker and type + for(auto& part_ : p_){ + elements_.push_back(part_.string()); + } + speaker_ = elements_.at(3); + speaker_char_ = (char*)calloc(std::strlen(speaker_.c_str())+1, sizeof(char)); + std::strcpy(speaker_char_, speaker_.c_str()); + type_ = elements_.at(4); + type_char_ = (char*)calloc(std::strlen(type_.c_str())+1, sizeof(char)); + std::strcpy(type_char_, type_.c_str()); + + VTAPI_Init(lib_path_char_); + this->hVTAPI = VTAPI_CreateHandle(); + if(this->hVTAPI == 0){ + ROS_ERROR("VoiceText API ERROR when creating API handler. : %s\n", VTAPI_GetLastErrorInfo(0)->szMsg); + return; + } + VTAPI_SetLicenseFolder(license_path_char_); + // Load engine + this->hEngine = VTAPI_GetEngine(speaker_char_, type_char_); + ret_ = VTAPI_SetEngineHandle(this->hVTAPI, this->hEngine); + if(ret_ < VTAPI_SUCCESS){ + ROS_ERROR("VoiceText API ERROR when creating engine handler. CODE: %d, MESSAGE: %s \n", ret_, VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); + return; + } + + free(lib_path_char_); + free(speaker_char_); + free(type_char_); + } + + free(db_path_char_); + if (!license_path.empty()) free(license_path_char_); + +} + +VTHandler::~VTHandler(){ + if(this->dl_handle != NULL){ + // TODO release handle before close dl + dlclose(this->dl_handle); + } +} + +bool VTHandler::LoadSym(){ + const char* dl_err_; + if(vt_type == VT_SDK){ + ROS_INFO("Found VoiceText SDK\n"); + // load symbol + for(auto& itr: VTSDK_func_){ + VTSDK_s_map_[itr] = dlsym(this->dl_handle, itr); + dl_err_ = dlerror(); + if(dl_err_ != NULL){ + ROS_FATAL_STREAM("Error occured when loading ReadSpeaker libraries " + << dl_err_); + dlclose(this->dl_handle); + return false; + break; + } + } + // cast + VT_LOADTTS_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_LOADTTS_JPN")); + VT_UNLOADTTS_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_UNLOADTTS_JPN")); + VT_GetTTSInfo_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_GetTTSInfo_JPN")); + VT_TextToFile_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_TextToFile_JPN")); + }else if(vt_type == VT_API){ + ROS_INFO("Found ReadSpeaker API\n"); + // load symbol + for(auto& itr: VTAPI_func_){ + VTAPI_s_map_[itr] = dlsym(this->dl_handle, itr); + dl_err_ = dlerror(); + if(dl_err_ != NULL){ + ROS_FATAL_STREAM("Error occured when loading ReadSpeaker libraries " + << dl_err_); + dlclose(this->dl_handle); + return false; + break; + } + } + // cast + VTAPI_Init = reinterpret_cast(VTAPI_s_map_.at("VTAPI_Init")); + VTAPI_CreateHandle = reinterpret_cast(VTAPI_s_map_.at("VTAPI_CreateHandle")); + VTAPI_SetLicenseFolder = reinterpret_cast(VTAPI_s_map_.at("VTAPI_SetLicenseFolder")); + VTAPI_GetEngine = reinterpret_cast(VTAPI_s_map_.at("VTAPI_GetEngine")); + VTAPI_SetEngineHandle = reinterpret_cast(VTAPI_s_map_.at("VTAPI_SetEngineHandle")); + VTAPI_SetAttr = reinterpret_cast(VTAPI_s_map_.at("VTAPI_SetAttr")); + VTAPI_SetOutputFile = reinterpret_cast(VTAPI_s_map_.at("VTAPI_SetOutputFile")); + VTAPI_TextToFile = reinterpret_cast(VTAPI_s_map_.at("VTAPI_TextToFile")); + VTAPI_GetLastErrorInfo = reinterpret_cast(VTAPI_s_map_.at("VTAPI_GetLastErrorInfo")); + VTAPI_ReleaseHandle = reinterpret_cast(VTAPI_s_map_.at("VTAPI_ReleaseHandle")); + VTAPI_UnloadEngine = reinterpret_cast(VTAPI_s_map_.at("VTAPI_UnloadEngine")); + VTAPI_Exit = reinterpret_cast(VTAPI_s_map_.at("VTAPI_Exit")); + } + return true; +} + +bool VTHandler::VTH_TextToFile(const int pitch, const int speed, const int volume, const int pause, + const std::string text, const std::string wave_path){ + char *text_char_, *wave_path_char_; + int ret_; + bool success_ = true; + + text_char_ = (char*)calloc(std::strlen(text.c_str())+1, sizeof(char)); + std::strcpy(text_char_, text.c_str()); + wave_path_char_ = (char*)calloc(std::strlen(wave_path.c_str())+1, sizeof(char)); + std::strcpy(wave_path_char_, wave_path.c_str()); + + if(this->vt_type == VT_SDK){ + ret_ = VT_TextToFile_JPN(VT_FILE_API_FMT_S16PCM_WAVE, + text_char_, + wave_path_char_, + -1, + pitch, + speed, + volume, + pause, + -1, -1); + if(ret_ != VT_FILE_API_SUCCESS){ + ROS_ERROR("[VoiceText SDK] Failed to execute TTS (code %d)\n", ret_); + success_ = false; + } + }else if(this->vt_type == VT_API){ + VTAPI_SetAttr(this->hVTAPI, ATTR_PITCH, pitch); + VTAPI_SetAttr(this->hVTAPI, ATTR_SPEED, speed); + VTAPI_SetAttr(this->hVTAPI, ATTR_VOLUME, volume); + VTAPI_SetAttr(this->hVTAPI, ATTR_PAUSE, pause); + ret_ = VTAPI_SetOutputFile(this->hVTAPI, wave_path_char_, FORMAT_16PCM_WAV); + if(ret_ != VTAPI_SUCCESS){ + ROS_ERROR("[ReadSpeaker API] ERROR when executing VTAPI_SetOutputFile. STATUS: %s\n", + VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); + success_ = false; + } + ret_ = VTAPI_TextToFile(this->hVTAPI, text_char_, -1, TEXT_FORMAT_DEFAULT); + if(ret_ != VTAPI_SUCCESS){ + ROS_ERROR("[ReadSpeaker API] ERROR when executing VTAPI_TextToFile. STATUS: %s\n", + VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); + success_ = false; + } + + free(text_char_); + free(wave_path_char_); + + return success_; + } +} From 6eb73263fd9da4b0161e7345ce658d21aabc0699 Mon Sep 17 00:00:00 2001 From: Yoshiki Obinata Date: Tue, 8 Nov 2022 11:13:19 +0900 Subject: [PATCH 2/7] [voice_text] Fix logging style --- 3rdparty/voice_text/src/vt_handler.cpp | 30 +++++++++++++------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/3rdparty/voice_text/src/vt_handler.cpp b/3rdparty/voice_text/src/vt_handler.cpp index b59c227a8..56e0d670e 100644 --- a/3rdparty/voice_text/src/vt_handler.cpp +++ b/3rdparty/voice_text/src/vt_handler.cpp @@ -16,12 +16,11 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ #ifdef ENV64 glob(("/usr/vt/*/*/bin/x86_64/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_64/RAMIO/libvt_jpn.so glob(("/usr/vt/*/*/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX64_GLIBC3/RAMIO/libvt_jpn.so - glob(("/usr/vt/*/*/bin/libvtapi.so"), 0, NULL, &api_gbuf_); // e.g., /usr/vt/hikari/D16/bin/libvtapi.so #elif ENV32 glob(("/usr/vt/*/*/bin/x86_32/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_32/RAMIO/libvt_jpn.so glob(("/usr/vt/*/*/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so - glob(("/usr/vt/*/*/bin/libvtapi.so"), 0, NULL, &api_gbuf_); // e.g., /usr/vt/hikari/D16/bin/libvtapi.so #endif + glob(("/usr/vt/*/*/bin/libvtapi.so"), 0, NULL, &api_gbuf_); // e.g., /usr/vt/hikari/D16/bin/libvtapi.so if(sdk_old_gbuf_.gl_pathc > 0){ this->vt_type = VT_SDK; @@ -42,7 +41,7 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ // Load libraries if(this->vt_type != NO_VT){ - ROS_INFO("Opening %s \n", lib_file_); + ROS_INFO("Opening %s ", lib_file_); this->dl_handle = dlopen(lib_file_, RTLD_NOW); if(this->dl_handle == NULL){ dl_err_ = dlerror(); @@ -51,7 +50,7 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ return; } }else{ - ROS_FATAL("No Voice Text or Read Speaker libraries have found\n"); + ROS_FATAL("No VoiceText or ReadSpeaker libraries have found"); return; } @@ -72,7 +71,7 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ license_path_char_ = (char*)calloc(std::strlen(license_path.c_str())+1, sizeof(char)); std::strcpy(license_path_char_, license_path.c_str()); }else{ - ROS_FATAL("Please set license file\n"); + ROS_FATAL("Please set license file"); return; } @@ -80,12 +79,12 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ if(this->vt_type == VT_SDK){ ret_ = VT_LOADTTS_JPN((int)NULL, -1, db_path_char_, license_path_char_); if(ret_ != VT_LOADTTS_SUCCESS){ - ROS_FATAL("Failed to load TTS engine (code %d)\n", ret_); + ROS_FATAL("[VoiceText SDK] Failed to load TTS engine (code %d)", ret_); return; } VT_GetTTSInfo_JPN(VT_VERIFY_CODE, NULL, &ret_, sizeof(int)); if (ret_ != 0) { - ROS_FATAL_STREAM("Verification failed (VT_VERIFY_CODE " << ret_ << ")"); + ROS_FATAL_STREAM("[VoiceText SDK] Verification failed (VT_VERIFY_CODE " << ret_ << ")"); return; } }else if(this->vt_type == VT_API){ @@ -113,7 +112,7 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ VTAPI_Init(lib_path_char_); this->hVTAPI = VTAPI_CreateHandle(); if(this->hVTAPI == 0){ - ROS_ERROR("VoiceText API ERROR when creating API handler. : %s\n", VTAPI_GetLastErrorInfo(0)->szMsg); + ROS_ERROR("[ReadSpeaker API] Failed to create API handler. : %s", VTAPI_GetLastErrorInfo(0)->szMsg); return; } VTAPI_SetLicenseFolder(license_path_char_); @@ -121,7 +120,8 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ this->hEngine = VTAPI_GetEngine(speaker_char_, type_char_); ret_ = VTAPI_SetEngineHandle(this->hVTAPI, this->hEngine); if(ret_ < VTAPI_SUCCESS){ - ROS_ERROR("VoiceText API ERROR when creating engine handler. CODE: %d, MESSAGE: %s \n", ret_, VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); + ROS_ERROR("[ReadSpeaker API] Failed to create engine handler. CODE: %d, MESSAGE: %s ", + ret_, VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); return; } @@ -145,13 +145,13 @@ VTHandler::~VTHandler(){ bool VTHandler::LoadSym(){ const char* dl_err_; if(vt_type == VT_SDK){ - ROS_INFO("Found VoiceText SDK\n"); + ROS_INFO("Found VoiceText SDK"); // load symbol for(auto& itr: VTSDK_func_){ VTSDK_s_map_[itr] = dlsym(this->dl_handle, itr); dl_err_ = dlerror(); if(dl_err_ != NULL){ - ROS_FATAL_STREAM("Error occured when loading ReadSpeaker libraries " + ROS_FATAL_STREAM("Error occured when loading VoiceText libraries " << dl_err_); dlclose(this->dl_handle); return false; @@ -164,7 +164,7 @@ bool VTHandler::LoadSym(){ VT_GetTTSInfo_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_GetTTSInfo_JPN")); VT_TextToFile_JPN = reinterpret_cast(VTSDK_s_map_.at("VT_TextToFile_JPN")); }else if(vt_type == VT_API){ - ROS_INFO("Found ReadSpeaker API\n"); + ROS_INFO("Found ReadSpeaker API"); // load symbol for(auto& itr: VTAPI_func_){ VTAPI_s_map_[itr] = dlsym(this->dl_handle, itr); @@ -216,7 +216,7 @@ bool VTHandler::VTH_TextToFile(const int pitch, const int speed, const int volum pause, -1, -1); if(ret_ != VT_FILE_API_SUCCESS){ - ROS_ERROR("[VoiceText SDK] Failed to execute TTS (code %d)\n", ret_); + ROS_ERROR("[VoiceText SDK] Failed to execute TTS (code %d)", ret_); success_ = false; } }else if(this->vt_type == VT_API){ @@ -226,13 +226,13 @@ bool VTHandler::VTH_TextToFile(const int pitch, const int speed, const int volum VTAPI_SetAttr(this->hVTAPI, ATTR_PAUSE, pause); ret_ = VTAPI_SetOutputFile(this->hVTAPI, wave_path_char_, FORMAT_16PCM_WAV); if(ret_ != VTAPI_SUCCESS){ - ROS_ERROR("[ReadSpeaker API] ERROR when executing VTAPI_SetOutputFile. STATUS: %s\n", + ROS_ERROR("[ReadSpeaker API] ERROR when executing VTAPI_SetOutputFile. STATUS: %s", VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); success_ = false; } ret_ = VTAPI_TextToFile(this->hVTAPI, text_char_, -1, TEXT_FORMAT_DEFAULT); if(ret_ != VTAPI_SUCCESS){ - ROS_ERROR("[ReadSpeaker API] ERROR when executing VTAPI_TextToFile. STATUS: %s\n", + ROS_ERROR("[ReadSpeaker API] ERROR when executing VTAPI_TextToFile. STATUS: %s", VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); success_ = false; } From 5cca12e62d1b4e7a652577d1dfe88dcd99308086 Mon Sep 17 00:00:00 2001 From: Yoshiki Obinata Date: Tue, 8 Nov 2022 11:13:42 +0900 Subject: [PATCH 3/7] [voice_text] Update README --- 3rdparty/voice_text/README.md | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/3rdparty/voice_text/README.md b/3rdparty/voice_text/README.md index 20f1da8ac..bea0daf47 100644 --- a/3rdparty/voice_text/README.md +++ b/3rdparty/voice_text/README.md @@ -7,7 +7,7 @@ ROS Interface for HOYA VoiceText Speech Synthesis Engine ### 1. Install VoiceText SDK #### If you have voicetext sdk install binary, please follow the official guide and install both engine and SDK -#### If you don't have the sdk install binary but have voice text API binary, please follow the guide below. +#### If you don't have the sdk install binary but have ReadSpeaker API binary, please follow the guide below. 1. Install VoiceText Engine by official guide 2. Copy VoiceText API binaries to VoiceText binary directory VoiceText API package includes binary libraries and header file. You have to copy those of them to specific directory by executing following commands. @@ -16,9 +16,6 @@ ROS Interface for HOYA VoiceText Speech Synthesis Engine cd bin/x64 # You have to cd x86 if your system is x86 architecture # Assuming VoiceText engine's talker is hikari, type is D16. If it is different, please set appropriate directory. sudo cp -a * /usr/vt/hikari/D16/bin # Don't forget to add -a not to break symbolic link. - cd ../../include/ - sudo mkdir /usr/vt/hikari/D16/inc # not include, but inc - sudo cp vtapi.h /usr/vt/hikari/D16/inc ``` ### 2. Put license file ### 3. Build this package From 477712c64441e13dc75fd53c88f981b8a4779a64 Mon Sep 17 00:00:00 2001 From: Yoshiki Obinata Date: Tue, 8 Nov 2022 13:50:05 +0900 Subject: [PATCH 4/7] [voice_text] Set C++11, remove unnecessary headers --- 3rdparty/voice_text/CMakeLists.txt | 4 ++++ 3rdparty/voice_text/include/vt_handler.h | 4 +--- 3rdparty/voice_text/src/voice_text.cpp | 4 ---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/3rdparty/voice_text/CMakeLists.txt b/3rdparty/voice_text/CMakeLists.txt index 13cf10648..5812fe5d2 100644 --- a/3rdparty/voice_text/CMakeLists.txt +++ b/3rdparty/voice_text/CMakeLists.txt @@ -1,5 +1,9 @@ cmake_minimum_required(VERSION 2.8.3) project(voice_text) + +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-write-strings") + find_package(catkin REQUIRED COMPONENTS dynamic_reconfigure roscpp diff --git a/3rdparty/voice_text/include/vt_handler.h b/3rdparty/voice_text/include/vt_handler.h index 291b9d12e..f52b20dd3 100644 --- a/3rdparty/voice_text/include/vt_handler.h +++ b/3rdparty/voice_text/include/vt_handler.h @@ -6,6 +6,7 @@ #ifndef VT_HANDLER_H_ #define VT_HANDLER_H_ +#include #include #include #include @@ -17,9 +18,6 @@ #include "vt_jpn.h" #include "vtapi.h" -#define VT_ROOT "/usr/vt/*/*" -#define PATH_MAX 1024 - #if __x86_64__ || __ppc64__ #define ENV64 #else diff --git a/3rdparty/voice_text/src/voice_text.cpp b/3rdparty/voice_text/src/voice_text.cpp index c86bc21db..7a1ea5165 100644 --- a/3rdparty/voice_text/src/voice_text.cpp +++ b/3rdparty/voice_text/src/voice_text.cpp @@ -4,11 +4,7 @@ */ #include -#include -#include -#include -#include #include #include From c0450ee357aaf05425c85f96470f16d4240c2dd7 Mon Sep 17 00:00:00 2001 From: Yoshiki Obinata Date: Tue, 8 Nov 2022 17:42:19 +0900 Subject: [PATCH 5/7] [voice_text] Skip indigo test, for requiring cmake>=3. It can be installed with apt install cmake3. Fix logging content --- .github/workflows/config.yml | 2 +- 3rdparty/voice_text/CMakeLists.txt | 2 +- 3rdparty/voice_text/src/vt_handler.cpp | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.github/workflows/config.yml b/.github/workflows/config.yml index 22334c8e4..afe132dd5 100644 --- a/.github/workflows/config.yml +++ b/.github/workflows/config.yml @@ -20,7 +20,7 @@ jobs: ROS_PARALLEL_TEST_JOBS: "-j8" CATKIN_PARALLEL_JOBS: "-i" NOT_TEST_INSTALL : true - BEFORE_SCRIPT : "for name in dialogflow_task_executive ros_speech_recognition sesame_ros ffha libsiftfast nlopt julius julius_ros downward assimp_devel; do echo \\$name; find $GITHUB_WORKSPACE -iname \\$name -exec touch {}/CATKIN_IGNORE \\; ; ls -al \\$(find -iname \\$name)/; done" # Skip large packagses / Skip packages that could not build on indigo dialogflow_task_executive/ros_speech_recognition/sesame_ros + BEFORE_SCRIPT : "for name in dialogflow_task_executive ros_speech_recognition sesame_ros ffha libsiftfast nlopt julius julius_ros downward assimp_devel voice_text; do echo \\$name; find $GITHUB_WORKSPACE -iname \\$name -exec touch {}/CATKIN_IGNORE \\; ; ls -al \\$(find -iname \\$name)/; done" # Skip large packagses / Skip packages that could not build on indigo dialogflow_task_executive/ros_speech_recognition/sesame_ros - ROS_DISTRO: kinetic CONTAINER: ubuntu:16.04 ROS_PARALLEL_TEST_JOBS: "-j8" diff --git a/3rdparty/voice_text/CMakeLists.txt b/3rdparty/voice_text/CMakeLists.txt index 5812fe5d2..74441c5a4 100644 --- a/3rdparty/voice_text/CMakeLists.txt +++ b/3rdparty/voice_text/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 2.8.3) +cmake_minimum_required(VERSION 3.5.1) project(voice_text) set(CMAKE_CXX_STANDARD 11) diff --git a/3rdparty/voice_text/src/vt_handler.cpp b/3rdparty/voice_text/src/vt_handler.cpp index 56e0d670e..b9225e15a 100644 --- a/3rdparty/voice_text/src/vt_handler.cpp +++ b/3rdparty/voice_text/src/vt_handler.cpp @@ -112,7 +112,7 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ VTAPI_Init(lib_path_char_); this->hVTAPI = VTAPI_CreateHandle(); if(this->hVTAPI == 0){ - ROS_ERROR("[ReadSpeaker API] Failed to create API handler. : %s", VTAPI_GetLastErrorInfo(0)->szMsg); + ROS_FATAL("[ReadSpeaker API] Failed to create API handler. STATUS: %s", VTAPI_GetLastErrorInfo(0)->szMsg); return; } VTAPI_SetLicenseFolder(license_path_char_); @@ -120,7 +120,7 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ this->hEngine = VTAPI_GetEngine(speaker_char_, type_char_); ret_ = VTAPI_SetEngineHandle(this->hVTAPI, this->hEngine); if(ret_ < VTAPI_SUCCESS){ - ROS_ERROR("[ReadSpeaker API] Failed to create engine handler. CODE: %d, MESSAGE: %s ", + ROS_FATAL("[ReadSpeaker API] Failed to create engine handler. CODE: %d, MESSAGE: %s ", ret_, VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); return; } @@ -151,7 +151,7 @@ bool VTHandler::LoadSym(){ VTSDK_s_map_[itr] = dlsym(this->dl_handle, itr); dl_err_ = dlerror(); if(dl_err_ != NULL){ - ROS_FATAL_STREAM("Error occured when loading VoiceText libraries " + ROS_FATAL_STREAM("Failed to load VoiceText libraries. STATUS: " << dl_err_); dlclose(this->dl_handle); return false; @@ -170,7 +170,7 @@ bool VTHandler::LoadSym(){ VTAPI_s_map_[itr] = dlsym(this->dl_handle, itr); dl_err_ = dlerror(); if(dl_err_ != NULL){ - ROS_FATAL_STREAM("Error occured when loading ReadSpeaker libraries " + ROS_FATAL_STREAM("Failed to load ReadSpeaker libraries. STATUS: " << dl_err_); dlclose(this->dl_handle); return false; @@ -226,13 +226,13 @@ bool VTHandler::VTH_TextToFile(const int pitch, const int speed, const int volum VTAPI_SetAttr(this->hVTAPI, ATTR_PAUSE, pause); ret_ = VTAPI_SetOutputFile(this->hVTAPI, wave_path_char_, FORMAT_16PCM_WAV); if(ret_ != VTAPI_SUCCESS){ - ROS_ERROR("[ReadSpeaker API] ERROR when executing VTAPI_SetOutputFile. STATUS: %s", + ROS_ERROR("[ReadSpeaker API] Failed to execute VTAPI_SetOutputFile. STATUS: %s", VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); success_ = false; } ret_ = VTAPI_TextToFile(this->hVTAPI, text_char_, -1, TEXT_FORMAT_DEFAULT); if(ret_ != VTAPI_SUCCESS){ - ROS_ERROR("[ReadSpeaker API] ERROR when executing VTAPI_TextToFile. STATUS: %s", + ROS_ERROR("[ReadSpeaker API] Failed to execute VTAPI_TextToFile. STATUS: %s", VTAPI_GetLastErrorInfo(this->hVTAPI)->szMsg); success_ = false; } From 1cbc0155d535043e28700b6d3947009818eb545d Mon Sep 17 00:00:00 2001 From: Yoshiki Obinata Date: Tue, 8 Nov 2022 19:31:04 +0900 Subject: [PATCH 6/7] [voice_text] update README, drop support of not-supported architecture --- 3rdparty/voice_text/package.xml | 4 +++- 3rdparty/voice_text/src/vt_handler.cpp | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/3rdparty/voice_text/package.xml b/3rdparty/voice_text/package.xml index 2fd9e303a..b9df97f21 100644 --- a/3rdparty/voice_text/package.xml +++ b/3rdparty/voice_text/package.xml @@ -1,14 +1,16 @@ voice_text 2.1.24 - voice_text (www.voicetext.jp) + The ROS package for VoiceText, ReadSpeaker (https://readspeaker.jp/) Kei Okada + Yoshiki Obinata HOYA License http://ros.org/wiki/voice_text Kei Okada + Yoshiki Obinata catkin roscpp diff --git a/3rdparty/voice_text/src/vt_handler.cpp b/3rdparty/voice_text/src/vt_handler.cpp index b9225e15a..9508cd439 100644 --- a/3rdparty/voice_text/src/vt_handler.cpp +++ b/3rdparty/voice_text/src/vt_handler.cpp @@ -19,6 +19,9 @@ VTHandler::VTHandler(const std::string license_path, const std::string db_path){ #elif ENV32 glob(("/usr/vt/*/*/bin/x86_32/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_old_gbuf_); // e.g., /usr/vt/sayaka/M16/bin/x86_32/RAMIO/libvt_jpn.so glob(("/usr/vt/*/*/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so"), 0, NULL, &sdk_new_gbuf_); // e.g., /usr/vt/risa/H16/bin/LINUX32_GLIBC3/RAMIO/libvt_jpn.so +#else + ROS_FATAL("Your architecture is not supported"); + return; #endif glob(("/usr/vt/*/*/bin/libvtapi.so"), 0, NULL, &api_gbuf_); // e.g., /usr/vt/hikari/D16/bin/libvtapi.so From f1cfb76aa8771a77bf6b93d4bb3b32b9b5dd2697 Mon Sep 17 00:00:00 2001 From: Yoshiki Obinata Date: Sun, 8 Jan 2023 12:28:57 +0900 Subject: [PATCH 7/7] fetch example --- 3rdparty/voice_text/launch/voice_text.launch | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/3rdparty/voice_text/launch/voice_text.launch b/3rdparty/voice_text/launch/voice_text.launch index 95e7d781f..54413b5a5 100644 --- a/3rdparty/voice_text/launch/voice_text.launch +++ b/3rdparty/voice_text/launch/voice_text.launch @@ -14,7 +14,8 @@ machine="$(arg voice_text_machine)"> - # db_path: /usr/vt/sayaka/M16 # Commented out to support other speakers than SAYAKA + db_path: /usr/vt/sayaka/M16 # Commented out to support other speakers than SAYAKA + license_path: /usr/vt/sayaka/M16/data-common/verify/verification.txt pitch: 100 speed: 100 volume: 100