Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update webrtcvad_ros and Add silero_vad_ros #456

Open
wants to merge 19 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions silero_vad_ros/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
cmake_minimum_required(VERSION 2.8.3)
project(silero_vad_ros)

find_program(EXIST_PYTHON3 "python3.8")
if(NOT EXIST_PYTHON3)
message(WARNING "python3.8 command not found. exit without building")
return()
else()
message(STATUS "python3.8 command found. continue building...")
endif()

find_package(catkin REQUIRED COMPONENTS catkin_virtualenv)

catkin_generate_virtualenv(
PYTHON_INTERPRETER python3.8
USE_SYSTEM_PACKAGES FALSE
CHECK_VENV FALSE
)

catkin_package(
)

catkin_install_python(PROGRAMS
node_scripts/vad.py
DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION})
21 changes: 21 additions & 0 deletions silero_vad_ros/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# silero_vad_ros

This package provides VAD (Voice Activity Detection) code. It subscribes an audio topic and publish a flag if curretly speeched or not with VAD.
This package uses [silero-vad](https://github.com/snakers4/silero-vad).

## How to build

```bash
catkin build silero_vad_ros
```

## Example

Please make sure your PC has a microphone.
And then launch.

```bash
roslaunch silero_vad_ros sample.launch
```

And please talk to the microphone.
53 changes: 53 additions & 0 deletions silero_vad_ros/launch/sample.launch
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
<launch>
<arg name="vad_threshold" default="0.5" />
<arg name="vad_minimum_duration" default="0.6" />
<arg name="vad_maximum_duration" default="10.0" />
<arg name="vad_audio_timeout_duration" default="0.5" />

<include file="$(find audio_capture)/launch/capture.launch">
<arg name="format" value="wave" />
</include>

<node
name="silero_vad_ros"
pkg="silero_vad_ros"
type="vad.py"
output="screen"
respawn="true"
>
<rosparam subst_value="true">
threshold: $(arg vad_threshold)
minimum_duration: $(arg vad_minimum_duration)
maximum_duration: $(arg vad_maximum_duration)
audio_timeout_duration: $(arg vad_audio_timeout_duration)
</rosparam>
<remap from="audio_data" to="/audio/audio" />
<remap from="audio_info" to="/audio/audio_info" />
</node>

<node
name="speech_to_text"
pkg="respeaker_ros"
type="speech_to_text.py"
output="log"
>
<remap from="audio" to="/silero_vad_ros/speech_audio" />
<rosparam subst_value="true">
language: ja-JP
self_cancellation: true
tts_actions_names:
- sound_play
- speed_play_jp
- robotsound
- robotsound_jp
tts_tolerance: 1.0
</rosparam>
</node>

<node
name="print_stt_result"
pkg="webrtcvad_ros"
type="print_stt_result.py"
output="screen"
/>
</launch>
43 changes: 43 additions & 0 deletions silero_vad_ros/node_scripts/vad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/env python

import struct

import numpy as np
import rospy
import torch

from webrtcvad_ros.vad_core import VADBaseNode


class SileroVADROS(VADBaseNode):

def __init__(self):

model_vad, _ = torch.hub.load(
repo_or_dir="snakers4/silero-vad", model="silero_vad", force_reload=True
)
self.model_vad = model_vad

super(SileroVADROS, self).__init__(chunk_size=1536)

rospy.loginfo("Initialized.")

def _get_vad_confidence(self, chunk, sampling_rate):
audio_chunk = np.frombuffer(chunk, np.int16)
abs_max = np.abs(audio_chunk).max()
audio_chunk = audio_chunk.astype("float32")
if abs_max > 0:
audio_chunk *= 1 / 32768
audio_chunk = audio_chunk.squeeze()
return self.model_vad(torch.from_numpy(audio_chunk), sampling_rate).item()


def main():

rospy.init_node("silero_vad_ros")
node = SileroVADROS()
rospy.spin()


if __name__ == "__main__":
main()
27 changes: 27 additions & 0 deletions silero_vad_ros/package.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
<?xml version="1.0"?>
<package format="2">
<name>silero_vad_ros</name>
<version>2.1.24</version>
<description>This package provides a wrapper node for silero_vad. It subscribes an audio topic and
publish a flag if curretly speeched or not with VAD.</description>

<maintainer email="[email protected]">Kei Okada</maintainer>
<maintainer email="[email protected]">Koki Shinjo</maintainer>

<author email="[email protected]">Koki Shinjo</author>

<license>BSD</license>

<buildtool_depend>catkin</buildtool_depend>

<build_depend>catkin_virtualenv</build_depend>

<exec_depend>audio_common_msgs</exec_depend>
<exec_depend>rospy</exec_depend>
<exec_depend>python-webrtcvad-pip</exec_depend>
<exec_depend>webrtcvad_ros</exec_depend>

<export>
<pip_requirements>requirements.txt</pip_requirements>
</export>
</package>
3 changes: 3 additions & 0 deletions silero_vad_ros/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
torch
torchaudio
numpy
2 changes: 2 additions & 0 deletions webrtcvad_ros/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,6 @@ project(webrtcvad_ros)

find_package(catkin REQUIRED)

catkin_python_setup()

catkin_package()
3 changes: 2 additions & 1 deletion webrtcvad_ros/README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# webrtcvad_ros

This package provides a wrapper node for [webrtcvad](https://github.com/wiseman/py-webrtcvad). It subscribes an audio topic and publish a flag if curretly speeched or not with VAD.
This package provides VAD (Voice Activity Detection) code. It subscribes an audio topic and publish a flag if curretly speeched or not with VAD.
This package uses [webrtcvad](https://github.com/wiseman/py-webrtcvad).

## Prerequities

Expand Down
14 changes: 7 additions & 7 deletions webrtcvad_ros/launch/sample.launch
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
<launch>
<include file="$(find audio_capture)/launch/capture.launch">
<arg name="format" value="wave" />
<arg name="format" value="wave"/>
</include>

<node
name="webrtcvad_ros"
pkg="webrtcvad_ros"
type="webrtcvad_ros.py"
type="vad.py"
output="screen"
>
>
<rosparam>
aggressiveness: 1
</rosparam>
<remap from="audio_data" to="/audio/audio" />
<remap from="audio_info" to="/audio/audio_info" />
<remap from="audio_data" to="/audio/audio"/>
<remap from="audio_info" to="/audio/audio_info"/>
</node>

<node
name="speech_to_text"
pkg="respeaker_ros"
type="speech_to_text.py"
output="log"
>
>
<remap from="audio" to="/webrtcvad_ros/speech_audio"/>
<rosparam subst_value="true">
language: ja-JP
Expand All @@ -35,5 +35,5 @@
pkg="webrtcvad_ros"
type="print_stt_result.py"
output="screen"
/>
/>
</launch>
30 changes: 30 additions & 0 deletions webrtcvad_ros/node_scripts/vad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/usr/bin/env python

import rospy
import webrtcvad

from webrtcvad_ros.vad_core import VADBaseNode


class WebRTCVADROS(VADBaseNode):

def __init__(self):

aggressiveness = rospy.get_param('~aggressiveness', 1)
self._vad = webrtcvad.Vad(int(aggressiveness))

super(WebRTCVADROS, self).__init__(chunk_size=480)

def _get_vad_confidence(self, chunk, sampling_rate):
return 1.0 if self._vad.is_speech(chunk, sampling_rate) else 0.0


def main():

rospy.init_node('webrtcvad_ros')
node = WebRTCVADROS()
rospy.spin()


if __name__ == '__main__':
main()
59 changes: 0 additions & 59 deletions webrtcvad_ros/node_scripts/webrtcvad_ros.py

This file was deleted.

Empty file.
Loading
Loading