Feature Introduction:¶
Record audio and obtain RAW audio format data in real-time. Use WebSocket to upload the data to the server and get real-time speech recognition results. For details, refer to the document: Using AudioCapturer to Develop Audio Recording Function (ArkTS). For more detailed interface information, please refer to the API documentation: AudioCapturer8+ and @ohos.net.webSocket (WebSocket Connection).
Key Knowledge Points:¶
- Familiar with using AudioCapturer to record audio and obtain RAW format data in real-time.
- Familiar with using WebSocket to upload audio data and obtain recognition results.
- Familiar with the dynamic application method for sensitive permissions. The sensitive permission in this project is
MICROPHONE. - For how to build a real-time speech recognition service, please refer to my other article: “Recognizes with Such High Accuracy, Real-time Speech Recognition Service”.
Environment:¶
- API 9
- DevEco Studio 4.0 Release
- Windows 11
- Stage Model
- ArkTS Language
Required Permissions:¶
- ohos.permission.MICROPHONE
Effect Diagram:¶

Core Code:¶
src/main/ets/utils/Permission.ets is a utility for dynamic permission application:
import bundleManager from '@ohos.bundle.bundleManager';
import abilityAccessCtrl, { Permissions } from '@ohos.abilityAccessCtrl';
async function checkAccessToken(permission: Permissions): Promise<abilityAccessCtrl.GrantStatus> {
let atManager = abilityAccessCtrl.createAtManager();
let grantStatus: abilityAccessCtrl.GrantStatus;
// Get the application's accessTokenID
let tokenId: number;
try {
let bundleInfo: bundleManager.BundleInfo = await bundleManager.getBundleInfoForSelf(bundleManager.BundleFlag.GET_BUNDLE_INFO_WITH_APPLICATION);
let appInfo: bundleManager.ApplicationInfo = bundleInfo.appInfo;
tokenId = appInfo.accessTokenId;
} catch (err) {
console.error(`getBundleInfoForSelf failed, code is ${err.code}, message is ${err.message}`);
}
// Verify if the application has been granted the permission
try {
grantStatus = await atManager.checkAccessToken(tokenId, permission);
} catch (err) {
console.error(`checkAccessToken failed, code is ${err.code}, message is ${err.message}`);
}
return grantStatus;
}
export async function checkPermissions(permission: Permissions): Promise<boolean> {
let grantStatus: abilityAccessCtrl.GrantStatus = await checkAccessToken(permission);
if (grantStatus === abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED) {
return true;
} else {
return false;
}
}
src/main/ets/utils/Recorder.ets is a recording utility class for audio recording and data acquisition:
import audio from '@ohos.multimedia.audio';
import { delay } from './Utils';
export default class AudioCapturer {
private audioCapturer: audio.AudioCapturer | undefined = undefined;
private isRecording: boolean = false;
private audioStreamInfo: audio.AudioStreamInfo = {
samplingRate: audio.AudioSamplingRate.SAMPLE_RATE_16000, // Audio sampling rate
channels: audio.AudioChannel.CHANNEL_1, // Number of recording channels
sampleFormat: audio.AudioSampleFormat.SAMPLE_FORMAT_S16LE,
encodingType: audio.AudioEncodingType.ENCODING_TYPE_RAW // Audio encoding type
};
private audioCapturerInfo: audio.AudioCapturerInfo = {
// Use SOURCE_TYPE_VOICE_RECOGNITION for noise reduction. If the device does not support it, use SOURCE_TYPE_MIC
source: audio.SourceType.SOURCE_TYPE_VOICE_RECOGNITION,
capturerFlags: 0 // Audio capturer flags
};
private audioCapturerOptions: audio.AudioCapturerOptions = {
streamInfo: this.audioStreamInfo,
capturerInfo: this.audioCapturerInfo
};
// Initialization: Create instance and set event listeners
constructor() {
// Create AudioCapturer instance
audio.createAudioCapturer(this.audioCapturerOptions, (err, capturer) => {
if (err) {
console.error(`Failed to create audio capturer, error code: ${err.code}, error message: ${err.message}`);
return;
}
this.audioCapturer = capturer;
console.info('Audio capturer created successfully');
});
}
// Start audio capture
async start(callback: (state: number, data?: ArrayBuffer) => void) {
// Only start capture if state is one of STATE_PREPARED, STATE_PAUSED, or STATE_STOPPED
const validStates = [audio.AudioState.STATE_PREPARED, audio.AudioState.STATE_PAUSED, audio.AudioState.STATE_STOPPED];
if (validStates.indexOf(this.audioCapturer.state) === -1) {
console.error('Failed to start recording');
callback(audio.AudioState.STATE_INVALID);
return;
}
// Start capture
await this.audioCapturer.start();
this.isRecording = true;
const bufferSize = 1920;
while (this.isRecording) {
let buffer = await this.audioCapturer.read(bufferSize, true);
if (buffer === undefined) {
console.error('Failed to read audio data');
} else {
callback(audio.AudioState.STATE_RUNNING, buffer);
}
}
callback(audio.AudioState.STATE_STOPPED);
}
// Stop capture
async stop() {
this.isRecording = false;
// Only stop if state is RUNNING or PAUSED
if (this.audioCapturer.state !== audio.AudioState.STATE_RUNNING && this.audioCapturer.state !== audio.AudioState.STATE_PAUSED) {
console.warn('Capturer is not running or paused');
return;
}
await delay(200);
// Stop capture
await this.audioCapturer.stop();
if (this.audioCapturer.state.valueOf() === audio.AudioState.STATE_STOPPED) {
console.info('Recording stopped');
} else {
console.error('Failed to stop recording');
}
}
// Destroy instance and release resources
async release() {
// Do not release if state is RELEASED or NEW
if (this.audioCapturer.state === audio.AudioState.STATE_RELEASED || this.audioCapturer.state === audio.AudioState.STATE_NEW) {
return;
}
// Release resources
await this.audioCapturer.release();
}
}
Additional utility functions in src/main/ets/utils/Utils.ets (used for sleep waiting):
// Sleep function
export function delay(milliseconds: number) {
return new Promise(resolve => setTimeout(resolve, milliseconds));
}
Add required permissions in src/main/module.json5 (under the module field). For field descriptions, add them to the respective string.json:
"requestPermissions": [
{
"name": "ohos.permission.MICROPHONE",
"reason": "$string:record_reason",
"usedScene": {
"abilities": [
"EntryAbility"
],
"when": "always"
}
}
]
Page code:
import abilityAccessCtrl, { Permissions } from '@ohos.abilityAccessCtrl';
import common from '@ohos.app.ability.common';
import webSocket from '@ohos.net.webSocket';
import AudioCapturer from '../utils/Recorder';
import promptAction from '@ohos.promptAction';
import { checkPermissions } from '../utils/Permission';
import audio from '@ohos.multimedia.audio';
// Permissions requiring dynamic application
const permissions: Array<Permissions> = ['ohos.permission.MICROPHONE'];
// Get application context
const context = getContext(this) as common.UIAbilityContext;
@Entry
@Component
struct Index {
@State recordBtnText: string = 'Press to Record';
@State speechResult: string = '';
private offlineResult = '';
private onlineResult = '';
// Speech recognition WebSocket address
private asrWebSocketUrl = "ws://192.168.0.100:10095";
// Audio capturer instance
private audioCapturer?: AudioCapturer;
// WebSocket instance
private ws;
// Called when page is displayed
async onPageShow() {
// Check permission status
let promise = checkPermissions(permissions[0]);
promise.then((result) => {
if (result) {
// Initialize audio capturer if not already initialized
if (this.audioCapturer == null) {
this.audioCapturer = new AudioCapturer();
}
} else {
this.reqPermissionsAndRecord(permissions);
}
});
}
// Called when page is hidden
async onPageHide() {
if (this.audioCapturer != null) {
this.audioCapturer.release();
}
}
build() {
Row() {
RelativeContainer() {
// Display recognition result
Text(this.speechResult)
.id("resultText")
.width('95%')
.maxLines(10)
.fontSize(18)
.margin({ top: 10 })
.alignRules({
top: { anchor: '__container__', align: VerticalAlign.Top },
middle: { anchor: '__container__', align: HorizontalAlign.Center }
});
// Recording button
Button(this.recordBtnText)
.width('90%')
.id("recordBtn")
.margin({ bottom: 10 })
.alignRules({
bottom: { anchor: '__container__', align: VerticalAlign.Bottom },
middle: { anchor: '__container__', align: HorizontalAlign.Center }
})
.onTouch((event) => {
switch (event.type) {
case TouchType.Down:
console.info('Button pressed');
// Check permission status
let promise = checkPermissions(permissions[0]);
promise.then((result) => {
if (result) {
// Start recording
this.startRecord();
this.recordBtnText = 'Recording...';
} else {
// Request permissions and record
this.reqPermissionsAndRecord(permissions);
}
});
break;
case TouchType.Up:
console.info('Button released');
// Stop recording
this.stopRecord();
this.recordBtnText = 'Press to Record';
break;
}
});
}
.height('100%')
.width('100%');
}
.height('100%');
}
// Start recording
startRecord() {
this.setWebSocketCallback();
this.ws.connect(this.asrWebSocketUrl, (err) => {
if (!err) {
console.log("WebSocket connection successful");
// Send initial configuration
let jsonData = '{"mode": "2pass", "chunk_size": [5, 10, 5], "chunk_interval": 10, ' +
'"wav_name": "HarmonyOS", "is_speaking": true, "itn": false}';
this.ws.send(jsonData);
// Start audio capture
this.audioCapturer.start((state, data) => {
if (state === audio.AudioState.STATE_STOPPED) {
console.info('Recording ended');
// Send stop signal to server
let jsonData = '{"is_speaking": false}';
this.ws.send(jsonData);
} else if (state === audio.AudioState.STATE_RUNNING) {
// Send audio data
this.ws.send(data, (err) => {
if (err) {
console.log("Failed to send audio data: " + JSON.stringify(err));
}
});
}
});
} else {
console.log("WebSocket connection failed: " + JSON.stringify(err));
}
});
}
// Stop recording
stopRecord() {
if (this.audioCapturer != null) {
this.audioCapturer.stop();
}
}
// Set WebSocket event callbacks
setWebSocketCallback() {
// Create WebSocket instance
this.ws = webSocket.createWebSocket();
// Handle incoming messages
this.ws.on('message', (err, value: string) => {
console.log("Received WebSocket message: " + value);
let result = JSON.parse(value);
let is_final = result['is_final'];
let mode = result['mode'];
let text = result['text'];
if (mode === '2pass-offline') {
this.offlineResult += text;
this.onlineResult = '';
} else {
this.onlineResult += text;
}
this.speechResult = this.offlineResult + this.onlineResult;
// Close connection if final result is received
if (is_final) {
this.ws.close();
}
});
// Handle connection closed
this.ws.on('close', () => {
console.log("WebSocket connection closed");
});
// Handle errors
this.ws.on('error', (err) => {
console.log("WebSocket error: " + JSON.stringify(err));
});
}
// Request permissions and start recording
reqPermissionsAndRecord(permissions: Array<Permissions>): void {
let atManager = abilityAccessCtrl.createAtManager();
// Request permissions from user
atManager.requestPermissionsFromUser(context, permissions).then((data) => {
let grantStatus: Array<number> = data.authResults;
let length: number = grantStatus.length;
for (let i = 0; i < length; i++) {
if (grantStatus[i] === 0) {
// Permission granted
console.info('Permission granted');
if (this.audioCapturer == null) {
this.audioCapturer = new AudioCapturer();
}
} else {
promptAction.showToast({ message: 'Permission denied, required for recording' });
return;
}
}
}).catch((err) => {
console.error(`Permission request failed: code ${err.code}, message: ${err.message}`);
});
}
}