近年来语音识别发展迅速也带动了人工智能的发展。曾经渴望自己做一个机器人,但是无奈,心有余而力不足,经过多年的积累,小白的我也能用站着巨人的肩膀上玩下机器人了。
准备工作:树莓派,音频模块,stm32单片机,百度语音识别接口,喇叭。
整体思路:
1. 由于树莓派没有ADC模块,所以这里借助于stm32的ADC模块来实现将语音信号转换成数字信号,然后通过串口传 输 到树莓派你中,树莓派你将数据组装成wave文件,便于语音识别。
2. 通过http协议将组装的语音文件上传到百度语音识别平台进行识别。文档说明(免费调用)
3. 根据识别结果做出相应的处理。
4. 对于需要播放语音时,根据百度语音合成接口合成语音然后使用mplayer播放出来。mplayer安装参考 博客
部分代码:
将音频转换成wave文件
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
#include <stdio.h> #include <stdlib.h> #include <stdint.h> #include <wiringPi.h> #include <wiringSerial.h> #include <unistd.h> #include <fcntl.h> #include <sys/types.h> #include <sys/ioctl.h> #include "listen.h" //gcc -o uart uart.c -lwiringPi typedef struct WAV_Format WAVHEADER; #define MAX_LISTEN_SIZES 1024*70 //定义接收数据的大小 #define bty 460800//串口的波特率 struct listen*listenUart() { int fd,file; char buff,buff2; struct listen*liste=( struct listen*) malloc ( sizeof ( struct listen)); unsigned short size; unsigned short *music,temp=0; unsigned short max=0,min=0; char *result=NULL; //存储最后的返回值 int index=0,i=0; char stop=1; WAVHEADER wavHead; music=(unsigned short *) malloc (MAX_LISTEN_SIZES*2); result=( char *) malloc (MAX_LISTEN_SIZES*2+ sizeof (WAVHEADER)); if (wiringPiSetup() < 0) return NULL; if ((fd = serialOpen ( "/dev/ttyAMA0" ,bty))<0) { return NULL; printf ( "serial err " ); } //file=open("abc.wav", O_RDWR|O_CREAT); printf ( "oepn success " ); //serialPrintf(fd,"Hello World!!!"); //需要对音频信号作出处理,当大于或者阈值时开始统计,知道录制完成 int countTotal=0; int countNumber= 1000; //统计个数 int countMax=2860; //最大值 int countMin=2840; //最小值 int startCount=1; while (1) { if (index==MAX_LISTEN_SIZES) { break ; } buff=serialGetchar(fd); buff2=serialGetchar(fd); if ((buff2&0x0F0)!=0) { buff2=serialGetchar(fd); } else { size=buff2; size=size<<8; size=(size&0xFF00)|(buff&0xFF); music[index]=size; if (startCount==1) { countTotal=countTotal+size; if (index>=countNumber) { int temp=countTotal/(countNumber+1); if (temp>countMax||temp<countMin) { startCount=0; //开始录音 printf ( ":::::%d " ,temp); index++; } else { printf ( "temp:%d " ,temp); index=0; } countTotal=0; } else { index++; } } else { index++; } } } serialClose(fd); printf ( "end " ); //对音频进行放大处理 max=music[0]; min=music[0]; for (i=i;i<MAX_LISTEN_SIZES;i++){ temp=music[i]; if (temp>max) { max=temp; } if (temp<min) { min=temp; } } size=max-min; for (i=0;i<MAX_LISTEN_SIZES;i++) { music[i]=(unsigned short )((music[i]-min)*1.0*6000/size); } wavHead.ChunkID=0x46464952; /* "RIFF" */ wavHead.ChunkSize= sizeof (wavHead)+MAX_LISTEN_SIZES*2 -8; /* 36 + Subchunk2Size */ wavHead.Format=0x45564157; /* "WAVE" */ wavHead.Subchunk1ID=0x20746D66; /* "fmt " */ wavHead.Subchunk1Size=0x10; /* 16 for PCM */ wavHead.AudioFormat=0x01; /* PCM = 1*/ wavHead.NumChannels=0x01; /* Mono = 1, Stereo = 2, etc. */ wavHead.SampleRate=0x3E80; /* 8000, 44100, etc. */ wavHead.ByteRate=0x7D00; /* = SampleRate * NumChannels * BitsPerSample/8 */ wavHead.BlockAlign=0x02; /* = NumChannels * BitsPerSample/8 */ wavHead.BitsPerSample=0x10; /* 8bits, 16bits, etc. */ wavHead.Subchunk2ID=0x61746164; /* "data" */ wavHead.Subchunk2Size=MAX_LISTEN_SIZES*2; /* data size */ //返回数据赋值 memcpy (result,( char *)&wavHead, sizeof (WAVHEADER)); memcpy (result+ sizeof (WAVHEADER),( char *)music,MAX_LISTEN_SIZES*2); liste->length= sizeof (WAVHEADER)+MAX_LISTEN_SIZES*2; liste->data=result; return liste; } |
将音频识别成文字
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
#include "convertText.h" static Buffer *listen_buff2=NULL; size_t listen_getData2( void *ptr, size_t size, size_t nmemb, void *stream) { appendBuffer(listen_buff2,ptr,nmemb); return nmemb; } int listenText( char *result2) { listen_buff2=initBuffer(); struct listen*lsn=listenUart(); char *base; int fileLength=lsn->length; int result=1; int baseSize=(lsn->length/3)*4+(lsn->length%3)*2+1; base=( char *) malloc (baseSize); base64_encode(lsn->data,lsn->length,base); //发送请求 free (lsn->data); free (lsn); int code=initToken(); if (code==1) { char *token=getToken(); ///开始创建json字符串 cJSON * root = cJSON_CreateObject(); cJSON_AddItemToObject(root, "format" , cJSON_CreateString( "wav" )); cJSON_AddItemToObject(root, "rate" , cJSON_CreateString( "16000" )); cJSON_AddItemToObject(root, "channel" , cJSON_CreateString( "1" )); cJSON_AddItemToObject(root, "cuid" , cJSON_CreateString( "34-68-95-91-77-43" )); cJSON_AddItemToObject(root, "token" , cJSON_CreateString(token)); cJSON_AddItemToObject(root, "dev_pid" , cJSON_CreateString( "1537" )); cJSON_AddItemToObject(root, "speech" , cJSON_CreateString(base)); cJSON_AddItemToObject(root, "len" , cJSON_CreateNumber(fileLength)); char *jsonParam=cJSON_PrintUnformatted(root); char *apiurl= "http://vop.baidu.com/server_api" ; CURL* curl; CURLcode res; // ptr = curl_easy_escape(NULL, (char *)a, asize); curl = curl_easy_init(); struct curl_slist* headers = NULL; headers = curl_slist_append(headers, "Content-Type:application/json" ); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, headers); curl_easy_setopt(curl, CURLOPT_URL, apiurl); curl_easy_setopt(curl, CURLOPT_TIMEOUT, 60); curl_easy_setopt(curl, CURLOPT_POST, 1); //http://vop.baidu.com/server_api //CURLOPT_POSTFIELDS,CURLOPT_POSTFIELDSIZE curl_easy_setopt(curl, CURLOPT_POSTFIELDS, jsonParam); curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE, strlen (jsonParam)); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, listen_getData2); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); res = curl_easy_perform(curl); curl_easy_cleanup(curl); cJSON_Delete(root); curl_slist_free_all(headers); free (token); free (jsonParam); if (res == CURLE_OK) { char *chars; char *tempresult=( char *) malloc (listen_buff2->length+1); memcpy (tempresult,listen_buff2->buff,listen_buff2->length); tempresult[listen_buff2->length]=0; cJSON *json; cJSON * item = NULL; cJSON*errCode; json=cJSON_Parse(tempresult); item=cJSON_GetObjectItem(json, "result" ); errCode=cJSON_GetObjectItem(json, "err_no" ); if (errCode->valueint!=0) { return -3; } chars=cJSON_GetArrayItem(item,0)->valuestring; strcpy (result2,chars); free (tempresult); cJSON_Delete(json); return 0; } else { return -3; } } else { return -2; } return -1; } |
主程序
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
#include<stdio.h> #include<string.h> #include "convertText.h" #include "mp3.h" #include "led.h" #include "say.h" //gcc -o robot robot.o mp3.o Buffer.o base64.o token.o cJSON.o listen.o convertText.o led.o say.o -lcurl -lm -lwiringPi -lmad void sayChina( char *china) { int resp=initSay(china); printf ( "resp:%d " ,resp); if (resp==1) { int tte=playData( "temp.mp3" ); printf ( "tte:%d " ,tte); } } int main() { char text[100]={0}; sayChina( "你好,我是小志,有什么可以为你服务" ); while (1) { printf ( ";;;;;;;;" ); int code= listenText(text); if (code==0) { printf ( "result:%s " ,text); if ( strstr (text, "播放音乐," )!=NULL|| strstr (text, "打开音乐," )!=NULL) { sayChina( "正在为你打开音乐" ); musicPlayFile( "mu.mp3" ); } if ( strstr (text, "打开灯," )!=NULL|| strstr (text, "打开," )!=NULL) { sayChina( "好的" ); printf ( "正在打开" ); ledOn(); } if ( strstr (text, "关闭灯," )!=NULL|| strstr (text, "关闭," )!=NULL|| strstr (text, "完毕," )!=NULL) { sayChina( "好的" ); printf ( "正在关闭" ); ledOff(); } if ( strstr (text, "你叫什么" )!=NULL|| strstr (text, "你叫什么名字" )!=NULL|| strstr (text, "名字" )!=NULL) { sayChina( "我叫小志" ); } if ( strstr (text, "今天天气咋样" )!=NULL|| strstr (text, "天气" )!=NULL) { sayChina( "外面在下雨,有点冷" ); } if ( strstr (text, "中午好" )!=NULL|| strstr (text, "中午" )!=NULL) { sayChina( "好什么啊,我还没吃饭呢" ); } if ( strstr (text, "你多大了" )!=NULL|| strstr (text, "今年几岁" )!=NULL|| strstr (text, "几岁" )!=NULL) { sayChina( "我才出生,还没满月" ); } } else { printf ( "error " ); } } return 0; } |
这里只是贴出来部分程序,所有代码请查看 链接 希望能和大家一起交流下心得。
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/telrob/article/details/80089684