之前本人写过ffmpeg录制系统声音的博客,但是用到的设备名称叫做virtual-audio-capturer,需要实现安装一个软件,ffmpeg才能找到这个设备,很不方便; 今天用windows api采集声卡声音,进行声卡数据抓取,然后放入ffmpeg进行编码。 关于声卡的数据采集api,可以参看下面博客: 声卡数据采集 本人从声卡中获取到的格式是: 采样率:48000 采样位数:32 通道数:双通道 最终编码时,编码后的的格式为AV_SAMPLE_FMT_FLTP(平面格式),代码如下: av_opt_set_channel_layout(m_pAudioConvertCtx, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0); av_opt_set_channel_layout(m_pAudioConvertCtx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0); av_opt_set_int(m_pAudioConvertCtx, "in_sample_rate", m_formatex.Format.nSamplesPerSec, 0); av_opt_set_int(m_pAudioConvertCtx, "out_sample_rate", 48000, 0); av_opt_set_sample_fmt(m_pAudioConvertCtx, "in_sample_fmt", AV_SAMPLE_FMT_S32, 0); av_opt_set_sample_fmt(m_pAudioConvertCtx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0); 相应的采样转换代码如下: uint8_t *audio_buf[2] = { 0 }; audio_buf[0] = (uint8_t *)frame_mic_encode->data[0]; audio_buf[1] = (uint8_t *)frame_mic_encode->data[1]; int nb = swr_convert(m_pAudioConvertCtx, audio_buf, num_frames_to_read, (const uint8_t**)&p_audio_data, num_frames_to_read); 其中p_audio_data为从声卡中获取的数据buffer,num_frames_to_read为数据长度(以每个采样为单位) 由于编码格式是平面格式,所以定义了audio_buf[2]。 如果系统未播放任何声音,则num_frames_to_read为0,这种情况,本人尚未处理。本人给出的例子是系统中播放一段音乐时的处理。 main函数如下所示: #include <iostream> #include "GetSystemAudio.h" int main() { CGetSystemAudio cCGetSystemAudio; cCGetSystemAudio.SetSavePath("E:\\learn\\ffmpeg\\FfmpegTest\\x64\\Release"); cCGetSystemAudio.StartCapture(); Sleep(30000); cCGetSystemAudio.StopCapture(); return 0; } 可以看出,录了30秒。 GetSystemAudio.h的内容如下: #pragma once #include <string> #include <combaseapi.h> #include <mmdeviceapi.h> #include <audioclient.h> #ifdef __cplusplus extern "C" { #endif #include "libavcodec/avcodec.h" #include "libavformat/avformat.h" #include "libswscale/swscale.h" #include "libswresample/swresample.h" #include "libavdevice/avdevice.h" #include "libavutil/audio_fifo.h" #include "libavutil/avutil.h" #include "libavutil/fifo.h" #include "libavutil/frame.h" #include "libavutil/imgutils.h" #include "libavfilter/avfilter.h" #include "libavfilter/buffersink.h" #include "libavfilter/buffersrc.h" #pragma comment(lib, "avcodec.lib") #pragma comment(lib, "avformat.lib") #pragma comment(lib, "avutil.lib") #pragma comment(lib, "avdevice.lib") #pragma comment(lib, "avfilter.lib") #pragma comment(lib, "postproc.lib") #pragma comment(lib, "swresample.lib") #pragma comment(lib, "swscale.lib") #ifdef __cplusplus }; #endif class CGetSystemAudio { public: CGetSystemAudio(); ~CGetSystemAudio(); public: void SetSavePath(std::string strPath); int StartCapture(); void StopCapture(); int OpenOutPut(); private: static DWORD WINAPI AudioSystemCaptureProc(LPVOID lpParam); void AudioSystemCapture(); static DWORD WINAPI AudioSystemWriteProc(LPVOID lpParam); void AudioSystemWrite(); HRESULT IsFormatSupported(IAudioClient *audioClient); private: std::string m_strRecordPath; bool m_bRecord; IAudioClient *pAudioClient = nullptr; IAudioCaptureClient *pAudioCaptureClient = nullptr; WAVEFORMATEXTENSIBLE m_formatex; HANDLE m_hAudioSystemCapture = NULL; HANDLE m_hAudioSystemWrite = NULL; AVFormatContext *m_pFormatCtx_Out = NULL; AVFormatContext*m_pFormatCtx_AudioSystem = NULL; AVCodecContext*m_pCodecEncodeCtx_Audio = NULL; AVCodec*m_pCodecEncode_Audio = NULL; SwrContext *m_pAudioConvertCtx = NULL; AVAudioFifo *m_pAudioFifo = NULL; CRITICAL_SECTION m_csAudioSystemSection; }; GetSystemAudio.cpp的内容如下: #include "GetSystemAudio.h" #include <iostream> #include <fstream> #include <thread> #define DEFAULT_SAMPLE_RATE 48000 // 默认采样率:48kHz #define DEFAULT_BITS_PER_SAMPLE 16 // 默认位深:16bit #define DEFAULT_CHANNELS 1 // 默认音频通道数:1 #define DEFAULT_AUDIO_PACKET_INTERVAL 10 // 默认音频包发送间隔:10ms HRESULT CreateDeviceEnumerator(IMMDeviceEnumerator **enumerator) { CoInitializeEx(nullptr, COINIT_MULTITHREADED); return CoCreateInstance(__uuidof(MMDeviceEnumerator), NULL, CLSCTX_ALL, __uuidof(IMMDeviceEnumerator), reinterpret_cast<void **>(enumerator)); } HRESULT CreateDevice(IMMDeviceEnumerator *enumerator, IMMDevice **device) { EDataFlow enDataFlow = eRender;// 表示获取扬声器的audio_endpoint ERole enRole = eConsole; return enumerator->GetDefaultAudioEndpoint(enDataFlow, enRole, device); } HRESULT CreateAudioClient(IMMDevice *device, IAudioClient **audioClient) { return device->Activate(__uuidof(IAudioClient), CLSCTX_ALL, NULL, (void **)audioClient); } HRESULT CGetSystemAudio::IsFormatSupported(IAudioClient *audioClient) { WAVEFORMATEX *format = &m_formatex.Format; format->nSamplesPerSec = DEFAULT_SAMPLE_RATE; format->wBitsPerSample = DEFAULT_BITS_PER_SAMPLE; format->nChannels = DEFAULT_CHANNELS; WAVEFORMATEX *closestMatch = nullptr; HRESULT hr = audioClient->IsFormatSupported(AUDCLNT_SHAREMODE_SHARED, format, &closestMatch); if (hr == AUDCLNT_E_UNSUPPORTED_FORMAT) // 0x88890008 { if (closestMatch != nullptr) // 如果找不到最相近的格式,closestMatch可能为nullptr { format->nSamplesPerSec = closestMatch->nSamplesPerSec; format->wBitsPerSample = closestMatch->wBitsPerSample; format->nChannels = closestMatch->nChannels; return S_OK; } } return hr; } HRESULT GetPreferFormat(IAudioClient *audioClient, WAVEFORMATEXTENSIBLE *formatex) { WAVEFORMATEX *format = nullptr; HRESULT hr = audioClient->GetMixFormat(&format); if (FAILED(hr)) { return hr; } formatex->Format.nSamplesPerSec = format->nSamplesPerSec; formatex->Format.wBitsPerSample = format->wBitsPerSample; formatex->Format.nChannels = format->nChannels; return hr; } HRESULT InitAudioClient(IAudioClient *audioClient, WAVEFORMATEXTENSIBLE *formatex) { AUDCLNT_SHAREMODE shareMode = AUDCLNT_SHAREMODE_SHARED; // share Audio Engine with other applications DWORD streamFlags = AUDCLNT_STREAMFLAGS_LOOPBACK; // loopback speaker streamFlags |= AUDCLNT_STREAMFLAGS_AUTOCONVERTPCM; // A channel matrixer and a sample // rate converter are inserted streamFlags |= AUDCLNT_STREAMFLAGS_SRC_DEFAULT_QUALITY; // a sample rate converter // with better quality than // the default conversion but // with a higher performance // cost is used REFERENCE_TIME hnsBufferDuration = 0; WAVEFORMATEX *format = &formatex->Format; format->wFormatTag = WAVE_FORMAT_EXTENSIBLE; format->nBlockAlign = (format->wBitsPerSample >> 3) * format->nChannels; format->nAvgBytesPerSec = format->nBlockAlign * format->nSamplesPerSec; format->cbSize = sizeof(WAVEFORMATEXTENSIBLE) - sizeof(WAVEFORMATEX); formatex->Samples.wValidBitsPerSample = format->wBitsPerSample; formatex->dwChannelMask = format->nChannels == 1 ? KSAUDIO_SPEAKER_MONO : KSAUDIO_SPEAKER_STEREO; formatex->SubFormat = KSDATAFORMAT_SUBTYPE_PCM; return audioClient->Initialize(shareMode, streamFlags, hnsBufferDuration, 0, format, nullptr); } HRESULT CreateAudioCaptureClient(IAudioClient *audioClient, IAudioCaptureClient **audioCaptureClient) { HRESULT hr = audioClient->GetService(IID_PPV_ARGS(audioCaptureClient)); if (FAILED(hr)) { *audioCaptureClient = nullptr; } return hr; } DWORD WINAPI CGetSystemAudio::AudioSystemCaptureProc(LPVOID lpParam) { CGetSystemAudio *pCGetSystemAudio = (CGetSystemAudio *)lpParam; if (pCGetSystemAudio != NULL) { pCGetSystemAudio->AudioSystemCapture(); } return 0; } void CGetSystemAudio::AudioSystemCapture() { HRESULT hr = S_OK; UINT32 num_success = 0; BYTE *p_audio_data = nullptr; UINT32 num_frames_to_read = 0; DWORD dw_flag = 0; UINT32 num_frames_in_next_packet = 0; UINT32 num_loop = 0; pAudioClient->Start(); int ret = 0; int AudioFrameIndex_mic = 1; while (m_bRecord) { std::this_thread::sleep_for(std::chrono::milliseconds(0)); while (true) { hr = pAudioCaptureClient->GetNextPacketSize(&num_frames_in_next_packet); if (FAILED(hr)) { throw std::exception(); } if (num_frames_in_next_packet == 0) { break; } hr = pAudioCaptureClient->GetBuffer(&p_audio_data, &num_frames_to_read, &dw_flag, nullptr, nullptr); if (FAILED(hr)) { throw std::exception(); } AVFrame *frame_mic_encode = NULL; frame_mic_encode = av_frame_alloc(); frame_mic_encode->nb_samples = m_pCodecEncodeCtx_Audio->frame_size; frame_mic_encode->channel_layout = m_pCodecEncodeCtx_Audio->channel_layout; frame_mic_encode->format = m_pCodecEncodeCtx_Audio->sample_fmt; frame_mic_encode->sample_rate = m_pCodecEncodeCtx_Audio->sample_rate; av_frame_get_buffer(frame_mic_encode, 0); int iDelaySamples = 0; AVPacket pkt_out_mic = { 0 }; pkt_out_mic.data = NULL; pkt_out_mic.size = 0; //uint8_t *audio_buf = NULL; uint8_t *audio_buf[2] = { 0 }; audio_buf[0] = (uint8_t *)frame_mic_encode->data[0]; audio_buf[1] = (uint8_t *)frame_mic_encode->data[1]; int nb = swr_convert(m_pAudioConvertCtx, audio_buf, num_frames_to_read, (const uint8_t**)&p_audio_data, num_frames_to_read); int buf_space = av_audio_fifo_space(m_pAudioFifo); if (buf_space >= frame_mic_encode->nb_samples) { //AudioSection EnterCriticalSection(&m_csAudioSystemSection); ret = av_audio_fifo_write(m_pAudioFifo, (void **)frame_mic_encode->data, num_frames_to_read); LeaveCriticalSection(&m_csAudioSystemSection); } hr = pAudioCaptureClient->ReleaseBuffer(num_frames_to_read); if (FAILED(hr)) { throw std::exception(); } num_loop++; } } pAudioClient->Stop(); } DWORD WINAPI CGetSystemAudio::AudioSystemWriteProc(LPVOID lpParam) { CGetSystemAudio *pCGetSystemAudio = (CGetSystemAudio *)lpParam; if (pCGetSystemAudio != NULL) { pCGetSystemAudio->AudioSystemWrite(); } return 0; } void CGetSystemAudio::AudioSystemWrite() { int ret = 0; int AudioFrameIndex_mic = 1; AVFrame *frame_audio_system = NULL; frame_audio_system = av_frame_alloc(); while (m_bRecord) { if (av_audio_fifo_size(m_pAudioFifo) >= (m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024)) { frame_audio_system->nb_samples = m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024; frame_audio_system->channel_layout = m_pFormatCtx_Out->streams[0]->codecpar->channel_layout; frame_audio_system->format = m_pFormatCtx_Out->streams[0]->codecpar->format; frame_audio_system->sample_rate = m_pFormatCtx_Out->streams[0]->codecpar->sample_rate; av_frame_get_buffer(frame_audio_system, 0); EnterCriticalSection(&m_csAudioSystemSection); int readcount = av_audio_fifo_read(m_pAudioFifo, (void **)frame_audio_system->data, (m_pFormatCtx_Out->streams[0]->codecpar->frame_size > 0 ? m_pFormatCtx_Out->streams[0]->codecpar->frame_size : 1024)); LeaveCriticalSection(&m_csAudioSystemSection); AVPacket pkt_out_mic = { 0 }; pkt_out_mic.data = NULL; pkt_out_mic.size = 0; ret = avcodec_send_frame(m_pCodecEncodeCtx_Audio, frame_audio_system); ret = avcodec_receive_packet(m_pCodecEncodeCtx_Audio, &pkt_out_mic); pkt_out_mic.stream_index = 0; pkt_out_mic.pts = AudioFrameIndex_mic * readcount; pkt_out_mic.dts = AudioFrameIndex_mic * readcount; pkt_out_mic.duration = readcount; av_write_frame(m_pFormatCtx_Out, &pkt_out_mic); av_packet_unref(&pkt_out_mic); AudioFrameIndex_mic++; } else { Sleep(1); if (!m_bRecord) { break; } } } Sleep(100); av_frame_free(&frame_audio_system); av_write_trailer(m_pFormatCtx_Out); avio_close(m_pFormatCtx_Out->pb); } CGetSystemAudio::CGetSystemAudio() { m_bRecord = false; m_hAudioSystemCapture = NULL; InitializeCriticalSection(&m_csAudioSystemSection); } CGetSystemAudio::~CGetSystemAudio() { DeleteCriticalSection(&m_csAudioSystemSection); } int CGetSystemAudio::OpenOutPut() { std::string strFileName = m_strRecordPath; int iRet = -1; AVStream *pAudioStream = NULL; do { std::string strFileName = m_strRecordPath; strFileName += "system_audio"; strFileName += ".mp4"; const char *outFileName = strFileName.c_str(); avformat_alloc_output_context2(&m_pFormatCtx_Out, NULL, NULL, outFileName); { pAudioStream = avformat_new_stream(m_pFormatCtx_Out, NULL); m_pCodecEncode_Audio = (AVCodec *)avcodec_find_encoder(m_pFormatCtx_Out->oformat->audio_codec); m_pCodecEncodeCtx_Audio = avcodec_alloc_context3(m_pCodecEncode_Audio); if (!m_pCodecEncodeCtx_Audio) { break; } //pCodecEncodeCtx_Audio->codec_id = pFormatCtx_Out->oformat->audio_codec; m_pCodecEncodeCtx_Audio->sample_fmt = m_pCodecEncode_Audio->sample_fmts ? m_pCodecEncode_Audio->sample_fmts[0] : AV_SAMPLE_FMT_FLTP; m_pCodecEncodeCtx_Audio->bit_rate = 64000; m_pCodecEncodeCtx_Audio->sample_rate = 48000; m_pCodecEncodeCtx_Audio->channel_layout = AV_CH_LAYOUT_STEREO; m_pCodecEncodeCtx_Audio->channels = av_get_channel_layout_nb_channels(m_pCodecEncodeCtx_Audio->channel_layout); AVRational timeBase; timeBase.den = m_pCodecEncodeCtx_Audio->sample_rate; timeBase.num = 1; pAudioStream->time_base = timeBase; if (avcodec_open2(m_pCodecEncodeCtx_Audio, m_pCodecEncode_Audio, 0) < 0) { //编码器打开失败,退出程序 break; } } if (!(m_pFormatCtx_Out->oformat->flags & AVFMT_NOFILE)) { if (avio_open(&m_pFormatCtx_Out->pb, outFileName, AVIO_FLAG_WRITE) < 0) { break; } } avcodec_parameters_from_context(pAudioStream->codecpar, m_pCodecEncodeCtx_Audio); if (avformat_write_header(m_pFormatCtx_Out, NULL) < 0) { break; } iRet = 0; } while (0); if (iRet != 0) { if (m_pCodecEncodeCtx_Audio != NULL) { avcodec_free_context(&m_pCodecEncodeCtx_Audio); m_pCodecEncodeCtx_Audio = NULL; } if (m_pFormatCtx_Out != NULL) { avformat_free_context(m_pFormatCtx_Out); m_pFormatCtx_Out = NULL; } } return iRet; } void CGetSystemAudio::SetSavePath(std::string strPath) { m_strRecordPath = strPath; if (!m_strRecordPath.empty()) { if (m_strRecordPath[m_strRecordPath.length() - 1] != '\\') { m_strRecordPath = m_strRecordPath + "\\"; } } } int CGetSystemAudio::StartCapture() { int iRet = -1; do { iRet = OpenOutPut(); if (iRet < 0) { break; } IMMDeviceEnumerator *pDeviceEnumerator = nullptr; IMMDevice *pDevice = nullptr; std::unique_ptr<std::thread> capture_thread = nullptr; std::string input_str; HRESULT hr; hr = CreateDeviceEnumerator(&pDeviceEnumerator); if (FAILED(hr)) { break; } hr = CreateDevice(pDeviceEnumerator, &pDevice); if (FAILED(hr)) { break; } hr = CreateAudioClient(pDevice, &pAudioClient); if (FAILED(hr)) { break; } hr = IsFormatSupported(pAudioClient); if (FAILED(hr)) { hr = GetPreferFormat(pAudioClient, &m_formatex); if (FAILED(hr)) { break; } } hr = InitAudioClient(pAudioClient, &m_formatex); if (FAILED(hr)) { break; } hr = CreateAudioCaptureClient(pAudioClient, &pAudioCaptureClient); if (FAILED(hr)) { break; } m_pAudioConvertCtx = swr_alloc(); av_opt_set_channel_layout(m_pAudioConvertCtx, "in_channel_layout", AV_CH_LAYOUT_STEREO, 0); av_opt_set_channel_layout(m_pAudioConvertCtx, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0); av_opt_set_int(m_pAudioConvertCtx, "in_sample_rate", m_formatex.Format.nSamplesPerSec, 0); av_opt_set_int(m_pAudioConvertCtx, "out_sample_rate", 48000, 0); av_opt_set_sample_fmt(m_pAudioConvertCtx, "in_sample_fmt", AV_SAMPLE_FMT_S32, 0); av_opt_set_sample_fmt(m_pAudioConvertCtx, "out_sample_fmt", AV_SAMPLE_FMT_FLTP, 0); iRet = swr_init(m_pAudioConvertCtx); if (NULL == m_pAudioFifo) { m_pAudioFifo = av_audio_fifo_alloc((AVSampleFormat)m_pFormatCtx_Out->streams[0]->codecpar->format, m_pFormatCtx_Out->streams[0]->codecpar->channels, 3000 * 1024); } m_bRecord = true; m_hAudioSystemCapture = CreateThread(NULL, 0, AudioSystemCaptureProc, this, 0, NULL); m_hAudioSystemWrite = CreateThread(NULL, 0, AudioSystemWriteProc, this, 0, NULL); iRet = 0; } while (0); return 0; } void CGetSystemAudio::StopCapture() { m_bRecord = false; Sleep(1000); WaitForSingleObject(m_hAudioSystemCapture, INFINITE); CloseHandle(m_hAudioSystemCapture); m_hAudioSystemCapture = NULL; }
标签:声音,audio,ffmpeg,format,hr,frame,录制,NULL,mic From: https://www.cnblogs.com/kn-zheng/p/17865350.html