一、内容
Simpleaudio:支持numpy数组播放
sounddevice 和 soundfile:支持播放和录制包含音频信号的 NumPy 数组
pydub:必须预先安装至少一个来自(simpleaudio、pyaudio、ffplay 和 avplay)的音频播放包。可以查看音频信息(时长,分贝)
pyaudio 和 wave:录制和连续音频流
moviepy:视频音频剪辑处理
二、pyaudio录音和播放
播放
import pyaudio
import wave
filename = 'path-to_file.wav'
# Set chunk size of 1024 samples per data frame
chunk = 1024
# Open the soaudio/sound file
af = wave.open(filename, 'rb')
# Create an interface to PortAudio
pa = pyaudio.PyAudio()
# Open a .Stream object to write the WAV file
# 'output = True' indicates that the
# sound will be played rather than
# recorded and opposite can be used for recording
stream = pa.open(format = pa.get_format_from_width(af.getsampwidth()),
channels = af.getnchannels(),
rate = af.getframerate(),
output = True)
# Read data in chunks
rd_data = af.readframes(chunk)
# Play the sound by writing the audio
# data to the Stream using while loop
while rd_data != '':
stream.write(rd_data)
rd_data = af.readframes(chunk)
# Close and terminate the stream
stream.stop_stream()
stream.close()
pa.terminate()
2. 录制
import pyaudio
import wave
# Record in chunks of 1024 samples
chunk = 1024
# 16 bits per sample
sample_format = pyaudio.paInt16
chanels = 2
# Record at 44400 samples per second
smpl_rt = 44400
seconds = 4
filename = "path_of_file.wav"
# Create an interface to PortAudio
pa = pyaudio.PyAudio()
stream = pa.open(format=sample_format, channels=chanels,
rate=smpl_rt, input=True,
frames_per_buffer=chunk)
print('Recording...')
# Initialize array that be used for storing frames
frames = []
# Store data in chunks for 8 seconds
for i in range(0, int(smpl_rt / chunk * seconds)):
data = stream.read(chunk)
frames.append(data)
# Stop and close the stream
stream.stop_stream()
stream.close()
# Terminate - PortAudio interface
pa.terminate()
print('Done !!! ')
# Save the recorded data in a .wav format
sf = wave.open(filename, 'wb')
sf.setnchannels(chanels)
sf.setsampwidth(pa.get_sample_size(sample_format))
sf.setframerate(smpl_rt)
sf.writeframes(b''.join(frames))
sf.close()
三、pydub调整音量
1、调整音量大小
读取文件
from pydub import AudioSegment
from pydub.playback import play #播放
audio = AudioSegment.from_mp3('./1.mp3')
#tape = AudioSegment.from_wav('path_to_myfile.wav')
#tape = AudioSegment.from_file('path_to_myfile.wav', format='wav')
#放大6db音量
audio_big = audio + 6
#减弱3db音量
audio_small = audio - 3
play(audio)
2 、音频切片与合并
获取录音的某个时间段,以毫秒为单位。
start = 10000
end = 20000
#切片
audio_temp = audio[start:end]
#合并
audio_temp = audio_temp+ audio_temp
#查看时长
audio_temp.duration_seconds
#导出
audio_temp.export(".wav", format="wav")
四、sounddevice播放numpy
import soundfile as sf
import sounddevice as sd
array, smp_rt = sf.read(song, dtype = 'float32')
sd.play(array, smp_rt,)
status = sd.wait()
sd.stop()
五、python去除静音
去除语音中静音段,ffmpeg和librosa均有提供功能,但是其去除静音段有一定局限性,只能去掉文件两端的静音段,而对于文件中间存在的静音段则无法去除,在此基础上,去除语音文件中任意地方的静音段
求取语音的mfcc参数,并选取其含有能量信息的mfcc0作为端点检测的输入特征
对mfcc0特征进行中值滤波,平滑mfcc0参数
确定静音帧阈值,即认为超过多少帧的间隔,我们认为是静音段
去除筛选出的静音段
在原文件中剔除静音段,得到端点检测后的语音序列
import librosa
import soundfile as sf
import numpy as np
from scipy.signal import medfilt
#忽略警告
import warnings
warnings.filterwarnings('ignore')
'''
将帧转换为时间刻度
'''
def frame2Time(frameNum, framelen, inc, fs):
frames = np.array(range(0, frameNum, 1))
frames = frames * inc + framelen / 2
frameTime = frames / fs
return frameTime
'''
去除静音的函数
'''
def slience(filename):
frame_threshold=10#该参数决定去掉连续多少帧的静音段,比如某段语音检测到有12帧的静音帧,则去掉这一段的语音,而如果检测到只有8帧,那么不操作
# 求取MFCCs参数
y, sr = librosa.load(filename, sr=16000)
mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=24,win_length=1024,hop_length=512,n_fft=1024)
# # 对mfcc进行中值滤波
Mfcc1 = medfilt(mfccs[0, :], 9)
pic = Mfcc1
start = 0
end = 0
points = []
min_data = min(pic) * 0.9
for i in range((pic.shape[0])):
if (pic[i] < min_data and start == 0):
start = i
if (pic[i] < min_data and start != 0):
end = i
elif (pic[i] > min_data and start != 0):
hh = [start, end]
points.append(hh)
start = 0
# 解决 文件的最后为静音
if (pic[-1] < min_data and start != 0):
hh = [start, end]
points.append(hh)
start = 0
distances = []
for i in range(len(points)):
two_ends = points[i]
distance = two_ends[1] - two_ends[0]
if (distance > frame_threshold):
distances.append(points[i])
# 保存到本地文件夹
name = filename.split('\\')[-1]
# 取出来端点,按照端点,进行切割,分情况讨论:1.如果未检测到静音段 2.检测到静音段
if (len(distances) == 0):
# print('检测到的静音段的个数为: %s 未对文件进行处理:' % len(distances))
return y
# sf.write(slience_clean, clean_data, 16000)
else:
slience_data = []
for i in range(len(distances)):
if (i == 0):
start, end = distances[i]
# 将左右端点转换到 采样点
if (start == 1):
internal_clean = y[0:0]
else:
# 求取开始帧的开头
start = (start - 1) * 512
# 求取结束帧的结尾
end = (end - 1) * 512 + 1024
internal_clean = y[0:start - 1]
else:
_, end = distances[i - 1]
start, _ = distances[i]
start = (start - 1) * 512
end = (end - 1) * 512 + 1024
internal_clean = y[end + 1:start]
hhh = np.array(internal_clean)
# 开始拼接
slience_data.extend(internal_clean)
# 开始 添加 最后一部分,需要分情况讨论,1. 文件末尾本来就是静音的 2.文件末尾不是静音的
ll = len(distances)
_, end = distances[ll - 1]
end = (end - 1) * 512 + 1024
end_part_clean = y[end:len(y)]
slience_data.extend(end_part_clean)
# 写到本地
# sf.write("./data/{}.wav".format(name), slience_data, 16000)
return slience_data
六、python实时静音检测
py-webRTCvad
七、python视频转音频
from moviepy.editor import *
videofile=VideoFileClip(x)#x文件名
audio=videofile.audio
audio.write_audiofile('x.mp3')#保存文件的名称
References
https://www.moonapi.com/news/2810.html文章来源:https://www.toymoban.com/news/detail-729276.html
https://blog.csdn.net/weixin_38468077/article/details/121677376文章来源地址https://www.toymoban.com/news/detail-729276.html
到了这里,关于Python音频处理,录制播放的文章就介绍完了。如果您还想了解更多内容,请在右上角搜索TOY模板网以前的文章或继续浏览下面的相关文章,希望大家以后多多支持TOY模板网!