四.代码 4-1 音量 import math
import wave
import pylab as pl
import numpy as np
# method 1: absSum
def calVolume(waveData, frameSize, overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen * 1.0 / step))
volume = np.zeros((frameNum, 1))
for i in range(frameNum):
curFrame = waveData[np.arange(i * step, min(i * step + frameSize, wlen))]
curFrame = curFrame - np.median(curFrame) # zero-justified
volume[i] = np.sum(np.abs(curFrame))
return volume
# method 2: 10 times log10 of square sum
def calVolumeDB(waveData, frameSize, overLap):
wlen = len(waveData)
step = frameSize - overLap
frameNum = int(math.ceil(wlen * 1.0 / step))
volume = np.zeros((frameNum, 1))
for i in range(frameNum):
curFrame = waveData[np.arange(i * step, min(i * step + frameSize, wlen))]
curFrame = curFrame - np.mean(curFrame) # zero-justified
volume[i] = 10 * np.log10(np.sum(curFrame * curFrame))
return volume
# ============ test the algorithm =============
# read wave file and get parameters.
fw = wave.open('sunday.wav', 'r')
params = fw.getparams()
print(params)
nchannels, sampwidth, framerate, nframes = params[:4]
strData = fw.readframes(nframes)
waveData = np.frombuffer(strData, dtype=np.int16)
waveData = waveData * 1.0 / max(abs(waveData)) # normalization
fw.close()
# calculate volume
frameSize = 256
overLap = 128
volume11 = calVolume(waveData, frameSize, overLap)
volume12 = calVolumeDB(waveData, frameSize, overLap)
# plot the wave
time = np.arange(0, nframes) * (1.0 / framerate)
time2 = np.arange(0, len(volume11)) * (frameSize - overLap) * 1.0 / framerate
pl.figure(figsize=(15, 9))
pl.subplot(311)
pl.plot(time, waveData)
pl.ylabel("Amplitude")
pl.subplot(312)
pl.plot(time2, volume11)
pl.ylabel("absSum")
pl.subplot(313)
pl.plot(time2, volume12, c="g")
pl.ylabel("Decibel(dB)")
pl.xlabel("time (seconds)")
pl.show()
4-2 音高 import wave
import numpy as np
import pylab as pl
# ============ test the algorithm =============
# read wave file and get parameters.
fw = wave.open('Ring08.wav', 'rb')
params = fw.getparams()
print(params)
nchannels, sampwidth, framerate, nframes = params[:4]
strData = fw.readframes(nframes)
# waveData = np.fromstring(strData, dtype=np.int16)
waveData = np.frombuffer(strData, np.int16)
waveData = waveData * 1.0 / max(abs(waveData)) # normalization
fw.close()
# plot the wave
time = np.arange(0, len(waveData)) * (1.0 / framerate)
index1 = 10000.0 / framerate
index2 = 10512.0 / framerate
index3 = 15000.0 / framerate
index4 = 15512.0 / framerate
pl.subplot(311)
pl.plot(time, waveData)
pl.plot([index1, index1], [-1, 1], 'r')
pl.plot([index2, index2], [-1, 1], 'r')
pl.plot([index3, index3], [-1, 1], 'g')
pl.plot([index4, index4], [-1, 1], 'g')
pl.xlabel("time (seconds)")
pl.ylabel("Amplitude")
pl.subplot(312)
pl.plot(np.arange(512), waveData[10000:10512], 'r')
pl.plot([59, 59], [-1, 1], 'b')
pl.plot([169, 169], [-1, 1], 'b')
print(1 / ((169 - 59) * 1.0 / framerate))
pl.xlabel("index in 1 frame")
pl.ylabel("Amplitude")
pl.subplot(313)
pl.plot(np.arange(512), waveData[15000:15512], 'g')
pl.xlabel("index in 1 frame")
pl.ylabel("Amplitude")
pl.show()
4-3 波形频谱 import librosa
import librosa.display
import matplotlib.pyplot as plt
# Load a wav file
y, sr = librosa.load('PCM.wav', sr=None)
# extract mel spectrogram feature
melspec = librosa.feature.melspectrogram(y, sr, n_fft=1024, hop_length=512, n_mels=128)
# convert to log scale
logmelspec = librosa.power_to_db(melspec)
plt.figure(figsize=(15, 10))
# plot a wavform
plt.subplot(2, 1, 1)
librosa.display.waveshow(y, sr)
plt.title('Beat wavform')
# plot mel spectrogram
plt.subplot(2, 1, 2)
librosa.display.specshow(logmelspec, sr=sr, x_axis='time', y_axis='mel')
plt.title('Mel spectrogram')
plt.tight_layout() # 保证图不重叠
plt.show()
4-4 PCM import numpy as np
import librosa
import matplotlib.pyplot as plt
def PCM_encode(x):
n = len(x)
out = np.zeros((n, 8))
for i in range(n):
# 符号位
if x[i] > 0:
out[i, 0] = 1
else:
out[i, 0] = 0
# 数据位
if abs(x[i]) < 32:
out[i, 1], out[i, 2], out[i, 3], step, st = 0, 0, 0, 2, 0
elif abs(x[i]) < 64:
out[i, 1], out[i, 2], out[i, 3], step, st = 0, 0, 1, 2, 32
elif abs(x[i]) < 128:
out[i, 1], out[i, 2], out[i, 3], step, st = 0, 1, 0, 4, 64
elif abs(x[i]) < 256:
out[i, 1], out[i, 2], out[i, 3], step, st = 0, 1, 1, 8, 128
elif abs(x[i]) < 512:
out[i, 1], out[i, 2], out[i, 3], step, st = 1, 0, 0, 16, 256
elif abs(x[i]) < 1024:
out[i, 1], out[i, 2], out[i, 3], step, st = 1, 0, 1, 32, 512
elif abs(x[i]) < 2048:
out[i, 1], out[i, 2], out[i, 3], step, st = 1, 1, 0, 64, 1024
else:
out[i, 1], out[i, 2], out[i, 3], step, st = 1, 1, 1, 128, 2048
if abs(x[i]) >= 4096:
out[i, 1:] = np.array([1, 1, 1, 1, 1, 1])
else:
tmp = bin(int((abs(x[i]) - st) / step)).replace('0b', '')
tmp = '0' * (4 - len(tmp)) + tmp
t = [int(k) for k in tmp]
out[i, 4:] = t
return out.reshape(8 * n)
def PCM_decode(ins, v):
inn = ins.reshape(len(ins) // 8, 8)
slot = np.array([0, 32, 64, 128, 256, 512, 1024, 2048])
step = np.array([2, 2, 4, 8, 16, 32, 64, 128])
out = np.zeros(len(ins) // 8)
for i in range(inn.shape[0]):
sgn = 2 * inn[i, 0] - 1
tmp = int(inn[i, 1] * 4 + inn[i, 2] * 2 + inn[i, 3])
st = slot[tmp]
dt = (inn[i, 4] * 8 + inn[i, 5] * 4 + inn[i, 6] * 2 + inn[i, 7]) * step[tmp] + 0.5 * step[tmp]
out[i] = sgn * (st + dt) / 4096 * v
return out
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
data, sr = librosa.load('Ring08.wav')
sxx = np.array(list(map(int, data * 4096)))
y = PCM_encode(sxx)
yy = PCM_decode(y, 1)
plt.subplot(3, 1, 1)
plt.plot(data)
plt.title('编码前')
plt.subplot(3, 1, 2)
plt.plot(yy)
plt.title('解码后')
plt.subplot(3, 1, 3)
plt.plot(yy - data)
plt.title('误差')
plt.show()
|