From 3884906cb896cdc8f03d9d65ca41d42a44178a17 Mon Sep 17 00:00:00 2001 From: king-king-king <52349759+king-king-king@users.noreply.github.com> Date: Sun, 29 May 2022 21:56:19 +0800 Subject: [PATCH] Add files via upload --- 10911831_3.py | 49 + AVOC.py | 149 ++ autoVOC-new.py | 153 ++ autoVOC.py | 148 ++ cut.py | 17 + data-number.py | 66 + fft.py | 20 + lsxbbh.py | 32 + lxml+plot(1).py | 50 + mel.py | 197 ++ melm.py | 52 + mfcc.py | 21 + multi classification.py | 20 + png-jpg.py | 16 + renamefile.py | 17 + resize.py | 27 + t.py | 13 + thinkdsp.py | 1631 +++++++++++++++++ thinkplot.py | 838 +++++++++ total.py | 339 ++++ two classification.py | 16 + xbbh.py | 46 + ...06\346\263\242\345\275\242\345\233\276.py" | 40 + ...47\224\237\346\210\220mfcc\345\233\276.py" | 44 + ...ft\345\243\260\350\260\261\345\233\276.py" | 41 + ...37\351\242\221\345\237\237\345\233\276.py" | 108 ++ "\346\227\266\351\225\277.py" | 8 + ...55\350\260\261\345\200\222\350\260\261.py" | 76 + "\346\263\242\345\275\242\345\233\276.py" | 18 + ...6-\346\237\261\347\212\266\345\233\276.py" | 19 + "\351\245\274\345\233\276.py" | 19 + 31 files changed, 4290 insertions(+) create mode 100644 10911831_3.py create mode 100644 AVOC.py create mode 100644 autoVOC-new.py create mode 100644 autoVOC.py create mode 100644 cut.py create mode 100644 data-number.py create mode 100644 fft.py create mode 100644 lsxbbh.py create mode 100644 lxml+plot(1).py create mode 100644 mel.py create mode 100644 melm.py create mode 100644 mfcc.py create mode 100644 multi classification.py create mode 100644 png-jpg.py create mode 100644 renamefile.py create mode 100644 resize.py create mode 100644 t.py create mode 100644 thinkdsp.py create mode 100644 thinkplot.py create mode 100644 total.py create mode 100644 two classification.py create mode 100644 xbbh.py create mode 100644 "\346\211\271\351\207\217\345\244\204\347\220\206\346\263\242\345\275\242\345\233\276.py" create mode 100644 "\346\211\271\351\207\217\347\224\237\346\210\220mfcc\345\233\276.py" create mode 100644 "\346\211\271\351\207\217\347\224\237\346\210\220stft\345\243\260\350\260\261\345\233\276.py" create mode 100644 "\346\227\266\345\237\237\351\242\221\345\237\237\345\233\276.py" create mode 100644 "\346\227\266\351\225\277.py" create mode 100644 "\346\227\266\351\242\221\350\260\261\357\274\214\350\257\255\350\260\261\345\233\276\357\274\214mel\350\257\255\350\260\261\345\200\222\350\260\261.py" create mode 100644 "\346\263\242\345\275\242\345\233\276.py" create mode 100644 "\347\224\273\345\233\276-\346\237\261\347\212\266\345\233\276.py" create mode 100644 "\351\245\274\345\233\276.py" diff --git a/10911831_3.py b/10911831_3.py new file mode 100644 index 0000000..2c1d9f5 --- /dev/null +++ b/10911831_3.py @@ -0,0 +1,49 @@ +# -*- coding: UTF-8 -*- +import wave +import numpy as np +import matplotlib.pyplot as plt + +# 打开wav文件 ,open返回一个的是一个Wave_read类的实例,通过调用它的方法读取WAV文件的格式和数据。 +f = wave.open(r"D:\CloudMusic\ss/000005.wav","rb") +# 读取格式信息 +# 一次性返回所有的WAV文件的格式信息,它返回的是一个组元(tuple):声道数, 量化位数(byte单位), 采 +# 样频率, 采样点数, 压缩类型, 压缩类型的描述。wave模块只支持非压缩的数据,因此可以忽略最后两个信息 +params = f.getparams() +[nchannels, sampwidth, framerate, nframes] = params[:4] +# 读取波形数据 +# 读取声音数据,传递一个参数指定需要读取的长度(以取样点为单位) +str_data = f.readframes(nframes) +f.close() +# 将波形数据转换成数组 +# 需要根据声道数和量化单位,将读取的二进制数据转换为一个可以计算的数组 +wave_data = np.fromstring(str_data,dtype = np.short) +# 将wave_data数组改为2列,行数自动匹配。在修改shape的属性时,需使得数组的总长度不变。 +wave_data.shape = -1,2 +# 转置数据 +wave_data = wave_data.T +# 通过取样点数和取样频率计算出每个取样的时间。 +time=np.arange(0,nframes/2)/framerate +# print(params) +plt.figure(1) +# time 也是一个数组,与wave_data[0]或wave_data[1]配对形成系列点坐标 +plt.subplot(211) +plt.plot(time,wave_data[0]) +plt.xlabel("time/s") +plt.title('Wave') + + +N=44100 +start=0 +# 开始采样位置 +df = framerate/(N-1) +# 分辨率 +freq = [df*n for n in range(0,N)] +# N个元素 +wave_data2=wave_data[0][start:start+N] +c=np.fft.fft(wave_data2)*2/N +# 常规显示采样频率一半的频谱 +plt.subplot(212) +plt.plot(freq[:round(len(freq)/2)],abs(c[:round(len(c)/2)]),'r') +plt.title('Freq') +plt.xlabel("Freq/Hz") +plt.show() diff --git a/AVOC.py b/AVOC.py new file mode 100644 index 0000000..621b707 --- /dev/null +++ b/AVOC.py @@ -0,0 +1,149 @@ +from lxml import etree + + +class GEN_Annotations: + def __init__(self, filename, imgpath): + self.root = etree.Element("annotation") + + child1 = etree.SubElement(self.root, "folder") + child1.text = "ss" + + child2 = etree.SubElement(self.root, "filename") + child2.text = filename + + child3 = etree.SubElement(self.root,"path") + child3.text = imgpath + + child4 = etree.SubElement(self.root, "source") + + # child4 = etree.SubElement(child3, "annotation") + # child4.text = "PASCAL VOC2007" + child5 = etree.SubElement(child4, "database") + child5.text = "Unknown" + # + # child6 = etree.SubElement(child3, "image") + # child6.text = "flickr" + # child7 = etree.SubElement(child3, "flickrid") + # child7.text = "35435" + + def set_size(self, witdh, height, channel): + size = etree.SubElement(self.root, "size") + widthn = etree.SubElement(size, "width") + widthn.text = str(witdh) + heightn = etree.SubElement(size, "height") + heightn.text = str(height) + channeln = etree.SubElement(size, "depth") + channeln.text = str(channel) + + def set_segmented(self,seg=0): + segmented = etree.SubElement(self.root,"segmented") + segmented.text = str(seg) + + + def savefile(self, filename): + tree = etree.ElementTree(self.root) + tree.write(filename, pretty_print=True, xml_declaration=False, encoding='utf-8') + + def add_pic_attr(self, label, xmin, ymin, xmax, ymax): + object = etree.SubElement(self.root, "object") + + namen = etree.SubElement(object, "name") + namen.text = label + + pose = etree.SubElement(object,"pose") + pose.text = "Unspecified" + + truncated = etree.SubElement(object,"truncated") + truncated.text = "0" + + difficult = etree.SubElement(object,"difficult") + difficult.text = "0" + + bndbox = etree.SubElement(object, "bndbox") + xminn = etree.SubElement(bndbox, "xmin") + xminn.text = str(xmin) + yminn = etree.SubElement(bndbox, "ymin") + yminn.text = str(ymin) + xmaxn = etree.SubElement(bndbox, "xmax") + xmaxn.text = str(xmax) + ymaxn = etree.SubElement(bndbox, "ymax") + ymaxn.text = str(ymax) + + +import os +import cv2 + + +def getFileList(dir, Filelist, ext=None): + """ + 获取文件夹及其子文件夹中文件列表 + 输入 dir:文件夹根目录 + 输入 ext: 扩展名 + 返回: 文件路径列表 + """ + newDir = dir + if os.path.isfile(dir): + if ext is None: + Filelist.append(dir) + else: + if ext in dir[-3:]: + Filelist.append(dir) + + elif os.path.isdir(dir): + for s in os.listdir(dir): + newDir = os.path.join(dir, s) + getFileList(newDir, Filelist, ext) + + return Filelist + + +# org_img_folder = './org' + +# 检索文件 +# imglist = getFileList(org_img_folder, [], 'jpg') +# print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') + +# for imgpath in imglist: +# imgname = os.path.splitext(os.path.basename(imgpath))[0] +# img = cv2.imread(imgpath, cv2.IMREAD_COLOR) + # 对每幅图像执行相关操作 + +if __name__ == '__main__': + org_img_folder = r'.\标注文件\mfcc\ss' + + # 检索文件 + imglist = getFileList(org_img_folder, [], 'png') + print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') + # + # filename = imglist[0] + # name = filename.split('\\') + # # print(name) + # anno = GEN_Annotations(name[4],filename) + # anno.set_size(800, 550, 3) + # anno.set_segmented() + # for i in range(1): + # xmin = i + 1 + # ymin = i + 1 + # xmax = i + 799 + # ymax = i + 549 + # anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) + # filename_saved = filename.split('.') + # # print(filename_saved) + # anno.savefile('.'+filename_saved[1]+".xml") + + for imagepath in imglist: + filename = imagepath + name = filename.split('\\') + # print(name) + anno = GEN_Annotations(name[4], filename) + anno.set_size(800, 550, 3) + for i in range(1): + xmin = i + 99 + ymin = i + 64 + xmax = i + 724 + ymax = i + 493 + anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) + # filename_saved = filename.split('.') + filename_saved=name[4].split('.') + path=r'E:\语音处理\频谱\VOC\mfcc/ss/' + anno.savefile(path + filename_saved[0] + ".xml") \ No newline at end of file diff --git a/autoVOC-new.py b/autoVOC-new.py new file mode 100644 index 0000000..e645c28 --- /dev/null +++ b/autoVOC-new.py @@ -0,0 +1,153 @@ +from lxml import etree + + +class GEN_Annotations: + def __init__(self, filename): + self.root = etree.Element("annotation") + + child1 = etree.SubElement(self.root, "folder") + child1.text = "folder" + + child2 = etree.SubElement(self.root, "filename") + child2.text = filename + + # child3 = etree.SubElement(self.root,"path") + # child3.text = imgpath + + child4 = etree.SubElement(self.root, "source") + + # child4 = etree.SubElement(child3, "annotation") + # child4.text = "PASCAL VOC2007" + child5 = etree.SubElement(child4, "database") + child5.text = "Unknown" + # + # child6 = etree.SubElement(child3, "image") + # child6.text = "flickr" + # child7 = etree.SubElement(child3, "flickrid") + # child7.text = "35435" + + def set_size(self, witdh, height, channel): + size = etree.SubElement(self.root, "size") + widthn = etree.SubElement(size, "width") + widthn.text = str(witdh) + heightn = etree.SubElement(size, "height") + heightn.text = str(height) + channeln = etree.SubElement(size, "depth") + channeln.text = str(channel) + + def set_segmented(self,seg=0): + segmented = etree.SubElement(self.root,"segmented") + segmented.text = str(seg) + + + def savefile(self, filename): + tree = etree.ElementTree(self.root) + tree.write(filename, pretty_print=True, xml_declaration=False, encoding='utf-8') + + def add_pic_attr(self, label, xmin, ymin, xmax, ymax): + object = etree.SubElement(self.root, "object") + + namen = etree.SubElement(object, "name") + namen.text = label + + pose = etree.SubElement(object,"pose") + pose.text = "Unspecified" + + truncated = etree.SubElement(object,"truncated") + truncated.text = "0" + + difficult = etree.SubElement(object,"difficult") + difficult.text = "0" + + bndbox = etree.SubElement(object, "bndbox") + xminn = etree.SubElement(bndbox, "xmin") + xminn.text = str(xmin) + yminn = etree.SubElement(bndbox, "ymin") + yminn.text = str(ymin) + xmaxn = etree.SubElement(bndbox, "xmax") + xmaxn.text = str(xmax) + ymaxn = etree.SubElement(bndbox, "ymax") + ymaxn.text = str(ymax) + + +import os +import cv2 + + +def getFileList(dir, Filelist, ext=None): + """ + 获取文件夹及其子文件夹中文件列表 + 输入 dir:文件夹根目录 + 输入 ext: 扩展名 + 返回: 文件路径列表 + """ + newDir = dir + if os.path.isfile(dir): + if ext is None: + Filelist.append(dir) + else: + if ext in dir[-3:]: + Filelist.append(dir) + + elif os.path.isdir(dir): + for s in os.listdir(dir): + newDir = os.path.join(dir, s) + getFileList(newDir, Filelist, ext) + + return Filelist + + +# org_img_folder = './org' + +# 检索文件 +# imglist = getFileList(org_img_folder, [], 'jpg') +# print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') + +# for imgpath in imglist: +# imgname = os.path.splitext(os.path.basename(imgpath))[0] +# img = cv2.imread(imgpath, cv2.IMREAD_COLOR) + # 对每幅图像执行相关操作 + +if __name__ == '__main__': + org_img_folder = r'.\标注文件\wave\ss' + + # 检索文件 + imglist = getFileList(org_img_folder, [], 'jpg') + print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') + # + # filename = imglist[0] + # name = filename.split('\\') + # # print(name) + # anno = GEN_Annotations(name[4],filename) + # anno.set_size(800, 550, 3) + # anno.set_segmented() + # for i in range(1): + # xmin = i + 1 + # ymin = i + 1 + # xmax = i + 799 + # ymax = i + 549 + # anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) + # filename_saved = filename.split('.') + # # print(filename_saved) + # anno.savefile('.'+filename_saved[1]+".xml") + + for imagepath in imglist: + filename = imagepath + name = filename.split('\\') + # print(name) + anno = GEN_Annotations(name[4]) + anno.set_size(800, 550, 3) + anno.set_segmented() + for i in range(1): + xmin = 105 + ymin = 72 + xmax = 718 + ymax = 486 + # xmin = 99 + # ymin = 64 + # xmax = 724 + # ymax = 493 + anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) + filename_saved = name[4].split('.') + path = r'E:\语音处理\频谱\anno\wave/' + anno.savefile(path + filename_saved[0] + ".xml") \ No newline at end of file diff --git a/autoVOC.py b/autoVOC.py new file mode 100644 index 0000000..4c0039d --- /dev/null +++ b/autoVOC.py @@ -0,0 +1,148 @@ +from lxml import etree + + +class GEN_Annotations: + def __init__(self, filename, imgpath): + self.root = etree.Element("annotation") + + child1 = etree.SubElement(self.root, "folder") + child1.text = "ss" + + child2 = etree.SubElement(self.root, "filename") + child2.text = filename + + child3 = etree.SubElement(self.root,"path") + child3.text = imgpath + + child4 = etree.SubElement(self.root, "source") + + # child4 = etree.SubElement(child3, "annotation") + # child4.text = "PASCAL VOC2007" + child5 = etree.SubElement(child4, "database") + child5.text = "Unknown" + # + # child6 = etree.SubElement(child3, "image") + # child6.text = "flickr" + # child7 = etree.SubElement(child3, "flickrid") + # child7.text = "35435" + + def set_size(self, witdh, height, channel): + size = etree.SubElement(self.root, "size") + widthn = etree.SubElement(size, "width") + widthn.text = str(witdh) + heightn = etree.SubElement(size, "height") + heightn.text = str(height) + channeln = etree.SubElement(size, "depth") + channeln.text = str(channel) + + def set_segmented(self,seg=0): + segmented = etree.SubElement(self.root,"segmented") + segmented.text = str(seg) + + + def savefile(self, filename): + tree = etree.ElementTree(self.root) + tree.write(filename, pretty_print=True, xml_declaration=False, encoding='utf-8') + + def add_pic_attr(self, label, xmin, ymin, xmax, ymax): + object = etree.SubElement(self.root, "object") + + namen = etree.SubElement(object, "name") + namen.text = label + + pose = etree.SubElement(object,"pose") + pose.text = "Unspecified" + + truncated = etree.SubElement(object,"truncated") + truncated.text = "0" + + difficult = etree.SubElement(object,"difficult") + difficult.text = "0" + + bndbox = etree.SubElement(object, "bndbox") + xminn = etree.SubElement(bndbox, "xmin") + xminn.text = str(xmin) + yminn = etree.SubElement(bndbox, "ymin") + yminn.text = str(ymin) + xmaxn = etree.SubElement(bndbox, "xmax") + xmaxn.text = str(xmax) + ymaxn = etree.SubElement(bndbox, "ymax") + ymaxn.text = str(ymax) + + +import os +import cv2 + + +def getFileList(dir, Filelist, ext=None): + """ + 获取文件夹及其子文件夹中文件列表 + 输入 dir:文件夹根目录 + 输入 ext: 扩展名 + 返回: 文件路径列表 + """ + newDir = dir + if os.path.isfile(dir): + if ext is None: + Filelist.append(dir) + else: + if ext in dir[-3:]: + Filelist.append(dir) + + elif os.path.isdir(dir): + for s in os.listdir(dir): + newDir = os.path.join(dir, s) + getFileList(newDir, Filelist, ext) + + return Filelist + + +# org_img_folder = './org' + +# 检索文件 +# imglist = getFileList(org_img_folder, [], 'jpg') +# print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') + +# for imgpath in imglist: +# imgname = os.path.splitext(os.path.basename(imgpath))[0] +# img = cv2.imread(imgpath, cv2.IMREAD_COLOR) + # 对每幅图像执行相关操作 + +if __name__ == '__main__': + org_img_folder = r'D:\snoring-dataset\Snoring Dataset\音频数据\标注文件\mfcc\no/' + files=os.listdir(org_img_folder) + # 检索文件 + imglist = getFileList(org_img_folder, [], 'png') + print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') + # + # filename = imglist[0] + # name = filename.split('\\') + # # print(name) + # anno = GEN_Annotations(name[4],filename) + # anno.set_size(800, 550, 3) + # anno.set_segmented() + # for i in range(1): + # xmin = i + 1 + # ymin = i + 1 + # xmax = i + 799 + # ymax = i + 549 + # anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) + # filename_saved = filename.split('.') + # # print(filename_saved) + # anno.savefile('.'+filename_saved[1]+".xml") + + for i,img in enumerate(org_img_folder): + filename = os.path.splitext(img)[0] + filetype = os.path.splitext(img)[1] + name = filename.split('\\') + # print(name) + anno = GEN_Annotations(name[4], filename) + anno.set_size(800, 550, 3) + for i in range(1): + xmin = i + 99 + ymin = i + 64 + xmax = i + 724 + ymax = i + 493 + anno.add_pic_attr("No Snoring", xmin, ymin, xmax, ymax) + filename_saved = filename.split('.') + anno.savefile('.' + filename_saved[1] + ".xml") \ No newline at end of file diff --git a/cut.py b/cut.py new file mode 100644 index 0000000..81f68cb --- /dev/null +++ b/cut.py @@ -0,0 +1,17 @@ +import os +from PIL import Image +import numpy as np + +rootimgs = 'D:\paper\\3low_light_image\compare_lowlighr_enchace\enhancement_image\MBLLEN\\' +targetroot = 'D:\paper\\3low_light_image\compare_lowlighr_enchace\enhancement_image\\' +savdir = 'D:\paper\\3low_light_image\compare_lowlighr_enchace\enhancement_image\\' +file_imgs = os.listdir(rootimgs) + +for file_img in file_imgs: + imgpath = rootimgs + file_img + targetimg = targetroot + file_img + image = Image.open(imgpath) # 用PIL中的Image.open打开图像 + image_arr = np.array(image) # 转化成numpy数组 + image_tar = image_arr[:, int(image_arr.shape[1] / 3):int(2 * image_arr.shape[1] / 3), :] + im = Image.fromarray(image_tar) + im.save(targetimg) \ No newline at end of file diff --git a/data-number.py b/data-number.py new file mode 100644 index 0000000..41736cd --- /dev/null +++ b/data-number.py @@ -0,0 +1,66 @@ +import matplotlib.pyplot as plt +import os +from urllib import request, parse +import json +# 有道翻译:中文→英文 +def fy(i): + req_url = 'http://fanyi.youdao.com/translate' # 创建连接接口 + # 创建要提交的数据 + Form_Date = {} + Form_Date['i'] = i + Form_Date['doctype'] = 'json' + Form_Date['form'] = 'AUTO' + Form_Date['to'] = 'AUTO' + Form_Date['smartresult'] = 'dict' + Form_Date['client'] = 'fanyideskweb' + Form_Date['salt'] = '1526995097962' + Form_Date['sign'] = '8e4c4765b52229e1f3ad2e633af89c76' + Form_Date['version'] = '2.1' + Form_Date['keyform'] = 'fanyi.web' + Form_Date['action'] = 'FY_BY_REALTIME' + Form_Date['typoResult'] = 'false' + + data = parse.urlencode(Form_Date).encode('utf-8') # 数据转换 + response = request.urlopen(req_url, data) # 提交数据并解析 + html = response.read().decode('utf-8') # 服务器返回结果读取 + # print(html) + # 可以看出html是一个json格式 + translate_results = json.loads(html) # 以json格式载入 + translate_results = translate_results['translateResult'][0][0]['tgt'] # json格式调取 + # print(translate_results) # 输出结果 + return translate_results; # 返回结果 +# +# +# res = fy('this is a dog') +# print(res) # 这是一只狗 + + +plt.style.use("seaborn") +no_snore_path='D:/snoring-dataset/no-snore/' +no_snore_path_dir=os.listdir(no_snore_path) +no_snore_num=0 +count=0 +no_snore_typrname=[] +for i,filename in enumerate(no_snore_path_dir): + oldname=filename + print(oldname) + newname = filename[6:] + newname=fy(newname) + print(newname) + Oldname=os.path.join(no_snore_path,oldname) + Newname=os.path.join(no_snore_path,newname) + os.rename(Oldname,Newname) + # no_snore_typrname.append(newname) + count+=1 +no_snore_num=count +print(no_snore_num) +# print(no_snore_typrname) +# print(no_snore_num) +# newnames=[] +# for i,file in enumerate(no_snore_typrname): +# oldname=file +# newname=file[6:] +# newname=fy(newname) +# newnames.append(newname) +# os.rename(oldname,newname) +# print(newnames) \ No newline at end of file diff --git a/fft.py b/fft.py new file mode 100644 index 0000000..6d84a85 --- /dev/null +++ b/fft.py @@ -0,0 +1,20 @@ +import librosa +import librosa.display +import matplotlib.pyplot as plt +import pywt +# 读取音频文件 +filepath = 'D:\snoring-dataset\Snoring Dataset/' +filename = filepath + '000000.wav' +x, sr = librosa.load(filename, sr=None) # x--音频时间序列(一维数组) ; sr--音频的采样率 + +# STFT处理绘制声谱图 + +X = librosa.stft(x) +Xdb = librosa.amplitude_to_db(abs(X)) # X--二维数组数据 + +plt.figure(figsize=(5, 5)) +librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='log') +plt.colorbar() +plt.title('STFT transform processing audio signal') +plt.show() + diff --git a/lsxbbh.py b/lsxbbh.py new file mode 100644 index 0000000..2b360db --- /dev/null +++ b/lsxbbh.py @@ -0,0 +1,32 @@ +import pywt +import matplotlib.pyplot as plt +import numpy as np +# 小波 +sampling_rate = 1024 +t = np.arange(0, 1.0, 1.0 / sampling_rate) +f1 = 100 +f2 = 200 +f3 = 300 +f4 = 400 +data = np.piecewise(t, [t < 1, t < 0.8, t < 0.5, t < 0.3], + [lambda t: 400*np.sin(2 * np.pi * f4 * t), + lambda t: 300*np.sin(2 * np.pi * f3 * t), + lambda t: 200*np.sin(2 * np.pi * f2 * t), + lambda t: 100*np.sin(2 * np.pi * f1 * t)]) +wavename = 'cgau8' +totalscal = 256 +fc = pywt.central_frequency(wavename) +cparam = 2 * fc * totalscal +scales = cparam / np.arange(totalscal, 1, -1) +[cwtmatr, frequencies] = pywt.cwt(data, scales, wavename, 1.0 / sampling_rate) +plt.figure(figsize=(8, 4)) +plt.subplot(211) +plt.plot(t, data) +plt.xlabel("t(s)") +plt.title('shipinpu', fontsize=20) +plt.subplot(212) +plt.contourf(t, frequencies, abs(cwtmatr)) +plt.ylabel(u"prinv(Hz)") +plt.xlabel(u"t(s)") +plt.subplots_adjust(hspace=0.4) +plt.show() \ No newline at end of file diff --git a/lxml+plot(1).py b/lxml+plot(1).py new file mode 100644 index 0000000..f8592c4 --- /dev/null +++ b/lxml+plot(1).py @@ -0,0 +1,50 @@ +from lxml import etree +with open(r'C:\Users\c9347\Desktop\hehehe\temp.xml', 'r', encoding='utf-8') as f: + str = f.read() +# print(str) +xml=etree.fromstring(str) +# xml=etree.XML(str) +# xml=etree.HTML(str) +xml=etree.parse(r'C:\Users\c9347\Desktop\hehehe\temp.xml') + +print(etree.tostring(xml).decode('utf-8')) +name=xml.xpath('/annotation/object/name/text()') +print(name) + +# import matplotlib.pyplot as plt +# # # 一个figure(画布)上,可以有多个区域axes(坐标系), +# # # 我们在每个坐标系上绘图,也就是说每个axes(坐标系)中,都有axis(坐标轴)。 +# # # 如果绘制一个简单的小图形,我们可以不设置figure对象,使用默认创建的figure对象, +# # # 当然我们也可以显示创建figure对象。如果一张figure画布上,需要绘制多个图形。 +# # # 那么就必须显示的创建figure对象,然后得到每个位置上的axes对象,进行对应位置上的图形绘制。 +# # # 定义fig +# # fig = plt.figure() +# # # 建立子图 +# # ax = fig.subplots(2,2) # 2*2 +# # fig, ax = plt.subplots(2,2) +# # # 第一个图为 +# # ax[0,0].plot([1,2,5], [3,4,8],label='a') +# # # 第二个图为 +# # ax[0,1].plot([1,2], [3,4]) +# # # 第三个图为 +# # ax[1,1].plot([1,2], [3,4]) +# # # 第四个图为 +# # ax[1,0].plot([1,2], [3,4]) +# # x1 = [0, 1, 2, 3] +# # y1 = [3, 7, 5, 9] +# # x2 = [0, 1, 2, 3] +# # y2 = [6, 2, 13, 10] +# # +# # ax[0,0].plot(x1, y1,label='b') +# # ax[0,0].plot(x2, y2,label='c') +# # ax[0,0].xticks([0,2,4,6,8]) +# # plt.show() +# counts=[1,5,4,7,5] +# labels=['a','b','c','d','e'] +# fig,ax=plt.subplots() +# # ax.pie(counts,labels=labels,colors=['red','blue','red','blue','red']) +# # plt.show() +# ax.barh(labels,counts) +# for index,item in enumerate(counts): +# ax.text(item+1,index,str(item)) +# plt.show() \ No newline at end of file diff --git a/mel.py b/mel.py new file mode 100644 index 0000000..4eb861f --- /dev/null +++ b/mel.py @@ -0,0 +1,197 @@ +import IPython +import cv2 +import IPython.display + +import librosa +import librosa.display + +from fastai.vision import * + +import os + +DATA = 'D:\CloudMusic/' +# CSV_TRN_CURATED = DATA + 'train_curated.csv' # 训练数据集:文件名,标签 +# TRN_CURATED = DATA + 'train_curated' # 训练数据的图片位置。 + +# Mel-spectrogram Dataset +PREPROCESSED = os.path.join(DATA) # 生成数据的保存位置 +MELS_TRN_CURATED = os.path.join(PREPROCESSED, 'mels_train_curated.pkl') # 结果保存文件,图片保存成pkl. + + +def read_audio(conf, pathname, trim_long_data): + """ + librosa 是音频处理库,conf.sampling_rate 为采样率 44100 + :param conf: + :param pathname: + :param trim_long_data: + :return: + """ + y, sr = librosa.load(pathname, sr=conf.sampling_rate) # 将音频文件加载为浮点时​​间系列。 + # trim silence + if 0 < len(y): # workaround: 0 length causes error + y, _ = librosa.effects.trim(y) # trim, top_db=default(60) + # make it unified length to conf.samples + if len(y) > conf.samples: # long enough 88200 + if trim_long_data: + y = y[0:0 + conf.samples] + else: # pad blank + padding = conf.samples - len(y) # add padding at both ends 不够的话就补充。 + offset = padding // 2 + y = np.pad(y, (offset, conf.samples - len(y) - offset), conf.padmode) + return y + + +def audio_to_melspectrogram(conf, audio): + """ + 计算一个梅尔频谱系数图 + :param conf: + :param audio: + :return: + """ + spectrogram = librosa.feature.melspectrogram(audio, + sr=conf.sampling_rate, + n_mels=conf.n_mels, + hop_length=conf.hop_length, + n_fft=conf.n_fft, + fmin=conf.fmin, + fmax=conf.fmax) + spectrogram = librosa.power_to_db(spectrogram) # 转化频谱系数单位 + spectrogram = spectrogram.astype(np.float32) + return spectrogram + + +def show_melspectrogram(conf, mels, title='Log-frequency power spectrogram'): + """ + + :param conf: + :param mels: + :param title: + :return: + """ + librosa.display.specshow(mels, x_axis='time', y_axis='mel', + sr=conf.sampling_rate, hop_length=conf.hop_length, + fmin=conf.fmin, fmax=conf.fmax) + plt.colorbar(format='%+2.0f dB') + plt.title(title) + plt.show() + + +def read_as_melspectrogram(conf, pathname, trim_long_data, debug_display=False): + """ + :param conf: + :param pathname: + :param trim_long_data: + :param debug_display: + :return: + """ + x = read_audio(conf, pathname, trim_long_data) + mels = audio_to_melspectrogram(conf, x) + if debug_display: + IPython.display.display(IPython.display.Audio(x, rate=conf.sampling_rate)) + show_melspectrogram(conf, mels) + return mels + + +def mono_to_color(X, mean=None, std=None, norm_max=None, norm_min=None, eps=1e-6): + """ + + :param X: + :param mean: + :param std: + :param norm_max: + :param norm_min: + :param eps: + :return: + """ + # Stack X as [X,X,X] + X = np.stack([X, X, X], axis=-1) + + # Standardize + mean = mean or X.mean() + X = X - mean + std = std or X.std() + Xstd = X / (std + eps) + _min, _max = Xstd.min(), Xstd.max() + norm_max = norm_max or _max + norm_min = norm_min or _min + if (_max - _min) > eps: + # Normalize to [0, 255] + V = Xstd + V[V < norm_min] = norm_min + V[V > norm_max] = norm_max + V = 255 * (V - norm_min) / (norm_max - norm_min) + V = V.astype(np.uint8) + else: + # Just zero + V = np.zeros_like(Xstd, dtype=np.uint8) + return V + + +def convert_wav_to_image(df, source): + """ + ## 转化WAV文件为图片,返回包含图片的list。 + :param df: + :param source: + :return: + """ + X = [] + for i, row in df.iterrows(): + wav_path = os.path.join(source, str(row.fname)) # WAV文件路径 + print(wav_path) + x = read_as_melspectrogram(conf, wav_path, trim_long_data=False) # 读取图像并转化成数组 + x_color = mono_to_color(x) # 转化为三维图像 + X.append(x_color) + return X + + +def save_as_pkl_binary(obj, filename): + """Save object as pickle binary file. + Thanks to https://stackoverflow.com/questions/19201290/how-to-save-a-dictionary-to-a-file/32216025 + """ + with open(filename, 'wb') as f: + pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL) + + +def convert_dataset(df, source_folder, filename): + """ + 转化WAV文件为图片,并保存image。 + :param df: + :param source_folder: + :param filename: + :return: + """ + X = convert_wav_to_image(df, source=source_folder) + save_as_pkl_binary(X, filename) + print(f'Created {filename}') + return X + + +class conf: + sampling_rate = 44100 + duration = 2 # sec + hop_length = 347 * duration # to make time steps 128 + fmin = 20 + fmax = sampling_rate // 2 + n_mels = 128 + n_fft = n_mels * 20 + padmode = 'constant' + samples = sampling_rate * duration + + +def get_default_conf(): + return conf + + +def main(): + trn_curated_df = pd.read_csv(CSV_TRN_CURATED) + + # 获取配置参数 + conf = get_default_conf() + + # 转化数据集 128xN (N/128)*2=时长。 + convert_dataset(trn_curated_df, TRN_CURATED, MELS_TRN_CURATED); + # convert_dataset(test_df, TEST, MELS_TEST); + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/melm.py b/melm.py new file mode 100644 index 0000000..f3e19ad --- /dev/null +++ b/melm.py @@ -0,0 +1,52 @@ +import matplotlib.pyplot as plt +import librosa +import librosa.display +import numpy as np +import sys + + +# 读取音频wav文件 +audio_path = r"D:\CloudMusic\no/000000.wav" +y, sr = librosa.load(audio_path, sr=None, mono=True) +""" +:param + path 音频路径 + sr 采样率(默认22050,但是有重采样的功能) + mono 设置为true是单通道,否则是双通道 + offset 音频读取的时间 + duration 获取音频的时长 + +:returns + y : 音频的信号值,类型是ndarray + sr : 采样率 +""" +############################################################################### + +################################################################################ +# 03 使用librosa获取mel谱图 +n_mels = 64 +n_frames = 5 +n_fft = 1024 +hop_length = 512 +power = 2.0 + +mel_spectrogram = librosa.feature.melspectrogram(y=y, + sr=sr, + n_fft=n_fft, + hop_length=hop_length, + n_mels=n_mels, + power=power) + +# librosa.display.specshow(librosa.power_to_db(mel_spectrogram, ref=np.max), +# y_axis='mel', fmax=8000, x_axis='time') +# plt.colorbar(format='%+2.0f dB') +################################################################################## + +# 04 将mel谱图转换为log mel谱图 +log_mel_spectrogram = 20.0 / power * np.log10(np.maximum(mel_spectrogram, sys.float_info.epsilon)) +librosa.display.specshow(librosa.power_to_db(log_mel_spectrogram, ref=np.max), + y_axis='mel', fmax=8000, x_axis='time') +# plt.colorbar(format='%+2.0f dB') +################################################################################## + +plt.show() diff --git a/mfcc.py b/mfcc.py new file mode 100644 index 0000000..462b563 --- /dev/null +++ b/mfcc.py @@ -0,0 +1,21 @@ +import matplotlib.pyplot as plt +import librosa +import librosa.display +plt.style.use('seaborn') + +y, sr = librosa.load('D:\CloudMusic/no/000000.wav', sr=16000) +# 提取 mel spectrogram feature +melspec = librosa.feature.melspectrogram(y, sr, n_fft=1024, hop_length=512, n_mels=128) +logmelspec = librosa.power_to_db(melspec) # 转换为对数刻度 +# 绘制 mel 频谱图 +plt.figure() +librosa.display.specshow(logmelspec, sr=sr, x_axis='time', y_axis='mel') +plt.colorbar(format='%+2.0f dB') # 右边的色度条 +plt.title('Beat wavform') +plt.show() + +# mfccs = librosa.feature.mfcc(y,) +# plt.figure() +# librosa.display.specshow(mfccs,) +# plt.title('mfcc') +# plt.show() diff --git a/multi classification.py b/multi classification.py new file mode 100644 index 0000000..ec77df8 --- /dev/null +++ b/multi classification.py @@ -0,0 +1,20 @@ +# coding=utf-8 +import os, random, shutil +def moveFile(fileDir): + pathDir = os.listdir(fileDir) # 取图片的原始路径 + filenumber = len(pathDir) + picknumber = int(filenumber * ratio) # 按照rate比例从文件夹中取一定数量图片 + sample = random.sample(pathDir, picknumber) # 随机选取picknumber数量的样本图片 + for name in sample: + shutil.move(os.path.join(fileDir, name), os.path.join(tarDir, name)) + return +if __name__ == '__main__': + ori_path = 'D:/snoring-dataset/Snoring Dataset/309-no-snoring-1' # 最开始train的文件夹路径 + split_Dir = 'D:/snoring-dataset/Snoring Dataset/test-309' # 移动到新的文件夹路径 + ratio = 0.1 # 抽取比例 + for firstPath in os.listdir(ori_path): + fileDir = os.path.join(ori_path, firstPath) # 原图片文件夹路径 + tarDir = os.path.join(split_Dir, firstPath) # val下子文件夹名字 + if not os.path.exists(tarDir): # 如果val下没有子文件夹,就创建 + os.makedirs(tarDir) + moveFile(fileDir) # 从每个子类别开始逐个划分 \ No newline at end of file diff --git a/png-jpg.py b/png-jpg.py new file mode 100644 index 0000000..349615e --- /dev/null +++ b/png-jpg.py @@ -0,0 +1,16 @@ +import os +# png文件路径 +png_path=r'E:\语音处理\频谱\标注文件\stft\no' +# jpg_path=r'E:\语音处理\频谱' +files=os.listdir(png_path) +k=0 +for i,file in enumerate(files): + filename=os.path.splitext(file)[0] + filetype=os.path.splitext(file)[1] + if filetype=='.png': + old_name=os.path.join(png_path,file) + new_name=os.path.join(png_path,filename+'.jpg') + os.rename(old_name,new_name) + # print(old_name,new_name) + k+=1 +print(k) \ No newline at end of file diff --git a/renamefile.py b/renamefile.py new file mode 100644 index 0000000..0c1a412 --- /dev/null +++ b/renamefile.py @@ -0,0 +1,17 @@ +import os +path=r'D:\snoring-dataset\40test\402 - Mouse click' +filedir=os.listdir(path) +count=0 +for i,file in enumerate(filedir): + # 分割文件的文件名和扩展名 + filename=os.path.splitext(file)[0] + filetype=os.path.splitext(file)[1] + # 判断文件类型 + if filetype=='.ogg': + if count%10==0: + print(count) + oldname=os.path.join(path,file) + newname=os.path.join(path,str(count+3000).zfill(6)+'.wav') + os.rename(oldname,newname) + count+=1 +print(count) diff --git a/resize.py b/resize.py new file mode 100644 index 0000000..67528c2 --- /dev/null +++ b/resize.py @@ -0,0 +1,27 @@ +# encoding:utf-8 + +# 用于重设图片大小,主要用来遇到图片大小限制时缩放图片 + +import cv2 + +if __name__ == '__main__': + img = cv2.imread('D:\snoring-dataset\Snoring Dataset\音频数据\标注文件\mfcc\ss/000000.png') + cv2.imshow('resize before', img) + # 直接指定目标图片大小 + img = cv2.resize(img, (416, 416)) + + # 按比例缩小,例如缩小2倍 + # 原图高 + # height = img.shape[0] + # # 原图宽 + # width = img.shape[1] + # # 元祖参数,为宽,高 + # img = cv2.resize(img, (int(width / 2), int(height / 2))) + + cv2.imshow('resize after', img) + + # 写入新文件 + cv2.imwrite('./2.jpg', img) + # 延迟关闭 + cv2.waitKey() + diff --git a/t.py b/t.py new file mode 100644 index 0000000..51aadb5 --- /dev/null +++ b/t.py @@ -0,0 +1,13 @@ +# 波形图 +# wave=thinkdsp.read_wave("D:\CloudMusic\ss/000000.wav") +# wave.plot() +# plt.savefig('D:\CloudMusic\ss/test1') +# plt.show() +# 频谱 +import thinkdsp +from 频谱 import thinkplot + +wave= thinkdsp.read_wave("D:\CloudMusic\ss/000000.wav") +spectrum=wave.make_spectrum() +spectrum.plot() +thinkplot.show() diff --git a/thinkdsp.py b/thinkdsp.py new file mode 100644 index 0000000..7d9d0a5 --- /dev/null +++ b/thinkdsp.py @@ -0,0 +1,1631 @@ +from __future__ import print_function, division + +import copy +import math + +import numpy as np +import random +import scipy +import scipy.stats +import scipy.fftpack +import subprocess +from 频谱 import thinkplot +import warnings + +from wave import open as open_wave + +import matplotlib.pyplot as pyplot + +try: + from IPython.display import Audio +except: + warnings.warn( + "Can't import Audio from IPython.display; " "Wave.make_audio() will not work." + ) + +PI2 = math.pi * 2 + + +def random_seed(x): + """Initialize the random and np.random generators. + x: int seed + """ + random.seed(x) + np.random.seed(x) + + +class UnimplementedMethodException(Exception): + """Exception if someone calls a method that should be overridden.""" + + +class WavFileWriter: + """Writes wav files.""" + + def __init__(self, filename="sound.wav", framerate=11025): + """Opens the file and sets parameters. + filename: string + framerate: samples per second + """ + self.filename = filename + self.framerate = framerate + self.nchannels = 1 + self.sampwidth = 2 + self.bits = self.sampwidth * 8 + self.bound = 2 ** (self.bits - 1) - 1 + + self.fmt = "h" + self.dtype = np.int16 + + self.fp = open_wave(self.filename, "w") + self.fp.setnchannels(self.nchannels) + self.fp.setsampwidth(self.sampwidth) + self.fp.setframerate(self.framerate) + + def write(self, wave): + """Writes a wave. + wave: Wave + """ + zs = wave.quantize(self.bound, self.dtype) + self.fp.writeframes(zs.tostring()) + + def close(self, duration=0): + """Closes the file. + duration: how many seconds of silence to append + """ + if duration: + self.write(rest(duration)) + + self.fp.close() + + +def read_wave(filename="sound.wav"): + """Reads a wave file. + filename: string + returns: Wave + """ + fp = open_wave(filename, "r") + + nchannels = fp.getnchannels() + nframes = fp.getnframes() + sampwidth = fp.getsampwidth() + framerate = fp.getframerate() + + z_str = fp.readframes(nframes) + + fp.close() + + dtype_map = {1: np.int8, 2: np.int16, 3: "special", 4: np.int32} + if sampwidth not in dtype_map: + raise ValueError("sampwidth %d unknown" % sampwidth) + + if sampwidth == 3: + xs = np.fromstring(z_str, dtype=np.int8).astype(np.int32) + ys = (xs[2::3] * 256 + xs[1::3]) * 256 + xs[0::3] + else: + ys = np.fromstring(z_str, dtype=dtype_map[sampwidth]) + + # if it's in stereo, just pull out the first channel + if nchannels == 2: + ys = ys[::2] + + # ts = np.arange(len(ys)) / framerate + wave = Wave(ys, framerate=framerate) + wave.normalize() + return wave + + +def play_wave(filename="sound.wav", player="aplay"): + """Plays a wave file. + filename: string + player: string name of executable that plays wav files + """ + cmd = "%s %s" % (player, filename) + popen = subprocess.Popen(cmd, shell=True) + popen.communicate() + + +def find_index(x, xs): + """Find the index corresponding to a given value in an array.""" + n = len(xs) + start = xs[0] + end = xs[-1] + i = round((n - 1) * (x - start) / (end - start)) + return int(i) + + +class _SpectrumParent: + """Contains code common to Spectrum and DCT. + """ + + def __init__(self, hs, fs, framerate, full=False): + """Initializes a spectrum. + hs: array of amplitudes (real or complex) + fs: array of frequencies + framerate: frames per second + full: boolean to indicate full or real FFT + """ + self.hs = np.asanyarray(hs) + self.fs = np.asanyarray(fs) + self.framerate = framerate + self.full = full + + @property + def max_freq(self): + """Returns the Nyquist frequency for this spectrum.""" + return self.framerate / 2 + + @property + def amps(self): + """Returns a sequence of amplitudes (read-only property).""" + return np.absolute(self.hs) + + @property + def power(self): + """Returns a sequence of powers (read-only property).""" + return self.amps ** 2 + + def copy(self): + """Makes a copy. + Returns: new Spectrum + """ + return copy.deepcopy(self) + + def max_diff(self, other): + """Computes the maximum absolute difference between spectra. + other: Spectrum + returns: float + """ + assert self.framerate == other.framerate + assert len(self) == len(other) + + hs = self.hs - other.hs + return np.max(np.abs(hs)) + + def ratio(self, denom, thresh=1, val=0): + """The ratio of two spectrums. + denom: Spectrum + thresh: values smaller than this are replaced + val: with this value + returns: new Wave + """ + ratio_spectrum = self.copy() + ratio_spectrum.hs /= denom.hs + ratio_spectrum.hs[denom.amps < thresh] = val + return ratio_spectrum + + def invert(self): + """Inverts this spectrum/filter. + returns: new Wave + """ + inverse = self.copy() + inverse.hs = 1 / inverse.hs + return inverse + + @property + def freq_res(self): + return self.framerate / 2 / (len(self.fs) - 1) + + def render_full(self, high=None): + """Extracts amps and fs from a full spectrum. + high: cutoff frequency + returns: fs, amps + """ + hs = np.fft.fftshift(self.hs) + amps = np.abs(hs) + fs = np.fft.fftshift(self.fs) + i = 0 if high is None else find_index(-high, fs) + j = None if high is None else find_index(high, fs) + 1 + return fs[i:j], amps[i:j] + + def plot(self, high=None, **options): + """Plots amplitude vs frequency. + Note: if this is a full spectrum, it ignores low and high + high: frequency to cut off at + """ + if self.full: + fs, amps = self.render_full(high) + thinkplot.plot(fs, amps, **options) + else: + i = None if high is None else find_index(high, self.fs) + thinkplot.plot(self.fs[:i], self.amps[:i], **options) + + def plot_power(self, high=None, **options): + """Plots power vs frequency. + high: frequency to cut off at + """ + if self.full: + fs, amps = self.render_full(high) + thinkplot.plot(fs, amps ** 2, **options) + else: + i = None if high is None else find_index(high, self.fs) + thinkplot.plot(self.fs[:i], self.power[:i], **options) + + def estimate_slope(self): + """Runs linear regression on log power vs log frequency. + returns: slope, inter, r2, p, stderr + """ + x = np.log(self.fs[1:]) + y = np.log(self.power[1:]) + t = scipy.stats.linregress(x, y) + return t + + def peaks(self): + """Finds the highest peaks and their frequencies. + returns: sorted list of (amplitude, frequency) pairs + """ + t = list(zip(self.amps, self.fs)) + t.sort(reverse=True) + return t + + +class Spectrum(_SpectrumParent): + """Represents the spectrum of a signal.""" + + def __len__(self): + """Length of the spectrum.""" + return len(self.hs) + + def __add__(self, other): + """Adds two spectrums elementwise. + other: Spectrum + returns: new Spectrum + """ + if other == 0: + return self.copy() + + assert all(self.fs == other.fs) + hs = self.hs + other.hs + return Spectrum(hs, self.fs, self.framerate, self.full) + + __radd__ = __add__ + + def __mul__(self, other): + """Multiplies two spectrums elementwise. + other: Spectrum + returns: new Spectrum + """ + assert all(self.fs == other.fs) + hs = self.hs * other.hs + return Spectrum(hs, self.fs, self.framerate, self.full) + + def convolve(self, other): + """Convolves two Spectrums. + other: Spectrum + returns: Spectrum + """ + assert all(self.fs == other.fs) + if self.full: + hs1 = np.fft.fftshift(self.hs) + hs2 = np.fft.fftshift(other.hs) + hs = np.convolve(hs1, hs2, mode="same") + hs = np.fft.ifftshift(hs) + else: + # not sure this branch would mean very much + hs = np.convolve(self.hs, other.hs, mode="same") + + return Spectrum(hs, self.fs, self.framerate, self.full) + + @property + def real(self): + """Returns the real part of the hs (read-only property).""" + return np.real(self.hs) + + @property + def imag(self): + """Returns the imaginary part of the hs (read-only property).""" + return np.imag(self.hs) + + @property + def angles(self): + """Returns a sequence of angles (read-only property).""" + return np.angle(self.hs) + + def scale(self, factor): + """Multiplies all elements by the given factor. + factor: what to multiply the magnitude by (could be complex) + """ + self.hs *= factor + + def low_pass(self, cutoff, factor=0): + """Attenuate frequencies above the cutoff. + cutoff: frequency in Hz + factor: what to multiply the magnitude by + """ + self.hs[abs(self.fs) > cutoff] *= factor + + def high_pass(self, cutoff, factor=0): + """Attenuate frequencies below the cutoff. + cutoff: frequency in Hz + factor: what to multiply the magnitude by + """ + self.hs[abs(self.fs) < cutoff] *= factor + + def band_stop(self, low_cutoff, high_cutoff, factor=0): + """Attenuate frequencies between the cutoffs. + low_cutoff: frequency in Hz + high_cutoff: frequency in Hz + factor: what to multiply the magnitude by + """ + # TODO: test this function + fs = abs(self.fs) + indices = (low_cutoff < fs) & (fs < high_cutoff) + self.hs[indices] *= factor + + def pink_filter(self, beta=1): + """Apply a filter that would make white noise pink. + beta: exponent of the pink noise + """ + denom = self.fs ** (beta / 2.0) + denom[0] = 1 + self.hs /= denom + + def differentiate(self): + """Apply the differentiation filter. + returns: new Spectrum + """ + new = self.copy() + new.hs *= PI2 * 1j * new.fs + return new + + def integrate(self): + """Apply the integration filter. + returns: new Spectrum + """ + new = self.copy() + new.hs /= PI2 * 1j * new.fs + return new + + def make_integrated_spectrum(self): + """Makes an integrated spectrum. + """ + cs = np.cumsum(self.power) + cs /= cs[-1] + return IntegratedSpectrum(cs, self.fs) + + def make_wave(self): + """Transforms to the time domain. + returns: Wave + """ + if self.full: + ys = np.fft.ifft(self.hs) + else: + ys = np.fft.irfft(self.hs) + + # NOTE: whatever the start time was, we lose it when + # we transform back; we could fix that by saving start + # time in the Spectrum + # ts = self.start + np.arange(len(ys)) / self.framerate + return Wave(ys, framerate=self.framerate) + + +class IntegratedSpectrum: + """Represents the integral of a spectrum.""" + + def __init__(self, cs, fs): + """Initializes an integrated spectrum: + cs: sequence of cumulative amplitudes + fs: sequence of frequencies + """ + self.cs = np.asanyarray(cs) + self.fs = np.asanyarray(fs) + + def plot_power(self, low=0, high=None, expo=False, **options): + """Plots the integrated spectrum. + low: int index to start at + high: int index to end at + """ + cs = self.cs[low:high] + fs = self.fs[low:high] + + if expo: + cs = np.exp(cs) + + thinkplot.plot(fs, cs, **options) + + def estimate_slope(self, low=1, high=-12000): + """Runs linear regression on log cumulative power vs log frequency. + returns: slope, inter, r2, p, stderr + """ + # print self.fs[low:high] + # print self.cs[low:high] + x = np.log(self.fs[low:high]) + y = np.log(self.cs[low:high]) + t = scipy.stats.linregress(x, y) + return t + + +class Dct(_SpectrumParent): + """Represents the spectrum of a signal using discrete cosine transform.""" + + @property + def amps(self): + """Returns a sequence of amplitudes (read-only property). + Note: for DCTs, amps are positive or negative real. + """ + return self.hs + + def __add__(self, other): + """Adds two DCTs elementwise. + other: DCT + returns: new DCT + """ + if other == 0: + return self + + assert self.framerate == other.framerate + hs = self.hs + other.hs + return Dct(hs, self.fs, self.framerate) + + __radd__ = __add__ + + def make_wave(self): + """Transforms to the time domain. + returns: Wave + """ + N = len(self.hs) + ys = scipy.fftpack.idct(self.hs, type=2) / 2 / N + # NOTE: whatever the start time was, we lose it when + # we transform back + # ts = self.start + np.arange(len(ys)) / self.framerate + return Wave(ys, framerate=self.framerate) + + +class Spectrogram: + """Represents the spectrum of a signal.""" + + def __init__(self, spec_map, seg_length): + """Initialize the spectrogram. + spec_map: map from float time to Spectrum + seg_length: number of samples in each segment + """ + self.spec_map = spec_map + self.seg_length = seg_length + + def any_spectrum(self): + """Returns an arbitrary spectrum from the spectrogram.""" + index = next(iter(self.spec_map)) + return self.spec_map[index] + + @property + def time_res(self): + """Time resolution in seconds.""" + spectrum = self.any_spectrum() + return float(self.seg_length) / spectrum.framerate + + @property + def freq_res(self): + """Frequency resolution in Hz.""" + return self.any_spectrum().freq_res + + def times(self): + """Sorted sequence of times. + returns: sequence of float times in seconds + """ + ts = sorted(iter(self.spec_map)) + return ts + + def frequencies(self): + """Sequence of frequencies. + returns: sequence of float freqencies in Hz. + """ + fs = self.any_spectrum().fs + return fs + + def plot(self, high=None, **options): + """Make a pseudocolor plot. + high: highest frequency component to plot + """ + fs = self.frequencies() + i = None if high is None else find_index(high, fs) + fs = fs[:i] + ts = self.times() + + # make the array + size = len(fs), len(ts) + array = np.zeros(size, dtype=np.float) + + # copy amplitude from each spectrum into a column of the array + for j, t in enumerate(ts): + spectrum = self.spec_map[t] + array[:, j] = spectrum.amps[:i] + + thinkplot.pcolor(ts, fs, array, **options) + + def make_wave(self): + """Inverts the spectrogram and returns a Wave. + returns: Wave + """ + res = [] + for t, spectrum in sorted(self.spec_map.items()): + wave = spectrum.make_wave() + n = len(wave) + + window = 1 / np.hamming(n) + wave.window(window) + + i = wave.find_index(t) + start = i - n // 2 + end = start + n + res.append((start, end, wave)) + + starts, ends, waves = zip(*res) + low = min(starts) + high = max(ends) + + ys = np.zeros(high - low, np.float) + for start, end, wave in res: + ys[start:end] = wave.ys + + # ts = np.arange(len(ys)) / self.framerate + return Wave(ys, framerate=wave.framerate) + + +class Wave: + """Represents a discrete-time waveform. + """ + + def __init__(self, ys, ts=None, framerate=None): + """Initializes the wave. + ys: wave array + ts: array of times + framerate: samples per second + """ + self.ys = np.asanyarray(ys) + self.framerate = framerate if framerate is not None else 11025 + + if ts is None: + self.ts = np.arange(len(ys)) / self.framerate + else: + self.ts = np.asanyarray(ts) + + def copy(self): + """Makes a copy. + Returns: new Wave + """ + return copy.deepcopy(self) + + def __len__(self): + return len(self.ys) + + @property + def start(self): + return self.ts[0] + + @property + def end(self): + return self.ts[-1] + + @property + def duration(self): + """Duration (property). + returns: float duration in seconds + """ + return len(self.ys) / self.framerate + + def __add__(self, other): + """Adds two waves elementwise. + other: Wave + returns: new Wave + """ + if other == 0: + return self + + assert self.framerate == other.framerate + + # make an array of times that covers both waves + start = min(self.start, other.start) + end = max(self.end, other.end) + n = int(round((end - start) * self.framerate)) + 1 + ys = np.zeros(n) + ts = start + np.arange(n) / self.framerate + + def add_ys(wave): + i = find_index(wave.start, ts) + + # make sure the arrays line up reasonably well + diff = ts[i] - wave.start + dt = 1 / wave.framerate + if (diff / dt) > 0.1: + warnings.warn( + "Can't add these waveforms; their " "time arrays don't line up." + ) + + j = i + len(wave) + ys[i:j] += wave.ys + + add_ys(self) + add_ys(other) + + return Wave(ys, ts, self.framerate) + + __radd__ = __add__ + + def __or__(self, other): + """Concatenates two waves. + other: Wave + returns: new Wave + """ + if self.framerate != other.framerate: + raise ValueError("Wave.__or__: framerates do not agree") + + ys = np.concatenate((self.ys, other.ys)) + # ts = np.arange(len(ys)) / self.framerate + return Wave(ys, framerate=self.framerate) + + def __mul__(self, other): + """Multiplies two waves elementwise. + Note: this operation ignores the timestamps; the result + has the timestamps of self. + other: Wave + returns: new Wave + """ + # the spectrums have to have the same framerate and duration + assert self.framerate == other.framerate + assert len(self) == len(other) + + ys = self.ys * other.ys + return Wave(ys, self.ts, self.framerate) + + def max_diff(self, other): + """Computes the maximum absolute difference between waves. + other: Wave + returns: float + """ + assert self.framerate == other.framerate + assert len(self) == len(other) + + ys = self.ys - other.ys + return np.max(np.abs(ys)) + + def convolve(self, other): + """Convolves two waves. + Note: this operation ignores the timestamps; the result + has the timestamps of self. + other: Wave or NumPy array + returns: Wave + """ + if isinstance(other, Wave): + assert self.framerate == other.framerate + window = other.ys + else: + window = other + + ys = np.convolve(self.ys, window, mode="full") + # ts = np.arange(len(ys)) / self.framerate + return Wave(ys, framerate=self.framerate) + + def diff(self): + """Computes the difference between successive elements. + returns: new Wave + """ + ys = np.diff(self.ys) + ts = self.ts[1:].copy() + return Wave(ys, ts, self.framerate) + + def cumsum(self): + """Computes the cumulative sum of the elements. + returns: new Wave + """ + ys = np.cumsum(self.ys) + ts = self.ts.copy() + return Wave(ys, ts, self.framerate) + + def quantize(self, bound, dtype): + """Maps the waveform to quanta. + bound: maximum amplitude + dtype: numpy data type or string + returns: quantized signal + """ + return quantize(self.ys, bound, dtype) + + def apodize(self, denom=20, duration=0.1): + """Tapers the amplitude at the beginning and end of the signal. + Tapers either the given duration of time or the given + fraction of the total duration, whichever is less. + denom: float fraction of the segment to taper + duration: float duration of the taper in seconds + """ + self.ys = apodize(self.ys, self.framerate, denom, duration) + + def hamming(self): + """Apply a Hamming window to the wave. + """ + self.ys *= np.hamming(len(self.ys)) + + def window(self, window): + """Apply a window to the wave. + window: sequence of multipliers, same length as self.ys + """ + self.ys *= window + + def scale(self, factor): + """Multplies the wave by a factor. + factor: scale factor + """ + self.ys *= factor + + def shift(self, shift): + """Shifts the wave left or right in time. + shift: float time shift + """ + # TODO: track down other uses of this function and check them + self.ts += shift + + def roll(self, roll): + """Rolls this wave by the given number of locations. + """ + self.ys = np.roll(self.ys, roll) + + def truncate(self, n): + """Trims this wave to the given length. + n: integer index + """ + self.ys = truncate(self.ys, n) + self.ts = truncate(self.ts, n) + + def zero_pad(self, n): + """Trims this wave to the given length. + n: integer index + """ + self.ys = zero_pad(self.ys, n) + self.ts = self.start + np.arange(n) / self.framerate + + def normalize(self, amp=1.0): + """Normalizes the signal to the given amplitude. + amp: float amplitude + """ + self.ys = normalize(self.ys, amp=amp) + + def unbias(self): + """Unbiases the signal. + """ + self.ys = unbias(self.ys) + + def find_index(self, t): + """Find the index corresponding to a given time.""" + n = len(self) + start = self.start + end = self.end + i = round((n - 1) * (t - start) / (end - start)) + return int(i) + + def segment(self, start=None, duration=None): + """Extracts a segment. + start: float start time in seconds + duration: float duration in seconds + returns: Wave + """ + if start is None: + start = self.ts[0] + i = 0 + else: + i = self.find_index(start) + + j = None if duration is None else self.find_index(start + duration) + return self.slice(i, j) + + def slice(self, i, j): + """Makes a slice from a Wave. + i: first slice index + j: second slice index + """ + ys = self.ys[i:j].copy() + ts = self.ts[i:j].copy() + return Wave(ys, ts, self.framerate) + + def make_spectrum(self, full=False): + """Computes the spectrum using FFT. + returns: Spectrum + """ + n = len(self.ys) + d = 1 / self.framerate + + if full: + hs = np.fft.fft(self.ys) + fs = np.fft.fftfreq(n, d) + else: + hs = np.fft.rfft(self.ys) + fs = np.fft.rfftfreq(n, d) + + return Spectrum(hs, fs, self.framerate, full) + + def make_dct(self): + """Computes the DCT of this wave. + """ + N = len(self.ys) + hs = scipy.fftpack.dct(self.ys, type=2) + fs = (0.5 + np.arange(N)) / 2 + return Dct(hs, fs, self.framerate) + + def make_spectrogram(self, seg_length, win_flag=True): + """Computes the spectrogram of the wave. + seg_length: number of samples in each segment + win_flag: boolean, whether to apply hamming window to each segment + returns: Spectrogram + """ + if win_flag: + window = np.hamming(seg_length) + i, j = 0, seg_length + step = int(seg_length // 2) + + # map from time to Spectrum + spec_map = {} + + while j < len(self.ys): + segment = self.slice(i, j) + if win_flag: + segment.window(window) + + # the nominal time for this segment is the midpoint + t = (segment.start + segment.end) / 2 + spec_map[t] = segment.make_spectrum() + + i += step + j += step + + return Spectrogram(spec_map, seg_length) + + def get_xfactor(self, options): + try: + xfactor = options["xfactor"] + options.pop("xfactor") + except KeyError: + xfactor = 1 + return xfactor + + def plot(self, **options): + """Plots the wave. + """ + xfactor = self.get_xfactor(options) + thinkplot.plot(self.ts * xfactor, self.ys, **options) + + def plot_vlines(self, **options): + """Plots the wave with vertical lines for samples. + """ + xfactor = self.get_xfactor(options) + thinkplot.vlines(self.ts * xfactor, 0, self.ys, **options) + + def corr(self, other): + """Correlation coefficient two waves. + other: Wave + returns: float coefficient of correlation + """ + corr = np.corrcoef(self.ys, other.ys)[0, 1] + return corr + + def cov_mat(self, other): + """Covariance matrix of two waves. + other: Wave + returns: 2x2 covariance matrix + """ + return np.cov(self.ys, other.ys) + + def cov(self, other): + """Covariance of two unbiased waves. + other: Wave + returns: float + """ + total = sum(self.ys * other.ys) / len(self.ys) + return total + + def cos_cov(self, k): + """Covariance with a cosine signal. + freq: freq of the cosine signal in Hz + returns: float covariance + """ + n = len(self.ys) + factor = math.pi * k / n + ys = [math.cos(factor * (i + 0.5)) for i in range(n)] + total = 2 * sum(self.ys * ys) + return total + + def cos_transform(self): + """Discrete cosine transform. + returns: list of frequency, cov pairs + """ + n = len(self.ys) + res = [] + for k in range(n): + cov = self.cos_cov(k) + res.append((k, cov)) + + return res + + def write(self, filename="sound.wav"): + """Write a wave file. + filename: string + """ + print("Writing", filename) + wfile = WavFileWriter(filename, self.framerate) + wfile.write(self) + wfile.close() + + def play(self, filename="sound.wav"): + """Plays a wave file. + filename: string + """ + self.write(filename) + play_wave(filename) + + def make_audio(self): + """Makes an IPython Audio object. + """ + audio = Audio(data=self.ys.real, rate=self.framerate) + return audio + + +def unbias(ys): + """Shifts a wave array so it has mean 0. + ys: wave array + returns: wave array + """ + return ys - ys.mean() + + +def normalize(ys, amp=1.0): + """Normalizes a wave array so the maximum amplitude is +amp or -amp. + ys: wave array + amp: max amplitude (pos or neg) in result + returns: wave array + """ + high, low = abs(max(ys)), abs(min(ys)) + return amp * ys / max(high, low) + + +def shift_right(ys, shift): + """Shifts a wave array to the right and zero pads. + ys: wave array + shift: integer shift + returns: wave array + """ + res = np.zeros(len(ys) + shift) + res[shift:] = ys + return res + + +def shift_left(ys, shift): + """Shifts a wave array to the left. + ys: wave array + shift: integer shift + returns: wave array + """ + return ys[shift:] + + +def truncate(ys, n): + """Trims a wave array to the given length. + ys: wave array + n: integer length + returns: wave array + """ + return ys[:n] + + +def quantize(ys, bound, dtype): + """Maps the waveform to quanta. + ys: wave array + bound: maximum amplitude + dtype: numpy data type of the result + returns: quantized signal + """ + if max(ys) > 1 or min(ys) < -1: + warnings.warn("Warning: normalizing before quantizing.") + ys = normalize(ys) + + zs = (ys * bound).astype(dtype) + return zs + + +def apodize(ys, framerate, denom=20, duration=0.1): + """Tapers the amplitude at the beginning and end of the signal. + Tapers either the given duration of time or the given + fraction of the total duration, whichever is less. + ys: wave array + framerate: int frames per second + denom: float fraction of the segment to taper + duration: float duration of the taper in seconds + returns: wave array + """ + # a fixed fraction of the segment + n = len(ys) + k1 = n // denom + + # a fixed duration of time + k2 = int(duration * framerate) + + k = min(k1, k2) + + w1 = np.linspace(0, 1, k) + w2 = np.ones(n - 2 * k) + w3 = np.linspace(1, 0, k) + + window = np.concatenate((w1, w2, w3)) + return ys * window + + +class Signal: + """Represents a time-varying signal.""" + + def __add__(self, other): + """Adds two signals. + other: Signal + returns: Signal + """ + if other == 0: + return self + return SumSignal(self, other) + + __radd__ = __add__ + + @property + def period(self): + """Period of the signal in seconds (property). + Since this is used primarily for purposes of plotting, + the default behavior is to return a value, 0.1 seconds, + that is reasonable for many signals. + returns: float seconds + """ + return 0.1 + + def plot(self, framerate=11025): + """Plots the signal. + The default behavior is to plot three periods. + framerate: samples per second + """ + duration = self.period * 3 + wave = self.make_wave(duration, start=0, framerate=framerate) + wave.plot() + + def make_wave(self, duration=1, start=0, framerate=11025): + """Makes a Wave object. + duration: float seconds + start: float seconds + framerate: int frames per second + returns: Wave + """ + n = round(duration * framerate) + ts = start + np.arange(n) / framerate + ys = self.evaluate(ts) + return Wave(ys, ts, framerate=framerate) + + +def infer_framerate(ts): + """Given ts, find the framerate. + Assumes that the ts are equally spaced. + ts: sequence of times in seconds + returns: frames per second + """ + # TODO: confirm that this is never used and remove it + dt = ts[1] - ts[0] + framerate = 1.0 / dt + return framerate + + +class SumSignal(Signal): + """Represents the sum of signals.""" + + def __init__(self, *args): + """Initializes the sum. + args: tuple of signals + """ + self.signals = args + + @property + def period(self): + """Period of the signal in seconds. + Note: this is not correct; it's mostly a placekeeper. + But it is correct for a harmonic sequence where all + component frequencies are multiples of the fundamental. + returns: float seconds + """ + return max(sig.period for sig in self.signals) + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ts = np.asarray(ts) + return sum(sig.evaluate(ts) for sig in self.signals) + + +class Sinusoid(Signal): + """Represents a sinusoidal signal.""" + + def __init__(self, freq=440, amp=1.0, offset=0, func=np.sin): + """Initializes a sinusoidal signal. + freq: float frequency in Hz + amp: float amplitude, 1.0 is nominal max + offset: float phase offset in radians + func: function that maps phase to amplitude + """ + self.freq = freq + self.amp = amp + self.offset = offset + self.func = func + + @property + def period(self): + """Period of the signal in seconds. + returns: float seconds + """ + return 1.0 / self.freq + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ts = np.asarray(ts) + phases = PI2 * self.freq * ts + self.offset + ys = self.amp * self.func(phases) + return ys + + +def CosSignal(freq=440, amp=1.0, offset=0): + """Makes a cosine Sinusoid. + freq: float frequency in Hz + amp: float amplitude, 1.0 is nominal max + offset: float phase offset in radians + returns: Sinusoid object + """ + return Sinusoid(freq, amp, offset, func=np.cos) + + +def SinSignal(freq=440, amp=1.0, offset=0): + """Makes a sine Sinusoid. + freq: float frequency in Hz + amp: float amplitude, 1.0 is nominal max + offset: float phase offset in radians + returns: Sinusoid object + """ + return Sinusoid(freq, amp, offset, func=np.sin) + + +def Sinc(freq=440, amp=1.0, offset=0): + """Makes a Sinc function. + freq: float frequency in Hz + amp: float amplitude, 1.0 is nominal max + offset: float phase offset in radians + returns: Sinusoid object + """ + return Sinusoid(freq, amp, offset, func=np.sinc) + + +class ComplexSinusoid(Sinusoid): + """Represents a complex exponential signal.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ts = np.asarray(ts) + phases = PI2 * self.freq * ts + self.offset + ys = self.amp * np.exp(1j * phases) + return ys + + +class SquareSignal(Sinusoid): + """Represents a square signal.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ts = np.asarray(ts) + cycles = self.freq * ts + self.offset / PI2 + frac, _ = np.modf(cycles) + ys = self.amp * np.sign(unbias(frac)) + return ys + + +class SawtoothSignal(Sinusoid): + """Represents a sawtooth signal.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ts = np.asarray(ts) + cycles = self.freq * ts + self.offset / PI2 + frac, _ = np.modf(cycles) + ys = normalize(unbias(frac), self.amp) + return ys + + +class ParabolicSignal(Sinusoid): + """Represents a parabolic signal.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ts = np.asarray(ts) + cycles = self.freq * ts + self.offset / PI2 + frac, _ = np.modf(cycles) + ys = (frac - 0.5) ** 2 + ys = normalize(unbias(ys), self.amp) + return ys + + +class CubicSignal(ParabolicSignal): + """Represents a cubic signal.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ys = ParabolicSignal.evaluate(self, ts) + ys = np.cumsum(ys) + ys = normalize(unbias(ys), self.amp) + return ys + + +class GlottalSignal(Sinusoid): + """Represents a periodic signal that resembles a glottal signal.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ts = np.asarray(ts) + cycles = self.freq * ts + self.offset / PI2 + frac, _ = np.modf(cycles) + ys = frac ** 2 * (1 - frac) + ys = normalize(unbias(ys), self.amp) + return ys + + +class TriangleSignal(Sinusoid): + """Represents a triangle signal.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ts = np.asarray(ts) + cycles = self.freq * ts + self.offset / PI2 + frac, _ = np.modf(cycles) + ys = np.abs(frac - 0.5) + ys = normalize(unbias(ys), self.amp) + return ys + + +class Chirp(Signal): + """Represents a signal with variable frequency.""" + + def __init__(self, start=440, end=880, amp=1.0): + """Initializes a linear chirp. + start: float frequency in Hz + end: float frequency in Hz + amp: float amplitude, 1.0 is nominal max + """ + self.start = start + self.end = end + self.amp = amp + + @property + def period(self): + """Period of the signal in seconds. + returns: float seconds + """ + return ValueError("Non-periodic signal.") + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + freqs = np.linspace(self.start, self.end, len(ts) - 1) + return self._evaluate(ts, freqs) + + def _evaluate(self, ts, freqs): + """Helper function that evaluates the signal. + ts: float array of times + freqs: float array of frequencies during each interval + """ + dts = np.diff(ts) + dps = PI2 * freqs * dts + phases = np.cumsum(dps) + phases = np.insert(phases, 0, 0) + ys = self.amp * np.cos(phases) + return ys + + +class ExpoChirp(Chirp): + """Represents a signal with varying frequency.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + start, end = np.log10(self.start), np.log10(self.end) + freqs = np.logspace(start, end, len(ts) - 1) + return self._evaluate(ts, freqs) + + +class SilentSignal(Signal): + """Represents silence.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + return np.zeros(len(ts)) + + +class Impulses(Signal): + """Represents silence.""" + + def __init__(self, locations, amps=1): + self.locations = np.asanyarray(locations) + self.amps = amps + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ys = np.zeros(len(ts)) + indices = np.searchsorted(ts, self.locations) + ys[indices] = self.amps + return ys + + +class _Noise(Signal): + """Represents a noise signal (abstract parent class).""" + + def __init__(self, amp=1.0): + """Initializes a white noise signal. + amp: float amplitude, 1.0 is nominal max + """ + self.amp = amp + + @property + def period(self): + """Period of the signal in seconds. + returns: float seconds + """ + return ValueError("Non-periodic signal.") + + +class UncorrelatedUniformNoise(_Noise): + """Represents uncorrelated uniform noise.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ys = np.random.uniform(-self.amp, self.amp, len(ts)) + return ys + + +class UncorrelatedGaussianNoise(_Noise): + """Represents uncorrelated gaussian noise.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + ts: float array of times + returns: float wave array + """ + ys = np.random.normal(0, self.amp, len(ts)) + return ys + + +class BrownianNoise(_Noise): + """Represents Brownian noise, aka red noise.""" + + def evaluate(self, ts): + """Evaluates the signal at the given times. + Computes Brownian noise by taking the cumulative sum of + a uniform random series. + ts: float array of times + returns: float wave array + """ + dys = np.random.uniform(-1, 1, len(ts)) + # ys = scipy.integrate.cumtrapz(dys, ts) + ys = np.cumsum(dys) + ys = normalize(unbias(ys), self.amp) + return ys + + +class PinkNoise(_Noise): + """Represents Brownian noise, aka red noise.""" + + def __init__(self, amp=1.0, beta=1.0): + """Initializes a pink noise signal. + amp: float amplitude, 1.0 is nominal max + """ + self.amp = amp + self.beta = beta + + def make_wave(self, duration=1, start=0, framerate=11025): + """Makes a Wave object. + duration: float seconds + start: float seconds + framerate: int frames per second + returns: Wave + """ + signal = UncorrelatedUniformNoise() + wave = signal.make_wave(duration, start, framerate) + spectrum = wave.make_spectrum() + + spectrum.pink_filter(beta=self.beta) + + wave2 = spectrum.make_wave() + wave2.unbias() + wave2.normalize(self.amp) + return wave2 + + +def rest(duration): + """Makes a rest of the given duration. + duration: float seconds + returns: Wave + """ + signal = SilentSignal() + wave = signal.make_wave(duration) + return wave + + +def make_note(midi_num, duration, sig_cons=CosSignal, framerate=11025): + """Make a MIDI note with the given duration. + midi_num: int MIDI note number + duration: float seconds + sig_cons: Signal constructor function + framerate: int frames per second + returns: Wave + """ + freq = midi_to_freq(midi_num) + signal = sig_cons(freq) + wave = signal.make_wave(duration, framerate=framerate) + wave.apodize() + return wave + + +def make_chord(midi_nums, duration, sig_cons=CosSignal, framerate=11025): + """Make a chord with the given duration. + midi_nums: sequence of int MIDI note numbers + duration: float seconds + sig_cons: Signal constructor function + framerate: int frames per second + returns: Wave + """ + freqs = [midi_to_freq(num) for num in midi_nums] + signal = sum(sig_cons(freq) for freq in freqs) + wave = signal.make_wave(duration, framerate=framerate) + wave.apodize() + return wave + + +def midi_to_freq(midi_num): + """Converts MIDI note number to frequency. + midi_num: int MIDI note number + returns: float frequency in Hz + """ + x = (midi_num - 69) / 12.0 + freq = 440.0 * 2 ** x + return freq + + +def sin_wave(freq, duration=1, offset=0): + """Makes a sine wave with the given parameters. + freq: float cycles per second + duration: float seconds + offset: float radians + returns: Wave + """ + signal = SinSignal(freq, offset=offset) + wave = signal.make_wave(duration) + return wave + + +def cos_wave(freq, duration=1, offset=0): + """Makes a cosine wave with the given parameters. + freq: float cycles per second + duration: float seconds + offset: float radians + returns: Wave + """ + signal = CosSignal(freq, offset=offset) + wave = signal.make_wave(duration) + return wave + + +def mag(a): + """Computes the magnitude of a numpy array. + a: numpy array + returns: float + """ + return np.sqrt(np.dot(a, a)) + + +def zero_pad(array, n): + """Extends an array with zeros. + array: numpy array + n: length of result + returns: new NumPy array + """ + res = np.zeros(n) + res[: len(array)] = array + return res + + +def main(): + + cos_basis = cos_wave(440) + sin_basis = sin_wave(440) + + wave = cos_wave(440, offset=math.pi / 2) + cos_cov = cos_basis.cov(wave) + sin_cov = sin_basis.cov(wave) + print(cos_cov, sin_cov, mag((cos_cov, sin_cov))) + return + + wfile = WavFileWriter() + for sig_cons in [ + SinSignal, + TriangleSignal, + SawtoothSignal, + GlottalSignal, + ParabolicSignal, + SquareSignal, + ]: + print(sig_cons) + sig = sig_cons(440) + wave = sig.make_wave(1) + wave.apodize() + wfile.write(wave) + wfile.close() + return + + signal = GlottalSignal(440) + signal.plot() + pyplot.show() + return + + wfile = WavFileWriter() + for m in range(60, 0, -1): + wfile.write(make_note(m, 0.25)) + wfile.close() + return + + wave1 = make_note(69, 1) + wave2 = make_chord([69, 72, 76], 1) + wave = wave1 | wave2 + + wfile = WavFileWriter() + wfile.write(wave) + wfile.close() + return + + sig1 = CosSignal(freq=440) + sig2 = CosSignal(freq=523.25) + sig3 = CosSignal(freq=660) + sig4 = CosSignal(freq=880) + sig5 = CosSignal(freq=987) + sig = sig1 + sig2 + sig3 + sig4 + + # wave = Wave(sig, duration=0.02) + # wave.plot() + + wave = sig.make_wave(duration=1) + # wave.normalize() + + wfile = WavFileWriter(wave) + wfile.write() + wfile.close() + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/thinkplot.py b/thinkplot.py new file mode 100644 index 0000000..aceeffb --- /dev/null +++ b/thinkplot.py @@ -0,0 +1,838 @@ +from __future__ import print_function + +import math +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +import warnings + +# customize some matplotlib attributes +#matplotlib.rc('figure', figsize=(4, 3)) + +#matplotlib.rc('font', size=14.0) +#matplotlib.rc('axes', labelsize=22.0, titlesize=22.0) +#matplotlib.rc('legend', fontsize=20.0) + +#matplotlib.rc('xtick.major', size=6.0) +#matplotlib.rc('xtick.minor', size=3.0) + +#matplotlib.rc('ytick.major', size=6.0) +#matplotlib.rc('ytick.minor', size=3.0) + + +class _Brewer(object): + """Encapsulates a nice sequence of colors. + Shades of blue that look good in color and can be distinguished + in grayscale (up to a point). + Borrowed from http://colorbrewer2.org/ + """ + color_iter = None + + colors = ['#f7fbff', '#deebf7', '#c6dbef', + '#9ecae1', '#6baed6', '#4292c6', + '#2171b5','#08519c','#08306b'][::-1] + + # lists that indicate which colors to use depending on how many are used + which_colors = [[], + [1], + [1, 3], + [0, 2, 4], + [0, 2, 4, 6], + [0, 2, 3, 5, 6], + [0, 2, 3, 4, 5, 6], + [0, 1, 2, 3, 4, 5, 6], + [0, 1, 2, 3, 4, 5, 6, 7], + [0, 1, 2, 3, 4, 5, 6, 7, 8], + ] + + current_figure = None + + @classmethod + def Colors(cls): + """Returns the list of colors. + """ + return cls.colors + + @classmethod + def ColorGenerator(cls, num): + """Returns an iterator of color strings. + n: how many colors will be used + """ + for i in cls.which_colors[num]: + yield cls.colors[i] + raise StopIteration('Ran out of colors in _Brewer.') + + @classmethod + def InitIter(cls, num): + """Initializes the color iterator with the given number of colors.""" + cls.color_iter = cls.ColorGenerator(num) + fig = plt.gcf() + cls.current_figure = fig + + @classmethod + def ClearIter(cls): + """Sets the color iterator to None.""" + cls.color_iter = None + cls.current_figure = None + + @classmethod + def GetIter(cls, num): + """Gets the color iterator.""" + fig = plt.gcf() + if fig != cls.current_figure: + cls.InitIter(num) + cls.current_figure = fig + + if cls.color_iter is None: + cls.InitIter(num) + + return cls.color_iter + + +def _UnderrideColor(options): + """If color is not in the options, chooses a color. + """ + if 'color' in options: + return options + + # get the current color iterator; if there is none, init one + color_iter = _Brewer.GetIter(5) + + try: + options['color'] = next(color_iter) + except StopIteration: + # if you run out of colors, initialize the color iterator + # and try again + warnings.warn('Ran out of colors. Starting over.') + _Brewer.ClearIter() + _UnderrideColor(options) + + return options + + +def PrePlot(num=None, rows=None, cols=None): + """Takes hints about what's coming. + num: number of lines that will be plotted + rows: number of rows of subplots + cols: number of columns of subplots + """ + if num: + _Brewer.InitIter(num) + + if rows is None and cols is None: + return + + if rows is not None and cols is None: + cols = 1 + + if cols is not None and rows is None: + rows = 1 + + # resize the image, depending on the number of rows and cols + size_map = {(1, 1): (8, 6), + (1, 2): (12, 6), + (1, 3): (12, 6), + (1, 4): (12, 5), + (1, 5): (12, 4), + (2, 2): (10, 10), + (2, 3): (16, 10), + (3, 1): (8, 10), + (4, 1): (8, 12), + } + + if (rows, cols) in size_map: + fig = plt.gcf() + fig.set_size_inches(*size_map[rows, cols]) + + # create the first subplot + if rows > 1 or cols > 1: + ax = plt.subplot(rows, cols, 1) + global SUBPLOT_ROWS, SUBPLOT_COLS + SUBPLOT_ROWS = rows + SUBPLOT_COLS = cols + else: + ax = plt.gca() + + return ax + + +def SubPlot(plot_number, rows=None, cols=None, **options): + """Configures the number of subplots and changes the current plot. + rows: int + cols: int + plot_number: int + options: passed to subplot + """ + rows = rows or SUBPLOT_ROWS + cols = cols or SUBPLOT_COLS + return plt.subplot(rows, cols, plot_number, **options) + + +def _Underride(d, **options): + """Add key-value pairs to d only if key is not in d. + If d is None, create a new dictionary. + d: dictionary + options: keyword args to add to d + """ + if d is None: + d = {} + + for key, val in options.items(): + d.setdefault(key, val) + + return d + + +def Clf(): + """Clears the figure and any hints that have been set.""" + global LOC + LOC = None + _Brewer.ClearIter() + plt.clf() + fig = plt.gcf() + fig.set_size_inches(8, 6) + + +def Figure(**options): + """Sets options for the current figure.""" + _Underride(options, figsize=(6, 8)) + plt.figure(**options) + + +def Plot(obj, ys=None, style='', **options): + """Plots a line. + Args: + obj: sequence of x values, or Series, or anything with Render() + ys: sequence of y values + style: style string passed along to plt.plot + options: keyword args passed to plt.plot + """ + options = _UnderrideColor(options) + label = getattr(obj, 'label', '_nolegend_') + options = _Underride(options, linewidth=3, alpha=0.7, label=label) + + xs = obj + if ys is None: + if hasattr(obj, 'Render'): + xs, ys = obj.Render() + if isinstance(obj, pd.Series): + ys = obj.values + xs = obj.index + + if ys is None: + plt.plot(xs, style, **options) + else: + plt.plot(xs, ys, style, **options) + + +def Vlines(xs, y1, y2, **options): + """Plots a set of vertical lines. + Args: + xs: sequence of x values + y1: sequence of y values + y2: sequence of y values + options: keyword args passed to plt.vlines + """ + options = _UnderrideColor(options) + options = _Underride(options, linewidth=1, alpha=0.5) + plt.vlines(xs, y1, y2, **options) + + +def Hlines(ys, x1, x2, **options): + """Plots a set of horizontal lines. + Args: + ys: sequence of y values + x1: sequence of x values + x2: sequence of x values + options: keyword args passed to plt.vlines + """ + options = _UnderrideColor(options) + options = _Underride(options, linewidth=1, alpha=0.5) + plt.hlines(ys, x1, x2, **options) + + +def axvline(x, **options): + """Plots a vertical line. + Args: + x: x location + options: keyword args passed to plt.axvline + """ + options = _UnderrideColor(options) + options = _Underride(options, linewidth=1, alpha=0.5) + plt.axvline(x, **options) + + +def axhline(y, **options): + """Plots a horizontal line. + Args: + y: y location + options: keyword args passed to plt.axhline + """ + options = _UnderrideColor(options) + options = _Underride(options, linewidth=1, alpha=0.5) + plt.axhline(y, **options) + + +def tight_layout(**options): + """Adjust subplots to minimize padding and margins. + """ + options = _Underride(options, + wspace=0.1, hspace=0.1, + left=0, right=1, + bottom=0, top=1) + plt.tight_layout() + plt.subplots_adjust(**options) + + +def FillBetween(xs, y1, y2=None, where=None, **options): + """Fills the space between two lines. + Args: + xs: sequence of x values + y1: sequence of y values + y2: sequence of y values + where: sequence of boolean + options: keyword args passed to plt.fill_between + """ + options = _UnderrideColor(options) + options = _Underride(options, linewidth=0, alpha=0.5) + plt.fill_between(xs, y1, y2, where, **options) + + +def Bar(xs, ys, **options): + """Plots a line. + Args: + xs: sequence of x values + ys: sequence of y values + options: keyword args passed to plt.bar + """ + options = _UnderrideColor(options) + options = _Underride(options, linewidth=0, alpha=0.6) + plt.bar(xs, ys, **options) + + +def Scatter(xs, ys=None, **options): + """Makes a scatter plot. + xs: x values + ys: y values + options: options passed to plt.scatter + """ + options = _Underride(options, color='blue', alpha=0.2, + s=30, edgecolors='none') + + if ys is None and isinstance(xs, pd.Series): + ys = xs.values + xs = xs.index + + plt.scatter(xs, ys, **options) + + +def HexBin(xs, ys, **options): + """Makes a scatter plot. + xs: x values + ys: y values + options: options passed to plt.scatter + """ + options = _Underride(options, cmap=matplotlib.cm.Blues) + plt.hexbin(xs, ys, **options) + + +def Pdf(pdf, **options): + """Plots a Pdf, Pmf, or Hist as a line. + Args: + pdf: Pdf, Pmf, or Hist object + options: keyword args passed to plt.plot + """ + low, high = options.pop('low', None), options.pop('high', None) + n = options.pop('n', 101) + xs, ps = pdf.Render(low=low, high=high, n=n) + options = _Underride(options, label=pdf.label) + Plot(xs, ps, **options) + + +def Pdfs(pdfs, **options): + """Plots a sequence of PDFs. + Options are passed along for all PDFs. If you want different + options for each pdf, make multiple calls to Pdf. + Args: + pdfs: sequence of PDF objects + options: keyword args passed to plt.plot + """ + for pdf in pdfs: + Pdf(pdf, **options) + + +def Hist(hist, **options): + """Plots a Pmf or Hist with a bar plot. + The default width of the bars is based on the minimum difference + between values in the Hist. If that's too small, you can override + it by providing a width keyword argument, in the same units + as the values. + Args: + hist: Hist or Pmf object + options: keyword args passed to plt.bar + """ + # find the minimum distance between adjacent values + xs, ys = hist.Render() + + # see if the values support arithmetic + try: + xs[0] - xs[0] + except TypeError: + # if not, replace values with numbers + labels = [str(x) for x in xs] + xs = np.arange(len(xs)) + plt.xticks(xs+0.5, labels) + + if 'width' not in options: + try: + options['width'] = 0.9 * np.diff(xs).min() + except TypeError: + warnings.warn("Hist: Can't compute bar width automatically." + "Check for non-numeric types in Hist." + "Or try providing width option." + ) + + options = _Underride(options, label=hist.label) + options = _Underride(options, align='center') + if options['align'] == 'left': + options['align'] = 'edge' + elif options['align'] == 'right': + options['align'] = 'edge' + options['width'] *= -1 + + Bar(xs, ys, **options) + + +def Hists(hists, **options): + """Plots two histograms as interleaved bar plots. + Options are passed along for all PMFs. If you want different + options for each pmf, make multiple calls to Pmf. + Args: + hists: list of two Hist or Pmf objects + options: keyword args passed to plt.plot + """ + for hist in hists: + Hist(hist, **options) + + +def Pmf(pmf, **options): + """Plots a Pmf or Hist as a line. + Args: + pmf: Hist or Pmf object + options: keyword args passed to plt.plot + """ + xs, ys = pmf.Render() + low, high = min(xs), max(xs) + + width = options.pop('width', None) + if width is None: + try: + width = np.diff(xs).min() + except TypeError: + warnings.warn("Pmf: Can't compute bar width automatically." + "Check for non-numeric types in Pmf." + "Or try providing width option.") + points = [] + + lastx = np.nan + lasty = 0 + for x, y in zip(xs, ys): + if (x - lastx) > 1e-5: + points.append((lastx, 0)) + points.append((x, 0)) + + points.append((x, lasty)) + points.append((x, y)) + points.append((x+width, y)) + + lastx = x + width + lasty = y + points.append((lastx, 0)) + pxs, pys = zip(*points) + + align = options.pop('align', 'center') + if align == 'center': + pxs = np.array(pxs) - width/2.0 + if align == 'right': + pxs = np.array(pxs) - width + + options = _Underride(options, label=pmf.label) + Plot(pxs, pys, **options) + + +def Pmfs(pmfs, **options): + """Plots a sequence of PMFs. + Options are passed along for all PMFs. If you want different + options for each pmf, make multiple calls to Pmf. + Args: + pmfs: sequence of PMF objects + options: keyword args passed to plt.plot + """ + for pmf in pmfs: + Pmf(pmf, **options) + + +def Diff(t): + """Compute the differences between adjacent elements in a sequence. + Args: + t: sequence of number + Returns: + sequence of differences (length one less than t) + """ + diffs = [t[i+1] - t[i] for i in range(len(t)-1)] + return diffs + + +def Cdf(cdf, complement=False, transform=None, **options): + """Plots a CDF as a line. + Args: + cdf: Cdf object + complement: boolean, whether to plot the complementary CDF + transform: string, one of 'exponential', 'pareto', 'weibull', 'gumbel' + options: keyword args passed to plt.plot + Returns: + dictionary with the scale options that should be passed to + Config, Show or Save. + """ + xs, ps = cdf.Render() + xs = np.asarray(xs) + ps = np.asarray(ps) + + scale = dict(xscale='linear', yscale='linear') + + for s in ['xscale', 'yscale']: + if s in options: + scale[s] = options.pop(s) + + if transform == 'exponential': + complement = True + scale['yscale'] = 'log' + + if transform == 'pareto': + complement = True + scale['yscale'] = 'log' + scale['xscale'] = 'log' + + if complement: + ps = [1.0-p for p in ps] + + if transform == 'weibull': + xs = np.delete(xs, -1) + ps = np.delete(ps, -1) + ps = [-math.log(1.0-p) for p in ps] + scale['xscale'] = 'log' + scale['yscale'] = 'log' + + if transform == 'gumbel': + xs = np.delete(xs, 0) + ps = np.delete(ps, 0) + ps = [-math.log(p) for p in ps] + scale['yscale'] = 'log' + + options = _Underride(options, label=cdf.label) + Plot(xs, ps, **options) + return scale + + +def Cdfs(cdfs, complement=False, transform=None, **options): + """Plots a sequence of CDFs. + cdfs: sequence of CDF objects + complement: boolean, whether to plot the complementary CDF + transform: string, one of 'exponential', 'pareto', 'weibull', 'gumbel' + options: keyword args passed to plt.plot + """ + for cdf in cdfs: + Cdf(cdf, complement, transform, **options) + + +def Contour(obj, pcolor=False, contour=True, imshow=False, **options): + """Makes a contour plot. + d: map from (x, y) to z, or object that provides GetDict + pcolor: boolean, whether to make a pseudocolor plot + contour: boolean, whether to make a contour plot + imshow: boolean, whether to use plt.imshow + options: keyword args passed to plt.pcolor and/or plt.contour + """ + try: + d = obj.GetDict() + except AttributeError: + d = obj + + _Underride(options, linewidth=3, cmap=matplotlib.cm.Blues) + + xs, ys = zip(*d.keys()) + xs = sorted(set(xs)) + ys = sorted(set(ys)) + + X, Y = np.meshgrid(xs, ys) + func = lambda x, y: d.get((x, y), 0) + func = np.vectorize(func) + Z = func(X, Y) + + x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) + axes = plt.gca() + axes.xaxis.set_major_formatter(x_formatter) + + if pcolor: + plt.pcolormesh(X, Y, Z, **options) + if contour: + cs = plt.contour(X, Y, Z, **options) + plt.clabel(cs, inline=1, fontsize=10) + if imshow: + extent = xs[0], xs[-1], ys[0], ys[-1] + plt.imshow(Z, extent=extent, **options) + + +def Pcolor(xs, ys, zs, pcolor=True, contour=False, **options): + """Makes a pseudocolor plot. + xs: + ys: + zs: + pcolor: boolean, whether to make a pseudocolor plot + contour: boolean, whether to make a contour plot + options: keyword args passed to plt.pcolor and/or plt.contour + """ + _Underride(options, linewidth=3, cmap=matplotlib.cm.Blues) + + X, Y = np.meshgrid(xs, ys) + Z = zs + + x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) + axes = plt.gca() + axes.xaxis.set_major_formatter(x_formatter) + + if pcolor: + plt.pcolormesh(X, Y, Z, **options) + + if contour: + cs = plt.contour(X, Y, Z, **options) + plt.clabel(cs, inline=1, fontsize=10) + + +def Text(x, y, s, **options): + """Puts text in a figure. + x: number + y: number + s: string + options: keyword args passed to plt.text + """ + options = _Underride(options, + fontsize=16, + verticalalignment='top', + horizontalalignment='left') + plt.text(x, y, s, **options) + + +LEGEND = True +LOC = None + +def Config(**options): + """Configures the plot. + Pulls options out of the option dictionary and passes them to + the corresponding plt functions. + """ + names = ['title', 'xlabel', 'ylabel', 'xscale', 'yscale', + 'xticks', 'yticks', 'axis', 'xlim', 'ylim'] + + for name in names: + if name in options: + getattr(plt, name)(options[name]) + + global LEGEND + LEGEND = options.get('legend', LEGEND) + + # see if there are any elements with labels; + # if not, don't draw a legend + ax = plt.gca() + handles, labels = ax.get_legend_handles_labels() + + if LEGEND and len(labels) > 0: + global LOC + LOC = options.get('loc', LOC) + frameon = options.get('frameon', True) + + try: + plt.legend(loc=LOC, frameon=frameon) + except UserWarning: + pass + + # x and y ticklabels can be made invisible + val = options.get('xticklabels', None) + if val is not None: + if val == 'invisible': + ax = plt.gca() + labels = ax.get_xticklabels() + plt.setp(labels, visible=False) + + val = options.get('yticklabels', None) + if val is not None: + if val == 'invisible': + ax = plt.gca() + labels = ax.get_yticklabels() + plt.setp(labels, visible=False) + +def set_font_size(title_size=16, label_size=16, ticklabel_size=14, legend_size=14): + """Set font sizes for the title, labels, ticklabels, and legend. + """ + def set_text_size(texts, size): + for text in texts: + text.set_size(size) + + ax = plt.gca() + + # TODO: Make this function more robust if any of these elements + # is missing. + + # title + ax.title.set_size(title_size) + + # x axis + ax.xaxis.label.set_size(label_size) + set_text_size(ax.xaxis.get_ticklabels(), ticklabel_size) + + # y axis + ax.yaxis.label.set_size(label_size) + set_text_size(ax.yaxis.get_ticklabels(), ticklabel_size) + + # legend + legend = ax.get_legend() + if legend is not None: + set_text_size(legend.texts, legend_size) + + +def bigger_text(): + sizes = dict(title_size=16, label_size=16, ticklabel_size=14, legend_size=14) + set_font_size(**sizes) + + +def Show(**options): + """Shows the plot. + For options, see Config. + options: keyword args used to invoke various plt functions + """ + clf = options.pop('clf', True) + Config(**options) + plt.show() + if clf: + Clf() + + +def Plotly(**options): + """Shows the plot. + For options, see Config. + options: keyword args used to invoke various plt functions + """ + clf = options.pop('clf', True) + Config(**options) + import plotly.plotly as plotly + url = plotly.plot_mpl(plt.gcf()) + if clf: + Clf() + return url + + +def Save(root=None, formats=None, **options): + """Saves the plot in the given formats and clears the figure. + For options, see Config. + Note: With a capital S, this is the original save, maintained for + compatibility. New code should use save(), which works better + with my newer code, especially in Jupyter notebooks. + Args: + root: string filename root + formats: list of string formats + options: keyword args used to invoke various plt functions + """ + clf = options.pop('clf', True) + + save_options = {} + for option in ['bbox_inches', 'pad_inches']: + if option in options: + save_options[option] = options.pop(option) + + # TODO: falling Config inside Save was probably a mistake, but removing + # it will require some work + Config(**options) + + if formats is None: + formats = ['pdf', 'png'] + + try: + formats.remove('plotly') + Plotly(clf=False) + except ValueError: + pass + + if root: + for fmt in formats: + SaveFormat(root, fmt, **save_options) + if clf: + Clf() + + +def save(root, formats=None, **options): + """Saves the plot in the given formats and clears the figure. + For options, see plt.savefig. + Args: + root: string filename root + formats: list of string formats + options: keyword args passed to plt.savefig + """ + if formats is None: + formats = ['pdf', 'png'] + + try: + formats.remove('plotly') + Plotly(clf=False) + except ValueError: + pass + + for fmt in formats: + SaveFormat(root, fmt, **options) + + +def SaveFormat(root, fmt='eps', **options): + """Writes the current figure to a file in the given format. + Args: + root: string filename root + fmt: string format + """ + _Underride(options, dpi=300) + filename = '%s.%s' % (root, fmt) + print('Writing', filename) + plt.savefig(filename, format=fmt, **options) + + +# provide aliases for calling functions with lower-case names +preplot = PrePlot +subplot = SubPlot +clf = Clf +figure = Figure +plot = Plot +vlines = Vlines +hlines = Hlines +fill_between = FillBetween +text = Text +scatter = Scatter +pmf = Pmf +pmfs = Pmfs +hist = Hist +hists = Hists +diff = Diff +cdf = Cdf +cdfs = Cdfs +contour = Contour +pcolor = Pcolor +config = Config +show = Show + + +def main(): + color_iter = _Brewer.ColorGenerator(7) + for color in color_iter: + print(color) + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/total.py b/total.py new file mode 100644 index 0000000..136967f --- /dev/null +++ b/total.py @@ -0,0 +1,339 @@ +import os +import glob +import shutil + +from PIL import Image +import random + +from lxml import etree + +# 所有路径以'/'结尾 +your_voc_path = 'C:/Users/c9347/Desktop/voc/' # voc数据集路径 +yolo_path = 'D:/labels/positive/' # 原始数据集路径 +yolo_filtered_path = 'D:/labes_29/' # 过滤后的yolo数据集路径 +# trainval_percent = 0.9 +# train_percent = 0.9 +threshold = 200 # 图片数量阈值,选取图片数量大于350的类别 +# threshold_cls = 15 # 类别阈值,选取最多的前15类数据 + +labels = ['road roller', 'bar deposits', 'piece deposits', 'brick', 'earth vehicles', 'tower', 'digger', 'bulldozer', + 'drill', 'crane', 'concrete truck', 'mixer', 'concrete simple house', 'simple house', 'green cover', + 'black cover', 'blue enclosure', 'grey enclosure', 'color enclosure', 'building', 'groove', 'big building', + 'building frame', 'scaffold', 'vehicle', 'grave mound', 'garbage', 'crushed stones', 'bricks', 'greenhouse', + 'site shanty', 'woodpile', 'fuel tank', 'big truck', 'car', 'boxcar', 'small truck', 'van car', + 'watering car', 'tutu', 'crane closed', 'Agricultural Tricycles', 'bus', 'pickup', 'large cement pipes', + 'middle cement pipes', 'small cement pipes', 'thin steel pipe', 'crude steel pipe', 'big stell pipe', 'slab', + 'U-steel', 'road leveling machine'] +# 建立所需文件夹 +# os.path.exists(path)——检验指定的对象是否存在。是True,否则False. +# os.makedirs(path[, mode]) 递归文件夹创建函数。 +if not os.path.exists(your_voc_path + 'VOCdevkit/VOC2007/Annotations/'): + os.makedirs(your_voc_path + 'VOCdevkit/VOC2007/Annotations/') +if not os.path.exists(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Layout'): + os.makedirs(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Layout') +if not os.path.exists(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Main'): + os.makedirs(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Main') +if not os.path.exists(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Segmentation'): + os.makedirs(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Segmentation') +if not os.path.exists(your_voc_path + 'VOCdevkit/VOC2007/JPEGImages'): + os.makedirs(your_voc_path + 'VOCdevkit/VOC2007/JPEGImages') +if not os.path.exists(your_voc_path + 'VOCdevkit/VOC2007/labels'): + os.makedirs(your_voc_path + 'VOCdevkit/VOC2007/labels') +if not os.path.exists(your_voc_path + 'VOCdevkit_filtered/VOC2007/Annotations/'): + os.makedirs(your_voc_path + 'VOCdevkit_filtered/VOC2007/Annotations/') +if not os.path.exists(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Layout'): + os.makedirs(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Layout') +if not os.path.exists(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Main'): + os.makedirs(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Main') +if not os.path.exists(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Segmentation'): + os.makedirs(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Segmentation') +if not os.path.exists(your_voc_path + 'VOCdevkit_filtered/VOC2007/JPEGImages'): + os.makedirs(your_voc_path + 'VOCdevkit_filtered/VOC2007/JPEGImages') +if not os.path.exists(your_voc_path + 'VOCdevkit_filtered/VOC2007/labels'): + os.makedirs(your_voc_path + 'VOCdevkit_filtered/VOC2007/labels') +if not os.path.exists(yolo_filtered_path): + os.makedirs(yolo_filtered_path) +# 数据重命名 +# os.listdir(path)——列出path目录下所有的文件和目录名。 +listdir = os.listdir(yolo_path) +count = 0 +for i, file in enumerate(listdir): + if i % 100 == 0: + print(i) + # os.path.splitext(path) + # 分离文件名与扩展名;默认返回(fname,fextension)元组,可做分片操作>>> os.path.splitext('c:\\csv\\test.csv') + # ('c:\\csv\\test', '.csv') + filename = os.path.splitext(file)[0] # 文件名 + filetype = os.path.splitext(file)[1] # 文件扩展名 + if filetype == '.txt': + continue + # os.path.join(path1[, path2[, ...]]) + # 将多个路径组合后返回,第一个绝对路径之前的参数将被忽略。>>> os.path.join('c:\\', 'csv', 'test.csv') + # 'c:\\csv\\test.csv' + + Olddir = os.path.join(yolo_path, file) + Newdir = os.path.join(yolo_path, str(count).zfill(6) + '.jpg') + Oldanno = os.path.join(yolo_path, filename + '.txt') + Newanno = os.path.join(yolo_path, str(count).zfill(6) + '.txt') + # os.rename(src, dst) 重命名文件或目录,从 src 到 dst + os.rename(Olddir, Newdir) + os.rename(Oldanno, Newanno) + shutil.copyfile(Newdir, your_voc_path + 'VOCdevkit/VOC2007/JPEGImages/' + str(count).zfill(6) + '.jpg') + count += 1 + +# 生成voc格式数据集 +voc_xml = your_voc_path + 'VOCdevkit/VOC2007/Annotations/' + +# 匹配文件路径下的所有jpg文件,并返回列表 +img_glob = glob.glob(yolo_path + '*.jpg') + +img_base_names = [] + +for img in img_glob: + # os.path.basename:取文件的后缀名 + img_base_names.append(os.path.basename(img)) + +img_pre_name = [] + +for img in img_base_names: + # os.path.splitext:将文件按照后缀切分为两块 + temp1, temp2 = os.path.splitext(img) + img_pre_name.append(temp1) + print(f'imgpre:{len(img_pre_name)}') +for i, img in enumerate(img_pre_name): + if i % 100 == 0: + print(i) + with open(voc_xml + img + '.xml', 'w') as xml_files: + image = Image.open(yolo_path + img + '.jpg') + img_w, img_h = image.size + xml_files.write('\n') + xml_files.write(' folder\n') + xml_files.write(f' {img}.jpg\n') + xml_files.write(' \n') + xml_files.write(' Unknown\n') + xml_files.write(' \n') + xml_files.write(' \n') + xml_files.write(f' {img_w}\n') + xml_files.write(f' {img_h}\n') + xml_files.write(f' 3\n') + xml_files.write(' \n') + xml_files.write(' 0\n') + with open(yolo_path + img + '.txt', 'r') as f: + # 以列表形式返回每一行 + lines = f.read().splitlines() + for each_line in lines: + line = each_line.split(' ') + xml_files.write(' \n') + xml_files.write(f' {labels[int(line[0])]}\n') + xml_files.write(' Unspecified\n') + xml_files.write(' 0\n') + xml_files.write(' 0\n') + xml_files.write(' \n') + center_x = round(float(line[1]) * img_w) + center_y = round(float(line[2]) * img_h) + bbox_w = round(float(line[3]) * img_w) + bbox_h = round(float(line[4]) * img_h) + xmin = str(int(center_x - bbox_w / 2)) + ymin = str(int(center_y - bbox_h / 2)) + xmax = str(int(center_x + bbox_w / 2)) + ymax = str(int(center_y + bbox_h / 2)) + xml_files.write(f' {xmin}\n') + xml_files.write(f' {ymin}\n') + xml_files.write(f' {xmax}\n') + xml_files.write(f' {ymax}\n') + xml_files.write(' \n') + xml_files.write(' \n') + xml_files.write('') + +# # 划分数据集 +# xmlfilepath = your_voc_path + 'VOCdevkit/VOC2007/Annotations/' +# txtsavepath = your_voc_path + 'VOCdevkit/VOC2007/ImgSets/Main/' +# total_xml = os.listdir(xmlfilepath) +# +# num = len(total_xml) +# list = range(num) +# tv = int(num * trainval_percent) +# tr = int(tv * train_percent) +# trainval = random.sample(list, tv) +# train = random.sample(trainval, tr) +# +# ftrainval = open(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Main/trainval.txt', 'w') +# ftest = open(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Main/test.txt', 'w') +# ftrain = open(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Main/train.txt', 'w') +# fval = open(your_voc_path + 'VOCdevkit/VOC2007/ImageSets/Main/val.txt', 'w') +# +# for i in list: +# name = total_xml[i][:-4] + '\n' +# if i in trainval: +# ftrainval.write(name) +# if i in train: +# ftrain.write(name) +# else: +# fval.write(name) +# else: +# ftest.write(name) +# +# ftrainval.close() +# ftrain.close() +# fval.close() +# ftest.close() + +# 数据筛选 +print('根据阈值计算筛选后的类别') +path = your_voc_path + 'VOCdevkit/VOC2007/Annotations/' +listdir = os.listdir(path) +count = [0 for i in range(53)] # 每个类别的图片数 +for file in listdir: + with open(path + file, "r") as f: + text = f.read() + text = etree.fromstring(text) + name = text.xpath('/annotation/object/name/text()') + if name[0] not in ['tutu', 'car', 'garbage', 'van car', 'Agricultural Tricycles', 'pickup']: + count[int(labels.index(name[0]))] += 1 +new_labels = [] # 筛选后的类别标签 +new_cls = [] # 筛选后的类别编号 + + +# 根据类别阈值筛选 +def maxk(arraylist, k): # 返回最大的前k个数据的索引,k为类别阈值 + maxlist = [] + maxlist_id = [i for i in range(0, k)] + m = [maxlist, maxlist_id] + for i in maxlist_id: + maxlist.append(arraylist[i]) + for i in range(k, len(arraylist)): # 对目标数组之后的数字 + if arraylist[i] > min(maxlist): + mm = maxlist.index(min(maxlist)) + del m[0][mm] + del m[1][mm] + m[0].append(arraylist[i]) + m[1].append(i) + return maxlist_id + + +# kmax_list = maxk(count, threshold_cls) +# for i in kmax_list: +# new_cls.append(i) +# new_labels.append(labels[i]) +# 根据图片数量阈值筛选 +for index, i in enumerate(count): + if int(i) > threshold: + new_cls.append(index) + new_labels.append(labels[index]) + +img_glob = glob.glob(yolo_path + '*.jpg') +img_base_names = [] +for img in img_glob: + # os.path.basename:取文件的后缀名 + img_base_names.append(os.path.basename(img)) +print('开始筛选数据') +img_pre_name = [] +count = 0 +for img in img_base_names: + # os.path.splitext:将文件按照后缀切分为两块 + temp1, temp2 = os.path.splitext(img) + img_pre_name.append(temp1) + +print('清空筛选后的文件夹') +voc_filter_xml = your_voc_path + 'VOCdevkit_filtered/VOC2007/Annotations/' +listxml = os.listdir(voc_filter_xml) +# 清空筛选后的文件夹 +for file in listxml: + os.remove(voc_filter_xml + file) +voc_filter_img = your_voc_path + 'VOCdevkit_filtered/VOC2007/JPEGImages/' +listimg = os.listdir(voc_filter_img) +for file in listimg: + os.remove(voc_filter_img + file) +print('写入筛选后的数据') +for i, img in enumerate(img_pre_name): + if i % 100 == 0: + print(i) + with open(yolo_path + img + '.txt', 'r') as f: + # 以列表形式返回每一行 + lines = f.read().splitlines() + line = lines[0].split(' ') + if int(line[0]) in new_cls: + # 生成筛选后的的yolo格式数据集 + newcls = new_cls.index(int(line[0])) + newanno_txt = line + newanno_txt[0] = str(newcls) + newtxt = ' '.join(newanno_txt) + with open(yolo_filtered_path + str(count).zfill(6) + '.txt', 'w') as f: + f.write(newtxt) + shutil.copyfile(yolo_path + img + '.jpg', yolo_filtered_path + str(count).zfill(6) + '.jpg') + # 生成筛选后的的xml格式数据集 + shutil.copyfile(yolo_path + img + '.jpg', + voc_filter_img + str(count).zfill(6) + '.jpg') + with open(voc_filter_xml + str(count).zfill(6) + '.xml', 'w') as xml_files: + image = Image.open(yolo_path + img + '.jpg') + img_w, img_h = image.size + xml_files.write('\n') + xml_files.write(' folder\n') + xml_files.write(f' {img}.jpg\n') + xml_files.write(' \n') + xml_files.write(' Unknown\n') + xml_files.write(' \n') + xml_files.write(' \n') + xml_files.write(f' {img_w}\n') + xml_files.write(f' {img_h}\n') + xml_files.write(f' 3\n') + xml_files.write(' \n') + xml_files.write(' 0\n') + for each_line in lines: + line = each_line.split(' ') + xml_files.write(' \n') + xml_files.write(f' {new_labels[newcls]}\n') + xml_files.write(' Unspecified\n') + xml_files.write(' 0\n') + xml_files.write(' 0\n') + xml_files.write(' \n') + center_x = round(float(line[1]) * img_w) + center_y = round(float(line[2]) * img_h) + bbox_w = round(float(line[3]) * img_w) + bbox_h = round(float(line[4]) * img_h) + xmin = str(int(center_x - bbox_w / 2)) + ymin = str(int(center_y - bbox_h / 2)) + xmax = str(int(center_x + bbox_w / 2)) + ymax = str(int(center_y + bbox_h / 2)) + xml_files.write(f' {xmin}\n') + xml_files.write(f' {ymin}\n') + xml_files.write(f' {xmax}\n') + xml_files.write(f' {ymax}\n') + xml_files.write(' \n') + xml_files.write(' \n') + xml_files.write('') + count += 1 + +xmlfilepath = your_voc_path + 'VOCdevkit_filtered/VOC2007/Annotations/' +txtsavepath = your_voc_path + 'VOCdevkit_filtered/VOC2007/ImgSets/Main/' +total_xml = os.listdir(xmlfilepath) + +# #划分数据集 +# num = len(total_xml) +# list = range(num) +# tv = int(num * trainval_percent) +# tr = int(tv * train_percent) +# trainval = random.sample(list, tv) +# train = random.sample(trainval, tr) +# +# ftrainval = open(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Main/trainval.txt', 'w') +# ftest = open(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Main/test.txt', 'w') +# ftrain = open(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Main/train.txt', 'w') +# fval = open(your_voc_path + 'VOCdevkit_filtered/VOC2007/ImageSets/Main/val.txt', 'w') +# +# for i in list: +# name = total_xml[i][:-4] + '\n' +# if i in trainval: +# ftrainval.write(name) +# if i in train: +# ftrain.write(name) +# else: +# fval.write(name) +# else: +# ftest.write(name) +# +# ftrainval.close() +# ftrain.close() +# fval.close() +# ftest.close() +print('数据筛选完成') diff --git a/two classification.py b/two classification.py new file mode 100644 index 0000000..79b92ff --- /dev/null +++ b/two classification.py @@ -0,0 +1,16 @@ +import os, random, shutil +def moveFile(fileDir, tarDir): + pathDir = os.listdir(fileDir) # 取图片的原始路径 + filenumber = len(pathDir) + rate = 0.1 # 自定义抽取图片的比例,比方说100张抽10张,那就是0.1 + picknumber = int(filenumber * rate) # 按照rate比例从文件夹中取一定数量图片 + sample = random.sample(pathDir, picknumber) # 随机选取picknumber数量的样本图片 + print(sample) + for name in sample: + shutil.move(fileDir + name, tarDir + "\\" + name) + return + +if __name__ == '__main__': + fileDir = r"D:\snoring-dataset\Snoring Dataset\图像数据\mfcc-dataset\train\ss/" # 源图片文件夹路径 + tarDir = r'D:\snoring-dataset\Snoring Dataset\图像数据\mfcc-dataset\test\ss/' # 移动到新的文件夹路径 + moveFile(fileDir, tarDir) \ No newline at end of file diff --git a/xbbh.py b/xbbh.py new file mode 100644 index 0000000..6371951 --- /dev/null +++ b/xbbh.py @@ -0,0 +1,46 @@ +import matplotlib.pyplot as plt +import librosa.display +import os +# 批量重命名 +vpath='D:\CloudMusic/ss' +mps_dir=os.listdir(vpath) +count=0 +for i,file in enumerate(mps_dir): + print(count) + filename=os.path.splitext(file)[0] + filetype=os.path.splitext(file)[1] + if filetype=='.wav': + olddir=os.path.join(vpath,file) + newdir=os.path.join(vpath,str(count).zfill(6)+'.wav') + os.rename(olddir, newdir) + count+=1 + + +# # 批量转图片-波形图 +# vpath='D:\CloudMusic/ss' +# mps_dir=os.listdir(vpath) +# count=0 +# for i,file in enumerate(mps_dir): +# filename=os.path.splitext(file)[0] +# filetype=os.path.splitext(file)[1] +# audio_path=vpath+'/'+file +# print(audio_path,file,filetype,filename) +# if filetype=='.wav': +# music,sr=librosa.load(audio_path) +# plt.figure(figsize=(4,4)) +# librosa.display.waveplot(music,sr=sr) +# plt.savefig(vpath+'/'+filename) +# # # plt.show() + +# # 音乐文件载入 +# path='D:\CloudMusic' +# filename='1.wav' +# audio_path = path+'/'+filename +# music, sr = librosa.load(audio_path) +# +# # 宽高比为14:5的图 +# plt.figure(figsize=(224, 224)) +# librosa.display.waveplot(music, sr=sr) +# plt.savefig('D:\CloudMusic/1.jpg') +# # 显示图 +# plt.show() diff --git "a/\346\211\271\351\207\217\345\244\204\347\220\206\346\263\242\345\275\242\345\233\276.py" "b/\346\211\271\351\207\217\345\244\204\347\220\206\346\263\242\345\275\242\345\233\276.py" new file mode 100644 index 0000000..3d06121 --- /dev/null +++ "b/\346\211\271\351\207\217\345\244\204\347\220\206\346\263\242\345\275\242\345\233\276.py" @@ -0,0 +1,40 @@ +import pyworld +import librosa +import librosa.display +from IPython.display import Audio +import numpy as np +from matplotlib import pyplot as plt +import math +import os +import matplotlib.pyplot as plt +# 图片风格 +# plt.style.use('seaborn') +# 存放文件夹路径 +path=r'D:\snoring-dataset\Snoring Dataset\音频数据\0/' +# 获取文件列表 +waveforms=os.listdir(path) +pngpath=r'D:\snoring-dataset\Snoring Dataset\音频数据\波形图\no/' +names=[] + +count=0 +# 批量处理wav文件 +for i,file in enumerate(waveforms): + file_name=os.path.splitext(file)[0] + file_type=os.path.splitext(file)[1] + filename=path+file + if count%10==0: + print(count) + # names.append(filename) + # 生成波形图 + x, fs = librosa.load(filename, sr=16000) # librosa load输出的waveform 是 float32 + x = x.astype(np.double) # 格式转换 + fftlen = pyworld.get_cheaptrick_fft_size(fs) # 自动计算适合的fftlen + # plt.figure() + # plt.figure(figsize=(26, 13), dpi=32) + plt.figure(figsize=(16, 11), dpi=50) + librosa.display.waveplot(x, sr=fs,) + # 保存生成的波形图 + plt.savefig(pngpath+file_name+'.png') + # plt.show() + count+=1 +# print(names) \ No newline at end of file diff --git "a/\346\211\271\351\207\217\347\224\237\346\210\220mfcc\345\233\276.py" "b/\346\211\271\351\207\217\347\224\237\346\210\220mfcc\345\233\276.py" new file mode 100644 index 0000000..ff32e20 --- /dev/null +++ "b/\346\211\271\351\207\217\347\224\237\346\210\220mfcc\345\233\276.py" @@ -0,0 +1,44 @@ +import pyworld +import librosa +import librosa.display +from IPython.display import Audio +import numpy as np +from matplotlib import pyplot as plt +import math +import os +import matplotlib.pyplot as plt +# 图片风格 +# plt.style.use('seaborn') +# 存放文件夹路径 +path=r'D:\snoring-dataset\Snoring Dataset\音频数据\1-snoring sounds/' +# 获取文件列表 +waveforms=os.listdir(path) +pngpath=r'D:\snoring-dataset\Snoring Dataset\音频数据\mfcc\ss/' +names=[] + +count=0 +# 批量处理wav文件 +for i,file in enumerate(waveforms): + file_name=os.path.splitext(file)[0] + file_type=os.path.splitext(file)[1] + filename=path+file + print(count) + # names.append(filename) + # 生成stft声谱图 + y, sr = librosa.load(filename, sr=16000) # librosa load输出的waveform 是 float32 + # x = x.astype(np.double) # 格式转换 + # fftlen = pyworld.get_cheaptrick_fft_size(fs) # 自动计算适合的fftlen + melspec = librosa.feature.melspectrogram(y, sr, n_fft=1024, hop_length=512, n_mels=128) + logmelspec = librosa.power_to_db(melspec) # 转换为对数刻度 + plt.figure(figsize=(16, 11), dpi=50) + librosa.display.specshow(logmelspec, sr=sr) + # 保存生成的波形图 + plt.savefig(pngpath+file_name+'.png') + # plt.show() + count+=1 +# print(names) + + +# 绘制 mel 频谱图 +plt.figure() + diff --git "a/\346\211\271\351\207\217\347\224\237\346\210\220stft\345\243\260\350\260\261\345\233\276.py" "b/\346\211\271\351\207\217\347\224\237\346\210\220stft\345\243\260\350\260\261\345\233\276.py" new file mode 100644 index 0000000..2f4f5a6 --- /dev/null +++ "b/\346\211\271\351\207\217\347\224\237\346\210\220stft\345\243\260\350\260\261\345\233\276.py" @@ -0,0 +1,41 @@ +import pyworld +import librosa +import librosa.display +from IPython.display import Audio +import numpy as np +from matplotlib import pyplot as plt +import math +import os +import matplotlib.pyplot as plt +# 图片风格 +# plt.style.use('seaborn') +# 存放文件夹路径 +path=r'D:\snoring-dataset\Snoring Dataset\音频数据\1-snoring sounds/' +# 获取文件列表 +waveforms=os.listdir(path) +pngpath=r'D:\snoring-dataset\Snoring Dataset\音频数据\stft\ss/' +names=[] + +count=0 +# 批量处理wav文件 +for i,file in enumerate(waveforms): + file_name=os.path.splitext(file)[0] + file_type=os.path.splitext(file)[1] + filename=path+file + print(file_name) + # names.append(filename) + # 生成stft声谱图 + # if int(file_name) in [898,899,900,901,902,903,904,905,906,907]: + # print("pass") + # continue + x, fs = librosa.load(filename, sr=16000) # librosa load输出的waveform 是 float32 + x = x.astype(np.double) # 格式转换 + fftlen = pyworld.get_cheaptrick_fft_size(fs) # 自动计算适合的fftlen + S = librosa.stft(x, n_fft=fftlen) + plt.figure(figsize=(16, 11), dpi=50) + librosa.display.specshow(np.log(np.abs(S)), sr=fs) + # 保存生成的波形图 + plt.savefig(pngpath+file_name+'.png') + # plt.show() + count+=1 +# print(names) diff --git "a/\346\227\266\345\237\237\351\242\221\345\237\237\345\233\276.py" "b/\346\227\266\345\237\237\351\242\221\345\237\237\345\233\276.py" new file mode 100644 index 0000000..f288082 --- /dev/null +++ "b/\346\227\266\345\237\237\351\242\221\345\237\237\345\233\276.py" @@ -0,0 +1,108 @@ +import wave +import pyaudio +import pylab +import numpy as np +import matplotlib.pyplot as plt + + +def get_framerate(wavefile): + ''' + 输入文件路径,获取帧率 + ''' + wf = wave.open(wavfile, "rb") # 打开wav + p = pyaudio.PyAudio() # 创建PyAudio对象 + params = wf.getparams() # 参数获取 + nchannels, sampwidth, framerate, nframes = params[:4] + return framerate + + +def get_nframes(wavefile): + ''' + 输入文件路径,获取帧数 + ''' + wf = wave.open(wavfile, "rb") # 打开wav + p = pyaudio.PyAudio() # 创建PyAudio对象 + params = wf.getparams() # 参数获取 + nchannels, sampwidth, framerate, nframes = params[:4] + return nframes + + +def get_wavedata(wavfile): + ''' + 输入文件路径,获取处理好的 N-2 左右声部数组 + ''' + #####1.读入wave文件 + wf = wave.open(wavfile, "rb") # 打开wav + p = pyaudio.PyAudio() # 创建PyAudio对象 + params = wf.getparams() # 参数获取 + nchannels, sampwidth, framerate, nframes = params[:4] + stream = p.open(format=p.get_format_from_width(sampwidth), + channels=nchannels, + rate=framerate, + output=True) # 创建输出流 + # 读取完整的帧数据到str_data中,这是一个string类型的数据 + str_data = wf.readframes(nframes) + wf.close() # 关闭wave + + #####2.将波形数据转换为数组 + # N-1 一维数组,右声道接着左声道 + wave_data = np.frombuffer(str_data, dtype=np.short) + # 2-N N维数组 + wave_data.shape = -1, 2 + # 将数组转置为 N-2 目标数组 + wave_data = wave_data.T + return wave_data + + +def plot_timedomain(wavfile): + ''' + 画出时域图 + ''' + wave_data = get_wavedata(wavfile) # 获取处理好的wave数据 + framerate = get_framerate(wavfile) # 获取帧率 + nframes = get_nframes(wavfile) # 获取帧数 + + #####3.构建横坐标 + time = np.arange(0, nframes) * (1.0 / framerate) + + #####4.画图 + pylab.figure(figsize=(40, 10)) + pylab.subplot(211) + pylab.plot(time, wave_data[0]) # 第一幅图:左声道 + pylab.subplot(212) + pylab.plot(time, wave_data[1], c="g") # 第二幅图:右声道 + pylab.xlabel("time (seconds)") + pylab.show() + return None + + +def plot_freqdomain(start, fft_size, wavfile): + ''' + 画出频域图 + ''' + waveData = get_wavedata(wavfile) # 获取wave数据 + framerate = get_framerate(wavfile) # 获取帧率数据 + + #### 1.取出所需部分进行傅里叶变换,并得到幅值 + # rfft,对称保留一半,结果为 fft_size/2-1 维复数数组 + fft_y1 = np.fft.rfft(waveData[0][start:start + fft_size - 1]) / fft_size # 左声部 + fft_y2 = np.fft.rfft(waveData[1][start:start + fft_size - 1]) / fft_size # 右声部 + + #### 2.计算频域图x值 + # 最小值为0Hz,最大值一般设为采样频率的一半 + freqs = np.linspace(0, framerate / 2, fft_size / 2) + + #### 3.画图 + plt.figure(figsize=(20, 10)) + pylab.subplot(211) + plt.plot(freqs, np.abs(fft_y1)) + pylab.xlabel("frequence(Hz)") + pylab.subplot(212) + plt.plot(freqs, np.abs(fft_y2), c='g') + pylab.xlabel("frequence(Hz)") + plt.show() + + +wavfile='D:\CloudMusic\ss/000005.wav' +plot_timedomain(wavfile=wavfile) +plot_freqdomain(10000,4000,wavfile) diff --git "a/\346\227\266\351\225\277.py" "b/\346\227\266\351\225\277.py" new file mode 100644 index 0000000..e79c899 --- /dev/null +++ "b/\346\227\266\351\225\277.py" @@ -0,0 +1,8 @@ +import contextlib +import wave +file_path = r"D:\snoring-dataset\Snoring Dataset\1_1.wav" +with contextlib.closing(wave.open(file_path, 'r')) as f: + frames = f.getnframes() + rate = f.getframerate() + wav_length = frames / float(rate) + print("音频长度:",wav_length,"秒") diff --git "a/\346\227\266\351\242\221\350\260\261\357\274\214\350\257\255\350\260\261\345\233\276\357\274\214mel\350\257\255\350\260\261\345\200\222\350\260\261.py" "b/\346\227\266\351\242\221\350\260\261\357\274\214\350\257\255\350\260\261\345\233\276\357\274\214mel\350\257\255\350\260\261\345\200\222\350\260\261.py" new file mode 100644 index 0000000..1b257a9 --- /dev/null +++ "b/\346\227\266\351\242\221\350\260\261\357\274\214\350\257\255\350\260\261\345\233\276\357\274\214mel\350\257\255\350\260\261\345\200\222\350\260\261.py" @@ -0,0 +1,76 @@ +import matplotlib +import pyworld +import librosa +import librosa.display +from IPython.display import Audio +import numpy as np +from matplotlib import pyplot as plt +import math +# plt.style.use('seaborn-white') +# plt.style.use('seaborn') +# 波形图 +x, fs = librosa.load("D:\snoring-dataset\Snoring Dataset\音频数据/0-non-snoring sounds/000869.wav", sr=16000) #librosa load输出的waveform 是 float32 +x = x.astype(np.double) # 格式转换 + +fftlen = pyworld.get_cheaptrick_fft_size(fs)#自动计算适合的fftlen +# 波形图 +# plt.figure(figsize=(26,13),dpi=32) +# # plt.figure() +# librosa.display.waveplot(x, sr=fs,x_axis=None,) +# # plt.savefig('D:\snoring-dataset\Snoring Dataset/000000-0.png') +# plt.show() +# Audio(x, rate=fs) +# 生成语谱图 +# plt.figure() +# plt.specgram(x,NFFT=fftlen, Fs=fs,noverlap=fftlen*1/4, window=np.hanning(fftlen)) +# # plt.ylabel('Frequency') +# # plt.xlabel('Time(s)') +# # plt.title('specgram') +# plt.show() +#功率谱图 +# D = librosa.amplitude_to_db(librosa.stft(x), ref=np.max)#20log|x| +# plt.figure() +# # librosa.display.specshow(D, sr=fs, hop_length=fftlen*1/4,y_axis='linear') +# librosa.display.specshow(D, sr=fs,hop_length=fftlen*1/4) +# # plt.colorbar(format='%+2.0f dB') +# # plt.title('Linear-frequency power spectrogram') +# plt.show() + +# STFT时频图 +# S = librosa.stft(x,n_fft=fftlen) # 幅值 +# plt.figure() +# # librosa.display.specshow(np.log(np.abs(S)), sr=fs,hop_length=fftlen/4) +# librosa.display.specshow(np.log(np.abs(S)), sr=fs) +# # plt.colorbar() +# # plt.title('STFT') +# plt.savefig('1') +# plt.show() + + +# mel spectrogram 梅尔语谱图 +# melspec = librosa.feature.melspectrogram(x, sr=fs, n_fft=fftlen, n_mels=128) #(128,856) +# logmelspec = librosa.power_to_db(melspec)# (128,856) +# plt.figure() +# # librosa.display.specshow(logmelspec, sr=fs, x_axis='time', y_axis='mel') +# librosa.display.specshow(logmelspec, sr=fs) +# # plt.title('log melspectrogram') +# plt.show() + +# MFCC +y, sr = librosa.load('D:\snoring-dataset\Snoring Dataset/1_0.wav', sr=16000) +# 提取 mel spectrogram feature +# melspec = librosa.feature.melspectrogram(y, sr, n_fft=1024, hop_length=512, n_mels=128) +melspec = librosa.feature.melspectrogram(y, sr, n_fft=1024, hop_length=512, n_mels=128) +logmelspec = librosa.power_to_db(melspec) # 转换为对数刻度 +# 绘制 mel 频谱图 +plt.figure() +librosa.display.specshow(logmelspec, sr=sr) +# librosa.display.specshow(logmelspec, sr=sr, x_axis='time', y_axis='mel') +# plt.colorbar(format='%+2.0f dB') # 右边的色度条 +# plt.title('Beat wavform') +plt.show() + + + + + diff --git "a/\346\263\242\345\275\242\345\233\276.py" "b/\346\263\242\345\275\242\345\233\276.py" new file mode 100644 index 0000000..07d3778 --- /dev/null +++ "b/\346\263\242\345\275\242\345\233\276.py" @@ -0,0 +1,18 @@ +import pyworld +import librosa +import librosa.display +from IPython.display import Audio +import numpy as np +from matplotlib import pyplot as plt +# plt.style.use('seaborn-white') +# plt.style.use('seaborn') +# 波形图 +x, fs = librosa.load("D:\snoring-dataset\Snoring Dataset/1_0.wav", sr=16000) #librosa load输出的waveform 是 float32 +x = x.astype(np.double) # 格式转换 +fftlen = pyworld.get_cheaptrick_fft_size(fs)#自动计算适合的fftlen +# 波形图 +plt.figure(figsize=(16,11),dpi=50) +# plt.figure() +librosa.display.waveplot(x, sr=fs,x_axis=None,) +plt.savefig('D:\snoring-dataset\Snoring Dataset/1_0-1.png') +plt.show() \ No newline at end of file diff --git "a/\347\224\273\345\233\276-\346\237\261\347\212\266\345\233\276.py" "b/\347\224\273\345\233\276-\346\237\261\347\212\266\345\233\276.py" new file mode 100644 index 0000000..42469d2 --- /dev/null +++ "b/\347\224\273\345\233\276-\346\237\261\347\212\266\345\233\276.py" @@ -0,0 +1,19 @@ +import os +import matplotlib.pyplot as plt +plt.style.use('seaborn') +type=['Snoring-kaggle','No-Snoring-kaggle','Snoring-ESC50','No-Snoring-ESC50'] +num=[500,500,40,40] +plt.figure() +x_ticks = range(len(type)) +# plt.bar(x_ticks, num, color=['b','r','g','y','c','m','y','k','c','g','b']) +plt.bar(x_ticks, num,color=['cornflowerblue','cornflowerblue','c','c']) +# 修改x刻度 +plt.xticks(x_ticks, type) +# 添加标题 +plt.title("Snoring-Dataset") +# 添加网格显示 +plt.grid(linestyle="--", alpha=0.7) +# plt.legend(loc='upper center', fontsize=15, ncol=2) +# 4、显示图像 +plt.savefig('数量柱状图.png') +plt.show() \ No newline at end of file diff --git "a/\351\245\274\345\233\276.py" "b/\351\245\274\345\233\276.py" new file mode 100644 index 0000000..142b591 --- /dev/null +++ "b/\351\245\274\345\233\276.py" @@ -0,0 +1,19 @@ +import os +import matplotlib.pyplot as plt +plt.style.use('seaborn') +type=['Snoring-kaggle','No-Snoring-kaggle','Snoring-ESC50','No-Snoring-ESC50'] +num=[500,500,40,40] +plt.figure() +# 2、创建画布 + + +# 3、绘制饼图 +plt.pie(num, labels=type, colors=['dodgerblue','red','springgreen','y'], autopct="%1.2f%%") + +# 显示图例 +plt.legend() + +plt.axis('equal') +plt.savefig('饼图.png') +# 4、显示图像 +plt.show()