-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 3884906
Showing
31 changed files
with
4,290 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
# -*- coding: UTF-8 -*- | ||
import wave | ||
import numpy as np | ||
import matplotlib.pyplot as plt | ||
|
||
# 打开wav文件 ,open返回一个的是一个Wave_read类的实例,通过调用它的方法读取WAV文件的格式和数据。 | ||
f = wave.open(r"D:\CloudMusic\ss/000005.wav","rb") | ||
# 读取格式信息 | ||
# 一次性返回所有的WAV文件的格式信息,它返回的是一个组元(tuple):声道数, 量化位数(byte单位), 采 | ||
# 样频率, 采样点数, 压缩类型, 压缩类型的描述。wave模块只支持非压缩的数据,因此可以忽略最后两个信息 | ||
params = f.getparams() | ||
[nchannels, sampwidth, framerate, nframes] = params[:4] | ||
# 读取波形数据 | ||
# 读取声音数据,传递一个参数指定需要读取的长度(以取样点为单位) | ||
str_data = f.readframes(nframes) | ||
f.close() | ||
# 将波形数据转换成数组 | ||
# 需要根据声道数和量化单位,将读取的二进制数据转换为一个可以计算的数组 | ||
wave_data = np.fromstring(str_data,dtype = np.short) | ||
# 将wave_data数组改为2列,行数自动匹配。在修改shape的属性时,需使得数组的总长度不变。 | ||
wave_data.shape = -1,2 | ||
# 转置数据 | ||
wave_data = wave_data.T | ||
# 通过取样点数和取样频率计算出每个取样的时间。 | ||
time=np.arange(0,nframes/2)/framerate | ||
# print(params) | ||
plt.figure(1) | ||
# time 也是一个数组,与wave_data[0]或wave_data[1]配对形成系列点坐标 | ||
plt.subplot(211) | ||
plt.plot(time,wave_data[0]) | ||
plt.xlabel("time/s") | ||
plt.title('Wave') | ||
|
||
|
||
N=44100 | ||
start=0 | ||
# 开始采样位置 | ||
df = framerate/(N-1) | ||
# 分辨率 | ||
freq = [df*n for n in range(0,N)] | ||
# N个元素 | ||
wave_data2=wave_data[0][start:start+N] | ||
c=np.fft.fft(wave_data2)*2/N | ||
# 常规显示采样频率一半的频谱 | ||
plt.subplot(212) | ||
plt.plot(freq[:round(len(freq)/2)],abs(c[:round(len(c)/2)]),'r') | ||
plt.title('Freq') | ||
plt.xlabel("Freq/Hz") | ||
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,149 @@ | ||
from lxml import etree | ||
|
||
|
||
class GEN_Annotations: | ||
def __init__(self, filename, imgpath): | ||
self.root = etree.Element("annotation") | ||
|
||
child1 = etree.SubElement(self.root, "folder") | ||
child1.text = "ss" | ||
|
||
child2 = etree.SubElement(self.root, "filename") | ||
child2.text = filename | ||
|
||
child3 = etree.SubElement(self.root,"path") | ||
child3.text = imgpath | ||
|
||
child4 = etree.SubElement(self.root, "source") | ||
|
||
# child4 = etree.SubElement(child3, "annotation") | ||
# child4.text = "PASCAL VOC2007" | ||
child5 = etree.SubElement(child4, "database") | ||
child5.text = "Unknown" | ||
# | ||
# child6 = etree.SubElement(child3, "image") | ||
# child6.text = "flickr" | ||
# child7 = etree.SubElement(child3, "flickrid") | ||
# child7.text = "35435" | ||
|
||
def set_size(self, witdh, height, channel): | ||
size = etree.SubElement(self.root, "size") | ||
widthn = etree.SubElement(size, "width") | ||
widthn.text = str(witdh) | ||
heightn = etree.SubElement(size, "height") | ||
heightn.text = str(height) | ||
channeln = etree.SubElement(size, "depth") | ||
channeln.text = str(channel) | ||
|
||
def set_segmented(self,seg=0): | ||
segmented = etree.SubElement(self.root,"segmented") | ||
segmented.text = str(seg) | ||
|
||
|
||
def savefile(self, filename): | ||
tree = etree.ElementTree(self.root) | ||
tree.write(filename, pretty_print=True, xml_declaration=False, encoding='utf-8') | ||
|
||
def add_pic_attr(self, label, xmin, ymin, xmax, ymax): | ||
object = etree.SubElement(self.root, "object") | ||
|
||
namen = etree.SubElement(object, "name") | ||
namen.text = label | ||
|
||
pose = etree.SubElement(object,"pose") | ||
pose.text = "Unspecified" | ||
|
||
truncated = etree.SubElement(object,"truncated") | ||
truncated.text = "0" | ||
|
||
difficult = etree.SubElement(object,"difficult") | ||
difficult.text = "0" | ||
|
||
bndbox = etree.SubElement(object, "bndbox") | ||
xminn = etree.SubElement(bndbox, "xmin") | ||
xminn.text = str(xmin) | ||
yminn = etree.SubElement(bndbox, "ymin") | ||
yminn.text = str(ymin) | ||
xmaxn = etree.SubElement(bndbox, "xmax") | ||
xmaxn.text = str(xmax) | ||
ymaxn = etree.SubElement(bndbox, "ymax") | ||
ymaxn.text = str(ymax) | ||
|
||
|
||
import os | ||
import cv2 | ||
|
||
|
||
def getFileList(dir, Filelist, ext=None): | ||
""" | ||
获取文件夹及其子文件夹中文件列表 | ||
输入 dir:文件夹根目录 | ||
输入 ext: 扩展名 | ||
返回: 文件路径列表 | ||
""" | ||
newDir = dir | ||
if os.path.isfile(dir): | ||
if ext is None: | ||
Filelist.append(dir) | ||
else: | ||
if ext in dir[-3:]: | ||
Filelist.append(dir) | ||
|
||
elif os.path.isdir(dir): | ||
for s in os.listdir(dir): | ||
newDir = os.path.join(dir, s) | ||
getFileList(newDir, Filelist, ext) | ||
|
||
return Filelist | ||
|
||
|
||
# org_img_folder = './org' | ||
|
||
# 检索文件 | ||
# imglist = getFileList(org_img_folder, [], 'jpg') | ||
# print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') | ||
|
||
# for imgpath in imglist: | ||
# imgname = os.path.splitext(os.path.basename(imgpath))[0] | ||
# img = cv2.imread(imgpath, cv2.IMREAD_COLOR) | ||
# 对每幅图像执行相关操作 | ||
|
||
if __name__ == '__main__': | ||
org_img_folder = r'.\标注文件\mfcc\ss' | ||
|
||
# 检索文件 | ||
imglist = getFileList(org_img_folder, [], 'png') | ||
print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') | ||
# | ||
# filename = imglist[0] | ||
# name = filename.split('\\') | ||
# # print(name) | ||
# anno = GEN_Annotations(name[4],filename) | ||
# anno.set_size(800, 550, 3) | ||
# anno.set_segmented() | ||
# for i in range(1): | ||
# xmin = i + 1 | ||
# ymin = i + 1 | ||
# xmax = i + 799 | ||
# ymax = i + 549 | ||
# anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) | ||
# filename_saved = filename.split('.') | ||
# # print(filename_saved) | ||
# anno.savefile('.'+filename_saved[1]+".xml") | ||
|
||
for imagepath in imglist: | ||
filename = imagepath | ||
name = filename.split('\\') | ||
# print(name) | ||
anno = GEN_Annotations(name[4], filename) | ||
anno.set_size(800, 550, 3) | ||
for i in range(1): | ||
xmin = i + 99 | ||
ymin = i + 64 | ||
xmax = i + 724 | ||
ymax = i + 493 | ||
anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) | ||
# filename_saved = filename.split('.') | ||
filename_saved=name[4].split('.') | ||
path=r'E:\语音处理\频谱\VOC\mfcc/ss/' | ||
anno.savefile(path + filename_saved[0] + ".xml") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
from lxml import etree | ||
|
||
|
||
class GEN_Annotations: | ||
def __init__(self, filename): | ||
self.root = etree.Element("annotation") | ||
|
||
child1 = etree.SubElement(self.root, "folder") | ||
child1.text = "folder" | ||
|
||
child2 = etree.SubElement(self.root, "filename") | ||
child2.text = filename | ||
|
||
# child3 = etree.SubElement(self.root,"path") | ||
# child3.text = imgpath | ||
|
||
child4 = etree.SubElement(self.root, "source") | ||
|
||
# child4 = etree.SubElement(child3, "annotation") | ||
# child4.text = "PASCAL VOC2007" | ||
child5 = etree.SubElement(child4, "database") | ||
child5.text = "Unknown" | ||
# | ||
# child6 = etree.SubElement(child3, "image") | ||
# child6.text = "flickr" | ||
# child7 = etree.SubElement(child3, "flickrid") | ||
# child7.text = "35435" | ||
|
||
def set_size(self, witdh, height, channel): | ||
size = etree.SubElement(self.root, "size") | ||
widthn = etree.SubElement(size, "width") | ||
widthn.text = str(witdh) | ||
heightn = etree.SubElement(size, "height") | ||
heightn.text = str(height) | ||
channeln = etree.SubElement(size, "depth") | ||
channeln.text = str(channel) | ||
|
||
def set_segmented(self,seg=0): | ||
segmented = etree.SubElement(self.root,"segmented") | ||
segmented.text = str(seg) | ||
|
||
|
||
def savefile(self, filename): | ||
tree = etree.ElementTree(self.root) | ||
tree.write(filename, pretty_print=True, xml_declaration=False, encoding='utf-8') | ||
|
||
def add_pic_attr(self, label, xmin, ymin, xmax, ymax): | ||
object = etree.SubElement(self.root, "object") | ||
|
||
namen = etree.SubElement(object, "name") | ||
namen.text = label | ||
|
||
pose = etree.SubElement(object,"pose") | ||
pose.text = "Unspecified" | ||
|
||
truncated = etree.SubElement(object,"truncated") | ||
truncated.text = "0" | ||
|
||
difficult = etree.SubElement(object,"difficult") | ||
difficult.text = "0" | ||
|
||
bndbox = etree.SubElement(object, "bndbox") | ||
xminn = etree.SubElement(bndbox, "xmin") | ||
xminn.text = str(xmin) | ||
yminn = etree.SubElement(bndbox, "ymin") | ||
yminn.text = str(ymin) | ||
xmaxn = etree.SubElement(bndbox, "xmax") | ||
xmaxn.text = str(xmax) | ||
ymaxn = etree.SubElement(bndbox, "ymax") | ||
ymaxn.text = str(ymax) | ||
|
||
|
||
import os | ||
import cv2 | ||
|
||
|
||
def getFileList(dir, Filelist, ext=None): | ||
""" | ||
获取文件夹及其子文件夹中文件列表 | ||
输入 dir:文件夹根目录 | ||
输入 ext: 扩展名 | ||
返回: 文件路径列表 | ||
""" | ||
newDir = dir | ||
if os.path.isfile(dir): | ||
if ext is None: | ||
Filelist.append(dir) | ||
else: | ||
if ext in dir[-3:]: | ||
Filelist.append(dir) | ||
|
||
elif os.path.isdir(dir): | ||
for s in os.listdir(dir): | ||
newDir = os.path.join(dir, s) | ||
getFileList(newDir, Filelist, ext) | ||
|
||
return Filelist | ||
|
||
|
||
# org_img_folder = './org' | ||
|
||
# 检索文件 | ||
# imglist = getFileList(org_img_folder, [], 'jpg') | ||
# print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') | ||
|
||
# for imgpath in imglist: | ||
# imgname = os.path.splitext(os.path.basename(imgpath))[0] | ||
# img = cv2.imread(imgpath, cv2.IMREAD_COLOR) | ||
# 对每幅图像执行相关操作 | ||
|
||
if __name__ == '__main__': | ||
org_img_folder = r'.\标注文件\wave\ss' | ||
|
||
# 检索文件 | ||
imglist = getFileList(org_img_folder, [], 'jpg') | ||
print('本次执行检索到 ' + str(len(imglist)) + ' 张图像\n') | ||
# | ||
# filename = imglist[0] | ||
# name = filename.split('\\') | ||
# # print(name) | ||
# anno = GEN_Annotations(name[4],filename) | ||
# anno.set_size(800, 550, 3) | ||
# anno.set_segmented() | ||
# for i in range(1): | ||
# xmin = i + 1 | ||
# ymin = i + 1 | ||
# xmax = i + 799 | ||
# ymax = i + 549 | ||
# anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) | ||
# filename_saved = filename.split('.') | ||
# # print(filename_saved) | ||
# anno.savefile('.'+filename_saved[1]+".xml") | ||
|
||
for imagepath in imglist: | ||
filename = imagepath | ||
name = filename.split('\\') | ||
# print(name) | ||
anno = GEN_Annotations(name[4]) | ||
anno.set_size(800, 550, 3) | ||
anno.set_segmented() | ||
for i in range(1): | ||
xmin = 105 | ||
ymin = 72 | ||
xmax = 718 | ||
ymax = 486 | ||
# xmin = 99 | ||
# ymin = 64 | ||
# xmax = 724 | ||
# ymax = 493 | ||
anno.add_pic_attr("Snoring", xmin, ymin, xmax, ymax) | ||
filename_saved = name[4].split('.') | ||
path = r'E:\语音处理\频谱\anno\wave/' | ||
anno.savefile(path + filename_saved[0] + ".xml") |
Oops, something went wrong.