樂團(tuán)總譜分譜太麻煩胆绊,用過西貝柳斯缘挑,效果不滿意蒜撮。最近嘗試了字幕提取識(shí)別
https://blog.csdn.net/XnCSD/article/details/89376477
想到類似的方法可以用來做樂譜的提取襟士。
進(jìn)行邊緣檢測
step one 膨脹腐蝕
設(shè)定二值閾的方法對(duì)樂譜并不管用篮幢,因此采用了腐蝕膨脹算法:讓線條能覆蓋一定的范圍大刊,再二值化提取輪廓
https://www.cnblogs.com/denny402/p/5166258.html
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt5
## 測試圖片,為反斜杠
pic = 'F:\\OCR_puzi\\sample\\0002.jpg'
## a.圖像的二值化 三椿,這里沒有做閾值處理
src = cv2.imread(pic,cv2.IMREAD_UNCHANGED)
## b.設(shè)置卷積核5*5
kernel = np.ones((30,30),np.uint8)
## c.圖像的腐蝕,默認(rèn)迭代次數(shù)
erosion = cv2.erode(src,kernel,10)
## 圖像的膨脹
dst = cv2.dilate(erosion,kernel)
erosion1 = cv2.erode(dst,kernel,60)
dst1 = cv2.dilate(erosion1,kernel)
## 效果展示
cv2.imshow('origin',erosion)
cv2.waitKey()
cv2.destroyAllWindows()
step two 灰度圖統(tǒng)計(jì)行均值
#轉(zhuǎn)灰度
import numpy as np
def rgb2gray(rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
erosiongray=rgb2gray(erosion)
#計(jì)算均值
y=np.mean(erosiongray,1)
#打印查看
count=0
for i in y:
count=count+1
print(str(count)+ " " +str(i))
取邊緣劃分閾值為100
index_b=list(np.where(y<100)[0])
##確定裁剪中心
listcut=[]
listcut.append(index_b[0]-30)
for i in range(len(index_b)-1):
if(index_b[i+1]-index_b[i]>=2):
listcut.append((index_b[i+1]+index_b[i])/2)
listcut.append(index_b[-1]+30)
listcut
#原圖像上進(jìn)行分割
img = Image.fromarray(src)
im=src[:, :, 0]
im=im[3048:3213, :]
# 確定字幕的范圍葫辐,注意不同的視頻文件剪切的索引值不同
img=Image.fromarray(im)
img.show()
遍歷頁面組合圖像
整合:
import cv2
import numpy as np
import matplotlib.pyplot as plt
%matplotlib qt5
from PIL import Image
import scipy.misc
import os
# 遍歷指定目錄搜锰,顯示目錄下的所有文件名
def eachFile(filepath):
list=[]
pathDir = os.listdir(filepath)
for allDir in pathDir:
child = os.path.join('%s%s' % (filepath, allDir))
child.replace("\\","\\\\")
list.append(child)
return list
dirlist=eachFile('F:\\OCR_puzi\\sample\\')
def rgb2gray(rgb):
r, g, b = rgb[:,:,0], rgb[:,:,1], rgb[:,:,2]
gray = 0.2989 * r + 0.5870 * g + 0.1140 * b
return gray
###############遍歷加入list
all_content=[]
for imm in dirlist:
##################################################
src = cv2.imread(imm,cv2.IMREAD_UNCHANGED)
## b.設(shè)置卷積核5*5
kernel = np.ones((30,30),np.uint8)
## c.圖像的腐蝕,默認(rèn)迭代次數(shù)
erosion = cv2.erode(src,kernel)
erosiongray=rgb2gray(erosion)
y=np.mean(erosiongray,1)
index_b=list(np.where(y<100)[0])
listcut=[]
listcut.append(index_b[0]-30)
for i in range(len(index_b)-1):
if(index_b[i+1]-index_b[i]>=2):
listcut.append((index_b[i+1]+index_b[i])/2)
listcut.append(index_b[-1]+30)
#########################################################
page_content=[]
for i in range(len(listcut)-1):
img = Image.fromarray(src)
im=src[:, :, 0]
im=im[int(listcut[i])-10:int(listcut[i+1])+10, :]
page_content.append(im)
all_content.append(page_content)
#######################從list取出進(jìn)行拼接耿战,以一種樂器為例########
a=all_content[1][0]
for i in range(2,len(all_content)):
c = np.vstack((a,all_content[i][0]))
a=c
img=Image.fromarray(a)
img.show()
歡迎大家交流想法~