目前繪制染色體的函數(shù)很少亦或者說繪制得很難看钥顽,因此有必要自己設計一個繪圖腳本坟冲。繪圖函數(shù)設計可以從頭使用svg包來繪制,也可以使用一些現(xiàn)有包進行繪制邪蛔,這里我選擇的時使用html和css通過瀏覽器進行渲染急黎。
結果如下:
輸出html截圖
python腳本如下:
#!/usr/bin/env python3
# -*- encoding: utf-8 -*-
#########################################################################
# Author: bqxiao
# File: draw_block.py
# Created time: 2024/12/23 17:03:00
# E-mail: xiao.benqi@qq.com
#########################################################################
# Usage: python draw_block.py -i input.txt -o output.html
import argparse
import re
import webcolors
import seaborn as sns
description=r"""
本腳本用于繪制染色體核型
輸入文件格式{chro}\t{group}\t{start}\t{end}\t{color}\t{annotation}:
1 group1 1 1963 #ffa500 A
1 group1 1964 3592 #ffa500 B
1 group1 3595 4681 #ffa500 C
1 group1 4684 5392 #ffff00 D
1 group1 5417 7075 #ffff00 E
2 group1 5 303 #0000ff K
2 group1 304 503 #008000 G
2 group1 594 1340 #008000 H
2 group1 1351 2287 #00ffff I
"""
parser=argparse.ArgumentParser(description=description,formatter_class=argparse.RawDescriptionHelpFormatter)
# 輸入和輸出文件參數(shù)
parser.add_argument("-i", "--input", type=str, required=True, help="輸入文件路徑")
parser.add_argument("-o", "--output", type=str, required=True, help="輸出文件路徑")
# 是否展示染色體名稱和分組名稱的選項
parser.add_argument("--chromosome_name", "-c", action="store_true", default=True, help="是否展示染色體名稱 (默認是展示)")
parser.add_argument("--group_name", "-g", action="store_true", default=False, help="是否展示分組名稱 (默認是隱藏)")
# CSS 樣式設置
parser.add_argument("--block_width", type=int, default=30, help="區(qū)塊寬度 (默認30)")
parser.add_argument("--block_font_size", type=int, default=16, help="區(qū)塊字體大小 (默認16)")
parser.add_argument("--block_font_color", type=str, default="#000000", help="區(qū)塊字體顏色 (默認黑色)")
parser.add_argument("--max_height", type=int, default=500, help="染色體最大高度 (默認500)")
parser.add_argument("--layout_width", type=int, default=50, help="布局寬度 (默認50)")
parser.add_argument("--layout_height", type=int, default=80, help="布局高度 (默認80)")
parser.add_argument("--layout_gap", type=int, default=30, help="每一組之間的間距 (默認30)")
parser.add_argument("--chromosome_gap", type=int, default=5, help="染色體之間的間距 (默認5)")
parser.add_argument("--title", type=str, default="Chromosome Karyotype", help="HTML頁面標題 (默認'Chromosome Karyotype')")
parser.add_argument("--chromosome_name_font_size", type=int, default=30, help="染色體名稱字體大小 (默認30)")
parser.add_argument("--chromosome_name_font_color", type=str, default="#000000", help="染色體名稱字體顏色 (默認黑色)")
parser.add_argument("--group_name_font_size", type=int, default=20, help="group名稱字體大小 (默認20)")
parser.add_argument("--group_name_font_color", type=str, default="#000000", help="group名稱字體顏色 (默認黑色)")
parser.add_argument("--block_border_size", type=int, default=0, help="區(qū)塊邊界大小 (默認0)")
parser.add_argument("--block_border_color", type=str, default="#333", help="區(qū)塊邊界顏色 (默認'#333')")
# 解析命令行參數(shù)
args = parser.parse_args()
# 將解析結果映射到樣式設置中
BLOCK_WIDTH = args.block_width
BLOCK_FONT_SIZE = args.block_font_size
BLOCK_FONT_COLOR = args.block_font_color
MAX_HEIGHT = args.max_height
LAYOUT_WIDTH = args.layout_width
LAYOUT_HEIGHT = args.layout_height
LAYOUT_GAP = args.layout_gap
CHROMOSOME_GAP = args.chromosome_gap
TITLE = args.title
CHROMOSOME_NAME_FONT_SIZE = args.chromosome_name_font_size
CHROMOSOME_NAME_FONT_COLOR = args.chromosome_name_font_color
GROUP_NAME_FONT_SIZE = args.group_name_font_size
GROUP_NAME_FONT_COLOR = args.group_name_font_color
BLOCK_BORDER_SIZE = args.block_border_size
BLOCK_BORDER_COLOR = args.block_border_color
# 是否展示染色體名稱和分組名稱
SHOW_CHROMOSOME_NAME = args.chromosome_name
SHOW_GROUP_NAME = args.group_name
class Info:
def __init__(self,chro:str,group:str,start:int,end:int,color:str,annotation:str):
self.chro=chro
self.group=group
self.start=start
self.end=end
self.color=color
self.annotation=annotation
def __repr__(self):
return f"{{{self.chro},{self.group},{self.start},{self.end},{self.color},{self.annotation}}}"
def __str__(self):
return f"{{{self.chro},{self.group},{self.start},{self.end},{self.color},{self.annotation}}}"
def is_valid_color(color_str):
# 1. 判斷是否為十六進制顏色代碼
hex_pattern = r"^#(?:[0-9a-fA-F]{3}){1,2}$"
if re.match(hex_pattern, color_str):
return True
# 2. 判斷是否為 RGB 或 RGBA 格式
rgb_pattern = r"^rgba?\(\s*(\d{1,3}|[0-2]?\d{1,2}|25[0-5])\s*,\s*(\d{1,3}|[0-2]?\d{1,2}|25[0-5])\s*,\s*(\d{1,3}|[0-2]?\d{1,2}|25[0-5])\s*(,\s*(0|1|0?\.\d+))?\)$"
if re.match(rgb_pattern, color_str):
return True
# 3. 判斷是否為顏色名稱(CSS標準顏色名稱)
try:
webcolors.name_to_rgb(color_str)
return True
except ValueError:
return False
def read_info(infile:str):
infoDIct={}
lineindex=0
infoToindex={}
for i in open(infile).readlines():
lineindex+=1
i=i.strip("\n")
if not i :
continue
line=i.split("\t")
try:
chro=line[0]
group=line[1]
start=int(line[2])
end=int(line[3])
color=line[4]
annotation=line[5]
except:
continue
if start > end:
start,end=end,start
infoToindex[str(Info(chro,group,start,end,color,annotation))]=lineindex
if chro not in infoDIct:
infoDIct[chro]={group:[Info(chro,group,start,end,color,annotation)]}
else:
if group not in infoDIct[chro]:
infoDIct[chro][group]=[Info(chro,group,start,end,color,annotation)]
else:
infoDIct[chro][group].append(Info(chro,group,start,end,color,annotation))
flag_bug=False
for chro in infoDIct:
for group in infoDIct[chro]:
infoDIct[chro][group].sort(key=lambda x:x.start)
lastinfo=infoDIct[chro][group][0]
for i in infoDIct[chro][group][1:]:
if i.start<lastinfo.end:
print(lastinfo,i)
flag_bug=True
print(f'區(qū)塊有重疊: File "{infile}", line {infoToindex[str(lastinfo)]}, in input file.\n File "{infile}", line {infoToindex[str(i)]}, in input file.')
print(f"重疊大小{i.start}-{lastinfo.end} : {i.start-lastinfo.end}")
print()
lastinfo=i
if flag_bug:
exit(1)
return infoDIct
FileContent=read_info(args.input)
# 獲取顏色方案
# 先判斷是否都是顏色
# 如果都是顏色,則什么都不需要做侧到,否則使用生成的配色方案
# 先獲取顏色個數(shù)
color_num=0
flag=True
for chro,group_info in FileContent.items():
for group,info in group_info.items():
for block in info:
if not is_valid_color(block.color):
flag=False
break
if flag: # 如果都是顏色
pass
else:
NotValidcolorSet=set()
for chro,group_info in FileContent.items():
for group,info in group_info.items():
for block in info:
if not is_valid_color(block.color):
NotValidcolorSet.add(block.color)
colorList=list(NotValidcolorSet)
color_num=len(colorList)
color_scheme=sns.color_palette("Set2", color_num)
# 將 Seaborn 配色方案從 [0, 1] 轉換為 [0, 255]勃教,然后轉換成 hex
color_scheme_hex = [webcolors.rgb_to_hex(tuple(int(c * 255) for c in color)) for color in color_scheme]
# 分配顏色
oldColorToNewColor={oldColor:newColor for oldColor,newColor in zip(colorList,color_scheme_hex)}
# 替換顏色
for chro,group_info in FileContent.items():
for group,info in group_info.items():
for block in info:
if not is_valid_color(block.color):
block.color=oldColorToNewColor[block.color]
#找到染色體最大高度
max_height=0
for chro,group_info in FileContent.items():
for group,info in group_info.items():
for i in info:
if i.end>max_height:
max_height=i.end
rate=MAX_HEIGHT/max_height
all_CSS_List=[]
# 染色體之間布局
all_CSS_List.append(f".CHROMOSOME-LAYOUT {{display: flex;justify-content: space-between;align-items: flex-end;width: {LAYOUT_WIDTH}%;height: {LAYOUT_HEIGHT}vh;margin: 0 auto;gap: {LAYOUT_GAP}px;}}")
# 染色體名稱字體樣式
all_CSS_List.append(f".CHROMOSOME-NAME-FONT {{font-family: 'Times New Roman', Times, serif;font-size: {CHROMOSOME_NAME_FONT_SIZE}px;font-weight: bold;color: {CHROMOSOME_NAME_FONT_COLOR};}}")
# 染色體內(nèi)部區(qū)塊布局
all_CSS_List.append(f".CHROMOSOME {{display: flex;flex-direction: column;align-items: center;}}")
# 區(qū)塊樣式
all_CSS_List.append(f".BLOCK {{border: {BLOCK_BORDER_SIZE}px solid {BLOCK_BORDER_COLOR};width: {BLOCK_WIDTH}px;border-radius: {BLOCK_WIDTH / 2}px;text-align: center;font-size: {BLOCK_FONT_SIZE}px;font-weight: bold;color: {BLOCK_FONT_COLOR};font-family: 'Times New Roman', Times, serif;}}")
# 染色體組樣式
all_CSS_List.append(f".GROUPS {{display: flex;justify-content: space-between;align-items: flex-end;gap:{CHROMOSOME_GAP}px;}}")
# 染色體組名稱樣式
all_CSS_List.append(f".GROUP-NAME{{font-family: 'Times New Roman', Times, serif;font-size: {GROUP_NAME_FONT_SIZE}px;font-weight: bold;color: {GROUP_NAME_FONT_COLOR};}}")
# 染色體名稱樣式
all_CSS_List.append(f".CHROMOSOME-NAME{{font-family: 'Times New Roman', Times, serif;font-size: {CHROMOSOME_NAME_FONT_SIZE}px;font-weight: bold;color: {CHROMOSOME_NAME_FONT_COLOR};}}")
all_CSS_List.append(f".GROUPS-CHROMOSOME-NAME{{display: flex;flex-direction: column;align-items: center;}}")
all_div_List=[f"<div class='CHROMOSOME-LAYOUT'>"]
block_index=0
for chro,group_info in FileContent.items():
groupList=[f"<div class='GROUPS'>"]
for group,info in group_info.items():
divList=[f"<div class='CHROMOSOME'>"] # 染色體組 內(nèi)其中一個組中
block_index+=1
lastblock=info[0]
divList.append(f'<div class="BLOCK" id="block{block_index}" style="height: {(lastblock.end-lastblock.start+1)*rate}px;background-color:{lastblock.color};line-height: {(lastblock.end-lastblock.start+1)*rate}px;">{lastblock.annotation}</div>')
#all_CSS_List.append(f"#block{block_index}{{}}")
for thisblock in info[1:]:
block_index+=1
gap=thisblock.start-lastblock.end-1 # 2個區(qū)塊之間的間隙:99和100之間的間隙大小為100-99-1 -> 0 ,沒有間隙
divList.append(f'<div class="BLOCK" id="block{block_index}" style="height: {(thisblock.end-thisblock.start+1)*rate}px;background-color:{thisblock.color};margin-bottom: {gap*rate}px;line-height: {(thisblock.end-thisblock.start+1)*rate}px;">{thisblock.annotation}</div>')
#all_CSS_List.append(f"#block{block_index}{{}}")
lastblock=thisblock
divList[1:]=divList[1:][::-1]
if SHOW_GROUP_NAME:
divList.append(f'<div class="GROUP-NAME">{group}</div>')
divList.append("</div>") # 對應 <div class='CHROMOSOME'>
groupList.extend(divList)
groupList.append("</div>") # 對應 <div class='GROUPS'>
if SHOW_CHROMOSOME_NAME:
groupList=["<div class='GROUPS-CHROMOSOME-NAME'>",*groupList,f'<div class="CHROMOSOME-NAME">{chro}</div>',"</div>"]
all_div_List.extend(groupList)
all_div_List.append("</div>") # 對應 <div class='CHROMOSOME-LAYOUT'>
body_content="\n".join(all_div_List)
style_content="\n".join(all_CSS_List)
htmlContent=f"""
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>{TITLE}</title>
<style>
{style_content}
</style>
</head>
<body>
{body_content}
</body>
</html>"""
with open(args.output, "w") as f:
f.write(htmlContent)
用法如下:
# python draw_block.py -h 可以查看幫助手冊
python draw_block.py -i input.txt -o output.html -g
輸入文件如下:
Chr1 24GB 1 1963 #ffa500 A
Chr1 24GB 1964 3592 #ffa500 B
Chr1 24GB 3595 4681 #ffa500 C
Chr1 24GB 4684 5392 #ffff00 D
Chr1 24GB 5417 7075 #ffff00 E
Chr2 24GB 5 303 #0000FF K
Chr2 24GB 304 503 #008000 G
Chr2 24GB 594 1340 #008000 H
Chr2 24GB 1351 2287 #00ffff I
Chr2 24GB 2288 4244 #00ffff J
Chr3 24GB 1 2674 #008000 F
Chr3 24GB 2676 3203 #0000FF L
Chr3 24GB 3274 4018 #0000FF M
Chr3 24GB 4048 5436 #0000FF N
Chr4 24GB 3 566 #0000FF O
Chr4 24GB 586 1094 #0000FF P
Chr4 24GB 1096 1524 #4b0082 T
Chr4 24GB 1527 4127 #4b0082 U
Chr5 24GB 4 2173 #0000FF R
Chr5 24GB 2174 2778 #0000FF Q
Chr5 24GB 2783 3533 #4b0082 S
Chr5 24GB 3534 4175 #FF7F7F V
Chr5 24GB 4176 5580 #FF7F7F W
Chr5 24GB 5581 6318 #FF7F7F X
Chr1 LF 1 222 LF B3
Chr1 LF 224 754 LF B6
Chr1 LF 756 1165 LF B4
Chr1 LF 1168 1192 LF B6
Chr1 LF 1208 1330 LF B4
Chr1 LF 1334 1722 LF B6
Chr1 LF 1723 3393 LF B4
Chr1 LF 3617 4674 LF B4
Chr1 LF 4686 5101 LF B7
Chr1 LF 5248 5392 LF B4
Chr1 LF 5420 6872 LF B3
Chr1 LF 6873 7074 LF B6
Chr1 MF1 7 1492 MF1 B2
Chr1 MF1 1492 3513 MF1 B3
Chr1 MF1 3956 4484 MF1 B6
Chr1 MF1 4504 4555 MF1 B4
Chr1 MF1 4654 5392 MF1 B4
Chr1 MF1 5414 7072 MF1 B5
Chr1 MF2 136 2190 MF2 B3
Chr1 MF2 2195 3014 MF2 B3
Chr1 MF2 3022 3132 MF2 B7
Chr1 MF2 3160 3201 MF2 B7
Chr1 MF2 3661 4484 MF2 B7
Chr1 MF2 4499 4555 MF2 B7
Chr1 MF2 4558 4674 MF2 B7
Chr1 MF2 4692 5268 MF2 B2
Chr1 MF2 5660 6377 MF2 B3
Chr1 MF2 6385 7074 MF2 B6
Chr2 LF 5 303 LF B2
Chr2 LF 315 483 LF B2
Chr2 LF 654 1836 LF B2
Chr2 LF 1944 2749 LF B6
Chr2 LF 2751 4028 LF B6
Chr2 MF1 5 303 MF1 B4
Chr2 MF1 846 1337 MF1 B3
Chr2 MF1 1368 4243 MF1 B1
Chr2 MF2 5 303 MF2 B4
Chr2 MF2 746 1347 MF2 B4
Chr2 MF2 1348 2285 MF2 B8
Chr2 MF2 2289 4224 MF2 B8
Chr3 LF 8 2651 LF B1
Chr3 LF 2676 3203 LF B2
Chr3 LF 3331 5429 LF B8
Chr3 MF1 6 2574 MF1 B7
Chr3 MF1 2677 3202 MF1 B4
Chr3 MF1 3841 4096 MF1 B5
Chr3 MF1 4109 4205 MF1 B5
Chr3 MF1 4206 5436 MF1 B6
Chr3 MF2 1 587 MF2 B7
Chr3 MF2 591 799 MF2 B3
Chr3 MF2 808 889 MF2 B7
Chr3 MF2 889 978 MF2 B3
Chr3 MF2 990 2369 MF2 B7
Chr3 MF2 2369 2614 MF2 B3
Chr3 MF2 2616 2674 MF2 B2
Chr3 MF2 2677 2755 MF2 B4
Chr3 MF2 2777 3133 MF2 B7
Chr3 MF2 3305 3873 MF2 B6
Chr3 MF2 3873 3965 MF2 B6
Chr3 MF2 3968 4276 MF2 B2
Chr3 MF2 4281 5429 MF2 B6
Chr4 LF 3 421 LF B8
Chr4 LF 784 1091 LF B8
Chr4 LF 1264 1296 LF B3
Chr4 LF 1316 4123 LF B5
Chr4 MF1 105 153 MF1 B5
Chr4 MF1 249 502 MF1 B5
Chr4 MF1 815 1091 MF1 B5
Chr4 MF1 1213 1313 MF1 B6
Chr4 MF1 1355 1406 MF1 B6
Chr4 MF1 1406 2151 MF1 B2
Chr4 MF1 2151 4116 MF1 B2
Chr4 MF2 3 421 MF2 B8
Chr4 MF2 445 561 MF2 B8
Chr4 MF2 649 1077 MF2 B8
Chr4 MF2 1203 1234 MF2 B7
Chr4 MF2 1323 1865 MF2 B7
Chr4 MF2 1870 2340 MF2 B3
Chr4 MF2 2405 3442 MF2 B3
Chr4 MF2 3449 3617 MF2 B3
Chr4 MF2 3622 4127 MF2 B3
Chr5 LF 4 2173 LF B2
Chr5 LF 2174 2738 LF B2
Chr5 LF 2852 3533 LF B1
Chr5 LF 3553 4175 LF B2
Chr5 LF 4176 4376 LF B2
Chr5 LF 4384 4502 LF B8
Chr5 LF 4532 5580 LF B2
Chr5 LF 5583 5682 LF B2
Chr5 LF 5683 6318 LF B6
Chr5 MF1 5 1639 MF1 B5
Chr5 MF1 1640 2173 MF1 B5
Chr5 MF1 2182 2690 MF1 B7
Chr5 MF1 2852 3532 MF1 B6
Chr5 MF1 3578 4172 MF1 B4
Chr5 MF1 4176 4376 MF1 B4
Chr5 MF1 4394 4607 MF1 B5
Chr5 MF1 4646 5580 MF1 B5
Chr5 MF1 5584 6317 MF1 B7
Chr5 MF2 4 2169 MF2 B8
Chr5 MF2 2174 2769 MF2 B4
Chr5 MF2 2829 2909 MF2 B7
Chr5 MF2 3072 3413 MF2 B1
Chr5 MF2 3545 4174 MF2 B4
Chr5 MF2 4176 4368 MF2 B7
Chr5 MF2 4482 5580 MF2 B8
Chr5 MF2 5582 6314 MF2 B4