簡書一直用著挺好的每瞒,唯一讓我難受的點(diǎn)就是管理文章的頁面沒有搜索,文章多了就很難找之前寫過的文章暇唾。另外促脉,有好長時(shí)間了,簡書的圖片一直加載不出來策州,我起初以為是我的網(wǎng)絡(luò)開代理造成瘸味,搜索一番才知道是簡書截停了Firefox的訪問,不清楚處于什么原因抽活,但是堅(jiān)定了我撤離簡書的想法硫戈。
簡書還是很大度的,可以 設(shè)置-賬號(hào)設(shè)置 里可以打包下載所有文章下硕,結(jié)果是純markdown文本丁逝,很容易做遷移。
困難來到Ghost這邊梭姓, Ghost支持幾個(gè)國外平臺(tái)的導(dǎo)入霜幼,國內(nèi)的就不用想了。Ghost提供導(dǎo)入和導(dǎo)出選項(xiàng)誉尖,我這里仿造Ghost的導(dǎo)出格式罪既,把簡書的文章塞進(jìn)去,再導(dǎo)回去铡恕。
Ghost內(nèi)容生成
文章末尾提供了一個(gè)python腳本琢感,用于生成Ghost的導(dǎo)入文件。
聲明:腳本和本文所述內(nèi)容可能造成未知問題探熔,使用前確保你清楚其中的功能并做好備份驹针,本人不對造成的任何損失負(fù)責(zé),轉(zhuǎn)載請注明出處诀艰。
首先介紹一下這個(gè)腳本 的輸入輸出:
- 輸入
- 簡書導(dǎo)出的rar文件
- Ghost導(dǎo)出的json文件柬甥,用于獲取Ghost 中的配置信息
- 輸出
- json格式的Ghost導(dǎo)入文件,包含文章信息
- zip格式的Ghost導(dǎo)入文件其垄,包含圖片信息苛蒲,兩個(gè)文件需要分開單獨(dú)導(dǎo)入
腳本依賴
- 系統(tǒng)中
7z
命令進(jìn)行壓縮和解壓,所以運(yùn)行前保證你在系統(tǒng)命令行中可以使用7z绿满。 - 需要使用
requests
下載簡書的圖片臂外,使用pip install requests
進(jìn)行安裝
腳本運(yùn)行
找到main函數(shù),這里有四個(gè)參數(shù),改成你的漏健,執(zhí)行就好了辜膝,生成文件放在了簡書導(dǎo)出的rar文件同級(jí)的目錄,文章名在下載時(shí)簡書出于通用性考慮把特殊字符換成了“-”漾肮,和本文無關(guān)。
設(shè)置參數(shù)
去我的Ghost茎毁,看看效果吧:http://ray.twig.ink
import os
import json
from pathlib import Path
import datetime
import subprocess
def handle_img (post_info, save_path, featured_first_img):
"""下載圖片并替換鏈接"""
md_str = post_info['markdown']
if 'https://upload-images' not in md_str:
return md_str
import re
import requests
# 匹配Markdown圖片鏈接
pattern = r'!\[(.*?)\]\((.*?)\)' # 匹配 ![alt text](image_url) 格式的圖片鏈接
now = datetime.datetime.now()
_rel_path = f'/content/images/{now.year}/{now.month}/'
ghost_image_path = f'__GHOST_URL__{_rel_path}'
image_save_path = f'{save_path}{_rel_path}'
if not os.path.exists(image_save_path):
os.makedirs(image_save_path)
# 下載圖片
matches = re.findall(pattern, md_str)
for alt, url in matches:
img_url = url.split('?')[0]
img_file_name = img_url.split('/')[-1]
image_save_url = f'{image_save_path}/{img_file_name}'
print(f'downloading.. {url}')
response = requests.get(url)
if response.status_code == 200:
with open(image_save_url, 'wb') as file:
file.write(response.content)
if featured_first_img and post_info['feature_image'] is None:
post_info['feature_image'] = f'{ghost_image_path}/{img_file_name}'
# 替換原文圖片鏈接
def replace_image_url(match):
alt_text = match.group(1)
original_url = match.group(2)
# 提取圖片名
image_name = os.path.basename(original_url.split('?')[0])
# 構(gòu)建新的圖片鏈接
new_url = f'{ghost_image_path}{image_name}'
return f'![{alt_text}]({new_url})'
res = re.sub(pattern, replace_image_url, md_str)
return res
def md_to_mobiledoc(markdown, mobiledoc_version):
mobiledoc = json.dumps({
'version': mobiledoc_version,
'markups': [],
'atoms': [],
'cards': [['markdown', {'cardName': 'markdown', 'markdown': markdown}]],
'sections': [[10, 0]]
}, ensure_ascii=False)
return mobiledoc
def generate_uuid():
import uuid
return str(uuid.uuid4())
def generate_id():
"""生成ghost格式的id克懊,但是這個(gè)導(dǎo)入的時(shí)候并沒有用,系統(tǒng)會(huì)自動(dòng)再生成一個(gè)"""
custom_id = generate_uuid().replace('-', '')[-24:]
return custom_id
def read_jianshu(zip_path: str):
"""將簡書的所有markdown文件讀出來"""
_path = Path(zip_path)
extract_to = os.path.join(_path.parent, _path.stem)
unzip_file(zip_path, extract_to)
posts = []
tags = {}
for md_file in find_md_files(extract_to):
# print(f"Found MD file: {md_file}")
__path = Path(md_file)
with open(md_file, 'r', encoding='utf-8') as file:
tag = __path.parent.name
if tag not in tags.keys():
tags[tag] = generate_id()
tag_id = tags[tag]
posts.append({
'id': generate_id(),
'tag': tag,
'tag_id': tag_id,
'title': __path.stem,
'markdown': file.read(),
'feature_image': None
})
return posts, tags
def unzip_file(zip_path, extract_to):
"""解壓rar文件到指定目錄"""
if not os.path.exists(extract_to):
os.makedirs(extract_to)
res = subprocess.run(['7z', 'x', zip_path, f'-o{extract_to}', '-aoa'], capture_output=True, text=True)
print(res.stdout)
def zip_file(folder_to_compress, compress_to):
"""壓縮文件"""
res = subprocess.run(['7z', 'a', compress_to, folder_to_compress], capture_output=True, text=True)
print(res.stdout)
def find_md_files(directory):
"""遞歸遍歷目錄七蜘,找到所有的.md文件"""
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.md'):
yield os.path.join(root, file)
def build_ghost(post_infos: list[dict], ghost_config: dict, tags) -> dict:
"""使用已知的信息組裝post"""
from datetime import datetime, timezone
# 格式化時(shí)間為指定格式
current_time = datetime.now(timezone.utc)
formatted_time = current_time.strftime('%Y-%m-%dT%H:%M:%S.000Z')
author_id = ghost_config['db'][0]['data']['users'][0]['id']
_model = {
'posts_authors': [{
'id': generate_id(),
"post_id": post['id'],
"author_id": author_id,
"sort_order": 0
}for post in post_infos],
'posts': [{
"id": post['id'],
"uuid": generate_uuid(),
"title": post['title'],
"feature_image": post['feature_image'],
"mobiledoc": post['mobiledoc'],
"type": 'post',
"status": post['post_status'],
"visibility": "public",
"email_recipient_filter": "all",
"created_at": formatted_time,
"updated_at": formatted_time,
"published_at": formatted_time,
"show_title_and_feature_image": 1
} for post in post_infos],
'posts_tags': [{
"id": generate_id(),
"post_id": post['id'],
"tag_id": post['tag_id'],
"sort_order": 0
} for post in post_infos],
'tags': [{
'id': tag_id,
'name': tag,
"visibility": "public",
"created_at": formatted_time,
"updated_at": formatted_time
} for tag, tag_id in tags.items()],
}
res = ghost_config
res_post = res['db'][0]['data']
# ghost導(dǎo)入本身就是增量更新谭溉,不需要保留之前的文章
res_post['posts'] = _model['posts']
res_post['tags'] = _model['tags']
res_post['posts_tags'] = _model['posts_tags']
return res
def get_mobiledoc_version(ghost_config):
_mobiledoc_str = ghost_config['db'][0]['data']['posts'][0]['mobiledoc']
_mobiledoc = json.loads(_mobiledoc_str)
return _mobiledoc['version']
def main():
# 簡書文件路徑
zip_path = '/Users/era/Downloads/user-7914065-1730503948.rar'
# ghost 導(dǎo)出文件,需要文章里的數(shù)據(jù)橡卤,保證導(dǎo)出的文件中有文章
ghost_json_path = '/Users/era/Downloads/tui-ge.ghost.2024-11-02-00-00-48.json'
# 導(dǎo)入的文章設(shè)置為 草稿 或者 已發(fā)布 draft published
post_status = 'published'
# 第一張圖片作為封面
first_img_as_feature = True
post_infos, tags = read_jianshu(zip_path)
with open(ghost_json_path) as file:
ghost_config = json.load(file)
# mobiledoc version
mobiledoc_version = get_mobiledoc_version(ghost_config)
for info in post_infos:
# 先替換markdown中的圖片鏈接扮念,再轉(zhuǎn)換成mobiledoc
md_str = handle_img(info, Path(zip_path).parent, first_img_as_feature)
info['mobiledoc'] = md_to_mobiledoc(md_str, mobiledoc_version)
info['post_status'] = post_status
print('download completed.')
ghost_res = build_ghost(post_infos, ghost_config, tags)
# 指定寫入文件路徑
output_json_path = zip_path.replace('.rar', '.json')
output_zip_path = zip_path.replace('.rar', '-pictures.zip')
with open(output_json_path, 'w', encoding='utf-8') as json_file:
json.dump(ghost_res, json_file, indent=4, ensure_ascii=False)
zip_file(f'{Path(zip_path).parent}/content', output_zip_path)
print(f"All done! Data saved to {output_json_path},{output_zip_path}")
if __name__ == "__main__":
"""
pip install requests
保證7z命令可用
"""
main()
參考
- json結(jié)構(gòu) https://ghost.org/docs/migration/custom/
- 導(dǎo)入圖片 https://ghost.org/help/imports/#image-imports
- 導(dǎo)入內(nèi)容 https://ghost.org/docs/migration/content/