常用縮寫
- FSAL - File System Abstract Layer
- DRC - Duplicate Request/Reply Cache
- CMAL - Cluster Management Abstraction Layer
- XDR - eXternal Data Representation
- SAL - State Abstraction Layer
- cih - Cache inode hashed dictionary
- TI-PRC (transport-independent RPC)
- TS-RPC (transport-specific RPC)
1. FSAL module
每個(gè)FSAL module對(duì)應(yīng)一個(gè)so文件挺智,例如libfsalvfs.so
。每個(gè)FSAL module描述了對(duì)某種文件系統(tǒng)的實(shí)現(xiàn)笤昨。有時(shí)也簡稱為fsal或module昔搂,對(duì)應(yīng)struct fsal_module
摹恨,變量名常用fsal或者fsal_hdl。
常見的FSAL module
- PSEUDO, 用于根目錄,或其他pseudo fs
- MDCACHE(比較特殊)
- VFS
- CEPH
struct fsal_module {
struct glist_head fsals; //所有fsal通過它連成鏈表江咳,存在`fsal_list`全局變量中
struct glist_head exports;//這個(gè)fsal下所有export形成的鏈表頭
struct glist_head handles;//這個(gè)fsal下所有handle形成的鏈表頭
struct glist_head servers;//Data Servers鏈表頭,pnfs使用用
char *path; //so模塊的path
char *name; //module的名字
void *dl_handle;//當(dāng)用dlopen動(dòng)態(tài)加載時(shí)候哥放,返回的句柄
struct fsal_ops m_ops; 這個(gè)模塊提供的通用函數(shù)歼指,如unload,create_export等等
pthread_rwlock_t lock;//在使用上面的鏈表時(shí)候甥雕,用到的lock
int32_t refcount;//Reference count
struct fsal_stats *stats; /*< for storing the FSAL specific stats */
struct fsal_staticfsinfo_t fs_info; /*< for storing FSAL static info */
};
相關(guān)函數(shù):
- load_fsal: 指定文件系統(tǒng)的名字踩身,如VFS,加載相應(yīng)模塊社露,例如
libfsalvfs.so
挟阻,返回module。 - unload_fsal
- lookup_fsal: 指定文件系統(tǒng)的名字峭弟,從內(nèi)存里找到module附鸽。
- register_fsal:將module注冊(cè)進(jìn)系統(tǒng),加入到
fsal_list
全局變量中瞒瘸。
2. FSAL Export
在配置文件中每描述一個(gè)export坷备,ganesha就創(chuàng)建一個(gè)export,對(duì)應(yīng)一個(gè)文件系統(tǒng)情臭,每個(gè)export都有一個(gè)export id省撑,都對(duì)應(yīng)一個(gè)FSAL module赌蔑。ganesha會(huì)自動(dòng)創(chuàng)建一個(gè)根目錄的export,其export id是0竟秫。 可以理解export是FSAL module的一個(gè)實(shí)例娃惯。
EXPORT
{
Export_ID=1;
Path = /tmp;
Pseudo = /vfs;
Access_Type = RW;
Protocols = 4;
Transports = TCP;
FSAL
{
Name = VFS;
}
}
2.1 gsh_export
struct gsh_export
是在解析配置文件中用到的數(shù)據(jù),不是太重要鸿摇。
struct gsh_export {
struct glist_head exp_list; //表頭是全局變量 exportlist
struct avltree_node node_k; //通過它存儲(chǔ)AVL tree,key是export_id
char *fullpath;
char *pseudopath; //PseudoFS path
uint16_t export_id;
struct fsal_export *fsal_export; //指向fsal export
...
};
相關(guān)函數(shù):
- alloc_export
- free_export
- insert_gsh_export
- get_gsh_export
- get_gsh_export_by_path
- mount_gsh_export
- remove_gsh_export
- foreach_gsh_export
2.2 fsal_export
struct fsal_export
代表著export石景。
struct fsal_export {
struct glist_head exports;//fsal->exports是所有相同F(xiàn)SAL的export的鏈表頭
struct fsal_module *fsal; //指向fsal
const struct fsal_up_vector *up_ops; //Upcall operations
struct export_ops exp_ops;//不需要inode的一些操作,如lookup_path拙吉,set_quota等
struct fsal_export *sub_export; //下面的export
struct fsal_export *super_export;//上面的export
uint16_t export_id; //export id
};
有意思的是export可以分層潮孽,形成一個(gè)stack的結(jié)構(gòu)。其實(shí)一般分為兩層筷黔,最上層是MDCACHE對(duì)應(yīng)的export,下層是真正文件系統(tǒng)對(duì)應(yīng)(如VFS)的export往史。這樣的好處是,IO先進(jìn)入到MDCACHE的export佛舱,如果能處理則直接返回椎例,如果不能則調(diào)用下層export去處理。
相關(guān)函數(shù):
- fsal_attach_export 將export插入到這個(gè)FSAL對(duì)應(yīng)鏈表中
- fsal_export_stack 將兩個(gè)export黏在一起请祖,形成上下層關(guān)系
2.3 擴(kuò)展的FSAL export
對(duì)于不同F(xiàn)SAL來說订歪,都會(huì)擴(kuò)展export的含義。如FSAL VFS的export定義為:
struct vfs_fsal_export {
struct fsal_export export; //由fsal_export擴(kuò)展而來
struct fsal_filesystem *root_fs;//根目錄的fs
struct glist_head filesystems;//在這個(gè)export下所有的FS形成的鏈表頭
int fsid_type;
bool async_hsm_restore;
};
再如FSAL MDCACHE的export定義為:
struct mdcache_fsal_export {
struct fsal_export mfe_exp; //由fsal_export擴(kuò)展而來
char *name;
struct fsal_up_vector up_ops;
struct fsal_up_vector super_up_ops;
struct glist_head entry_list;
pthread_rwlock_t mdc_exp_lock;
uint8_t flags;
};
不同F(xiàn)SAL的export的創(chuàng)建是由不同函數(shù)實(shí)現(xiàn)的
fsal->m_ops.create_export(...)
例如對(duì)應(yīng)VFS的create_export函數(shù)指針對(duì)應(yīng)vfs_create_export()
3. Object handle
對(duì)應(yīng)struct fsal_obj_handle
,變量名常用obj肆捕。每個(gè)object handle對(duì)應(yīng)一個(gè)文件或者目錄刷晋。
struct fsal_obj_handle {
struct glist_head handles;//所有相同fsal的handle通過它形成鏈表,鏈表頭是fsal-> handles
struct fsal_filesystem *fs;//指向隸屬的FS
struct fsal_module *fsal; //指向fsal module
struct fsal_obj_ops *obj_ops; //如lookup慎陵,readdir眼虱,getattrs,read2,write2等等。
pthread_rwlock_t obj_lock;
object_file_type_t type; /*< Object file type */
fsal_fsid_t fsid;
uint64_t fileid; //在相同fsid范圍下席纽,唯一區(qū)分的ID捏悬,如inode number
struct state_hdl *state_hdl; // obj相關(guān)的state,參看vfs_state_locate()
};
3.1 擴(kuò)充的Object handle
對(duì)于不同F(xiàn)SAL來說,需要擴(kuò)充這個(gè)對(duì)象润梯。例如VFS對(duì)應(yīng)的Object handle:
struct vfs_fsal_obj_handle {
struct fsal_obj_handle obj_handle; //在fsal_obj_handle基礎(chǔ)上擴(kuò)充
fsal_dev_t dev;
vfs_file_handle_t *handle;
struct vfs_subfsal_obj_ops *sub_ops; /*< Optional subfsal ops */
const struct fsal_up_vector *up_ops; /*< Upcall operations */
union {
struct {
struct fsal_share share;
struct vfs_fd fd;
} file;
struct {
unsigned char *link_content;
int link_size;
} symlink;
struct {
vfs_file_handle_t *dir;
char *name;
} unopenable;
} u;
};
例如MDCACHE對(duì)應(yīng)的Object handle:
struct mdcache_fsal_obj_handle {
struct fsal_obj_handle obj_handle; //在fsal_obj_handle基礎(chǔ)上擴(kuò)充
struct fsal_obj_handle *sub_handle; //mdcache的下層是更加實(shí)際的fsal过牙,如VFS的handle
...
}
4. 不同level的ops函數(shù)指針
FSAL module的ops
struct fsal_ops
,FSAL module級(jí)別的ops
struct fsal_ops def_fsal_ops = {
.unload = unload_fsal,
.init_config = init_config,
.dump_config = dump_config,
.create_export = create_export,
.emergency_cleanup = emergency_cleanup,
.getdeviceinfo = getdeviceinfo,
.fs_da_addr_size = fs_da_addr_size,
.fsal_pnfs_ds = fsal_pnfs_ds,
.fsal_pnfs_ds_ops = fsal_pnfs_ds_ops,
.fsal_extract_stats = fsal_extract_stats,
.fsal_reset_stats = fsal_reset_stats,
};
4.2 export的ops
struct export_ops
,文件系統(tǒng)級(jí)級(jí)別的ops
void vfs_export_ops_init(struct export_ops *ops)
{
ops->release = release;
ops->lookup_path = vfs_lookup_path;
ops->wire_to_host = wire_to_host;
ops->create_handle = vfs_create_handle;
ops->get_fs_dynamic_info = get_dynamic_info;
ops->get_quota = get_quota;
ops->set_quota = set_quota;
ops->alloc_state = vfs_alloc_state;
ops->free_state = vfs_free_state;
}
4.3 obj的ops
struct fsal_obj_ops
,文件對(duì)象層的ops。
void vfs_handle_ops_init(struct fsal_obj_ops *ops)
{
fsal_default_obj_ops_init(ops);
ops->release = release;
ops->merge = vfs_merge;
ops->lookup = lookup;
ops->readdir = read_dirents;
ops->mkdir = makedir;
ops->mknode = makenode;
ops->symlink = makesymlink;
ops->readlink = readsymlink;
ops->getattrs = vfs_getattr2;
ops->link = linkfile;
ops->rename = renamefile;
ops->unlink = file_unlink;
ops->close = vfs_close;
ops->handle_to_wire = handle_to_wire;
ops->handle_to_key = handle_to_key;
ops->open2 = vfs_open2;
ops->reopen2 = vfs_reopen2;
ops->read2 = vfs_read2;
ops->write2 = vfs_write2;
ops->commit2 = vfs_commit2;
ops->list_ext_attrs = vfs_list_ext_attrs;
ops->getextattr_id_by_name = vfs_getextattr_id_by_name;
ops->getextattr_value_by_name = vfs_getextattr_value_by_name;
ops->getextattr_value_by_id = vfs_getextattr_value_by_id;
ops->setextattr_value = vfs_setextattr_value;
ops->setextattr_value_by_id = vfs_setextattr_value_by_id;
ops->remove_extattr_by_id = vfs_remove_extattr_by_id;
ops->remove_extattr_by_name = vfs_remove_extattr_by_name;
}
5. MDCACHE和其他FSAL的交互
MDCACHE的export位于其他FSAL的上層仆救。
mdcache_read2()
{
...
subcall(
entry->sub_handle->obj_ops->read2(entry->sub_handle, bypass,
mdc_read_cb, read_arg, arg)
);
}
6. IO處理線程的op_ctx
每個(gè)IO處理線程有個(gè)線程變量op_ctx
抒和,指向nfs_rpc_process_request()
下的req_ctx局部變量。
__thread struct req_op_context *op_ctx;
會(huì)在函數(shù)nfs_rpc_process_request
的開頭設(shè)置op_ctx
彤蔽,并在結(jié)尾清理op_ctx
在init_root_op_context
初始化op_ctx
在release_root_op_context
恢復(fù)op_ctx
nfs4_mds_putfh()
設(shè)置op_ctx->ctx_export
和op_ctx->fsal_export
在所有的IO處理線程中,可以方便的從op_ctx
得到export信息庙洼。
那么op_ctx都存了些什么東西呢顿痪,
struct req_op_context {
struct user_cred *creds; /*< resolved user creds from request */
struct user_cred original_creds; /*< Saved creds */
struct group_data *caller_gdata;
gid_t *caller_garray_copy; /*< Copied garray from AUTH_SYS */
gid_t *managed_garray_copy; /*< Copied garray from managed gids */
int cred_flags; /* Various cred flags */
sockaddr_t *caller_addr; //IP connection info
const uint64_t *clientid; //Client ID
uint32_t nfs_vers;
uint32_t nfs_minorvers;
uint32_t req_type; /*< request_type NFS | 9P */
struct gsh_client *client; //client host info镊辕,將TCP,UDP,9P的連接都隱藏在內(nèi)
struct gsh_export *ctx_export;//指向gsh_export
struct fsal_export *fsal_export;//current export ,一般是DCACHE的export
struct export_perms *export_perms; /*< Effective export perms */
nsecs_elapsed_t start_time; //start time of this op/request
nsecs_elapsed_t queue_wait; //time in wait queue
void *fsal_private; /*< private for FSAL use */
struct fsal_module *fsal_module; //fsal module,一般是DCACHE
struct fsal_pnfs_ds *fsal_pnfs_ds; //pnfs相關(guān)
};
7. Compound中Operation的處理函數(shù)
static const struct nfs4_op_desc optabv4[] = {
[0] = { /* all out of bounds illegals go here to die */
.name = "OP_ILLEGAL",
.funct = nfs4_op_illegal,
.free_res = nfs4_op_illegal_Free,
.resp_size = sizeof(ILLEGAL4res),
.exp_perm_flags = 0},
[1] = {
.name = "OP_ILLEGAL",
.funct = nfs4_op_illegal,
.free_res = nfs4_op_illegal_Free,
.resp_size = sizeof(ILLEGAL4res),
.exp_perm_flags = 0},
[2] = {
.name = "OP_ILLEGAL",
.funct = nfs4_op_illegal,
.free_res = nfs4_op_illegal_Free,
.resp_size = sizeof(ILLEGAL4res),
.exp_perm_flags = 0},
[NFS4_OP_ACCESS] = {
.name = "OP_ACCESS",
.funct = nfs4_op_access,
.free_res = nfs4_op_access_Free,
.resp_size = sizeof(ACCESS4res),
.exp_perm_flags = EXPORT_OPTION_MD_READ_ACCESS},
[NFS4_OP_CLOSE] = {
.name = "OP_CLOSE",
.funct = nfs4_op_close,
.free_res = nfs4_op_close_Free,
.resp_size = sizeof(CLOSE4res),
.exp_perm_flags = EXPORT_OPTION_MD_READ_ACCESS},
[NFS4_OP_COMMIT] = {
.name = "OP_COMMIT",
.funct = nfs4_op_commit,
.free_res = nfs4_op_commit_Free,
.resp_size = sizeof(COMMIT4res),
.exp_perm_flags = EXPORT_OPTION_MD_WRITE_ACCESS}
//...
}