0x01 線程環(huán)境初始化
線程模型回顧
Python
啟動(dòng)后,真正有意義的初始化動(dòng)作是從Py_Initialize
開始(當(dāng)然Py_Initialize
之前也做了很多復(fù)雜的動(dòng)作)愈涩,Py_Initialize
中調(diào)用了Py_InitializeEx
秘豹。
// python.c -- Minimal main program -- everything is loaded from the library
int
main(int argc, char **argv)
{
/* 754 requires that FP exceptions run in "no stop" mode by default,
* and until C vendors implement C99's ways to control FP exceptions,
* Python requires non-stop mode. Alas, some platforms enable FP
* exceptions by default. Here we disable them.
*/
#ifdef __FreeBSD__
fp_except_t m;
m = fpgetmask();
fpsetmask(m & ~FP_X_OFL);
#endif
return Py_Main(argc, argv);
}
// main.c -- Python interpreter main program
/* Main program */
int
Py_Main(int argc, char **argv)
{
......
Py_Initialize
......
}
//Pythonrun.c
void
Py_Initialize(void)
{
Py_InitializeEx(1);
}
void
Py_InitializeEx(int install_sigs)
{
PyInterpreterState *interp;
PyThreadState *tstate;
PyObject *bimod, *sysmod;
char *p;
extern void _Py_ReadyTypes(void);
if (initialized)
return;
initialized = 1;
if ((p = Py_GETENV("PYTHONDEBUG")) && *p != '\0')
Py_DebugFlag = add_flag(Py_DebugFlag, p);
if ((p = Py_GETENV("PYTHONVERBOSE")) && *p != '\0')
Py_VerboseFlag = add_flag(Py_VerboseFlag, p);
if ((p = Py_GETENV("PYTHONOPTIMIZE")) && *p != '\0')
Py_OptimizeFlag = add_flag(Py_OptimizeFlag, p);
// 創(chuàng)建進(jìn)程狀態(tài)
interp = PyInterpreterState_New();
if (interp == NULL)
Py_FatalError("Py_Initialize: can't make first interpreter");
// 創(chuàng)建線程狀態(tài)
tstate = PyThreadState_New(interp);
if (tstate == NULL)
Py_FatalError("Py_Initialize: can't make first thread");
(void) PyThreadState_Swap(tstate);
// 初始化類型模型
_Py_ReadyTypes();
if (!_PyFrame_Init())
Py_FatalError("Py_Initialize: can't init frames");
if (!_PyInt_Init())
Py_FatalError("Py_Initialize: can't init ints");
_PyFloat_Init();
// 初始化modules
interp->modules = PyDict_New();
if (interp->modules == NULL)
Py_FatalError("Py_Initialize: can't make modules dictionary");
interp->modules_reloading = PyDict_New();
if (interp->modules_reloading == NULL)
Py_FatalError("Py_Initialize: can't make modules_reloading dictionary");
// 初始化__built__ module
bimod = _PyBuiltin_Init();
if (bimod == NULL)
Py_FatalError("Py_Initialize: can't initialize __builtin__");
interp->builtins = PyModule_GetDict(bimod);
if (interp->builtins == NULL)
Py_FatalError("Py_Initialize: can't initialize builtins dict");
Py_INCREF(interp->builtins);
// 初始化sys module
sysmod = _PySys_Init();
if (sysmod == NULL)
Py_FatalError("Py_Initialize: can't initialize sys");
interp->sysdict = PyModule_GetDict(sysmod);
if (interp->sysdict == NULL)
Py_FatalError("Py_Initialize: can't initialize sys dict");
Py_INCREF(interp->sysdict);
_PyImport_FixupExtension("sys", "sys");
// 設(shè)置module搜索路徑集合
PySys_SetPath(Py_GetPath());
// 設(shè)置了sys.modules枫振,他就是interp->modules
PyDict_SetItemString(interp->sysdict, "modules",
interp->modules);
// 初始化import機(jī)制的環(huán)境
_PyImport_Init();
/* initialize builtin exceptions */
// 初始化Python內(nèi)建的exceptions
_PyExc_Init();
// 備份exceptions module 和 __builtin__ module
_PyImport_FixupExtension("exceptions", "exceptions");
/* phase 2 of builtins */
_PyImport_FixupExtension("__builtin__", "__builtin__");
// 在sys module中添加一些對(duì)象蚀乔,用于import機(jī)制
_PyImportHooks_Init();
if (install_sigs)
initsigs(); /* Signal handling stuff, including initintr() */
initmain(); /* Module __main__ */
if (!Py_NoSiteFlag)
initsite(); /* Module site */
warnings_module = PyImport_ImportModule("warnings");
if (!warnings_module)
PyErr_Clear();
}
Py_InitializeEx
中完成了一個(gè)重要工作就是加載多個(gè)基礎(chǔ)的module
(__builtin__
,sys
等)源梭,還會(huì)完成Python
類型系統(tǒng)的初始化和異常系統(tǒng)初始化娱俺。
// pystate.h
typedef struct _is {
struct _is *next;
struct _ts *tstate_head; // 模擬進(jìn)程環(huán)境中的線程集合
PyObject *modules;
PyObject *sysdict;
PyObject *builtins;
PyObject *modules_reloading;
PyObject *codec_search_path;
PyObject *codec_search_cache;
PyObject *codec_error_registry;
} PyInterpreterState;
typedef struct _ts {
struct _ts *next;
PyInterpreterState *interp;
struct _frame *frame; // 模擬線程中的函數(shù)調(diào)用堆棧
int recursion_depth;
int tracing;
int use_tracing;
Py_tracefunc c_profilefunc;
Py_tracefunc c_tracefunc;
PyObject *c_profileobj;
PyObject *c_traceobj;
PyObject *curexc_type;
PyObject *curexc_value;
PyObject *curexc_traceback;
PyObject *exc_type;
PyObject *exc_value;
PyObject *exc_traceback;
PyObject *dict; /* Stores per-thread state */
int tick_counter;
int gilstate_counter;
PyObject *async_exc; /* Asynchronous exception to raise */
long thread_id; /* Thread id where this tstate was created */
int trash_delete_nesting;
PyObject *trash_delete_later;
} PyThreadState;
初始化線程環(huán)境
在win32
環(huán)境,當(dāng)執(zhí)行一個(gè)可執(zhí)行文件時(shí)废麻,操作系統(tǒng)首先會(huì)創(chuàng)建一個(gè)進(jìn)程內(nèi)核對(duì)象矢否。同樣,在Python
中也是如此脑溢,在Py_InitializeEx
的開始處僵朗,Python
會(huì)首先調(diào)用PyInterpreterState_New
創(chuàng)建一個(gè)嶄新的PyInterpreterState
對(duì)象。
// pystate.c
static PyInterpreterState *interp_head = NULL;
PyInterpreterState *
PyInterpreterState_New(void)
{
PyInterpreterState *interp = (PyInterpreterState *)
malloc(sizeof(PyInterpreterState));
if (interp != NULL) {
HEAD_INIT();
#ifdef WITH_THREAD
if (head_mutex == NULL)
Py_FatalError("Can't initialize threads for interpreter");
#endif
interp->modules = NULL;
interp->modules_reloading = NULL;
interp->sysdict = NULL;
interp->builtins = NULL;
interp->tstate_head = NULL;
interp->codec_search_path = NULL;
interp->codec_search_cache = NULL;
interp->codec_error_registry = NULL;
HEAD_LOCK();
interp->next = interp_head;
interp_head = interp;
HEAD_UNLOCK();
}
return interp;
}
在Python
的運(yùn)行時(shí)環(huán)境中屑彻,有一個(gè)全局變量interp_head
(一個(gè)PyInterpreterState
鏈表)來管理PyInterpreterState
對(duì)象验庙,在Python
運(yùn)行時(shí)可能會(huì)有多個(gè)PyInterpreterState
對(duì)象鏈接在一起(這個(gè)就是對(duì)操作系統(tǒng)多進(jìn)程的模擬)。新創(chuàng)建的PyInterpreterState
對(duì)象如下所示:
在創(chuàng)建了PyInterpreterState
(進(jìn)程狀態(tài))對(duì)象之后社牲,Python
會(huì)調(diào)用PyThreadState_New
函數(shù)來創(chuàng)建PyThreadState
(線程狀態(tài))對(duì)象粪薛。
// pystate.c
PyThreadState *
PyThreadState_New(PyInterpreterState *interp)
{
PyThreadState *tstate = (PyThreadState *)malloc(sizeof(PyThreadState));
// 設(shè)置獲得線程中函數(shù)調(diào)用棧的操作
if (_PyThreadState_GetFrame == NULL)
_PyThreadState_GetFrame = threadstate_getframe;
if (tstate != NULL) {
// 在PyThreadState對(duì)象中關(guān)聯(lián)PyInterpreterState對(duì)象
tstate->interp = interp;
tstate->frame = NULL;
tstate->recursion_depth = 0;
tstate->tracing = 0;
tstate->use_tracing = 0;
tstate->tick_counter = 0;
tstate->gilstate_counter = 0;
tstate->async_exc = NULL;
tstate->dict = NULL;
tstate->curexc_type = NULL;
tstate->curexc_value = NULL;
tstate->curexc_traceback = NULL;
tstate->exc_type = NULL;
tstate->exc_value = NULL;
tstate->exc_traceback = NULL;
tstate->c_profilefunc = NULL;
tstate->c_tracefunc = NULL;
tstate->c_profileobj = NULL;
tstate->c_traceobj = NULL;
HEAD_LOCK();
tstate->next = interp->tstate_head;
// 在PyInterpreterState對(duì)象中關(guān)聯(lián)PyThreadState對(duì)象
interp->tstate_head = tstate;
HEAD_UNLOCK();
}
return tstate;
}
與PyThreadState
結(jié)構(gòu)體中也存在一個(gè)next
指針,PyThreadState
在Python
運(yùn)行時(shí)也會(huì)創(chuàng)建一個(gè)鏈表(模擬多線程)搏恤。
還會(huì)設(shè)置從線程中獲得函數(shù)調(diào)用棧(PyFrameObject
對(duì)象鏈表)的方法(threadstate_getframe
)违寿。
然后建立PyThreadState
對(duì)象和PyInterpreterState
對(duì)象的關(guān)系,結(jié)果如下所示:
在Python
的運(yùn)行時(shí)環(huán)境中熟空,有一個(gè)全局變量_PyThreadState_Current
(_PyThreadState_Current
維護(hù)了當(dāng)前活動(dòng)的線程藤巢,更準(zhǔn)確的說是當(dāng)前活動(dòng)線程對(duì)應(yīng)的PyThreadState
對(duì)象),初始化時(shí)該變量為NULL
息罗。創(chuàng)建完PyThreadState
對(duì)象后掂咒,會(huì)通過PyThreadState_Swap
函數(shù)設(shè)置_PyThreadState_Current
的值。
// pystate.c
PyThreadState * PyThreadState_Swap(PyThreadState *newts)
{
PyThreadState *oldts = _PyThreadState_Current;
_PyThreadState_Current = newts;
return oldts;
}
接下來會(huì)通過_Py_ReadyTypes
函數(shù)初始化Python
類型系統(tǒng)(參考類機(jī)制剖析)迈喉。
然后通過_PyFrame_Init
函數(shù)設(shè)置全局變量builtin_object
绍刮。
// pystate.c
static PyObject *builtin_object;
int _PyFrame_Init()
{
builtin_object = PyString_InternFromString("__builtins__");
return (builtin_object != NULL);
}
這個(gè)內(nèi)容為__builtins__
的PyStringObject
對(duì)象builtin_object
在PyFrame_New
創(chuàng)建一個(gè)新的PyFrameObject
對(duì)象時(shí)會(huì)發(fā)揮作用。
接下來會(huì)初始化一些邊邊角角的東西挨摸。
至此孩革,Py_InitializeEx
有了一個(gè)階段性結(jié)果,創(chuàng)建了代表進(jìn)程和線程的PyInterpreterState
和PyThreadState
對(duì)象得运,并且在他們之間建立了聯(lián)系膝蜈。接下來會(huì)進(jìn)入相對(duì)獨(dú)立的環(huán)節(jié):設(shè)置系統(tǒng)module
。
0x02 系統(tǒng)module初始化
創(chuàng)建__builtin__ module
在Py_INitializeEx
中澈圈,當(dāng)Python
創(chuàng)建了PyThreadState
和PyInterpreterState
對(duì)象之后彬檀,就會(huì)開始通過_PyBuiltin_Init
來設(shè)置系統(tǒng)的__builtin__ module
了。在調(diào)用_PyBuiltin_Init
之前瞬女,Python
會(huì)通過interp->modules
創(chuàng)建為一個(gè)PyDictObject
對(duì)象,這個(gè)對(duì)象中維護(hù)著所有的PyThreadState
對(duì)象共享的資源努潘。
// Bltinmodule.c
PyObject *
_PyBuiltin_Init(void)
{
PyObject *mod, *dict, *debug;
// 創(chuàng)建并設(shè)置__builtin__ module
mod = Py_InitModule4("__builtin__", builtin_methods,
builtin_doc, (PyObject *)NULL,
PYTHON_API_VERSION);
if (mod == NULL)
return NULL;
// 將所有內(nèi)建類型加到__builtin__ module中
dict = PyModule_GetDict(mod);
#ifdef Py_TRACE_REFS
/* __builtin__ exposes a number of statically allocated objects
* that, before this code was added in 2.3, never showed up in
* the list of "all objects" maintained by Py_TRACE_REFS. As a
* result, programs leaking references to None and False (etc)
* couldn't be diagnosed by examining sys.getobjects(0).
*/
#define ADD_TO_ALL(OBJECT) _Py_AddToAllObjects((PyObject *)(OBJECT), 0)
#else
#define ADD_TO_ALL(OBJECT) (void)0
#endif
#define SETBUILTIN(NAME, OBJECT) \
if (PyDict_SetItemString(dict, NAME, (PyObject *)OBJECT) < 0) \
return NULL; \
ADD_TO_ALL(OBJECT)
SETBUILTIN("None", Py_None);
SETBUILTIN("Ellipsis", Py_Ellipsis);
SETBUILTIN("NotImplemented", Py_NotImplemented);
SETBUILTIN("False", Py_False);
SETBUILTIN("True", Py_True);
SETBUILTIN("basestring", &PyBaseString_Type);
SETBUILTIN("bool", &PyBool_Type);
SETBUILTIN("buffer", &PyBuffer_Type);
SETBUILTIN("classmethod", &PyClassMethod_Type);
#ifndef WITHOUT_COMPLEX
SETBUILTIN("complex", &PyComplex_Type);
#endif
SETBUILTIN("dict", &PyDict_Type);
SETBUILTIN("enumerate", &PyEnum_Type);
SETBUILTIN("file", &PyFile_Type);
SETBUILTIN("float", &PyFloat_Type);
SETBUILTIN("frozenset", &PyFrozenSet_Type);
SETBUILTIN("property", &PyProperty_Type);
SETBUILTIN("int", &PyInt_Type);
SETBUILTIN("list", &PyList_Type);
SETBUILTIN("long", &PyLong_Type);
SETBUILTIN("object", &PyBaseObject_Type);
SETBUILTIN("reversed", &PyReversed_Type);
SETBUILTIN("set", &PySet_Type);
SETBUILTIN("slice", &PySlice_Type);
SETBUILTIN("staticmethod", &PyStaticMethod_Type);
SETBUILTIN("str", &PyString_Type);
SETBUILTIN("super", &PySuper_Type);
SETBUILTIN("tuple", &PyTuple_Type);
SETBUILTIN("type", &PyType_Type);
SETBUILTIN("xrange", &PyRange_Type);
#ifdef Py_USING_UNICODE
SETBUILTIN("unicode", &PyUnicode_Type);
#endif
debug = PyBool_FromLong(Py_OptimizeFlag == 0);
if (PyDict_SetItemString(dict, "__debug__", debug) < 0) {
Py_XDECREF(debug);
return NULL;
}
Py_XDECREF(debug);
return mod;
#undef ADD_TO_ALL
#undef SETBUILTIN
}
整個(gè)函數(shù)的功能就是設(shè)置好__builtin__ module
诽偷±ぱВ總共分為兩個(gè)步驟:
- 創(chuàng)建
PyModuleObject
對(duì)象,在Python
中报慕,module
正是通過和這個(gè)對(duì)象來設(shè)置的 - 設(shè)置
module
深浮,將Python
中的所有的類型對(duì)象全塞進(jìn)新創(chuàng)建的__builtin__ module
中。
在第一步中已經(jīng)設(shè)置好了大部分的內(nèi)容眠冈,由函數(shù)Py_InitModule4
實(shí)現(xiàn):
// modsupport.c
PyObject *
Py_InitModule4(const char *name, PyMethodDef *methods, const char *doc,
PyObject *passthrough, int module_api_version)
{
PyObject *m, *d, *v, *n;
PyMethodDef *ml;
......
// 創(chuàng)建module對(duì)象
if ((m = PyImport_AddModule(name)) == NULL)
return NULL;
// 設(shè)置module中的(符號(hào)飞苇,值)對(duì)應(yīng)關(guān)系
d = PyModule_GetDict(m);
if (methods != NULL) {
n = PyString_FromString(name);
if (n == NULL)
return NULL;
// 遍歷methods指定的module對(duì)象中應(yīng)該包含的操作集合
for (ml = methods; ml->ml_name != NULL; ml++) {
if ((ml->ml_flags & METH_CLASS) ||
(ml->ml_flags & METH_STATIC)) {
PyErr_SetString(PyExc_ValueError,
"module functions cannot set"
" METH_CLASS or METH_STATIC");
Py_DECREF(n);
return NULL;
}
v = PyCFunction_NewEx(ml, passthrough, n);
if (v == NULL) {
Py_DECREF(n);
return NULL;
}
if (PyDict_SetItemString(d, ml->ml_name, v) != 0) {
Py_DECREF(v);
Py_DECREF(n);
return NULL;
}
Py_DECREF(v);
}
Py_DECREF(n);
}
if (doc != NULL) {
v = PyString_FromString(doc);
if (v == NULL || PyDict_SetItemString(d, "__doc__", v) != 0) {
Py_XDECREF(v);
return NULL;
}
Py_DECREF(v);
}
return m;
}
Py_InitModule4
參數(shù)的含義:
-
name
:module
的名稱,在這里是__builtin__
-
methods
:該module
中所包含的函數(shù)的集合蜗顽,在這里是builtin_methods
-
doc
:module
的文檔厕宗,在這里是builtin_doc
-
passthrough
:這個(gè)參數(shù)在Python 2.5
中沒有使用似扔,為NULL
-
module_api_version
:Python
內(nèi)部使用的version
值,用于比較
Py_InitModule4
函數(shù)可以分為2
個(gè)獨(dú)立的部分:創(chuàng)建module
對(duì)象;將(符號(hào)旭咽,值)
對(duì)應(yīng)關(guān)系放置到module
中。
創(chuàng)建module對(duì)象
// import.c
PyObject *
PyImport_AddModule(const char *name)
{
// 獲得Python維護(hù)的module集合
PyObject *modules = PyImport_GetModuleDict();
PyObject *m;
// 如果module集合中沒有名為name的module對(duì)象串塑,就創(chuàng)建它舰讹,否則直接返回
if ((m = PyDict_GetItemString(modules, name)) != NULL &&
PyModule_Check(m))
return m;
m = PyModule_New(name);
if (m == NULL)
return NULL;
// 將創(chuàng)建的module對(duì)象放入Python的全局module集合中
if (PyDict_SetItemString(modules, name, m) != 0) {
Py_DECREF(m);
return NULL;
}
Py_DECREF(m); /* Yes, it still exists, in modules! */
return m;
}
Python
中維護(hù)了一個(gè)存放所有加載到內(nèi)存中的module
集合,在這個(gè)集合中狸相,存放著所有的(module名薛匪,module對(duì)象)
這樣的對(duì)應(yīng)關(guān)系,對(duì)應(yīng)到Python
中就是sys.modules
脓鹃。在創(chuàng)建一個(gè)新的module
對(duì)象之前蛋辈,會(huì)先到這個(gè)全局module
中查看是否已經(jīng)存在。通過PyImport_GetModuleDict
函數(shù)獲得PyInterpreterState
(進(jìn)程狀態(tài))對(duì)象中的module
域的值将谊。
如果全局modules
字典中沒有存在該name
的module
對(duì)象冷溶,則通過PyModule_New
函數(shù)創(chuàng)建一個(gè)新的module
對(duì)象,然后將(name尊浓,module)
對(duì)應(yīng)關(guān)系插入到modules
集合中逞频。
// moduleobject.c
typedef struct {
PyObject_HEAD
PyObject *md_dict;
} PyModuleObject;
PyObject * PyModule_New(const char *name)
{
PyModuleObject *m;
PyObject *nameobj;
m = PyObject_GC_New(PyModuleObject, &PyModule_Type);
if (m == NULL)
return NULL;
nameobj = PyString_FromString(name);
m->md_dict = PyDict_New();
if (m->md_dict == NULL || nameobj == NULL)
goto fail;
if (PyDict_SetItemString(m->md_dict, "__name__", nameobj) != 0)
goto fail;
if (PyDict_SetItemString(m->md_dict, "__doc__", Py_None) != 0)
goto fail;
Py_DECREF(nameobj);
PyObject_GC_Track(m);
return (PyObject *)m;
fail:
Py_XDECREF(nameobj);
Py_DECREF(m);
return NULL;
}
實(shí)際上,PyModuleObject
對(duì)象就是對(duì)PyDictObject
對(duì)象的簡(jiǎn)單包裝栋齿,創(chuàng)建PyModuleObject
對(duì)象的動(dòng)作很簡(jiǎn)單苗胀,注意在這里設(shè)置了module
的__name__
屬性,但是沒有設(shè)置其__doc__
屬性瓦堵。
至此基协,創(chuàng)建的PyModuleObject
對(duì)象還算是空的,接下來就開始設(shè)置module
對(duì)象菇用。
設(shè)置module對(duì)象
設(shè)置module
對(duì)象的流程回到Py_InitModule4
函數(shù)中澜驮,設(shè)置屬性主要依賴Py_InitModule4
函數(shù)的第二個(gè)參數(shù)(methods
),在這里為builtin_methods
惋鸥,遍歷builtin_methods
杂穷,處理其中的每一個(gè)元素悍缠。
// methodobject.h
typedef PyObject *(*PyCFunction)(PyObject *, PyObject *);
struct PyMethodDef {
const char *ml_name; /* The name of the built-in function/method */
PyCFunction ml_meth; /* The C function that implements it */
int ml_flags; /* Combination of METH_xxx flags, which mostly
describe the args expected by the C func */
const char *ml_doc; /* The __doc__ attribute, or NULL */
};
typedef struct PyMethodDef PyMethodDef;
// bltinmodule.c
static PyMethodDef builtin_methods[] = {
{"__import__", (PyCFunction)builtin___import__, METH_VARARGS | METH_KEYWORDS, import_doc},
{"abs", builtin_abs, METH_O, abs_doc},
{"all", builtin_all, METH_O, all_doc},
{"any", builtin_any, METH_O, any_doc},
{"apply", builtin_apply, METH_VARARGS, apply_doc},
{"callable", builtin_callable, METH_O, callable_doc},
{"chr", builtin_chr, METH_VARARGS, chr_doc},
{"cmp", builtin_cmp, METH_VARARGS, cmp_doc},
{"coerce", builtin_coerce, METH_VARARGS, coerce_doc},
{"compile", builtin_compile, METH_VARARGS, compile_doc},
{"delattr", builtin_delattr, METH_VARARGS, delattr_doc},
{"dir", builtin_dir, METH_VARARGS, dir_doc},
{"divmod", builtin_divmod, METH_VARARGS, divmod_doc},
{"eval", builtin_eval, METH_VARARGS, eval_doc},
{"execfile", builtin_execfile, METH_VARARGS, execfile_doc},
{"filter", builtin_filter, METH_VARARGS, filter_doc},
{"getattr", builtin_getattr, METH_VARARGS, getattr_doc},
{"globals", (PyCFunction)builtin_globals, METH_NOARGS, globals_doc},
{"hasattr", builtin_hasattr, METH_VARARGS, hasattr_doc},
{"hash", builtin_hash, METH_O, hash_doc},
{"hex", builtin_hex, METH_O, hex_doc},
{"id", builtin_id, METH_O, id_doc},
{"input", builtin_input, METH_VARARGS, input_doc},
{"intern", builtin_intern, METH_VARARGS, intern_doc},
{"isinstance", builtin_isinstance, METH_VARARGS, isinstance_doc},
{"issubclass", builtin_issubclass, METH_VARARGS, issubclass_doc},
{"iter", builtin_iter, METH_VARARGS, iter_doc},
{"len", builtin_len, METH_O, len_doc},
{"locals", (PyCFunction)builtin_locals, METH_NOARGS, locals_doc},
{"map", builtin_map, METH_VARARGS, map_doc},
{"max", (PyCFunction)builtin_max, METH_VARARGS | METH_KEYWORDS, max_doc},
{"min", (PyCFunction)builtin_min, METH_VARARGS | METH_KEYWORDS, min_doc},
{"oct", builtin_oct, METH_O, oct_doc},
{"open", (PyCFunction)builtin_open, METH_VARARGS | METH_KEYWORDS, open_doc},
{"ord", builtin_ord, METH_O, ord_doc},
{"pow", builtin_pow, METH_VARARGS, pow_doc},
{"range", builtin_range, METH_VARARGS, range_doc},
{"raw_input", builtin_raw_input, METH_VARARGS, raw_input_doc},
{"reduce", builtin_reduce, METH_VARARGS, reduce_doc},
{"reload", builtin_reload, METH_O, reload_doc},
{"repr", builtin_repr, METH_O, repr_doc},
{"round", (PyCFunction)builtin_round, METH_VARARGS | METH_KEYWORDS, round_doc},
{"setattr", builtin_setattr, METH_VARARGS, setattr_doc},
{"sorted", (PyCFunction)builtin_sorted, METH_VARARGS | METH_KEYWORDS, sorted_doc},
{"sum", builtin_sum, METH_VARARGS, sum_doc},
#ifdef Py_USING_UNICODE
{"unichr", builtin_unichr, METH_VARARGS, unichr_doc},
#endif
{"vars", builtin_vars, METH_VARARGS, vars_doc},
{"zip", builtin_zip, METH_VARARGS, zip_doc},
{NULL, NULL},
};
對(duì)于builtin_methods
中的每一個(gè)PyMethodDef
結(jié)構(gòu),PyInitModule4
都會(huì)基于它創(chuàng)建一個(gè)PyCFunctionObject
對(duì)象(Python
中對(duì)函數(shù)指針的包裝)耐量。
// methodobject.h
typedef struct {
PyObject_HEAD
PyMethodDef *m_ml; /* Description of the C function to call */
PyObject *m_self; /* Passed as 'self' arg to the C func, can be NULL */
PyObject *m_module; /* The __module__ attribute, can be anything */
} PyCFunctionObject;
// methodobject.c
PyObject *
PyCFunction_NewEx(PyMethodDef *ml, PyObject *self, PyObject *module)
{
PyCFunctionObject *op;
op = free_list;
if (op != NULL) {
free_list = (PyCFunctionObject *)(op->m_self);
PyObject_INIT(op, &PyCFunction_Type);
}
else {
op = PyObject_GC_New(PyCFunctionObject, &PyCFunction_Type);
if (op == NULL)
return NULL;
}
op->m_ml = ml;
Py_XINCREF(self);
op->m_self = self;
Py_XINCREF(module);
op->m_module = module;
_PyObject_GC_TRACK(op);
return (PyObject *)op;
}
PyCFunctionObject
對(duì)象中的那個(gè)self
飞蚓,也就是在Py_InitModule4
中傳入的passthrough
(之前說這個(gè)參數(shù)在Python 2.5
中沒用,所以這里的self
也就為NULL
)廊蜒。
注意趴拧,PyCFunctionObject
對(duì)象中的m_module
域并不是指向一個(gè)真正的PyModuleObject
對(duì)象,而是一個(gè)PyStringObject
對(duì)象山叮,但是這個(gè)PyStringObject
對(duì)象正是PyModuleObject
對(duì)象的名字(根據(jù)PyModuleObject
對(duì)象的名字可以在全局modules
中找到對(duì)應(yīng)的PyModuleObject
對(duì)象)著榴。
__builtin__ module
創(chuàng)建完以后,也就是bimod = _PyBuiltin_Init()
執(zhí)行完以后聘芜,將PyModuleObject
對(duì)象中維護(hù)的那個(gè)PyDictObject
對(duì)象賦值給interp->builtins
兄渺。以后Python
在需要訪問__builtin__ module
時(shí),直接訪問interp->builtins
就行汰现,不需要再到interp->modules
中去找到__builtin__ module
對(duì)象然后再去訪問挂谍。因?yàn)?code>Python中使用__builtin__ module
非常頻繁,這樣的機(jī)制會(huì)速度會(huì)更快瞎饲。
創(chuàng)建sys module
sys module的備份
Python
在創(chuàng)建并設(shè)置了__builtin__ module
之后口叙,會(huì)類似的以同樣流程設(shè)置sys module
,并像設(shè)置interp->builtins
一樣設(shè)置interp->sysdict
嗅战。
設(shè)置完成__builtin__
和sys
兩個(gè)module
之后妄田,PyInterpreterState
和PyThreadState
對(duì)象在內(nèi)存中的情形如下所示:
由于Python
的module
集合interp->modules
是一個(gè)PyDictObject
對(duì)象,而PyDictObject
對(duì)象在Python
中是一個(gè)可變對(duì)象(其中維護(hù)的(module name驮捍,PyModuleObject)
元素對(duì)在運(yùn)行時(shí)有可能被刪除)疟呐。
對(duì)于Python
的擴(kuò)展module
(sys
等),為了避免被刪除后的重新初始化东且,Python
會(huì)將所有的擴(kuò)展module
通過一個(gè)全局PyDictObject
對(duì)象來進(jìn)行備份維護(hù)启具,這里通過_PyImport_FixupExtension("sys", "sys")
函數(shù)完成:
// import.c
static PyObject *extensions = NULL;
PyObject *
_PyImport_FixupExtension(char *name, char *filename)
{
PyObject *modules, *mod, *dict, *copy;
// 如果extensions為空,則創(chuàng)建PyDictObject對(duì)象賦給extensions
if (extensions == NULL) {
extensions = PyDict_New();
if (extensions == NULL)
return NULL;
}
// 獲得進(jìn)程對(duì)象的modules集合珊泳,interp->modules
modules = PyImport_GetModuleDict();
// 在interp->modules中找到名字為name的module
mod = PyDict_GetItemString(modules, name);
if (mod == NULL || !PyModule_Check(mod)) {
PyErr_Format(PyExc_SystemError,
"_PyImport_FixupExtension: module %.200s not loaded", name);
return NULL;
}
// 抽取module中的dict
dict = PyModule_GetDict(mod);
if (dict == NULL)
return NULL;
// 對(duì)dict進(jìn)行拷貝
copy = PyDict_Copy(dict);
if (copy == NULL)
return NULL;
// 將拷貝得到的心dict存儲(chǔ)在extensions中
PyDict_SetItemString(extensions, filename, copy);
Py_DECREF(copy);
return copy;
}
上面代碼介紹了備份的過程鲁冯,第一次會(huì)創(chuàng)建一個(gè)新的PyDictObject
對(duì)象,這個(gè)對(duì)象將維護(hù)所有已經(jīng)被Python
加載的module
中的PyDictObject
的一個(gè)佩芬色查。當(dāng)系統(tǒng)中的modules
集合中的某個(gè)標(biāo)準(zhǔn)擴(kuò)展module
被刪除后又被重新加載時(shí)薯演,Python
就不需要再次初始化這些module
,只需extensions
中備份的PyDictObject
對(duì)象來創(chuàng)建一個(gè)新的module
即可秧了。
設(shè)置module搜索路徑
在創(chuàng)建完sys module
以后跨扮,會(huì)在此module
中設(shè)置Python
搜索一個(gè)module
時(shí)的默認(rèn)搜索路徑集合(PySys_SetPath(Py_GetPath())
)。這個(gè)路徑集合就是在Python
執(zhí)行import xyz
時(shí)將查看的路徑的集合。
// sysmodule.c
void
PySys_SetPath(char *path)
{
PyObject *v;
if ((v = makepathobject(path, DELIM)) == NULL)
Py_FatalError("can't create sys.path");
if (PySys_SetObject("path", v) != 0)
Py_FatalError("can't assign sys.path");
Py_DECREF(v);
}
int
PySys_SetObject(char *name, PyObject *v)
{
PyThreadState *tstate = PyThreadState_GET();
PyObject *sd = tstate->interp->sysdict;
if (v == NULL) {
if (PyDict_GetItemString(sd, name) == NULL)
return 0;
else
return PyDict_DelItemString(sd, name);
}
else
return PyDict_SetItemString(sd, name, v);
}
在makepathobject
中會(huì)創(chuàng)建一個(gè)PyListObject
對(duì)象好港,這個(gè)PyListObject
對(duì)象中包含一組PyStringObject
對(duì)象愉镰,每一個(gè)PyStringObject
對(duì)象的內(nèi)容就是一個(gè)module
的搜索路徑(也就是Python
中的sys.path
)米罚。
最終钧汹,這個(gè)代表搜索路徑集合的list
對(duì)象會(huì)在PySys_SetObject
中被插入到interp->sysdict
這個(gè)PyDictObject
對(duì)象(sys module
中維護(hù)的那個(gè)PyDictObject
對(duì)象)中。
接下來录择,Python
會(huì)進(jìn)行一些瑣碎的動(dòng)作拔莱,其中包含初始化Python
的import
環(huán)境,初始化Python
的內(nèi)建異常(其實(shí)就是調(diào)用PyType_Ready
初始化各個(gè)異常類)隘竭。
創(chuàng)建__main__ module
在_PyImportHooks_Init
之后塘秦,Python
會(huì)創(chuàng)建一個(gè)非常特殊的module
:__main__
的module
。
// pythonrun.c
/* Create __main__ module */
static void
initmain(void)
{
PyObject *m, *d;
// 創(chuàng)建__main__ module动看,并設(shè)置到interp->modules中
m = PyImport_AddModule("__main__");
if (m == NULL)
Py_FatalError("can't create __main__ module");
// 獲得__main__ module中的dict
d = PyModule_GetDict(m);
if (PyDict_GetItemString(d, "__builtins__") == NULL) {
// 獲得interp->modules中的__builtin__ module
PyObject *bimod = PyImport_ImportModule("__builtin__");
if (bimod == NULL ||
// 將(“__builtins__”, __builtin__ module)插入到__main__ module中
PyDict_SetItemString(d, "__builtins__", bimod) != 0)
Py_FatalError("can't add __builtins__ to __main__");
Py_DECREF(bimod);
}
}
__main__ module
是什么尊剔?在PyImport_AddModule
時(shí),創(chuàng)建了一個(gè)名為name
的module
后菱皆,會(huì)在module
對(duì)應(yīng)的PyDictObject
對(duì)象(mp_dict
)設(shè)置一個(gè)名為__name__
的項(xiàng)须误。__main__
module
的這一項(xiàng)就是"__main__"
,作為主程序運(yùn)行的Python
源文件就可以被視為名為__main__
的module
仇轻。
當(dāng)Python
以python abc.py
運(yùn)行時(shí)京痢,Python
在沿著名字空間尋找__name__
時(shí),就會(huì)最終在__main__
module
中發(fā)現(xiàn)__name__
為"__main__"
篷店;而如果一個(gè)py
文件是以import
的方式加載的祭椰,則__name__
不會(huì)為"__main__"
。
設(shè)置site-specific的module的搜索路徑
Python
的第三方庫(kù)通常都是由module
提供疲陕,一般來說方淤,一些規(guī)模較大的第三方庫(kù)將放在%PythonHome%/lib/site-packages
目錄下,但是之前在初始化搜索路徑集合的時(shí)候并沒有將site-packages
包含在內(nèi)蹄殃。
接下來Python
通過initsite()
函數(shù)將site-packages
加入到搜索路徑中携茂。
// pythonrun.c
static void
initsite(void)
{
PyObject *m, *f;
// 獲得interp->modules中的site module
m = PyImport_ImportModule("site");
if (m == NULL) {
f = PySys_GetObject("stderr");
if (Py_VerboseFlag) {
PyFile_WriteString(
"'import site' failed; traceback:\n", f);
PyErr_Print();
}
else {
PyFile_WriteString(
"'import site' failed; use -v for traceback\n", f);
PyErr_Clear();
}
}
else {
Py_DECREF(m);
}
}
PyImport_ImportModule
函數(shù)時(shí)Python
中import
機(jī)制的核心,調(diào)用這個(gè)函數(shù)相當(dāng)于Python
中的import xxx
窃爷,在這里進(jìn)入site.py
邑蒋,其中會(huì)進(jìn)行兩個(gè)動(dòng)作:
- 將
site-packages
路徑加入到sys.path
中,對(duì)于不同平臺(tái)有不同操作-
win32
平臺(tái):%PythonHome%/lib/site-packages
+Unix
/Linux
平臺(tái):-
%sys.prefix%/lib/python<version>/site-packages
(其中%sys.prefix%
為Python
的sys.predix
) %sys.prefix%/lib/site-python
%sys.exec_prefix%/lib/python<version>/site-packages
%sys.exec_prefix%/lib/site-python
-
-
- 處理
site-packages
目錄下的所有.pth
文件中的所有路徑加入到sys.path
中
至此按厘,Python
中絕大部分重要的初始化動(dòng)作都已經(jīng)完成了医吊,下圖是完成初始化后所有可以利用的資源:
0x03 激活Python虛擬機(jī)
上面部分算是完成了
Python
執(zhí)行程序所必須的基礎(chǔ)設(shè)施建設(shè),但是初始化動(dòng)作還沒有真正完成逮京,當(dāng)Python
真正進(jìn)入到之前講的字節(jié)碼虛擬機(jī)后卿堂,初始化階段才算真正完成。
Python
在Py_Initialize
成功完成后,最終將調(diào)用PyRun_AnyFileExFlags(fp, filename == NULL ? "<stdin>" : filename, filename != NULL, &cf)
草描。如果以腳本方式運(yùn)行Python
览绿,則filename
是文件名;如果以交互方式運(yùn)行Python
穗慕,則filename
會(huì)傳入"<stdini>
"饿敲。第一個(gè)參數(shù)fp
指向打開的腳本或者是系統(tǒng)的標(biāo)準(zhǔn)輸入流stdin
。
// pythonrun.c
/* Parse input from a file and execute it */
int
PyRun_AnyFileExFlags(FILE *fp, const char *filename, int closeit,
PyCompilerFlags *flags)
{
if (filename == NULL)
filename = "???";
// 根據(jù)fp是否代表交互環(huán)境逛绵,對(duì)程序流程進(jìn)行分流
if (Py_FdIsInteractive(fp, filename)) {
int err = PyRun_InteractiveLoopFlags(fp, filename, flags);
if (closeit)
fclose(fp);
return err;
}
else
return PyRun_SimpleFileExFlags(fp, filename, closeit, flags);
}
通過Py_FdIsInteractive
函數(shù)判斷fp
是否指向標(biāo)準(zhǔn)輸入流怀各。如果是,則進(jìn)入PyRun_InteractiveLoopFlags
术浪,否則進(jìn)入PyRun_SimpleFileExFlags
瓢对。
交互式運(yùn)行方式
// pythonrun.c
int
PyRun_InteractiveLoopFlags(FILE *fp, const char *filename, PyCompilerFlags *flags)
{
PyObject *v;
int ret;
PyCompilerFlags local_flags;
if (flags == NULL) {
flags = &local_flags;
local_flags.cf_flags = 0;
}
// 創(chuàng)建交互式環(huán)境提示符“>>>”
v = PySys_GetObject("ps1");
if (v == NULL) {
PySys_SetObject("ps1", v = PyString_FromString(">>> "));
Py_XDECREF(v);
}
// 創(chuàng)建交互式環(huán)境提示符“...”
v = PySys_GetObject("ps2");
if (v == NULL) {
PySys_SetObject("ps2", v = PyString_FromString("... "));
Py_XDECREF(v);
}
// 進(jìn)入一個(gè)死循環(huán),即交互式環(huán)境
for (;;) {
ret = PyRun_InteractiveOneFlags(fp, filename, flags);
PRINT_TOTAL_REFS();
if (ret == E_EOF)
return 0;
/*
if (ret == E_NOMEM)
return -1;
*/
}
}
int
PyRun_InteractiveOneFlags(FILE *fp, const char *filename, PyCompilerFlags *flags)
{
PyObject *m, *d, *v, *w;
mod_ty mod;
PyArena *arena;
char *ps1 = "", *ps2 = "";
int errcode = 0;
v = PySys_GetObject("ps1");
if (v != NULL) {
v = PyObject_Str(v);
if (v == NULL)
PyErr_Clear();
else if (PyString_Check(v))
ps1 = PyString_AsString(v);
}
w = PySys_GetObject("ps2");
if (w != NULL) {
w = PyObject_Str(w);
if (w == NULL)
PyErr_Clear();
else if (PyString_Check(w))
ps2 = PyString_AsString(w);
}
// 編譯用戶在交互式環(huán)境下輸入的Python語句
arena = PyArena_New();
if (arena == NULL) {
Py_XDECREF(v);
Py_XDECREF(w);
return -1;
}
mod = PyParser_ASTFromFile(fp, filename,
Py_single_input, ps1, ps2,
flags, &errcode, arena);
Py_XDECREF(v);
Py_XDECREF(w);
if (mod == NULL) {
PyArena_Free(arena);
if (errcode == E_EOF) {
PyErr_Clear();
return E_EOF;
}
PyErr_Print();
return -1;
}
// 獲得<module __main__>中維護(hù)的dict
m = PyImport_AddModule("__main__");
if (m == NULL) {
PyArena_Free(arena);
return -1;
}
d = PyModule_GetDict(m);
// 執(zhí)行用戶輸入的Python語句
v = run_mod(mod, filename, d, d, flags, arena);
PyArena_Free(arena);
if (v == NULL) {
PyErr_Print();
return -1;
}
Py_DECREF(v);
if (Py_FlushLine())
PyErr_Clear();
return 0;
}
調(diào)用PyParser_ASTFromFile
函數(shù)胰苏,對(duì)用戶在交互式環(huán)境下輸入的Python
語句進(jìn)行編譯硕蛹,其結(jié)果是構(gòu)造與Python
語句對(duì)應(yīng)的抽象語法樹(AST
)。
然后調(diào)用run_mode
硕并,將最終完成對(duì)用戶輸入的語句的執(zhí)行動(dòng)作法焰。需要注意的是:在進(jìn)入run_mode
之前,會(huì)將__main__
module
中維護(hù)的PyDictObject
對(duì)象取出鲤孵,傳遞給run_mode
壶栋,這個(gè)參數(shù)很重要,它將作為Python
虛擬機(jī)開始執(zhí)行時(shí)當(dāng)前活動(dòng)的frame
對(duì)象的local
名字空間和global
名字空間普监。
腳本文件運(yùn)行方式
// python.h
#define Py_file_input 257
// pythonrun.c
int
PyRun_SimpleFileExFlags(FILE *fp, const char *filename, int closeit,
PyCompilerFlags *flags)
{
PyObject *m, *d, *v;
const char *ext;
// 在__main__module中設(shè)置“__file__”屬性
m = PyImport_AddModule("__main__");
if (m == NULL)
return -1;
d = PyModule_GetDict(m);
if (PyDict_GetItemString(d, "__file__") == NULL) {
PyObject *f = PyString_FromString(filename);
if (f == NULL)
return -1;
if (PyDict_SetItemString(d, "__file__", f) < 0) {
Py_DECREF(f);
return -1;
}
Py_DECREF(f);
}
ext = filename + strlen(filename) - 4;
// 首先嘗試去執(zhí)行pyc文件
if (maybe_pyc_file(fp, filename, ext, closeit)) {
/* Try to run a pyc file. First, re-open in binary */
if (closeit)
fclose(fp);
if ((fp = fopen(filename, "rb")) == NULL) {
fprintf(stderr, "python: Can't reopen .pyc file\n");
return -1;
}
/* Turn on optimization if a .pyo file is given */
if (strcmp(ext, ".pyo") == 0)
Py_OptimizeFlag = 1;
v = run_pyc_file(fp, filename, d, d, flags);
} else {
// 執(zhí)行py腳本文件
v = PyRun_FileExFlags(fp, filename, Py_file_input, d, d,
closeit, flags);
}
......
return 0;
}
PyObject *
PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals,
PyObject *locals, int closeit, PyCompilerFlags *flags)
{
PyObject *ret;
mod_ty mod;
PyArena *arena = PyArena_New();
if (arena == NULL)
return NULL;
// 編譯
mod = PyParser_ASTFromFile(fp, filename, start, 0, 0,
flags, NULL, arena);
if (closeit)
fclose(fp);
if (mod == NULL) {
PyArena_Free(arena);
return NULL;
}
// 執(zhí)行
ret = run_mod(mod, filename, globals, locals, flags, arena);
PyArena_Free(arena);
return ret;
}
同交互式執(zhí)行方式一樣贵试,腳本文件的執(zhí)行流程最后也進(jìn)入了run_mode
,也同樣將__main__
module
中維護(hù)的PyDictObject
對(duì)象作為local
和global
名字空間傳入run_mode
中凯正。
啟動(dòng)虛擬機(jī)
從run_mode
開始毙玻,Python
現(xiàn)在只剩下最后一件需要完成的工作了,那就是啟動(dòng)字節(jié)碼虛擬機(jī)廊散。
// pythonrun.c
static PyObject *
run_mod(mod_ty mod, const char *filename, PyObject *globals, PyObject *locals,
PyCompilerFlags *flags, PyArena *arena)
{
PyCodeObject *co;
PyObject *v;
// 基于AST編譯字節(jié)碼指令序列桑滩,創(chuàng)建PyCodeObject對(duì)象
co = PyAST_Compile(mod, filename, flags, arena);
if (co == NULL)
return NULL;
// 創(chuàng)建PyFrameObject對(duì)象,執(zhí)行PyCodeObject對(duì)象中的字節(jié)碼指令序列
v = PyEval_EvalCode(co, globals, locals);
Py_DECREF(co);
return v;
}
首先是根據(jù)AST
得到PyCodeObject
對(duì)象允睹,然后通過PyEval_EvalCode
函數(shù)開始喚醒字節(jié)碼虛擬機(jī)运准。
// ceval.c
PyObject *
PyEval_EvalCode(PyCodeObject *co, PyObject *globals, PyObject *locals)
{
/* XXX raise SystemError if globals is NULL */
return PyEval_EvalCodeEx(co,
globals, locals,
(PyObject **)NULL, 0,
(PyObject **)NULL, 0,
(PyObject **)NULL, 0,
NULL);
}
PyObject *
PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
PyObject **args, int argcount, PyObject **kws, int kwcount,
PyObject **defs, int defcount, PyObject *closure)
{
register PyFrameObject *f;
register PyObject *retval = NULL;
register PyObject **fastlocals, **freevars;
PyThreadState *tstate = PyThreadState_GET();
PyObject *x, *u;
......
f = PyFrame_New(tstate, co, globals, locals);
......
fastlocals = f->f_localsplus;
freevars = f->f_localsplus + co->co_nlocals;
......
// 真正的字節(jié)碼虛擬機(jī)(偽CPU)
retval = PyEval_EvalFrameEx(f,0);
return retval;
}
從操作系統(tǒng)為Python
創(chuàng)建進(jìn)程開始,到Python
虛擬機(jī)被喚醒缭受,再到執(zhí)行引擎循環(huán)執(zhí)行字節(jié)碼胁澳,這個(gè)過程已經(jīng)很清晰了。
名字空間
在創(chuàng)建PyFrameObject
對(duì)象時(shí)米者,設(shè)置的3
個(gè)名字空間:local
韭畸、global
宇智、builtin
。
-
builtin
名字空間就是初始化時(shí)創(chuàng)建的__builtin__
module
胰丁,Python
所有線程都共享同樣的builtin
名字空間(節(jié)省空間随橘,加快速度) -
global
名字空間被設(shè)置為__main__
module
的dict
-
local
名字空間和global
名字空間一樣
歡迎關(guān)注微信公眾號(hào)(coder0x00)或掃描下方二維碼關(guān)注,我們將持續(xù)搜尋程序員必備基礎(chǔ)技能包提供給大家锦庸。