字符串對象 PyListObject
PyListObject對象支持元素的插入、添加、刪除等操作,你內(nèi)部存放的是PyObject*
指針猿涨。
定義
[listobject.h]
typedef struct {
PyObject_VAR_HEAD
/* Vector of pointers to list elements. list[0] is ob_item[0], etc. */
/*ob_item為指向元素列表的指針,實際上姆怪,Python中的list[0]就是ob_item[0]*/
PyObject **ob_item;
/* ob_item contains space for 'allocated' elements. The number
* currently in use is ob_size.
* Invariants:
* 0 <= ob_size <= allocated
* len(list) == ob_size
* ob_item == NULL implies ob_size == allocated == 0
* list.sort() temporarily sets allocated to -1 to detect mutations.
*
* Items must normally not be NULL, except during construction when
* the list is not yet visible outside the function that builds it.
*/
/*
* PyListObject 會提前申請一塊大內(nèi)存叛赚,來為分配做準(zhǔn)備,allocated 就代表著
* PyListObject 能夠使用的內(nèi)存數(shù)
*/
Py_ssize_t allocated;
} PyListObject;
創(chuàng)建與維護
創(chuàng)建對象
python 提供了唯一的方法PyList_New
來創(chuàng)建列表稽揭,同時其參數(shù)size
用于指定列表初始元素個數(shù)
[listobject.c]
PyObject *
PyList_New(Py_ssize_t size)
{
PyListObject *op;
size_t nbytes;
#ifdef SHOW_ALLOC_COUNT
static int initialized = 0;
if (!initialized) {
Py_AtExit(show_alloc);
initialized = 1;
}
#endif
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
/* Check for overflow without an actual overflow,
* which can cause compiler to optimise out */
//檢查size的大小是否合理
if ((size_t)size > PY_SIZE_MAX / sizeof(PyObject *))
return PyErr_NoMemory();
nbytes = size * sizeof(PyObject *);
if (numfree) {
//緩沖池可用
numfree--;
op = free_list[numfree];
_Py_NewReference((PyObject *)op);
#ifdef SHOW_ALLOC_COUNT
count_reuse++;
#endif
} else {
//緩沖池不可用俺附,創(chuàng)建新的對象
op = PyObject_GC_New(PyListObject, &PyList_Type);
if (op == NULL)
return NULL;
#ifdef SHOW_ALLOC_COUNT
count_alloc++;
#endif
}
if (size <= 0)
op->ob_item = NULL;
else {
//為對象指針列表分配指定的內(nèi)存空間
op->ob_item = (PyObject **) PyMem_MALLOC(nbytes);
if (op->ob_item == NULL) {
Py_DECREF(op);
return PyErr_NoMemory();
}
memset(op->ob_item, 0, nbytes);
}
// 設(shè)置對象的參數(shù)
Py_SIZE(op) = size;
op->allocated = size;
_PyObject_GC_TRACK(op);
return (PyObject *) op;
}
PyListObject
的緩沖池free_list
維護的對象個數(shù)是一定的。具體有下面代碼指定
[listobject.c]
#ifndef PyList_MAXFREELIST
#define PyList_MAXFREELIST 80
#endif
static PyListObject *free_list[PyList_MAXFREELIST];
static int numfree = 0;
假設(shè)溪掀,我們第一次調(diào)用PyList_New
函數(shù)來創(chuàng)建列表對象事镣,
即使用PyList_New(6)
。那么我們將得到下圖這樣的列表結(jié)構(gòu)
設(shè)置元素
[listobject.c]
int
PyList_SetItem(register PyObject *op, register Py_ssize_t i,
register PyObject *newitem)
{
register PyObject *olditem;
register PyObject **p;
//檢查對象
if (!PyList_Check(op)) {
Py_XDECREF(newitem);
PyErr_BadInternalCall();
return -1;
}
//檢查索引的合法性
if (i < 0 || i >= Py_SIZE(op)) {
Py_XDECREF(newitem);
PyErr_SetString(PyExc_IndexError,
"list assignment index out of range");
return -1;
}
//設(shè)置元素(即修改指針列表中揪胃,對應(yīng)索引位置處存儲的指針)
p = ((PyListObject *)op) -> ob_item + i;
olditem = *p;
*p = newitem;
//調(diào)整olditem的引用計數(shù)
Py_XDECREF(olditem);
return 0;
}
那么璃哟,在利用PyList_SetItem
實現(xiàn)向列表設(shè)置元素,在python運行list[3]=100
,那么之前的對象將變?yōu)橄聢D
插入元素
插入元素與設(shè)置元素是完全不同的操作只嚣,插入元素會在原有的結(jié)構(gòu)中加入新的元素沮稚,如下所示
>>> lst = [1, 2, 3, 4, 5]
>>> lst[3] = 100
>>> lst
[1, 2, 3, 100, 5]
>>> lst.insert(3, 99)
>>> lst
[1, 2, 3, 99, 100, 5]
可以看出,插入操作實際上改變了列表對象的內(nèi)存結(jié)構(gòu)
[listobject.c]
int
PyList_Insert(PyObject *op, Py_ssize_t where, PyObject *newitem)
{
//類型檢查
if (!PyList_Check(op)) {
PyErr_BadInternalCall();
return -1;
}
return ins1((PyListObject *)op, where, newitem);
}
[listobject.c]
static int
ins1(PyListObject *self, Py_ssize_t where, PyObject *v)
{
Py_ssize_t i, n = Py_SIZE(self);
PyObject **items;
if (v == NULL) {
PyErr_BadInternalCall();
return -1;
}
// 溢出檢查
if (n == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"cannot add more objects to list");
return -1;
}
// 調(diào)整列表容量
if (list_resize(self, n+1) == -1)
return -1;
// 確定插入點
// 參數(shù)where為負表示反向插入
if (where < 0) {
where += n;
if (where < 0)
where = 0;
}
if (where > n)
where = n;
// 插入元素
items = self->ob_item;
for (i = n; --i >= where; )
items[i+1] = items[i];
Py_INCREF(v);
items[where] = v;
return 0;
}
調(diào)整類別的容量
[listobject.c]
static int
list_resize(PyListObject *self, Py_ssize_t newsize)
{
PyObject **items;
size_t new_allocated;
Py_ssize_t allocated = self->allocated;
/* Bypass realloc() when a previous overallocation is large enough
to accommodate the newsize. If the newsize falls lower than half
the allocated size, then proceed with the realloc() to shrink the list.
*/
// 不需要重新申請內(nèi)存册舞,這時只需調(diào)整ob_size即可
if (allocated >= newsize && newsize >= (allocated >> 1)) {
assert(self->ob_item != NULL || newsize == 0);
Py_SIZE(self) = newsize;
return 0;
}
/* This over-allocates proportional to the list size, making room
* for additional growth. The over-allocation is mild, but is
* enough to give linear-time amortized behavior over a long
* sequence of appends() in the presence of a poorly-performing
* system realloc().
* The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ...
*/
// 計算重新申請內(nèi)存的大小
new_allocated = (newsize >> 3) + (newsize < 9 ? 3 : 6);
/* check for integer overflow */
// 溢出判斷
if (new_allocated > PY_SIZE_MAX - newsize) {
PyErr_NoMemory();
return -1;
} else {
new_allocated += newsize;
}
//擴展列表
if (newsize == 0)
new_allocated = 0;
items = self->ob_item;
if (new_allocated <= (PY_SIZE_MAX / sizeof(PyObject *)))
PyMem_RESIZE(items, PyObject *, new_allocated);
else
items = NULL;
if (items == NULL) {
PyErr_NoMemory();
return -1;
}
self->ob_item = items;
Py_SIZE(self) = newsize;
self->allocated = new_allocated;
return 0;
}
通過insert
插入的示意圖
同時,python的list
還支持append
操作
[listobject.c]
int
PyList_Append(PyObject *op, PyObject *newitem)
{
if (PyList_Check(op) && (newitem != NULL))
return app1((PyListObject *)op, newitem);
PyErr_BadInternalCall();
return -1;
}
[listobject.c]
static int
app1(PyListObject *self, PyObject *v)
{
Py_ssize_t n = PyList_GET_SIZE(self);
assert (v != NULL);
if (n == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"cannot add more objects to list");
return -1;
}
if (list_resize(self, n+1) == -1)
return -1;
Py_INCREF(v);
PyList_SET_ITEM(self, n, v);
return 0;
}
刪除元素
即python中的remove
操作
>>> lst = [0,1, 2, 3, 4, 5]
>>> lst
[0, 1, 2, 3, 4, 5]
>>> lst.remove(3)
>>> lst
[0, 1, 2, 4, 5]
其實現(xiàn)代碼
[listobject.c]
static PyObject *
listremove(PyListObject *self, PyObject *v)
{
Py_ssize_t i;
// 對list進行遍歷并比較
for (i = 0; i < Py_SIZE(self); i++) {
//PyObject_RichCompareBool
//return
//-1 if error
// 1 if op
// 0 if not op
int cmp = PyObject_RichCompareBool(self->ob_item[i], v, Py_EQ);
if (cmp > 0) {
if (list_ass_slice(self, i, i+1,
(PyObject *)NULL) == 0)
Py_RETURN_NONE;
return NULL;
}
else if (cmp < 0)
return NULL;
}
PyErr_SetString(PyExc_ValueError, "list.remove(x): x not in list");
return NULL;
}
[listobject.c]
/* a[ilow:ihigh] = v if v != NULL.
* del a[ilow:ihigh] if v == NULL.
*
* Special speed gimmick: when v is NULL and ihigh - ilow <= 8, it's
* guaranteed the call cannot fail.
*/
static int
list_ass_slice(PyListObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
{
/* Because [X]DECREF can recursively invoke list operations on
this list, we must postpone all [X]DECREF activity until
after the list is back in its canonical shape. Therefore
we must allocate an additional array, 'recycle', into which
we temporarily copy the items that are deleted from the
list. :-( */
PyObject *recycle_on_stack[8];
PyObject **recycle = recycle_on_stack; /* will allocate more if needed */
PyObject **item;
PyObject **vitem = NULL;
PyObject *v_as_SF = NULL; /* PySequence_Fast(v) */
Py_ssize_t n; /* # of elements in replacement list */
Py_ssize_t norig; /* # of elements in list getting replaced */
Py_ssize_t d; /* Change in size */
Py_ssize_t k;
size_t s;
int result = -1; /* guilty until proved innocent */
#define b ((PyListObject *)v)
if (v == NULL)
n = 0;
else {
if (a == b) {
/* Special case "a[i:j] = a" -- copy b first */
v = list_slice(b, 0, Py_SIZE(b));
if (v == NULL)
return result;
result = list_ass_slice(a, ilow, ihigh, v);
Py_DECREF(v);
return result;
}
v_as_SF = PySequence_Fast(v, "can only assign an iterable");
if(v_as_SF == NULL)
goto Error;
n = PySequence_Fast_GET_SIZE(v_as_SF);
vitem = PySequence_Fast_ITEMS(v_as_SF);
}
if (ilow < 0)
ilow = 0;
else if (ilow > Py_SIZE(a))
ilow = Py_SIZE(a);
if (ihigh < ilow)
ihigh = ilow;
else if (ihigh > Py_SIZE(a))
ihigh = Py_SIZE(a);
norig = ihigh - ilow;
assert(norig >= 0);
d = n - norig;
if (Py_SIZE(a) + d == 0) {
Py_XDECREF(v_as_SF);
return list_clear(a);
}
item = a->ob_item;
/* recycle the items that we are about to remove */
s = norig * sizeof(PyObject *);
/* If norig == 0, item might be NULL, in which case we may not memcpy from it. */
if (s) {
if (s > sizeof(recycle_on_stack)) {
recycle = (PyObject **)PyMem_MALLOC(s);
if (recycle == NULL) {
PyErr_NoMemory();
goto Error;
}
}
memcpy(recycle, &item[ilow], s);
}
if (d < 0) { /* Delete -d items */
memmove(&item[ihigh+d], &item[ihigh],
(Py_SIZE(a) - ihigh)*sizeof(PyObject *));
list_resize(a, Py_SIZE(a) + d);
item = a->ob_item;
}
else if (d > 0) { /* Insert d items */
k = Py_SIZE(a);
if (list_resize(a, k+d) < 0)
goto Error;
item = a->ob_item;
memmove(&item[ihigh+d], &item[ihigh],
(k - ihigh)*sizeof(PyObject *));
}
for (k = 0; k < n; k++, ilow++) {
PyObject *w = vitem[k];
Py_XINCREF(w);
item[ilow] = w;
}
for (k = norig - 1; k >= 0; --k)
Py_XDECREF(recycle[k]);
result = 0;
Error:
if (recycle != recycle_on_stack)
PyMem_FREE(recycle);
Py_XDECREF(v_as_SF);
return result;
#undef b
}
即障般,這里的刪除操作是通過內(nèi)存移動來實現(xiàn)的(即memmove
).其如下圖所示
緩存池
在創(chuàng)建PyListObject
時调鲸,用的了緩存池free_lists
。那么挽荡,free_lists
中緩存的對象是何時創(chuàng)建的呢藐石?
答案是在PyListObject
的銷毀時,即函數(shù)list_dealloc
中
[listobject.c]
static void
list_dealloc(PyListObject *op)
{
Py_ssize_t i;
PyObject_GC_UnTrack(op);
Py_TRASHCAN_SAFE_BEGIN(op)
// 銷毀列表中存儲的數(shù)據(jù)
if (op->ob_item != NULL) {
/* Do it backwards, for Christian Tismer.
There's a simple test case where somehow this reduces
thrashing when a *very* large list is created and
immediately deleted. */
i = Py_SIZE(op);
while (--i >= 0) {
// 處理對象的引用計數(shù)
Py_XDECREF(op->ob_item[i]);
}
// 釋放內(nèi)存
PyMem_FREE(op->ob_item);
}
if (numfree < PyList_MAXFREELIST && PyList_CheckExact(op))
// 內(nèi)存池還未存滿定拟,則將其存入內(nèi)存池
free_list[numfree++] = op;
else
// 內(nèi)存池已滿于微,直接釋放掉
Py_TYPE(op)->tp_free((PyObject *)op);
Py_TRASHCAN_SAFE_END(op)
}
需要注意的是逗嫡,即使對象將保存進對象緩存池,python也會將列表對象中的數(shù)據(jù)指針數(shù)組釋放掉株依,這是為了避免過度消耗系統(tǒng)內(nèi)存驱证。如下圖,顯示了存在于對象緩存池中的對象結(jié)構(gòu)的例子
參考
《Python 源碼剖析》