Index in Python List Slicing
How does Python list slicing work? What are the default start and stop indices? The post will cover the answers and how I found out the answers in source code.
A convenient way to get to the source code is Ctrl
+ Right Click in VSCode. By searching for the __getitem__
, which is used for []
, I got two methods below:
The second method looks more like the one we want. The bad news is that, we don't have more python code to follow. We need to search for the C source code now.
Go to the github repo of Python and search for files whose name include "list". I got the results below:
It seems that listobject.c
is the file that is most likely to contain the source code of list. Enter the file, and search for "getitem" using Ctrl
+F
. We are lucky here because there are only 6 matches and one of them looks like the registration of __getitem__
method.
By searching the function list_subscript
, I found its definition in the same file:
/* listobject.c Line 3564 */
static PyObject *
list_subscript(PyObject* _self, PyObject* item)
{
PyListObject* self = (PyListObject*)_self;
if (_PyIndex_Check(item)) {
Py_ssize_t i;
i = PyNumber_AsSsize_t(item, PyExc_IndexError);
if (i == -1 && PyErr_Occurred())
return NULL;
if (i < 0)
i += PyList_GET_SIZE(self);
return list_item((PyObject *)self, i);
}
else if (PySlice_Check(item)) {
Py_ssize_t start, stop, step;
if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
return NULL;
}
return list_slice_wrap(self, start, stop, step);
}
else {
PyErr_Format(PyExc_TypeError,
"list indices must be integers or slices, not %.200s",
Py_TYPE(item)->tp_name);
return NULL;
}
}
It seems that the else if
block is what we want, because it takes a slice. By searching PySlice_Unpack
in the repo, I found its definition in sliceobject.c
. Besides, the definition of list_slice_wrap
is just above list_subscript
.
/* sliceobject.c Line 215 */
int
PySlice_Unpack(PyObject *_r,
Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
{
PySliceObject *r = (PySliceObject*)_r;
/* this is harder to get right than you might think */
static_assert(PY_SSIZE_T_MIN + 1 <= -PY_SSIZE_T_MAX,
"-PY_SSIZE_T_MAX < PY_SSIZE_T_MIN + 1");
if (r->step == Py_None) {
*step = 1;
}
else {
if (!_PyEval_SliceIndex(r->step, step)) return -1;
if (*step == 0) {
PyErr_SetString(PyExc_ValueError,
"slice step cannot be zero");
return -1;
}
/* Here *step might be -PY_SSIZE_T_MAX-1; in this case we replace it
* with -PY_SSIZE_T_MAX. This doesn't affect the semantics, and it
* guards against later undefined behaviour resulting from code that
* does "step = -step" as part of a slice reversal.
*/
if (*step < -PY_SSIZE_T_MAX)
*step = -PY_SSIZE_T_MAX;
}
if (r->start == Py_None) {
*start = *step < 0 ? PY_SSIZE_T_MAX : 0;
}
else {
if (!_PyEval_SliceIndex(r->start, start)) return -1;
}
if (r->stop == Py_None) {
*stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX;
}
else {
if (!_PyEval_SliceIndex(r->stop, stop)) return -1;
}
return 0;
}
/* listobject.c Line 3545 */
static PyObject *
list_slice_wrap(PyListObject *aa, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step)
{
PyObject *res = NULL;
Py_BEGIN_CRITICAL_SECTION(aa);
Py_ssize_t len = PySlice_AdjustIndices(Py_SIZE(aa), &start, &stop, step);
if (len <= 0) {
res = PyList_New(0);
}
else if (step == 1) {
res = list_slice_lock_held(aa, start, stop);
}
else {
res = list_slice_step_lock_held(aa, start, step, len);
}
Py_END_CRITICAL_SECTION();
return res;
}
To understand the default value of start, stop and step, we still need to understand one more function PySlice_AdjustIndices
used in list_slice_wrap
. I also found its definition in sliceobject.c
:
/* sliceobject.c Line 260 */
Py_ssize_t
PySlice_AdjustIndices(Py_ssize_t length,
Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step)
{
/* this is harder to get right than you might think */
assert(step != 0);
assert(step >= -PY_SSIZE_T_MAX);
if (*start < 0) {
*start += length;
if (*start < 0) {
*start = (step < 0) ? -1 : 0;
}
}
else if (*start >= length) {
*start = (step < 0) ? length - 1 : length;
}
if (*stop < 0) {
*stop += length;
if (*stop < 0) {
*stop = (step < 0) ? -1 : 0;
}
}
else if (*stop >= length) {
*stop = (step < 0) ? length - 1 : length;
}
if (step < 0) {
if (*stop < *start) {
return (*start - *stop - 1) / (-step) + 1;
}
}
else {
if (*start < *stop) {
return (*stop - *start - 1) / step + 1;
}
}
return 0;
}
By summarizing the three functions above, we can get the idea of how the slicing works and what is the logic behind start, stop and step. To make things easier, I get rid of the steps that we do not care about, and here is all we need:
static PyObject *
list_subscript(PyObject* _self, PyObject* item)
{
// ...
else if (PySlice_Check(item)) { // if the item is a slice
Py_ssize_t start, stop, step;
if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
return NULL;
}
return list_slice_wrap(self, start, stop, step);
}
// ...
}
int
PySlice_Unpack(PyObject *_r,
Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
{
// ...
if (r->step == Py_None) {
*step = 1;
}
else {
if (*step == 0) {
PyErr_SetString(PyExc_ValueError,
"slice step cannot be zero");
return -1;
}
}
if (r->start == Py_None) {
*start = *step < 0 ? PY_SSIZE_T_MAX : 0;
}
if (r->stop == Py_None) {
*stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX;
}
return 0;
}
static PyObject *
list_slice_wrap(PyListObject *aa, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step)
{
PyObject *res = NULL;
Py_ssize_t len = PySlice_AdjustIndices(Py_SIZE(aa), &start, &stop, step);
if (len <= 0) {
res = PyList_New(0);
}
else if (step == 1) {
res = make_result_1(aa, start, stop);
/*
function make_result_1(aa, start, stop):
result = []
for (cur = start; cur < stop; cur += 1):
result.append(aa[cur])
*/
}
else {
res = make_result_2(aa, start, step, len);
/*
function make_result_2(aa, start, step, len):
result = []
for (cur = start, i = 0; i < len; cur += step, i++):
result.append(aa[cur])
*/
}
return res;
}
Py_ssize_t
PySlice_AdjustIndices(Py_ssize_t length,
Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step)
{
if (*start < 0) {
*start += length;
if (*start < 0) {
*start = (step < 0) ? -1 : 0;
}
}
else if (*start >= length) {
*start = (step < 0) ? length - 1 : length;
}
if (*stop < 0) {
*stop += length;
if (*stop < 0) {
*stop = (step < 0) ? -1 : 0;
}
}
else if (*stop >= length) {
*stop = (step < 0) ? length - 1 : length;
}
if (step < 0) {
if (*stop < *start) {
return (*start - *stop - 1) / (-step) + 1;
}
}
else {
if (*start < *stop) {
return (*stop - *start - 1) / step + 1;
}
}
return 0;
}
If written in Python, it may look like this (just a rough idea):
def get_slice(a, start=None, stop=None, step=1) -> list:
# a[start:stop:step]
INF = float("inf")
if step == 0: raise Exception("step cannot be 0! U GG")
if start is None:
start = 0 if step > 0 else INF
if stop is None:
stop = INF if step > 0 else -INF
length = len(a)
if start < 0 and start >= -length:
start = start + length
elif start < -length:
start = 0 if step > 0 else -1
elif start >= length:
start = length if step > 0 else length - 1
if stop < 0 and stop >= -length:
stop = stop + length
elif stop < -length:
stop = 0 if step > 0 else -1
elif stop >= length:
stop = length if step > 0 else length - 1
res = []
if step < 0 and start > stop:
cur = start
while cur < stop:
res.append(a[cur])
cur += step
return res
elif step > 0 and start < stop:
cur = start
while cur > stop:
res.append(a[cur])
cur += step
return res
else:
return []
So that's all the rules we are interested in.