Index in Python List Slicing

How does Python list slicing work? What are the default start and stop indices? The post will cover the answers and how I found out the answers in source code.

A convenient way to get to the source code is Ctrl + Right Click in VSCode. By searching for the __getitem__, which is used for [], I got two methods below:

python def of list getitem

The second method looks more like the one we want. The bad news is that, we don't have more python code to follow. We need to search for the C source code now.

Go to the github repo of Python and search for files whose name include "list". I got the results below:

github file search

It seems that listobject.c is the file that is most likely to contain the source code of list. Enter the file, and search for "getitem" using Ctrl+F. We are lucky here because there are only 6 matches and one of them looks like the registration of __getitem__ method.

github file search

By searching the function list_subscript, I found its definition in the same file:

/* listobject.c Line 3564 */
static PyObject *
list_subscript(PyObject* _self, PyObject* item)
{
    PyListObject* self = (PyListObject*)_self;
    if (_PyIndex_Check(item)) {
        Py_ssize_t i;
        i = PyNumber_AsSsize_t(item, PyExc_IndexError);
        if (i == -1 && PyErr_Occurred())
            return NULL;
        if (i < 0)
            i += PyList_GET_SIZE(self);
        return list_item((PyObject *)self, i);
    }
    else if (PySlice_Check(item)) {
        Py_ssize_t start, stop, step;
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
            return NULL;
        }
        return list_slice_wrap(self, start, stop, step);
    }
    else {
        PyErr_Format(PyExc_TypeError,
                     "list indices must be integers or slices, not %.200s",
                     Py_TYPE(item)->tp_name);
        return NULL;
    }
}

It seems that the else if block is what we want, because it takes a slice. By searching PySlice_Unpack in the repo, I found its definition in sliceobject.c. Besides, the definition of list_slice_wrap is just above list_subscript.

/* sliceobject.c Line 215 */
int
PySlice_Unpack(PyObject *_r,
               Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
{
    PySliceObject *r = (PySliceObject*)_r;
    /* this is harder to get right than you might think */

    static_assert(PY_SSIZE_T_MIN + 1 <= -PY_SSIZE_T_MAX,
                  "-PY_SSIZE_T_MAX < PY_SSIZE_T_MIN + 1");

    if (r->step == Py_None) {
        *step = 1;
    }
    else {
        if (!_PyEval_SliceIndex(r->step, step)) return -1;
        if (*step == 0) {
            PyErr_SetString(PyExc_ValueError,
                            "slice step cannot be zero");
            return -1;
        }
        /* Here *step might be -PY_SSIZE_T_MAX-1; in this case we replace it
         * with -PY_SSIZE_T_MAX.  This doesn't affect the semantics, and it
         * guards against later undefined behaviour resulting from code that
         * does "step = -step" as part of a slice reversal.
         */
        if (*step < -PY_SSIZE_T_MAX)
            *step = -PY_SSIZE_T_MAX;
    }

    if (r->start == Py_None) {
        *start = *step < 0 ? PY_SSIZE_T_MAX : 0;
    }
    else {
        if (!_PyEval_SliceIndex(r->start, start)) return -1;
    }

    if (r->stop == Py_None) {
        *stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX;
    }
    else {
        if (!_PyEval_SliceIndex(r->stop, stop)) return -1;
    }

    return 0;
}
/* listobject.c Line 3545 */
static PyObject *
list_slice_wrap(PyListObject *aa, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step)
{
    PyObject *res = NULL;
    Py_BEGIN_CRITICAL_SECTION(aa);
    Py_ssize_t len = PySlice_AdjustIndices(Py_SIZE(aa), &start, &stop, step);
    if (len <= 0) {
        res = PyList_New(0);
    }
    else if (step == 1) {
        res = list_slice_lock_held(aa, start, stop);
    }
    else {
        res = list_slice_step_lock_held(aa, start, step, len);
    }
    Py_END_CRITICAL_SECTION();
    return res;
}

To understand the default value of start, stop and step, we still need to understand one more function PySlice_AdjustIndices used in list_slice_wrap. I also found its definition in sliceobject.c:

/* sliceobject.c Line 260 */
Py_ssize_t
PySlice_AdjustIndices(Py_ssize_t length,
                      Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step)
{
    /* this is harder to get right than you might think */

    assert(step != 0);
    assert(step >= -PY_SSIZE_T_MAX);

    if (*start < 0) {
        *start += length;
        if (*start < 0) {
            *start = (step < 0) ? -1 : 0;
        }
    }
    else if (*start >= length) {
        *start = (step < 0) ? length - 1 : length;
    }

    if (*stop < 0) {
        *stop += length;
        if (*stop < 0) {
            *stop = (step < 0) ? -1 : 0;
        }
    }
    else if (*stop >= length) {
        *stop = (step < 0) ? length - 1 : length;
    }

    if (step < 0) {
        if (*stop < *start) {
            return (*start - *stop - 1) / (-step) + 1;
        }
    }
    else {
        if (*start < *stop) {
            return (*stop - *start - 1) / step + 1;
        }
    }
    return 0;
}

By summarizing the three functions above, we can get the idea of how the slicing works and what is the logic behind start, stop and step. To make things easier, I get rid of the steps that we do not care about, and here is all we need:


static PyObject *
list_subscript(PyObject* _self, PyObject* item)
{
    // ...
    else if (PySlice_Check(item)) { // if the item is a slice
        Py_ssize_t start, stop, step;
        if (PySlice_Unpack(item, &start, &stop, &step) < 0) {
            return NULL;
        }
        return list_slice_wrap(self, start, stop, step);
    }
    // ...
}

int
PySlice_Unpack(PyObject *_r,
               Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
{
    // ...
    if (r->step == Py_None) {
        *step = 1;
    }
    else {
        if (*step == 0) {
            PyErr_SetString(PyExc_ValueError,
                            "slice step cannot be zero");
            return -1;
        }
    }
    if (r->start == Py_None) {
        *start = *step < 0 ? PY_SSIZE_T_MAX : 0;
    }
    if (r->stop == Py_None) {
        *stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX;
    }
    return 0;
}

static PyObject *
list_slice_wrap(PyListObject *aa, Py_ssize_t start, Py_ssize_t stop, Py_ssize_t step)
{
    PyObject *res = NULL;
    Py_ssize_t len = PySlice_AdjustIndices(Py_SIZE(aa), &start, &stop, step);
    if (len <= 0) {
        res = PyList_New(0);
    }
    else if (step == 1) {
        res = make_result_1(aa, start, stop);
        /*
        function make_result_1(aa, start, stop):
            result = []
            for (cur = start; cur < stop; cur += 1):
                result.append(aa[cur])
        */
    }
    else {
        res = make_result_2(aa, start, step, len);
        /*
        function make_result_2(aa, start, step, len):
            result = []
            for (cur = start, i = 0; i < len; cur += step, i++):
                result.append(aa[cur])
        */
    }
    return res;
}

Py_ssize_t
PySlice_AdjustIndices(Py_ssize_t length,
                      Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step)
{
    if (*start < 0) {
        *start += length;
        if (*start < 0) {
            *start = (step < 0) ? -1 : 0;
        }
    }
    else if (*start >= length) {
        *start = (step < 0) ? length - 1 : length;
    }

    if (*stop < 0) {
        *stop += length;
        if (*stop < 0) {
            *stop = (step < 0) ? -1 : 0;
        }
    }
    else if (*stop >= length) {
        *stop = (step < 0) ? length - 1 : length;
    }

    if (step < 0) {
        if (*stop < *start) {
            return (*start - *stop - 1) / (-step) + 1;
        }
    }
    else {
        if (*start < *stop) {
            return (*stop - *start - 1) / step + 1;
        }
    }
    return 0;
}

If written in Python, it may look like this (just a rough idea):

def get_slice(a, start=None, stop=None, step=1) -> list:
    # a[start:stop:step]

    INF = float("inf")

    if step == 0: raise Exception("step cannot be 0! U GG")

    if start is None:
        start = 0 if step > 0 else INF
    if stop is None:
        stop = INF if step > 0 else -INF
    
    length = len(a)

    if start < 0 and start >= -length:
        start = start + length
    elif start < -length:
        start = 0 if step > 0 else -1
    elif start >= length:
        start = length if step > 0 else length - 1

    if stop < 0 and stop >= -length:
        stop = stop + length
    elif stop < -length:
        stop = 0 if step > 0 else -1
    elif stop >= length:
        stop = length if step > 0 else length - 1

    res = []
    if step < 0 and start > stop:
        cur = start
        while cur < stop:
            res.append(a[cur])
            cur += step
        return res
    elif step > 0 and start < stop:
        cur = start
        while cur > stop:
            res.append(a[cur])
            cur += step
        return res
    else:
        return []

So that's all the rules we are interested in.