@@ -14309,83 +14309,65 @@ unicode_getnewargs(PyObject *v, PyObject *Py_UNUSED(ignored))
1430914309}
1431014310
1431114311/*
14312- This function searchs the longest common leading whitespace
14313- of all lines in the [src, end).
14314- It returns the length of the common leading whitespace and sets `output` to
14315- point to the beginning of the common leading whitespace if length > 0.
14312+ Find the longest common leading whitespace among a list of lines.
14313+ Whitespace-only lines are ignored.
14314+ Returns the margin length (>= 0).
1431614315*/
1431714316static Py_ssize_t
14318- search_longest_common_leading_whitespace (
14319- const char * const src ,
14320- const char * const end ,
14321- const char * * output )
14322- {
14323- // [_start, _start + _len)
14324- // describes the current longest common leading whitespace
14325- const char * _start = NULL ;
14326- Py_ssize_t _len = 0 ;
14327-
14328- for (const char * iter = src ; iter < end ; ++ iter ) {
14329- const char * line_start = iter ;
14330- const char * leading_whitespace_end = NULL ;
14331-
14332- // scan the whole line
14333- while (iter < end && * iter != '\n' ) {
14334- if (!leading_whitespace_end && * iter != ' ' && * iter != '\t' ) {
14335- /* `iter` points to the first non-whitespace character
14336- in this line */
14337- if (iter == line_start ) {
14338- // some line has no indent, fast exit!
14339- return 0 ;
14340- }
14341- leading_whitespace_end = iter ;
14342- }
14343- ++ iter ;
14344- }
14317+ search_longest_common_leading_whitespace (PyObject * lines , Py_ssize_t nlines )
14318+ {
14319+ PyObject * smallest = NULL , * largest = NULL ;
14320+ for (Py_ssize_t i = 0 ; i < nlines ; i ++ ) {
14321+ PyObject * line = PyList_GET_ITEM (lines , i );
14322+ Py_ssize_t linelen = PyUnicode_GET_LENGTH (line );
1434514323
14346- // if this line has all white space, skip it
14347- if (!leading_whitespace_end ) {
14324+ if (linelen == 0 ) {
1434814325 continue ;
1434914326 }
1435014327
14351- if (!_start ) {
14352- // update the first leading whitespace
14353- _start = line_start ;
14354- _len = leading_whitespace_end - line_start ;
14355- assert (_len > 0 );
14328+ int kind = PyUnicode_KIND (line );
14329+ void * data = PyUnicode_DATA (line );
14330+ int all_ws = 1 ;
14331+ for (Py_ssize_t j = 0 ; j < linelen ; j ++ ) {
14332+ if (!Py_UNICODE_ISSPACE (PyUnicode_READ (kind , data , j ))) {
14333+ all_ws = 0 ;
14334+ break ;
14335+ }
14336+ }
14337+ if (all_ws ) {
14338+ continue ;
1435614339 }
14357- else {
14358- /* We then compare with the current longest leading whitespace.
1435914340
14360- [line_start, leading_whitespace_end) is the leading
14361- whitespace of this line,
14341+ if (smallest == NULL || PyObject_RichCompareBool (line , smallest , Py_LT )) {
14342+ smallest = line ;
14343+ }
14344+ if (largest == NULL || PyObject_RichCompareBool (line , largest , Py_GT )) {
14345+ largest = line ;
14346+ }
14347+ }
1436214348
14363- [_start, _start + _len) is the leading whitespace of the
14364- current longest leading whitespace. */
14365- Py_ssize_t new_len = 0 ;
14366- const char * _iter = _start , * line_iter = line_start ;
14349+ if (smallest == NULL || largest == NULL ) {
14350+ return 0 ;
14351+ }
1436714352
14368- while ( _iter < _start + _len && line_iter < leading_whitespace_end
14369- && * _iter == * line_iter )
14370- {
14371- ++ _iter ;
14372- ++ line_iter ;
14373- ++ new_len ;
14374- }
14353+ Py_ssize_t margin = 0 ;
14354+ Py_ssize_t minlen = Py_MIN ( PyUnicode_GET_LENGTH ( smallest ),
14355+ PyUnicode_GET_LENGTH ( largest ));
14356+ int skind = PyUnicode_KIND ( smallest ) ;
14357+ int lkind = PyUnicode_KIND ( largest ) ;
14358+ const void * sdata = PyUnicode_DATA ( smallest ) ;
14359+ const void * ldata = PyUnicode_DATA ( largest );
1437514360
14376- _len = new_len ;
14377- if ( _len == 0 ) {
14378- // No common things now, fast exit!
14379- return 0 ;
14380- }
14361+ while ( margin < minlen ) {
14362+ Py_UCS4 c1 = PyUnicode_READ ( skind , sdata , margin );
14363+ Py_UCS4 c2 = PyUnicode_READ ( lkind , ldata , margin );
14364+ if ( c1 != c2 || !( c1 == ' ' || c1 == '\t' )) {
14365+ break ;
1438114366 }
14367+ margin ++ ;
1438214368 }
1438314369
14384- assert (_len >= 0 );
14385- if (_len > 0 ) {
14386- * output = _start ;
14387- }
14388- return _len ;
14370+ return margin ;
1438914371}
1439014372
1439114373/* Dedent a string.
@@ -14395,74 +14377,58 @@ search_longest_common_leading_whitespace(
1439514377PyObject *
1439614378_PyUnicode_Dedent (PyObject * unicode )
1439714379{
14398- Py_ssize_t src_len = 0 ;
14399- const char * src = PyUnicode_AsUTF8AndSize (unicode , & src_len );
14400- if (!src ) {
14380+ PyObject * sep = PyUnicode_FromString ("\n" );
14381+ if (sep == NULL ) {
1440114382 return NULL ;
1440214383 }
14403- assert (src_len >= 0 );
14404- if (src_len == 0 ) {
14405- return Py_NewRef (unicode );
14406- }
14407-
14408- const char * const end = src + src_len ;
14409-
14410- // [whitespace_start, whitespace_start + whitespace_len)
14411- // describes the current longest common leading whitespace
14412- const char * whitespace_start = NULL ;
14413- Py_ssize_t whitespace_len = search_longest_common_leading_whitespace (
14414- src , end , & whitespace_start );
14415-
14416- if (whitespace_len == 0 ) {
14417- return Py_NewRef (unicode );
14384+ PyObject * lines = PyUnicode_Split (unicode , sep , -1 );
14385+ Py_DECREF (sep );
14386+ if (lines == NULL ) {
14387+ return NULL ;
1441814388 }
14389+ Py_ssize_t nlines = PyList_GET_SIZE (lines );
14390+ Py_ssize_t margin = search_longest_common_leading_whitespace (lines , nlines );
1441914391
14420- // now we should trigger a dedent
14421- char * dest = PyMem_Malloc (src_len );
14422- if (!dest ) {
14423- PyErr_NoMemory ();
14392+ PyUnicodeWriter * writer = PyUnicodeWriter_Create (0 );
14393+ if (writer == NULL ) {
14394+ Py_DECREF (lines );
1442414395 return NULL ;
1442514396 }
14426- char * dest_iter = dest ;
1442714397
14428- for (const char * iter = src ; iter < end ; ++ iter ) {
14429- const char * line_start = iter ;
14430- bool in_leading_space = true ;
14398+ for (Py_ssize_t i = 0 ; i < nlines ; i ++ ) {
14399+ PyObject * line = PyList_GET_ITEM ( lines , i ) ;
14400+ Py_ssize_t linelen = PyUnicode_GET_LENGTH ( line ) ;
1443114401
14432- // iterate over a line to find the end of a line
14433- while (iter < end && * iter != '\n' ) {
14434- if (in_leading_space && * iter != ' ' && * iter != '\t' ) {
14435- in_leading_space = false;
14402+ int all_ws = 1 ;
14403+ int kind = PyUnicode_KIND (line );
14404+ void * data = PyUnicode_DATA (line );
14405+ for (Py_ssize_t j = 0 ; j < linelen ; j ++ ) {
14406+ if (!Py_UNICODE_ISSPACE (PyUnicode_READ (kind , data , j ))) {
14407+ all_ws = 0 ;
14408+ break ;
1443614409 }
14437- ++ iter ;
1443814410 }
1443914411
14440- // invariant: *iter == '\n' or iter == end
14441- bool append_newline = iter < end ;
14442-
14443- // if this line has all white space, write '\n' and continue
14444- if ( in_leading_space && append_newline ) {
14445- * dest_iter ++ = '\n' ;
14446- continue ;
14412+ if (! all_ws ) {
14413+ Py_ssize_t start = Py_MIN ( margin , linelen ) ;
14414+ if ( PyUnicodeWriter_WriteSubstring ( writer , line , start , linelen ) < 0 ) {
14415+ PyUnicodeWriter_Discard ( writer );
14416+ Py_DECREF ( lines );
14417+ return NULL ;
14418+ }
1444714419 }
1444814420
14449- /* copy [new_line_start + whitespace_len, iter) to buffer, then
14450- conditionally append '\n' */
14451-
14452- Py_ssize_t new_line_len = iter - line_start - whitespace_len ;
14453- assert (new_line_len >= 0 );
14454- memcpy (dest_iter , line_start + whitespace_len , new_line_len );
14455-
14456- dest_iter += new_line_len ;
14457-
14458- if (append_newline ) {
14459- * dest_iter ++ = '\n' ;
14421+ if (i < nlines - 1 ) {
14422+ if (PyUnicodeWriter_WriteChar (writer , '\n' ) < 0 ) {
14423+ PyUnicodeWriter_Discard (writer );
14424+ Py_DECREF (lines );
14425+ return NULL ;
14426+ }
1446014427 }
1446114428 }
1446214429
14463- PyObject * res = PyUnicode_FromStringAndSize (dest , dest_iter - dest );
14464- PyMem_Free (dest );
14465- return res ;
14430+ Py_DECREF (lines );
14431+ return PyUnicodeWriter_Finish (writer );
1446614432}
1446714433
1446814434static PyMethodDef unicode_methods [] = {
0 commit comments