diff -r 3528cc01c4d9 Lib/test/test_ast.py
--- a/Lib/test/test_ast.py Sun Dec 30 22:15:37 2012 +0100
+++ b/Lib/test/test_ast.py Sun Dec 30 22:23:34 2012 +0100
@@ -289,6 +289,25 @@
self.assertRaises(TypeError, ast.Num, 1, 2)
self.assertRaises(TypeError, ast.Num, 1, 2, lineno=0)
+ def test_multi_line_docstring_col_offset_and_lineno_issue16806(self):
+ node = ast.parse(
+ '"""line one\nline two"""\n\n'
+ 'def foo():\n """line one\n line two"""\n\n'
+ ' def bar():\n """line one\n line two"""\n'
+ ' """line one\n line two"""\n'
+ '"""line one\nline two"""\n\n'
+ )
+ self.assertEqual(node.body[0].col_offset, 0)
+ self.assertEqual(node.body[0].lineno, 1)
+ self.assertEqual(node.body[1].body[0].col_offset, 2)
+ self.assertEqual(node.body[1].body[0].lineno, 5)
+ self.assertEqual(node.body[1].body[1].body[0].col_offset, 4)
+ self.assertEqual(node.body[1].body[1].body[0].lineno, 9)
+ self.assertEqual(node.body[1].body[2].col_offset, 2)
+ self.assertEqual(node.body[1].body[2].lineno, 11)
+ self.assertEqual(node.body[2].col_offset, 0)
+ self.assertEqual(node.body[2].lineno, 13)
+
def test_module(self):
body = [ast.Num(42)]
x = ast.Module(body)
diff -r 3528cc01c4d9 Parser/parsetok.c
--- a/Parser/parsetok.c Sun Dec 30 22:15:37 2012 +0100
+++ b/Parser/parsetok.c Sun Dec 30 22:23:34 2012 +0100
@@ -155,6 +155,8 @@
size_t len;
char *str;
int col_offset;
+ int lineno;
+ int line_start;
type = PyTokenizer_Get(tok, &a, &b);
if (type == ERRORTOKEN) {
@@ -190,13 +192,19 @@
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
#endif
- if (a >= tok->line_start)
- col_offset = a - tok->line_start;
+ /* Nodes of type STRING, especially multi line strings
+ must be handled differently in order to get both
+ the starting line number and the column offset right.
+ (cf. issue 16806) */
+ lineno = type == STRING ? tok->first_lineno : tok->lineno;
+ line_start = type == STRING ? tok->multi_line_start : tok->line_start;
+ if (a >= line_start)
+ col_offset = a - line_start;
else
- col_offset = -1;
+ col_offset = -1;
if ((err_ret->error =
- PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
+ PyParser_AddToken(ps, (int)type, str, lineno, col_offset,
&(err_ret->expected))) != E_OK) {
if (err_ret->error != E_DONE) {
PyObject_FREE(str);
diff -r 3528cc01c4d9 Parser/tokenizer.c
--- a/Parser/tokenizer.c Sun Dec 30 22:15:37 2012 +0100
+++ b/Parser/tokenizer.c Sun Dec 30 22:23:34 2012 +0100
@@ -1529,6 +1529,14 @@
int quote = c;
int triple = 0;
int tripcount = 0;
+
+ /* Nodes of type STRING, especially multi line strings
+ must be handled differently in order to get both
+ the starting line number and the column offset right.
+ (cf. issue 16806) */
+ tok->first_lineno = tok->lineno;
+ tok->multi_line_start = tok->line_start;
+
for (;;) {
c = tok_nextc(tok);
if (c == '\n') {
diff -r 3528cc01c4d9 Parser/tokenizer.h
--- a/Parser/tokenizer.h Sun Dec 30 22:15:37 2012 +0100
+++ b/Parser/tokenizer.h Sun Dec 30 22:23:34 2012 +0100
@@ -31,6 +31,8 @@
int pendin; /* Pending indents (if > 0) or dedents (if < 0) */
char *prompt, *nextprompt; /* For interactive prompting */
int lineno; /* Current line number */
+ int first_lineno; /* First line of a single line or multi line string
+ expression (cf. issue 16806) */
int level; /* () [] {} Parentheses nesting level */
/* Used to allow free continuations inside them */
/* Stuff for checking on different tab sizes */
@@ -46,6 +48,9 @@
char *encoding;
int cont_line; /* whether we are in a continuation line. */
const char* line_start; /* pointer to start of current line */
+ const char* multi_line_start; /* pointer to start of first line of
+ a single line or multi line string
+ expression (cf. issue 16806) */
#ifndef PGEN
PyObject *decoding_readline; /* codecs.open(...).readline */
PyObject *decoding_buffer;