Method: csv.Sniffer._guess_quote_and_delimiter
Calls: 28, Exceptions: 1, Paths: 5Back
Path 1: 13 calls (0.46)
'05/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03\n05/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03\n05/05/03?05/05/03?05/05/03?05/05/03?05/...
None (11) '?,' (1) '/,' (1)
('', False, None, 0) (13)
1def _guess_quote_and_delimiter(self, data, delimiters):
2 """
3 Looks for text enclosed between two identical quotes
4 (the probable quotechar) which are preceded and followed
5 by the same character (the probable delimiter).
6 For example:
7 ,'some text',
8 The quote with the most wins, same with the delimiter.
9 If there is no quotechar the delimiter can't be determined
10 this way.
11 """
12
13 matches = []
14 for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
16 r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
17 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
18 regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19 matches = regexp.findall(data)
20 if matches:
21 break
22
23 if not matches:
24 # (quotechar, doublequote, delimiter, skipinitialspace)
25 return ('', False, None, 0)
26 quotes = {}
27 delims = {}
28 spaces = 0
29 groupindex = regexp.groupindex
30 for m in matches:
31 n = groupindex['quote'] - 1
32 key = m[n]
33 if key:
34 quotes[key] = quotes.get(key, 0) + 1
35 try:
36 n = groupindex['delim'] - 1
37 key = m[n]
38 except KeyError:
39 continue
40 if key and (delimiters is None or key in delimiters):
41 delims[key] = delims.get(key, 0) + 1
42 try:
43 n = groupindex['space'] - 1
44 except KeyError:
45 continue
46 if m[n]:
47 spaces += 1
48
49 quotechar = max(quotes, key=quotes.get)
50
51 if delims:
52 delim = max(delims, key=delims.get)
53 skipinitialspace = delims[delim] == spaces
54 if delim == '\n': # most likely a file with a single column
55 delim = ''
56 else:
57 # there is *no* delimiter, it's a single column of quoted data
58 delim = ''
59 skipinitialspace = 0
60
61 # if we see an extra quote between delimiters, we've got a
62 # double quoted format
63 dq_regexp = re.compile(
64 r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65 {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69 if dq_regexp.search(data):
70 doublequote = True
71 else:
72 doublequote = False
73
74 return (quotechar, doublequote, delim, skipinitialspace)
Path 2: 10 calls (0.36)
"'a'|'b'|'c'\r\n'd'|e|f\r\n" (1) '"venue","city","state","date","performers"\n' (1) '"venue"+"city"+"state"+"date"+"performers"\n' (1) ";'123;4';" (1)...
None (7) ',;' (3)
('"', False, ',', False) (4) ("'", False, ';', False) (3) ('"', False, '+', False) (2) ("'", False, '|', False) (1)
1def _guess_quote_and_delimiter(self, data, delimiters):
2 """
3 Looks for text enclosed between two identical quotes
4 (the probable quotechar) which are preceded and followed
5 by the same character (the probable delimiter).
6 For example:
7 ,'some text',
8 The quote with the most wins, same with the delimiter.
9 If there is no quotechar the delimiter can't be determined
10 this way.
11 """
12
13 matches = []
14 for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
16 r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
17 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
18 regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19 matches = regexp.findall(data)
20 if matches:
21 break
22
23 if not matches:
24 # (quotechar, doublequote, delimiter, skipinitialspace)
25 return ('', False, None, 0)
26 quotes = {}
27 delims = {}
28 spaces = 0
29 groupindex = regexp.groupindex
30 for m in matches:
31 n = groupindex['quote'] - 1
32 key = m[n]
33 if key:
34 quotes[key] = quotes.get(key, 0) + 1
35 try:
36 n = groupindex['delim'] - 1
37 key = m[n]
38 except KeyError:
39 continue
40 if key and (delimiters is None or key in delimiters):
41 delims[key] = delims.get(key, 0) + 1
42 try:
43 n = groupindex['space'] - 1
44 except KeyError:
45 continue
46 if m[n]:
47 spaces += 1
48
49 quotechar = max(quotes, key=quotes.get)
50
51 if delims:
52 delim = max(delims, key=delims.get)
53 skipinitialspace = delims[delim] == spaces
54 if delim == '\n': # most likely a file with a single column
55 delim = ''
56 else:
57 # there is *no* delimiter, it's a single column of quoted data
58 delim = ''
59 skipinitialspace = 0
60
61 # if we see an extra quote between delimiters, we've got a
62 # double quoted format
63 dq_regexp = re.compile(
64 r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65 {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69 if dq_regexp.search(data):
70 doublequote = True
71 else:
72 doublequote = False
73
74 return (quotechar, doublequote, delim, skipinitialspace)
Path 3: 2 calls (0.07)
"'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'\n'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'\n'Tommy''s Place'+ Blue I...
None (2)
("'", True, '+', True) (2)
1def _guess_quote_and_delimiter(self, data, delimiters):
2 """
3 Looks for text enclosed between two identical quotes
4 (the probable quotechar) which are preceded and followed
5 by the same character (the probable delimiter).
6 For example:
7 ,'some text',
8 The quote with the most wins, same with the delimiter.
9 If there is no quotechar the delimiter can't be determined
10 this way.
11 """
12
13 matches = []
14 for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
16 r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
17 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
18 regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19 matches = regexp.findall(data)
20 if matches:
21 break
22
23 if not matches:
24 # (quotechar, doublequote, delimiter, skipinitialspace)
25 return ('', False, None, 0)
26 quotes = {}
27 delims = {}
28 spaces = 0
29 groupindex = regexp.groupindex
30 for m in matches:
31 n = groupindex['quote'] - 1
32 key = m[n]
33 if key:
34 quotes[key] = quotes.get(key, 0) + 1
35 try:
36 n = groupindex['delim'] - 1
37 key = m[n]
38 except KeyError:
39 continue
40 if key and (delimiters is None or key in delimiters):
41 delims[key] = delims.get(key, 0) + 1
42 try:
43 n = groupindex['space'] - 1
44 except KeyError:
45 continue
46 if m[n]:
47 spaces += 1
48
49 quotechar = max(quotes, key=quotes.get)
50
51 if delims:
52 delim = max(delims, key=delims.get)
53 skipinitialspace = delims[delim] == spaces
54 if delim == '\n': # most likely a file with a single column
55 delim = ''
56 else:
57 # there is *no* delimiter, it's a single column of quoted data
58 delim = ''
59 skipinitialspace = 0
60
61 # if we see an extra quote between delimiters, we've got a
62 # double quoted format
63 dq_regexp = re.compile(
64 r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65 {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69 if dq_regexp.search(data):
70 doublequote = True
71 else:
72 doublequote = False
73
74 return (quotechar, doublequote, delim, skipinitialspace)
Path 4: 2 calls (0.07)
"'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'\n'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'\n'Tommy''s Place':'Blue Island'...
None (2)
("'", True, ':', False) (2)
1def _guess_quote_and_delimiter(self, data, delimiters):
2 """
3 Looks for text enclosed between two identical quotes
4 (the probable quotechar) which are preceded and followed
5 by the same character (the probable delimiter).
6 For example:
7 ,'some text',
8 The quote with the most wins, same with the delimiter.
9 If there is no quotechar the delimiter can't be determined
10 this way.
11 """
12
13 matches = []
14 for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
16 r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
17 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
18 regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19 matches = regexp.findall(data)
20 if matches:
21 break
22
23 if not matches:
24 # (quotechar, doublequote, delimiter, skipinitialspace)
25 return ('', False, None, 0)
26 quotes = {}
27 delims = {}
28 spaces = 0
29 groupindex = regexp.groupindex
30 for m in matches:
31 n = groupindex['quote'] - 1
32 key = m[n]
33 if key:
34 quotes[key] = quotes.get(key, 0) + 1
35 try:
36 n = groupindex['delim'] - 1
37 key = m[n]
38 except KeyError:
39 continue
40 if key and (delimiters is None or key in delimiters):
41 delims[key] = delims.get(key, 0) + 1
42 try:
43 n = groupindex['space'] - 1
44 except KeyError:
45 continue
46 if m[n]:
47 spaces += 1
48
49 quotechar = max(quotes, key=quotes.get)
50
51 if delims:
52 delim = max(delims, key=delims.get)
53 skipinitialspace = delims[delim] == spaces
54 if delim == '\n': # most likely a file with a single column
55 delim = ''
56 else:
57 # there is *no* delimiter, it's a single column of quoted data
58 delim = ''
59 skipinitialspace = 0
60
61 # if we see an extra quote between delimiters, we've got a
62 # double quoted format
63 dq_regexp = re.compile(
64 r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65 {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69 if dq_regexp.search(data):
70 doublequote = True
71 else:
72 doublequote = False
73
74 return (quotechar, doublequote, delim, skipinitialspace)
Path 5: 1 calls (0.04)
"'123;4'" (1)
',;' (1)
("'", False, '', 0) (1)
KeyError (1)
1def _guess_quote_and_delimiter(self, data, delimiters):
2 """
3 Looks for text enclosed between two identical quotes
4 (the probable quotechar) which are preceded and followed
5 by the same character (the probable delimiter).
6 For example:
7 ,'some text',
8 The quote with the most wins, same with the delimiter.
9 If there is no quotechar the delimiter can't be determined
10 this way.
11 """
12
13 matches = []
14 for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
16 r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
17 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
18 regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19 matches = regexp.findall(data)
20 if matches:
21 break
22
23 if not matches:
24 # (quotechar, doublequote, delimiter, skipinitialspace)
25 return ('', False, None, 0)
26 quotes = {}
27 delims = {}
28 spaces = 0
29 groupindex = regexp.groupindex
30 for m in matches:
31 n = groupindex['quote'] - 1
32 key = m[n]
33 if key:
34 quotes[key] = quotes.get(key, 0) + 1
35 try:
36 n = groupindex['delim'] - 1
37 key = m[n]
38 except KeyError:
39 continue
40 if key and (delimiters is None or key in delimiters):
41 delims[key] = delims.get(key, 0) + 1
42 try:
43 n = groupindex['space'] - 1
44 except KeyError:
45 continue
46 if m[n]:
47 spaces += 1
48
49 quotechar = max(quotes, key=quotes.get)
50
51 if delims:
52 delim = max(delims, key=delims.get)
53 skipinitialspace = delims[delim] == spaces
54 if delim == '\n': # most likely a file with a single column
55 delim = ''
56 else:
57 # there is *no* delimiter, it's a single column of quoted data
58 delim = ''
59 skipinitialspace = 0
60
61 # if we see an extra quote between delimiters, we've got a
62 # double quoted format
63 dq_regexp = re.compile(
64 r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65 {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69 if dq_regexp.search(data):
70 doublequote = True
71 else:
72 doublequote = False
73
74 return (quotechar, doublequote, delim, skipinitialspace)