csv.Sniffer._guess_quote_and

Path 1: 13 calls (0.46)

'05/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03\n05/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03\n05/05/03?05/05/03?05/05/03?05/05/03?05/...

None (11) '?,' (1) '/,' (1)

('', False, None, 0) (13)

1def _guess_quote_and_delimiter(self, data, delimiters):
2        """
3        Looks for text enclosed between two identical quotes
4        (the probable quotechar) which are preceded and followed
5        by the same character (the probable delimiter).
6        For example:
7                         ,'some text',
8        The quote with the most wins, same with the delimiter.
9        If there is no quotechar the delimiter can't be determined
10        this way.
11        """
12
13        matches = []
14        for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',   #  ".*?",
16                      r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)',   # ,".*?"
17                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):                            #  ".*?" (no delim, no space)
18            regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19            matches = regexp.findall(data)
20            if matches:
21                break
22
23        if not matches:
24            # (quotechar, doublequote, delimiter, skipinitialspace)
25            return ('', False, None, 0)
26        quotes = {}
27        delims = {}
28        spaces = 0
29        groupindex = regexp.groupindex
30        for m in matches:
31            n = groupindex['quote'] - 1
32            key = m[n]
33            if key:
34                quotes[key] = quotes.get(key, 0) + 1
35            try:
36                n = groupindex['delim'] - 1
37                key = m[n]
38            except KeyError:
39                continue
40            if key and (delimiters is None or key in delimiters):
41                delims[key] = delims.get(key, 0) + 1
42            try:
43                n = groupindex['space'] - 1
44            except KeyError:
45                continue
46            if m[n]:
47                spaces += 1
48
49        quotechar = max(quotes, key=quotes.get)
50
51        if delims:
52            delim = max(delims, key=delims.get)
53            skipinitialspace = delims[delim] == spaces
54            if delim == '\n': # most likely a file with a single column
55                delim = ''
56        else:
57            # there is *no* delimiter, it's a single column of quoted data
58            delim = ''
59            skipinitialspace = 0
60
61        # if we see an extra quote between delimiters, we've got a
62        # double quoted format
63        dq_regexp = re.compile(
64                               r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65                               {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69        if dq_regexp.search(data):
70            doublequote = True
71        else:
72            doublequote = False
73
74        return (quotechar, doublequote, delim, skipinitialspace)

Path 2: 10 calls (0.36)

"'a'|'b'|'c'\r\n'd'|e|f\r\n" (1) '"venue","city","state","date","performers"\n' (1) '"venue"+"city"+"state"+"date"+"performers"\n' (1) ";'123;4';" (1)...

None (7) ',;' (3)

('"', False, ',', False) (4) ("'", False, ';', False) (3) ('"', False, '+', False) (2) ("'", False, '|', False) (1)

1def _guess_quote_and_delimiter(self, data, delimiters):
2        """
3        Looks for text enclosed between two identical quotes
4        (the probable quotechar) which are preceded and followed
5        by the same character (the probable delimiter).
6        For example:
7                         ,'some text',
8        The quote with the most wins, same with the delimiter.
9        If there is no quotechar the delimiter can't be determined
10        this way.
11        """
12
13        matches = []
14        for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',   #  ".*?",
16                      r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)',   # ,".*?"
17                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):                            #  ".*?" (no delim, no space)
18            regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19            matches = regexp.findall(data)
20            if matches:
21                break
22
23        if not matches:
24            # (quotechar, doublequote, delimiter, skipinitialspace)
25            return ('', False, None, 0)
26        quotes = {}
27        delims = {}
28        spaces = 0
29        groupindex = regexp.groupindex
30        for m in matches:
31            n = groupindex['quote'] - 1
32            key = m[n]
33            if key:
34                quotes[key] = quotes.get(key, 0) + 1
35            try:
36                n = groupindex['delim'] - 1
37                key = m[n]
38            except KeyError:
39                continue
40            if key and (delimiters is None or key in delimiters):
41                delims[key] = delims.get(key, 0) + 1
42            try:
43                n = groupindex['space'] - 1
44            except KeyError:
45                continue
46            if m[n]:
47                spaces += 1
48
49        quotechar = max(quotes, key=quotes.get)
50
51        if delims:
52            delim = max(delims, key=delims.get)
53            skipinitialspace = delims[delim] == spaces
54            if delim == '\n': # most likely a file with a single column
55                delim = ''
56        else:
57            # there is *no* delimiter, it's a single column of quoted data
58            delim = ''
59            skipinitialspace = 0
60
61        # if we see an extra quote between delimiters, we've got a
62        # double quoted format
63        dq_regexp = re.compile(
64                               r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65                               {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69        if dq_regexp.search(data):
70            doublequote = True
71        else:
72            doublequote = False
73
74        return (quotechar, doublequote, delim, skipinitialspace)

Path 3: 2 calls (0.07)

"'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'\n'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'\n'Tommy''s Place'+ Blue I...

None (2)

("'", True, '+', True) (2)

1def _guess_quote_and_delimiter(self, data, delimiters):
2        """
3        Looks for text enclosed between two identical quotes
4        (the probable quotechar) which are preceded and followed
5        by the same character (the probable delimiter).
6        For example:
7                         ,'some text',
8        The quote with the most wins, same with the delimiter.
9        If there is no quotechar the delimiter can't be determined
10        this way.
11        """
12
13        matches = []
14        for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',   #  ".*?",
16                      r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)',   # ,".*?"
17                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):                            #  ".*?" (no delim, no space)
18            regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19            matches = regexp.findall(data)
20            if matches:
21                break
22
23        if not matches:
24            # (quotechar, doublequote, delimiter, skipinitialspace)
25            return ('', False, None, 0)
26        quotes = {}
27        delims = {}
28        spaces = 0
29        groupindex = regexp.groupindex
30        for m in matches:
31            n = groupindex['quote'] - 1
32            key = m[n]
33            if key:
34                quotes[key] = quotes.get(key, 0) + 1
35            try:
36                n = groupindex['delim'] - 1
37                key = m[n]
38            except KeyError:
39                continue
40            if key and (delimiters is None or key in delimiters):
41                delims[key] = delims.get(key, 0) + 1
42            try:
43                n = groupindex['space'] - 1
44            except KeyError:
45                continue
46            if m[n]:
47                spaces += 1
48
49        quotechar = max(quotes, key=quotes.get)
50
51        if delims:
52            delim = max(delims, key=delims.get)
53            skipinitialspace = delims[delim] == spaces
54            if delim == '\n': # most likely a file with a single column
55                delim = ''
56        else:
57            # there is *no* delimiter, it's a single column of quoted data
58            delim = ''
59            skipinitialspace = 0
60
61        # if we see an extra quote between delimiters, we've got a
62        # double quoted format
63        dq_regexp = re.compile(
64                               r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65                               {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69        if dq_regexp.search(data):
70            doublequote = True
71        else:
72            doublequote = False
73
74        return (quotechar, doublequote, delim, skipinitialspace)

Path 4: 2 calls (0.07)

"'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'\n'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'\n'Tommy''s Place':'Blue Island'...

None (2)

("'", True, ':', False) (2)

1def _guess_quote_and_delimiter(self, data, delimiters):
2        """
3        Looks for text enclosed between two identical quotes
4        (the probable quotechar) which are preceded and followed
5        by the same character (the probable delimiter).
6        For example:
7                         ,'some text',
8        The quote with the most wins, same with the delimiter.
9        If there is no quotechar the delimiter can't be determined
10        this way.
11        """
12
13        matches = []
14        for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',   #  ".*?",
16                      r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)',   # ,".*?"
17                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):                            #  ".*?" (no delim, no space)
18            regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19            matches = regexp.findall(data)
20            if matches:
21                break
22
23        if not matches:
24            # (quotechar, doublequote, delimiter, skipinitialspace)
25            return ('', False, None, 0)
26        quotes = {}
27        delims = {}
28        spaces = 0
29        groupindex = regexp.groupindex
30        for m in matches:
31            n = groupindex['quote'] - 1
32            key = m[n]
33            if key:
34                quotes[key] = quotes.get(key, 0) + 1
35            try:
36                n = groupindex['delim'] - 1
37                key = m[n]
38            except KeyError:
39                continue
40            if key and (delimiters is None or key in delimiters):
41                delims[key] = delims.get(key, 0) + 1
42            try:
43                n = groupindex['space'] - 1
44            except KeyError:
45                continue
46            if m[n]:
47                spaces += 1
48
49        quotechar = max(quotes, key=quotes.get)
50
51        if delims:
52            delim = max(delims, key=delims.get)
53            skipinitialspace = delims[delim] == spaces
54            if delim == '\n': # most likely a file with a single column
55                delim = ''
56        else:
57            # there is *no* delimiter, it's a single column of quoted data
58            delim = ''
59            skipinitialspace = 0
60
61        # if we see an extra quote between delimiters, we've got a
62        # double quoted format
63        dq_regexp = re.compile(
64                               r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65                               {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69        if dq_regexp.search(data):
70            doublequote = True
71        else:
72            doublequote = False
73
74        return (quotechar, doublequote, delim, skipinitialspace)

Path 5: 1 calls (0.04)

"'123;4'" (1)

',;' (1)

("'", False, '', 0) (1)

KeyError (1)

1def _guess_quote_and_delimiter(self, data, delimiters):
2        """
3        Looks for text enclosed between two identical quotes
4        (the probable quotechar) which are preceded and followed
5        by the same character (the probable delimiter).
6        For example:
7                         ,'some text',
8        The quote with the most wins, same with the delimiter.
9        If there is no quotechar the delimiter can't be determined
10        this way.
11        """
12
13        matches = []
14        for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
15                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',   #  ".*?",
16                      r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)',   # ,".*?"
17                      r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):                            #  ".*?" (no delim, no space)
18            regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
19            matches = regexp.findall(data)
20            if matches:
21                break
22
23        if not matches:
24            # (quotechar, doublequote, delimiter, skipinitialspace)
25            return ('', False, None, 0)
26        quotes = {}
27        delims = {}
28        spaces = 0
29        groupindex = regexp.groupindex
30        for m in matches:
31            n = groupindex['quote'] - 1
32            key = m[n]
33            if key:
34                quotes[key] = quotes.get(key, 0) + 1
35            try:
36                n = groupindex['delim'] - 1
37                key = m[n]
38            except KeyError:
39                continue
40            if key and (delimiters is None or key in delimiters):
41                delims[key] = delims.get(key, 0) + 1
42            try:
43                n = groupindex['space'] - 1
44            except KeyError:
45                continue
46            if m[n]:
47                spaces += 1
48
49        quotechar = max(quotes, key=quotes.get)
50
51        if delims:
52            delim = max(delims, key=delims.get)
53            skipinitialspace = delims[delim] == spaces
54            if delim == '\n': # most likely a file with a single column
55                delim = ''
56        else:
57            # there is *no* delimiter, it's a single column of quoted data
58            delim = ''
59            skipinitialspace = 0
60
61        # if we see an extra quote between delimiters, we've got a
62        # double quoted format
63        dq_regexp = re.compile(
64                               r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
65                               {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
66
67
68
69        if dq_regexp.search(data):
70            doublequote = True
71        else:
72            doublequote = False
73
74        return (quotechar, doublequote, delim, skipinitialspace)

Method: csv.Sniffer._guess_quote_and_delimiter