csv.Sniffer.has

Path 1: 2 calls (0.25)

"Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes\nShark City, Glendale Heights, IL, 12/28/02, Prezence\nTommy's Place, Blue Island, IL, 12/28/02, B...

False (2)

ValueError (2)

1def has_header(self, sample):
2        # Creates a dictionary of types of data in each column. If any
3        # column is of a single type (say, integers), *except* for the first
4        # row, then the first row is presumed to be labels. If the type
5        # can't be determined, it is assumed to be a string in which case
6        # the length of the string is the determining factor: if all of the
7        # rows except for the first are the same length, it's a header.
8        # Finally, a 'vote' is taken at the end for each column, adding or
9        # subtracting from the likelihood of the first row being a header.
10
11        rdr = reader(StringIO(sample), self.sniff(sample))
12
13        header = next(rdr) # assume first row is header
14
15        columns = len(header)
16        columnTypes = {}
17        for i in range(columns): columnTypes[i] = None
18
19        checked = 0
20        for row in rdr:
21            # arbitrary number of rows to check, to keep it sane
22            if checked > 20:
23                break
24            checked += 1
25
26            if len(row) != columns:
27                continue # skip rows that have irregular number of columns
28
29            for col in list(columnTypes.keys()):
30                thisType = complex
31                try:
32                    thisType(row[col])
33                except (ValueError, OverflowError):
34                    # fallback to length of string
35                    thisType = len(row[col])
36
37                if thisType != columnTypes[col]:
38                    if columnTypes[col] is None: # add new column type
39                        columnTypes[col] = thisType
40                    else:
41                        # type is inconsistent, remove column from
42                        # consideration
43                        del columnTypes[col]
44
45        # finally, compare results against first row and "vote"
46        # on whether it's a header
47        hasHeader = 0
48        for col, colType in columnTypes.items():
49            if type(colType) == type(0): # it's a length
50                if len(header[col]) != colType:
51                    hasHeader += 1
52                else:
53                    hasHeader -= 1
54            else: # attempt typecast
55                try:
56                    colType(header[col])
57                except (ValueError, TypeError):
58                    hasHeader += 1
59                else:
60                    hasHeader -= 1
61
62        return hasHeader > 0

Path 2: 2 calls (0.25)

'"venue","city","state","date","performers"\nHarry\'s, Arlington Heights, IL, 2/1/03, Kimi Hayes\nShark City, Glendale Heights, IL, 12/28/02, Prezence...

True (2)

ValueError (2)

1def has_header(self, sample):
2        # Creates a dictionary of types of data in each column. If any
3        # column is of a single type (say, integers), *except* for the first
4        # row, then the first row is presumed to be labels. If the type
5        # can't be determined, it is assumed to be a string in which case
6        # the length of the string is the determining factor: if all of the
7        # rows except for the first are the same length, it's a header.
8        # Finally, a 'vote' is taken at the end for each column, adding or
9        # subtracting from the likelihood of the first row being a header.
10
11        rdr = reader(StringIO(sample), self.sniff(sample))
12
13        header = next(rdr) # assume first row is header
14
15        columns = len(header)
16        columnTypes = {}
17        for i in range(columns): columnTypes[i] = None
18
19        checked = 0
20        for row in rdr:
21            # arbitrary number of rows to check, to keep it sane
22            if checked > 20:
23                break
24            checked += 1
25
26            if len(row) != columns:
27                continue # skip rows that have irregular number of columns
28
29            for col in list(columnTypes.keys()):
30                thisType = complex
31                try:
32                    thisType(row[col])
33                except (ValueError, OverflowError):
34                    # fallback to length of string
35                    thisType = len(row[col])
36
37                if thisType != columnTypes[col]:
38                    if columnTypes[col] is None: # add new column type
39                        columnTypes[col] = thisType
40                    else:
41                        # type is inconsistent, remove column from
42                        # consideration
43                        del columnTypes[col]
44
45        # finally, compare results against first row and "vote"
46        # on whether it's a header
47        hasHeader = 0
48        for col, colType in columnTypes.items():
49            if type(colType) == type(0): # it's a length
50                if len(header[col]) != colType:
51                    hasHeader += 1
52                else:
53                    hasHeader -= 1
54            else: # attempt typecast
55                try:
56                    colType(header[col])
57                except (ValueError, TypeError):
58                    hasHeader += 1
59                else:
60                    hasHeader -= 1
61
62        return hasHeader > 0

Path 3: 2 calls (0.25)

'\nabc,def\nghijkl,mno\nghi,jkl\n' (1) '\nabc,def\nghijkl,mnop\nghi,jkl\n' (1)

False (2)

1def has_header(self, sample):
2        # Creates a dictionary of types of data in each column. If any
3        # column is of a single type (say, integers), *except* for the first
4        # row, then the first row is presumed to be labels. If the type
5        # can't be determined, it is assumed to be a string in which case
6        # the length of the string is the determining factor: if all of the
7        # rows except for the first are the same length, it's a header.
8        # Finally, a 'vote' is taken at the end for each column, adding or
9        # subtracting from the likelihood of the first row being a header.
10
11        rdr = reader(StringIO(sample), self.sniff(sample))
12
13        header = next(rdr) # assume first row is header
14
15        columns = len(header)
16        columnTypes = {}
17        for i in range(columns): columnTypes[i] = None
18
19        checked = 0
20        for row in rdr:
21            # arbitrary number of rows to check, to keep it sane
22            if checked > 20:
23                break
24            checked += 1
25
26            if len(row) != columns:
27                continue # skip rows that have irregular number of columns
28
29            for col in list(columnTypes.keys()):
30                thisType = complex
31                try:
32                    thisType(row[col])
33                except (ValueError, OverflowError):
34                    # fallback to length of string
35                    thisType = len(row[col])
36
37                if thisType != columnTypes[col]:
38                    if columnTypes[col] is None: # add new column type
39                        columnTypes[col] = thisType
40                    else:
41                        # type is inconsistent, remove column from
42                        # consideration
43                        del columnTypes[col]
44
45        # finally, compare results against first row and "vote"
46        # on whether it's a header
47        hasHeader = 0
48        for col, colType in columnTypes.items():
49            if type(colType) == type(0): # it's a length
50                if len(header[col]) != colType:
51                    hasHeader += 1
52                else:
53                    hasHeader -= 1
54            else: # attempt typecast
55                try:
56                    colType(header[col])
57                except (ValueError, TypeError):
58                    hasHeader += 1
59                else:
60                    hasHeader -= 1
61
62        return hasHeader > 0

Path 4: 1 calls (0.12)

'"time","forces"\n 1,1.5\n 0.5,5+0j\n 0,0\n 1+1j,6\n' (1) ...

True (1)

ValueError (1)

1def has_header(self, sample):
2        # Creates a dictionary of types of data in each column. If any
3        # column is of a single type (say, integers), *except* for the first
4        # row, then the first row is presumed to be labels. If the type
5        # can't be determined, it is assumed to be a string in which case
6        # the length of the string is the determining factor: if all of the
7        # rows except for the first are the same length, it's a header.
8        # Finally, a 'vote' is taken at the end for each column, adding or
9        # subtracting from the likelihood of the first row being a header.
10
11        rdr = reader(StringIO(sample), self.sniff(sample))
12
13        header = next(rdr) # assume first row is header
14
15        columns = len(header)
16        columnTypes = {}
17        for i in range(columns): columnTypes[i] = None
18
19        checked = 0
20        for row in rdr:
21            # arbitrary number of rows to check, to keep it sane
22            if checked > 20:
23                break
24            checked += 1
25
26            if len(row) != columns:
27                continue # skip rows that have irregular number of columns
28
29            for col in list(columnTypes.keys()):
30                thisType = complex
31                try:
32                    thisType(row[col])
33                except (ValueError, OverflowError):
34                    # fallback to length of string
35                    thisType = len(row[col])
36
37                if thisType != columnTypes[col]:
38                    if columnTypes[col] is None: # add new column type
39                        columnTypes[col] = thisType
40                    else:
41                        # type is inconsistent, remove column from
42                        # consideration
43                        del columnTypes[col]
44
45        # finally, compare results against first row and "vote"
46        # on whether it's a header
47        hasHeader = 0
48        for col, colType in columnTypes.items():
49            if type(colType) == type(0): # it's a length
50                if len(header[col]) != colType:
51                    hasHeader += 1
52                else:
53                    hasHeader -= 1
54            else: # attempt typecast
55                try:
56                    colType(header[col])
57                except (ValueError, TypeError):
58                    hasHeader += 1
59                else:
60                    hasHeader -= 1
61
62        return hasHeader > 0

Path 5: 1 calls (0.12)

'"time","forces"\n 0,0\n 1,2\n a,b\n' (1)

False (1)

ValueError (1)

1def has_header(self, sample):
2        # Creates a dictionary of types of data in each column. If any
3        # column is of a single type (say, integers), *except* for the first
4        # row, then the first row is presumed to be labels. If the type
5        # can't be determined, it is assumed to be a string in which case
6        # the length of the string is the determining factor: if all of the
7        # rows except for the first are the same length, it's a header.
8        # Finally, a 'vote' is taken at the end for each column, adding or
9        # subtracting from the likelihood of the first row being a header.
10
11        rdr = reader(StringIO(sample), self.sniff(sample))
12
13        header = next(rdr) # assume first row is header
14
15        columns = len(header)
16        columnTypes = {}
17        for i in range(columns): columnTypes[i] = None
18
19        checked = 0
20        for row in rdr:
21            # arbitrary number of rows to check, to keep it sane
22            if checked > 20:
23                break
24            checked += 1
25
26            if len(row) != columns:
27                continue # skip rows that have irregular number of columns
28
29            for col in list(columnTypes.keys()):
30                thisType = complex
31                try:
32                    thisType(row[col])
33                except (ValueError, OverflowError):
34                    # fallback to length of string
35                    thisType = len(row[col])
36
37                if thisType != columnTypes[col]:
38                    if columnTypes[col] is None: # add new column type
39                        columnTypes[col] = thisType
40                    else:
41                        # type is inconsistent, remove column from
42                        # consideration
43                        del columnTypes[col]
44
45        # finally, compare results against first row and "vote"
46        # on whether it's a header
47        hasHeader = 0
48        for col, colType in columnTypes.items():
49            if type(colType) == type(0): # it's a length
50                if len(header[col]) != colType:
51                    hasHeader += 1
52                else:
53                    hasHeader -= 1
54            else: # attempt typecast
55                try:
56                    colType(header[col])
57                except (ValueError, TypeError):
58                    hasHeader += 1
59                else:
60                    hasHeader -= 1
61
62        return hasHeader > 0

Method: csv.Sniffer.has_header