A fairly typical use case for this would be when reading data from a CSV file where you know the first few lines consist of information abotu teh data rather than just the data itself.
withopen('cars.csv')as file:for line in file:print(line)Car;MPG;Cylinders;Displacement;Horsepower;Weight;Acceleration;Model;OriginSTRING;DOUBLE;INT;DOUBLE;DOUBLE;DOUBLE;DOUBLE;INT;CATChevrolet Chevelle Malibu;18.0;8;307.0;130.0;3504.;12.0;70;USBuick Skylark 320;15.0;8;350.0;165.0;3693.;11.5;70;US
As we can see, the values are delimited by ; and the first two lines consist of the column names, and column types.
The reason for the spacing between each line is that each line ends with a newline, and our print statement also emits a newline by default. So we'll have to strip those out.
Here's what we want to do:
read the first line to get the column headers and create a named tuple class
read data types from second line and store this so we can cast the strings we are reading to the correct data type
read the data rows and parse them into a named tuples
withopen('cars.csv')as file: row_index =0for line in file:if row_index ==0:# header row headers = line.strip('\n').split(';')print(headers)elif row_index ==1:# data type row data_types = line.strip('\n').split(';')print(data_types)else:# data rows data = line.strip('\n').split(';')print(data) row_index +=1['Car','MPG','Cylinders','Displacement','Horsepower','Weight','Acceleration','Model','Origin']['STRING','DOUBLE','INT','DOUBLE','DOUBLE','DOUBLE','DOUBLE','INT','CAT']['Chevrolet Chevelle Malibu','18.0','8','307.0','130.0','3504.','12.0','70','US']['Buick Skylark 320','15.0','8','350.0','165.0','3693.','11.5','70','US']['Plymouth Satellite','18.0','8','318.0','150.0','3436.','11.0','70','US']......# Using namedtuplefrom collections import namedtuplecars = []withopen('cars.csv')as file: row_index =0for line in file:if row_index ==0:# header row headers = line.strip('\n').split(';') Car =namedtuple('Car', headers)elif row_index ==1:# data type row data_types = line.strip('\n').split(';')print(data_types)else:# data rows data = line.strip('\n').split(';') car =Car(*data) cars.append(car) row_index +=1
We still need to parse the data into strings, integers, floats... First we need to figure cast to a data type based on the data type string:
from collections import namedtuplecars = []withopen('cars.csv')as file: row_index =0for line in file:if row_index ==0:# header row headers = line.strip('\n').split(';') Car =namedtuple('Car', headers)elif row_index ==1:# data type row data_types = line.strip('\n').split(';')else:# data rows data = line.strip('\n').split(';') data =cast_row(data_types, data) car =Car(*data) cars.append(car) row_index +=1
We can clean up this code by using iterators directly:
from collections import namedtuplecars = []withopen('cars.csv')as file: file_iter =iter(file) headers =next(file_iter).strip('\n').split(';') Car =namedtuple('Car', headers) data_types =next(file_iter).strip('\n').split(';')for line in file_iter: data = line.strip('\n').split(';') data =cast_row(data_types, data) car =Car(*data) cars.append(car)# More cleaner wayfrom collections import namedtuplewithopen('cars.csv')as file: file_iter =iter(file) headers =next(file_iter).strip('\n').split(';') data_types =next(file_iter).strip('\n').split(';') cars_data = [cast_row(data_types, line.strip('\n').split(';'))for line in file_iter] cars = [Car(*item)for item in cars_data]cars_data[0]['Chevrolet Chevelle Malibu',18.0,8,307.0,130.0,3504.0,12.0,70,'US']