# Python 3 script to merge power plant data from EIA into a single JavaScript data file for mapping # By Dan Schroeder, January 2021 # The inputs are four csv files. The first three are from the latest zip file downloaded from # https://www.eia.gov/electricity/data/eia860/: # 1. plants.csv, from file 2___Plant_Y2019.xlsx # 2. generators.csv, from "Operable" sheet of 3_1_Generator_Y2019.xlsx, with last line of just commas removed # 3. generatorsRetired.csv, from "Retired or Canceled" sheet of 3_1_Generator_Y2019.xlsx, with last line (a comment) removed # The fourth file is from latest annual zip file downloaded from https://www.eia.gov/electricity/data/eia923/: # 4. generation.csv, from first sheet of file EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx # (I used Excel to convert EIA's .xlsx files/sheets to csv format.) # We assume that all of these csv files are sorted by the plant ID number. # This script is needed to merge the data and because files 2, 3, and 4 typically contain multiple # entries for a single power plant (but sometimes contain no entries for a particular power plant). # The code (below) to read the CSV files is from https://www.geeksforgeeks.org/working-csv-files-python/, # and I don't understand it because I'm a Python idiot. # Note: When reading a CSV line, all fields are returned as strings. # The output is written to the file ppData2019.js (overwriting any previous version). # It's a single JavaScript list with one element for each US power plant that generated postive net # electrical energy during the year. This list element is itself a list containing 11 fields: # Name, Utility/operator, County, State, Latitude, Longitude, Main fuel code, Main fuel percent, # Nameplate capacity in MW, Annual generation in MWh, Capacity factor. # The fuel code meanings are given in the fuelName list below. # The output list is sorted by energy generated in descending order. # All of the following are hard-coded and should be modified as needed: # - file names # - the year to which the data corresponds (currently 2019) and the number of days in the year # - all data field indices # Import the csv processing module (which my Anaconda distribution fortunately includes): import csv # Initialize lists: plantFieldNames = [] plantData = [] generatorFieldNames = [] generatorData = [] generatorRetFieldNames = [] generatorRetData = [] generationFieldNames = [] generationData = [] # (The FieldNames lists just absorb that line of each csv file and then I never use them.) # Read in the list of power plants: with open("plants.csv", "r") as csvfile: csvreader = csv.reader(csvfile) heading = next(csvreader) # first row is the title of the table plantFieldNames = next(csvreader) # second row is column headings for row in csvreader: plantData.append(row) print("Total number of rows in plants.csv: " + str(len(plantData))) # Read in the list of generators: with open("generators.csv") as csvfile: csvreader = csv.reader(csvfile) heading = next(csvreader) generatorFieldNames = next(csvreader) for row in csvreader: generatorData.append(row) print("Total number of rows in generators.csv: " + str(len(generatorData))) # Read in the list of retired and canceled generators: with open("generatorsRetired.csv") as csvfile: csvreader = csv.reader(csvfile) heading = next(csvreader) generatorRetFieldNames = next(csvreader) for row in csvreader: generatorRetData.append(row) print("Total number of rows in generatorsRetired.csv: " + str(len(generatorRetData))) # Read in the list of generation data: with open("generation.csv") as csvfile: csvreader = csv.reader(csvfile) heading = next(csvreader) # five header lines before main column headings heading = next(csvreader) heading = next(csvreader) heading = next(csvreader) heading = next(csvreader) generationFieldNames = next(csvreader) # many of these contain line breaks due to wrapping in Excel for row in csvreader: generationData.append(row) print("Total number of rows in generation.csv: " + str(len(generationData))) # OK, we have the data. Now to merge the parts we want into a single list... outputList = [] nextGeneratorIndex = 0 nextRetGeneratorIndex = 0 nextGenerationIndex = 0 # Dictionary for translating 18 fuel type codes into 11 more general categories: fuelTypeCodes = { "COL": 0, # coal "DFO": 1, # distillate petroleum "GEO": 7, # geothermal "HPS": 5, # pumped hydro (shouldn't occur in plants with positive net generation) "HYC": 5, # hydro "MLG": 6, # municipal waste and landfill gas (biomass) "NG": 3, # natural gas "NUC": 4, # nuclear "OOG": 2, # other gases (other fossil) "ORW": 6, # other renewables (biomass) "OTH": 10, # other (waste heat, nonbiogenic waste) "PC": 1, # petroleum coke "RFO": 1, # residual petroleum "SUN": 9, # solar "WND": 8, # wind "WOC": 0, # waste coal "WOO": 1, # waste oil "WWW": 6 # wood and wood waste (biomass) } # Names of fuel categories (needed only for testing and documentation): fuelName = ["Coal", "Oil", "Other fossil", "Gas", "Nuclear", "Hydro", "Biomass", "Geothermal", "Wind", "Solar", "Other"] # Loop over all the power plants: for plant in plantData: # First grab the data we need from the list of plants: id = int(plant[2]) name = plant[3] operator = plant[1] county = plant[8] state = plant[6] latitude = plant[9] longitude = plant[10] # these are all strings, even the numerical fields! # Next get the total nameplate capacity by adding up capacities of all generators: generatorCount = 0 capacity = 0.0 year = 9999 # not currently used but I may include this in the output someday # Operable generators first: while nextGeneratorIndex < len(generatorData) and int(generatorData[nextGeneratorIndex][2]) < id: # This shouldn't happen since every generator should have a plant print("Generator with no plant: index " + str(nextGeneratorIndex) + ", plant code " + generatorData[nextGeneratorIndex][2]) nextGeneratorIndex += 1 while nextGeneratorIndex < len(generatorData) and int(generatorData[nextGeneratorIndex][2]) == id: # loop through all of this plant's generators generatorCount += 1 capacity += float(generatorData[nextGeneratorIndex][15].replace(",", "")) generatorYear = int(generatorData[nextGeneratorIndex][26]) if generatorYear < year: year = generatorYear # save year for plant's earliest generator (not currently used) nextGeneratorIndex += 1 # Now look for any generators that were retired during the current year: while nextRetGeneratorIndex < len(generatorRetData) and int(generatorRetData[nextRetGeneratorIndex][2]) < id: # I suppose there could be retired generators with no plant nextRetGeneratorIndex += 1 while nextRetGeneratorIndex < len(generatorRetData) and int(generatorRetData[nextRetGeneratorIndex][2]) == id: if len(generatorRetData[nextRetGeneratorIndex][28]) >= 4: # some listings have a blank for the retired year retYear = int(generatorRetData[nextRetGeneratorIndex][28]) if retYear == 2019: # hard-coded year must be updated in the future generatorCount += 1 capacity += float(generatorRetData[nextRetGeneratorIndex][15].replace(",", "")) generatorYear = int(generatorRetData[nextRetGeneratorIndex][26]) if generatorYear < year: year = generatorYear nextRetGeneratorIndex += 1 # Next get the energy generated and fuel used, for each fuel type, for this plant: genByFuelType = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] # Value is energy generated; index is fuel type while nextGenerationIndex < len(generationData) and int(generationData[nextGenerationIndex][0]) < id: # This shouldn't happen since all generation should come from a plant! print("Generation data line with no plant: index " + str(nextGenerationIndex) + ", plant code " + generationData[nextGenerationIndex][0]) nextGenerationIndex += 1 while nextGenerationIndex < len(generationData) and int(generationData[nextGenerationIndex][0]) == id: # Loop through all generation types for this plant fuel = generationData[nextGenerationIndex][15] fuelCode = fuelTypeCodes[fuel] gen = int(generationData[nextGenerationIndex][95].replace(",", "")) genByFuelType[fuelCode] += gen nextGenerationIndex += 1 mainFuelCode = -1 mainFuelGen = 0 totalGen = 0 for code in range(11): totalGen += genByFuelType[code] if genByFuelType[code] > mainFuelGen: mainFuelGen = genByFuelType[code] mainFuelCode = code if generatorCount > 0 and totalGen > 0: mainFuelPercent = round(100 * mainFuelGen / totalGen) capFactor = round(totalGen / (capacity * 24 * 365), 4) # change 365 to 366 in a leap year outputList.append([name, operator, county, state, latitude, longitude, mainFuelCode, mainFuelPercent, round(capacity,1), totalGen, capFactor]) print("Number of plants in output list: " + str(len(outputList))) # Sort the output list in descending order by energy generated: def sortByGen(plant): return(plant[9]) outputList.sort(key=sortByGen, reverse=True) # Print to screen (for testing with small data sets): # for plant in outputList: # print(plant) # Write to JavaScript file: # (It's messy because of string/numeric conversions and the required comma at the end of every line but the last.) outFile = open("ppData2019.js", "w") outFile.write("ppData2019 = [\n") for p in range(len(outputList)-1): outFile.write('["' + outputList[p][0] + '", "' + outputList[p][1] + '", "' + outputList[p][2] + '", "' + outputList[p][3] + '", ' + outputList[p][4] + ', ' + outputList[p][5] + ', ' + str(outputList[p][6]) + ', ' + str(outputList[p][7]) + ', ' + str(outputList[p][8]) + ', ' + str(outputList[p][9]) + ', ' + str(outputList[p][10]) + '],\n') p = len(outputList)-1 outFile.write('["' + outputList[p][0] + '", "' + outputList[p][1] + '", "' + outputList[p][2] + '", "' + outputList[p][3] + '", ' + outputList[p][4] + ', ' + outputList[p][5] + ', ' + str(outputList[p][6]) + ', ' + str(outputList[p][7]) + ', ' + str(outputList[p][8]) + ', ' + str(outputList[p][9]) + ', ' + str(outputList[p][10]) + ']]') outFile.close()