# Python 3 script to merge power plant data from EIA into a single JavaScript data file for mapping
# By Dan Schroeder, January 2021

# The inputs are four csv files.  The first three are from the latest zip file downloaded from
# https://www.eia.gov/electricity/data/eia860/:
# 1.  plants.csv, from file 2___Plant_Y2019.xlsx
# 2.  generators.csv, from "Operable" sheet of 3_1_Generator_Y2019.xlsx, with last line of just commas removed
# 3.  generatorsRetired.csv, from "Retired or Canceled" sheet of 3_1_Generator_Y2019.xlsx, with last line (a comment) removed
# The fourth file is from latest annual zip file downloaded from https://www.eia.gov/electricity/data/eia923/:
# 4.  generation.csv, from first sheet of file EIA923_Schedules_2_3_4_5_M_12_2019_Final.xlsx
# (I used Excel to convert EIA's .xlsx files/sheets to csv format.)
# We assume that all of these csv files are sorted by the plant ID number.
# This script is needed to merge the data and because files 2, 3, and 4 typically contain multiple 
# entries for a single power plant (but sometimes contain no entries for a particular power plant).

# The code (below) to read the CSV files is from https://www.geeksforgeeks.org/working-csv-files-python/,
# and I don't understand it because I'm a Python idiot.
# Note:  When reading a CSV line, all fields are returned as strings.

# The output is written to the file ppData2019.js (overwriting any previous version).
# It's a single JavaScript list with one element for each US power plant that generated postive net
# electrical energy during the year.  This list element is itself a list containing 11 fields:
# Name, Utility/operator, County, State, Latitude, Longitude, Main fuel code, Main fuel percent,
# Nameplate capacity in MW, Annual generation in MWh, Capacity factor.
# The fuel code meanings are given in the fuelName list below.
# The output list is sorted by energy generated in descending order.

# All of the following are hard-coded and should be modified as needed:
# - file names
# - the year to which the data corresponds (currently 2019) and the number of days in the year
# - all data field indices

# Import the csv processing module (which my Anaconda distribution fortunately includes):
import csv

# Initialize lists:
plantFieldNames = []
plantData = []
generatorFieldNames = []
generatorData = []
generatorRetFieldNames = []
generatorRetData = []
generationFieldNames = []
generationData = []
# (The FieldNames lists just absorb that line of each csv file and then I never use them.)

# Read in the list of power plants:
with open("plants.csv", "r") as csvfile:
	csvreader = csv.reader(csvfile)
	heading = next(csvreader)			# first row is the title of the table
	plantFieldNames = next(csvreader)	# second row is column headings
	for row in csvreader:
		plantData.append(row)

print("Total number of rows in plants.csv: " + str(len(plantData)))

# Read in the list of generators:
with open("generators.csv") as csvfile:
	csvreader = csv.reader(csvfile)
	heading = next(csvreader)
	generatorFieldNames = next(csvreader)
	for row in csvreader:
		generatorData.append(row)

print("Total number of rows in generators.csv: " + str(len(generatorData)))

# Read in the list of retired and canceled generators:
with open("generatorsRetired.csv") as csvfile:
	csvreader = csv.reader(csvfile)
	heading = next(csvreader)
	generatorRetFieldNames = next(csvreader)
	for row in csvreader:
		generatorRetData.append(row)

print("Total number of rows in generatorsRetired.csv: " + str(len(generatorRetData)))

# Read in the list of generation data:
with open("generation.csv") as csvfile:
	csvreader = csv.reader(csvfile)
	heading = next(csvreader)	# five header lines before main column headings
	heading = next(csvreader)
	heading = next(csvreader)
	heading = next(csvreader)
	heading = next(csvreader)
	generationFieldNames = next(csvreader)	# many of these contain line breaks due to wrapping in Excel
	for row in csvreader:
		generationData.append(row)

print("Total number of rows in generation.csv: " + str(len(generationData)))

# OK, we have the data.  Now to merge the parts we want into a single list...

outputList = []
nextGeneratorIndex = 0
nextRetGeneratorIndex = 0
nextGenerationIndex = 0

# Dictionary for translating 18 fuel type codes into 11 more general categories:
fuelTypeCodes = {
	"COL": 0,	# coal
	"DFO": 1,	# distillate petroleum
	"GEO": 7,	# geothermal
	"HPS": 5,	# pumped hydro (shouldn't occur in plants with positive net generation)
	"HYC": 5,	# hydro
	"MLG": 6,	# municipal waste and landfill gas (biomass)
	 "NG": 3,	# natural gas
	"NUC": 4,	# nuclear
	"OOG": 2,	# other gases (other fossil)
	"ORW": 6,	# other renewables (biomass)
	"OTH": 10,	# other (waste heat, nonbiogenic waste)
	 "PC": 1,	# petroleum coke
	"RFO": 1,	# residual petroleum
	"SUN": 9,	# solar
	"WND": 8,	# wind
	"WOC": 0,	# waste coal
	"WOO": 1,	# waste oil
	"WWW": 6	# wood and wood waste (biomass)
}

# Names of fuel categories (needed only for testing and documentation):
fuelName = ["Coal", "Oil", "Other fossil", "Gas", "Nuclear", "Hydro", "Biomass", "Geothermal", "Wind", "Solar", "Other"]

# Loop over all the power plants:
for plant in plantData:

	# First grab the data we need from the list of plants:
	id = int(plant[2])
	name = plant[3]
	operator = plant[1]
	county = plant[8]
	state = plant[6]
	latitude = plant[9]
	longitude = plant[10]	# these are all strings, even the numerical fields!
	
	# Next get the total nameplate capacity by adding up capacities of all generators:
	generatorCount = 0
	capacity = 0.0
	year = 9999		# not currently used but I may include this in the output someday
	# Operable generators first:
	while nextGeneratorIndex < len(generatorData) and int(generatorData[nextGeneratorIndex][2]) < id:	# This shouldn't happen since every generator should have a plant
		print("Generator with no plant: index " + str(nextGeneratorIndex) + ", plant code " + 
			generatorData[nextGeneratorIndex][2])
		nextGeneratorIndex += 1
	while nextGeneratorIndex < len(generatorData) and int(generatorData[nextGeneratorIndex][2]) == id:	# loop through all of this plant's generators
		generatorCount += 1
		capacity += float(generatorData[nextGeneratorIndex][15].replace(",", ""))
		generatorYear = int(generatorData[nextGeneratorIndex][26])
		if generatorYear < year:
			year = generatorYear	# save year for plant's earliest generator (not currently used)
		nextGeneratorIndex += 1
	# Now look for any generators that were retired during the current year:
	while nextRetGeneratorIndex < len(generatorRetData) and int(generatorRetData[nextRetGeneratorIndex][2]) < id:	# I suppose there could be retired generators with no plant
		nextRetGeneratorIndex += 1
	while nextRetGeneratorIndex < len(generatorRetData) and int(generatorRetData[nextRetGeneratorIndex][2]) == id:
		if len(generatorRetData[nextRetGeneratorIndex][28]) >= 4:	# some listings have a blank for the retired year
			retYear = int(generatorRetData[nextRetGeneratorIndex][28])
			if retYear == 2019:		# hard-coded year must be updated in the future
				generatorCount += 1
				capacity += float(generatorRetData[nextRetGeneratorIndex][15].replace(",", ""))
				generatorYear = int(generatorRetData[nextRetGeneratorIndex][26])
				if generatorYear < year:
					year = generatorYear
		nextRetGeneratorIndex += 1

	# Next get the energy generated and fuel used, for each fuel type, for this plant:
	genByFuelType = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]	# Value is energy generated; index is fuel type
	while nextGenerationIndex < len(generationData) and int(generationData[nextGenerationIndex][0]) < id:	# This shouldn't happen since all generation should come from a plant!
		print("Generation data line with no plant: index " + str(nextGenerationIndex) + ", plant code " +
			generationData[nextGenerationIndex][0])
		nextGenerationIndex += 1
	while nextGenerationIndex < len(generationData) and int(generationData[nextGenerationIndex][0]) == id:	# Loop through all generation types for this plant
		fuel = generationData[nextGenerationIndex][15]
		fuelCode = fuelTypeCodes[fuel]
		gen = int(generationData[nextGenerationIndex][95].replace(",", ""))
		genByFuelType[fuelCode] += gen
		nextGenerationIndex += 1
	mainFuelCode = -1
	mainFuelGen = 0
	totalGen = 0
	for code in range(11):
		totalGen += genByFuelType[code]
		if genByFuelType[code] > mainFuelGen:
			mainFuelGen = genByFuelType[code]
			mainFuelCode = code
	if generatorCount > 0 and totalGen > 0:
		mainFuelPercent = round(100 * mainFuelGen / totalGen)
		capFactor = round(totalGen / (capacity * 24 * 365), 4)	# change 365 to 366 in a leap year
		outputList.append([name, operator, county, state, latitude, longitude, 
			mainFuelCode, mainFuelPercent, round(capacity,1), totalGen, capFactor])

print("Number of plants in output list: " + str(len(outputList)))

# Sort the output list in descending order by energy generated:
def sortByGen(plant):
	return(plant[9])
outputList.sort(key=sortByGen, reverse=True)

# Print to screen (for testing with small data sets):
# for plant in outputList:
# 	print(plant)

# Write to JavaScript file:
# (It's messy because of string/numeric conversions and the required comma at the end of every line but the last.)
outFile = open("ppData2019.js", "w")
outFile.write("ppData2019 = [\n")
for p in range(len(outputList)-1):
	outFile.write('["' + outputList[p][0] + '", "' + outputList[p][1] + '", "' +
		outputList[p][2] + '", "' + outputList[p][3] + '", ' +
		outputList[p][4] + ', ' + outputList[p][5] + ', ' + str(outputList[p][6]) + ', ' +
		str(outputList[p][7]) + ', ' + str(outputList[p][8]) + ', ' + 
		str(outputList[p][9]) + ', ' + str(outputList[p][10]) + '],\n')
p = len(outputList)-1
outFile.write('["' + outputList[p][0] + '", "' + outputList[p][1] + '", "' +
	outputList[p][2] + '", "' + outputList[p][3] + '", ' +
	outputList[p][4] + ', ' + outputList[p][5] + ', ' + str(outputList[p][6]) + ', ' +
	str(outputList[p][7]) + ', ' + str(outputList[p][8]) + ', ' + 
	str(outputList[p][9]) + ', ' + str(outputList[p][10]) + ']]')
outFile.close()