# Script to join selected American Community Survey
# census tract data for the city and county analysis areas.

# Michael Minn - 31 May 2014

library("rgdal")
library("maptools") # for spRbind()

tract_files = matrix(ncol=2, byrow=T, data = c(
	"Arizona",	"tl_2013_04_tract",
	"California",	"tl_2013_06_tract",
	"Illinois",	"tl_2013_17_tract",
	"New York",	"tl_2013_36_tract",
	"Pennsylvania",	"tl_2013_42_tract",
	"Texas",	"tl_2013_48_tract"))

colnames(tract_files) = c("state", "layer")

# county_places = matrix(ncol = 3, byrow = T, data = c(
# 	"Bexar County",		"San Antonio",		"Texas",
# 	"Bronx County",		"New York City",	"New York",
# 	"Cook County",		"Chicago",		"Illinois",
# 	"Harris County",	"Houston",		"Texas",
# 	"Kings County",		"New York City",	"New York",
# 	"Los Angeles County",	"Los Angeles",		"California",
# 	"Maricopa County",	"Phoenix",		"Arizona",
# 	"New York County",	"New York City",	"New York",
# 	"Philadelphia County",	"Philadelphia",		"Pennsylvania",
# 	"Queens County",	"New York City",	"New York",
# 	"Richmond County",	"New York City",	"New York",
# 	"San Diego County",	"San Diego",		"California"))
# 
# colnames(county_places) = c("county", "city", "state")
# 
# city_places = matrix(ncol = 2, byrow = T, data = c(
# 	"Phoenix",	"Arizona",
# 	"Los Angeles",	"California",
# 	"San Diego",	"California",
# 	"Chicago",	"Illinois",
# 	"New York",	"New York",
# 	"Philadelphia",	"Pennsylvania",
# 	"Houston",	"Texas",
# 	"San Antonio",	"Texas"))
# 
# colnames(city_places) = c("city", "state")

wgs84 = CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")


# Merge all tracts into a single spatialPolygonsDataFrame

tracts = NULL

for (layer in tract_files[,"layer"])
{
	state = readOGR(dsn="downloads", layer=layer)
	state = spTransform(state, CRS=wgs84)

	if (is.null(tracts))
		tracts = state
	else
	{
		# Change Feature IDs so new features have IDs that continue sequentially from existing features
		state = spChFIDs(state, as.character(nrow(tracts) + (1:nrow(state))))
		tracts = spRbind(tracts, state)
	}
}


# coordinates() calculates centroids when given a SpatialPolygonsDataFrame

tract_centroids = SpatialPoints(coordinates(tracts), proj4string=wgs84)


# Find all tracts for counties and set county names

counties = readOGR(dsn = "../boundaries/analysis-areas", layer = "boundaries-county")
counties = spTransform(counties, CRS = wgs84)
county_polygons = SpatialPolygons(counties@polygons, proj4string = wgs84)

indices = over(tract_centroids, county_polygons)

tracts@data$COUNTY = counties@data[indices, "NAMELSAD10"]


# Find all tracts for cities and set city names

cities = readOGR(dsn = "../boundaries/analysis-areas", layer = "boundaries-city")
cities = spTransform(cities, CRS = wgs84)
city_polygons = SpatialPolygons(cities@polygons, proj4string = wgs84)

indices = over(tract_centroids, city_polygons)

tracts@data$CITY = cities@data[indices, "NAME"]


# Keep only tracts that are in a county or city analysis area

tracts = tracts[(!is.na(tracts$CITY)) | (!is.na(tracts$COUNTY)), ]



# DP03 - SELECTED ECONOMIC CHARACTERISTICS: 2008-2012 American Community Survey 5-Year Estimates
dp03_fields = c(
	"HC03_VC13" = "PCUNEMPLOY",
	"HC03_VC29" = "PCDRIVE",
	"HC01_VC85" = "HHINCOME")

# DP04 - SELECTED HOUSING CHARACTERISTICS: 2008-2012 American Community Survey 5-Year Estimates
dp04_fields = c(
	"HC03_VC64" = "HHRENTER",
	"HC03_VC82" = "PCNOVEHICL")

# DP05 - ACS DEMOGRAPHIC AND HOUSING ESTIMATES: 2008-2012 American Community Survey 5-Year Estimates 
dp05_fields = c(
	"HC01_VC03" = "POPTOTAL",
	"HC01_VC21" = "MEDIANAGE",
	"HC03_VC72" = "PCWHITE", 
	"HC03_VC73" = "PCBLACK",
	"HC03_VC81" = "PCHISPANIC")


dp03 = read.csv("downloads/ACS_12_5YR_DP03_with_ann.csv", fill=T)
dp03 = dp03[-1,]
dp03 = dp03[,c("GEO.id2", names(dp03_fields))]
colnames(dp03) = c("GEO.id2", dp03_fields)
# Columns are imported as factors but need to be converted to numeric for graduated mapping
dp03[,2:ncol(dp03)] = as.numeric(as.character(unlist(dp03[,2:ncol(dp03)])))

dp04 = read.csv("downloads/ACS_12_5YR_DP04_with_ann.csv", fill=T)
dp04 = dp04[-1,]
dp04 = dp04[,c("GEO.id2", names(dp04_fields))]
colnames(dp04) = c("GEO.id2", dp04_fields)
dp04[,2:ncol(dp04)] = as.numeric(as.character(unlist(dp04[,2:ncol(dp04)])))

dp05 = read.csv("downloads/ACS_12_5YR_DP05_with_ann.csv", fill=T)
dp05 = dp05[-1,]
dp05 = dp05[,c("GEO.id2", names(dp05_fields))]
colnames(dp05) = c("GEO.id2", dp05_fields)
dp05[,2:ncol(dp05)] = as.numeric(as.character(unlist(dp05[,2:ncol(dp05)])))


# Join attributes from census data files
tracts = merge(tracts, dp03, by.x = "GEOID", by.y = "GEO.id2")
tracts = merge(tracts, dp04, by.x = "GEOID", by.y = "GEO.id2")
tracts = merge(tracts, dp05, by.x = "GEOID", by.y = "GEO.id2")

writeOGR(tracts, dsn=".", layer = "tracts", driver="ESRI Shapefile", overwrite_layer = T)
