Add population to region; disable zip codes

feature/geodb
Alinson S. Xavier 4 years ago
parent 399db41f86
commit 658d5ddbdc

@ -83,8 +83,9 @@ function _geodb_load_gov_census(;
shp_url, shp_url,
population_url, population_url,
population_crc32, population_crc32,
population_col = "POPESTIMATE2019", population_col,
population_join_key = "STATE", population_preprocess,
population_join,
)::Dict{String,GeoRegion} )::Dict{String,GeoRegion}
basedir = joinpath(dirname(@__FILE__), "..", "..", "data", db_name) basedir = joinpath(dirname(@__FILE__), "..", "..", "data", db_name)
csv_filename = "$basedir/locations.csv" csv_filename = "$basedir/locations.csv"
@ -117,9 +118,10 @@ function _geodb_load_gov_census(;
# Join with population data # Join with population data
population = DataFrame(CSV.File("$basedir/population.csv")) population = DataFrame(CSV.File("$basedir/population.csv"))
population = population[:, [population_join_key, population_col]] population_preprocess(population)
population = population[:, [population_join, population_col]]
rename!(population, population_col => "population") rename!(population, population_col => "population")
df = leftjoin(df, population, on = population_join_key) df = leftjoin(df, population, on = population_join)
# Write output # Write output
CSV.write(csv_filename, df) CSV.write(csv_filename, df)
@ -129,62 +131,88 @@ function _geodb_load_gov_census(;
DB_CACHE[db_name] = Dict( DB_CACHE[db_name] = Dict(
string(row.id) => GeoRegion( string(row.id) => GeoRegion(
centroid = GeoPoint(row.latitude, row.longitude), centroid = GeoPoint(row.latitude, row.longitude),
population = 0, population = (row.population === missing ? 0 : row.population),
) for row in csv ) for row in csv
) )
end end
return DB_CACHE[db_name] return DB_CACHE[db_name]
end end
function _cols_2018_us_county(table::Shapefile.Table, i::Int)::OrderedDict{String,Any} # 2018 US counties
return OrderedDict("id" => table.STATEFP[i] * table.COUNTYFP[i]) # -----------------------------------------------------------------------------
function _extract_cols_2018_us_county(table::Shapefile.Table, i::Int)::OrderedDict{String,Any}
return OrderedDict(
"id" => table.STATEFP[i] * table.COUNTYFP[i],
"statefp" => table.STATEFP[i],
"countyfp" => table.COUNTYFP[i],
"name" => table.NAME[i]
)
end
function _population_preprocess_2018_us_county(df)
df[!, "id"] = [@sprintf("%02d%03d", row.STATE, row.COUNTY) for row in eachrow(df)]
end end
function _geodb_load_2018_us_county()::Dict{String,GeoRegion} function _geodb_load_2018_us_county()::Dict{String,GeoRegion}
return _geodb_load_gov_census( return _geodb_load_gov_census(
db_name = "2018-us-county", db_name = "2018-us-county",
extract_cols = _cols_2018_us_county, extract_cols = _extract_cols_2018_us_county,
shp_crc32 = 0x83eaec6d, shp_crc32 = 0x83eaec6d,
shp_filename = "cb_2018_us_county_500k.shp", shp_filename = "cb_2018_us_county_500k.shp",
shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip", shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip",
population_url = "http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv", population_url = "https://www2.census.gov/programs-surveys/popest/datasets/2010-2019/counties/totals/co-est2019-alldata.csv",
population_crc32 = 0x191cc64c, population_crc32 = 0xf85b0405,
) population_col = "POPESTIMATE2019",
end population_join = "id",
population_preprocess = _population_preprocess_2018_us_county,
function _cols_2018_us_zcta(table::Shapefile.Table, i::Int)::OrderedDict{String,Any}
return OrderedDict("id" => table.ZCTA5CE10[i])
end
function _geodb_load_2018_us_zcta()::Dict{String,GeoRegion}
return _geodb_load_gov_census(
db_name = "2018-us-zcta",
extract_cols = _cols_2018_us_zcta,
shp_crc32 = 0x6391f5fc,
shp_filename = "cb_2018_us_zcta510_500k.shp",
shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_zcta510_500k.zip",
population_url = "http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv",
population_crc32 = 0x191cc64c,
) )
end end
function _cols_us_state(table::Shapefile.Table, i::Int)::OrderedDict{String,Any} # # 2018 US ZIP codes
# # -----------------------------------------------------------------------------
# function _extract_cols_2018_us_zcta(table::Shapefile.Table, i::Int)::OrderedDict{String,Any}
# return OrderedDict("id" => table.ZCTA5CE10[i])
# end
# function _geodb_load_2018_us_zcta()::Dict{String,GeoRegion}
# return _geodb_load_gov_census(
# db_name = "2018-us-zcta",
# extract_cols = _extract_cols_2018_us_zcta,
# shp_crc32 = 0x6391f5fc,
# shp_filename = "cb_2018_us_zcta510_500k.shp",
# shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_zcta510_500k.zip",
# population_url = "http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv",
# population_crc32 = 0x191cc64c,
# population_col = "POPESTIMATE2019",
# )
# end
# US States
# -----------------------------------------------------------------------------
function _extract_cols_us_state(table::Shapefile.Table, i::Int)::OrderedDict{String,Any}
return OrderedDict( return OrderedDict(
"id" => table.STUSPS[i], "id" => table.STUSPS[i],
"STATE" => parse(Int, table.STATEFP[i]), "statefp" => parse(Int, table.STATEFP[i]),
"name" => table.NAME[i], "name" => table.NAME[i],
) )
end end
function _population_preprocess_us_state(df)
rename!(df, "STATE" => "statefp")
end
function _geodb_load_us_state()::Dict{String,GeoRegion} function _geodb_load_us_state()::Dict{String,GeoRegion}
return _geodb_load_gov_census( return _geodb_load_gov_census(
db_name = "us-state", db_name = "us-state",
extract_cols = _cols_us_state, extract_cols = _extract_cols_us_state,
shp_crc32 = 0x9469e5ca, shp_crc32 = 0x9469e5ca,
shp_filename = "cb_2018_us_state_500k.shp", shp_filename = "cb_2018_us_state_500k.shp",
shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_500k.zip", shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_500k.zip",
population_url = "http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv", population_url = "http://www2.census.gov/programs-surveys/popest/datasets/2010-2019/national/totals/nst-est2019-alldata.csv",
population_crc32 = 0x191cc64c, population_crc32 = 0x191cc64c,
population_col = "POPESTIMATE2019",
population_join = "statefp",
population_preprocess = _population_preprocess_us_state,
) )
end end

@ -8,16 +8,18 @@ using RELOG
region = RELOG.geodb_query("2018-us-county:17043") region = RELOG.geodb_query("2018-us-county:17043")
@test region.centroid.lat == 41.83956 @test region.centroid.lat == 41.83956
@test region.centroid.lon == -88.08857 @test region.centroid.lon == -88.08857
@test region.population == 922_921
end end
@testset "geodb_query (2018-us-zcta)" begin # @testset "geodb_query (2018-us-zcta)" begin
region = RELOG.geodb_query("2018-us-zcta:60439") # region = RELOG.geodb_query("2018-us-zcta:60439")
@test region.centroid.lat == 41.68241 # @test region.centroid.lat == 41.68241
@test region.centroid.lon == -87.98954 # @test region.centroid.lon == -87.98954
end # end
@testset "geodb_query (us-state)" begin @testset "geodb_query (us-state)" begin
region = RELOG.geodb_query("us-state:IL") region = RELOG.geodb_query("us-state:IL")
@test region.centroid.lat == 39.73939 @test region.centroid.lat == 39.73939
@test region.centroid.lon == -89.50414 @test region.centroid.lon == -89.50414
@test region.population == 12_671_821
end end

Loading…
Cancel
Save