GeoDB: Prepare for population

feature/geodb
Alinson S. Xavier 4 years ago
parent c9391dd299
commit 33ab4c5f76

@ -17,7 +17,13 @@ struct GeoPoint
lon::Float64 lon::Float64
end end
DB_CACHE = Dict{String,Dict{String,GeoPoint}}() struct GeoRegion
centroid::GeoPoint
population::Int
GeoRegion(; centroid, population) = new(centroid, population)
end
DB_CACHE = Dict{String,Dict{String,GeoRegion}}()
function centroid(geom::Shapefile.Polygon)::GeoPoint function centroid(geom::Shapefile.Polygon)::GeoPoint
x_max, x_min, y_max, y_min = -Inf, Inf, -Inf, Inf x_max, x_min, y_max, y_min = -Inf, Inf, -Inf, Inf
@ -32,7 +38,7 @@ function centroid(geom::Shapefile.Polygon)::GeoPoint
return GeoPoint(round(y_center, digits = 5), round(x_center, digits = 5)) return GeoPoint(round(y_center, digits = 5), round(x_center, digits = 5))
end end
function download_census_gov(url, outputdir, expected_crc32)::Nothing function download_zip(url, outputdir, shp_crc32)::Nothing
if isdir(outputdir) if isdir(outputdir)
return return
end end
@ -40,7 +46,7 @@ function download_census_gov(url, outputdir, expected_crc32)::Nothing
@info "Downloading: $url" @info "Downloading: $url"
zip_filename = download(url) zip_filename = download(url)
actual_crc32 = open(crc32, zip_filename) actual_crc32 = open(crc32, zip_filename)
expected_crc32 == actual_crc32 || error("CRC32 mismatch") shp_crc32 == actual_crc32 || error("CRC32 mismatch")
open(zip_filename) do zip_file open(zip_filename) do zip_file
zr = ZipFile.Reader(zip_file) zr = ZipFile.Reader(zip_file)
for file in zr.files for file in zr.files
@ -52,17 +58,17 @@ function download_census_gov(url, outputdir, expected_crc32)::Nothing
return return
end end
function load_census_gov(; function geodb_load_gov_census(;
db_name, db_name,
url,
expected_crc32,
shp_filename,
extract_id, extract_id,
)::Dict{String,GeoPoint} shp_crc32,
shp_filename,
shp_url,
)::Dict{String,GeoRegion}
basedir = joinpath(dirname(@__FILE__), "..", "..", "data", db_name) basedir = joinpath(dirname(@__FILE__), "..", "..", "data", db_name)
csv_filename = "$basedir/locations.csv" csv_filename = "$basedir/locations.csv"
if !isfile(csv_filename) if !isfile(csv_filename)
download_census_gov(url, basedir, expected_crc32) download_zip(shp_url, basedir, shp_crc32)
@info "Processing: $shp_filename" @info "Processing: $shp_filename"
table = Shapefile.Table(joinpath(basedir, shp_filename)) table = Shapefile.Table(joinpath(basedir, shp_filename))
geoms = Shapefile.shapes(table) geoms = Shapefile.shapes(table)
@ -77,8 +83,12 @@ function load_census_gov(;
end end
if db_name keys(DB_CACHE) if db_name keys(DB_CACHE)
csv = CSV.File(csv_filename; types = [String, Float64, Float64]) csv = CSV.File(csv_filename; types = [String, Float64, Float64])
DB_CACHE[db_name] = DB_CACHE[db_name] = Dict(
Dict(row.id => GeoPoint(row.latitude, row.longitude) for row in csv) row.id => GeoRegion(
centroid = GeoPoint(row.latitude, row.longitude),
population = 0,
) for row in csv
)
end end
return DB_CACHE[db_name] return DB_CACHE[db_name]
end end
@ -87,13 +97,13 @@ function _id_2018_us_county(table::Shapefile.Table, i::Int)::String
return table.STATEFP[i] * table.COUNTYFP[i] return table.STATEFP[i] * table.COUNTYFP[i]
end end
function load_2018_us_county()::Dict{String,GeoPoint} function geodb_load_2018_us_county()::Dict{String,GeoRegion}
return load_census_gov( return geodb_load_gov_census(
db_name = "2018-us-county", db_name = "2018-us-county",
url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip",
expected_crc32 = 0x83eaec6d,
shp_filename = "cb_2018_us_county_500k.shp",
extract_id = _id_2018_us_county, extract_id = _id_2018_us_county,
shp_crc32 = 0x83eaec6d,
shp_filename = "cb_2018_us_county_500k.shp",
shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip",
) )
end end
@ -101,13 +111,13 @@ function _id_2018_us_zcta(table::Shapefile.Table, i::Int)::String
return table.ZCTA5CE10[i] return table.ZCTA5CE10[i]
end end
function load_2018_us_zcta()::Dict{String,GeoPoint} function geodb_load_2018_us_zcta()::Dict{String,GeoRegion}
return load_census_gov( return geodb_load_gov_census(
db_name = "2018-us-zcta", db_name = "2018-us-zcta",
url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_zcta510_500k.zip",
expected_crc32 = 0x6391f5fc,
shp_filename = "cb_2018_us_zcta510_500k.shp",
extract_id = _id_2018_us_zcta, extract_id = _id_2018_us_zcta,
shp_crc32 = 0x6391f5fc,
shp_filename = "cb_2018_us_zcta510_500k.shp",
shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_zcta510_500k.zip",
) )
end end
@ -115,24 +125,24 @@ function _id_us_state(table::Shapefile.Table, i::Int)::String
return table.STUSPS[i] return table.STUSPS[i]
end end
function load_us_state()::Dict{String,GeoPoint} function geodb_load_us_state()::Dict{String,GeoRegion}
return load_census_gov( return geodb_load_gov_census(
db_name = "us-state", db_name = "us-state",
url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_500k.zip",
expected_crc32 = 0x9469e5ca,
shp_filename = "cb_2018_us_state_500k.shp",
extract_id = _id_us_state, extract_id = _id_us_state,
shp_crc32 = 0x9469e5ca,
shp_filename = "cb_2018_us_state_500k.shp",
shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_500k.zip",
) )
end end
function load_latlon_database(db_name) function geodb_load(db_name::AbstractString)::Dict{String,GeoRegion}
db_name == "2018-us-county" && return load_2018_us_county() db_name == "2018-us-county" && return geodb_load_2018_us_county()
db_name == "2018-us-zcta" && return load_2018_us_zcta() db_name == "2018-us-zcta" && return geodb_load_2018_us_zcta()
db_name == "us-state" && return load_us_state() db_name == "us-state" && return geodb_load_us_state()
error("Unknown database: $db_name") error("Unknown database: $db_name")
end end
function geodb_query(name) function geodb_query(name)::GeoRegion
db_name, id = split(name, ":") db_name, id = split(name, ":")
return load_latlon_database(db_name)[id] return geodb_load(db_name)[id]
end end

@ -54,9 +54,9 @@ function parse(json)::Instance
if "initial amounts" in keys(product_dict) if "initial amounts" in keys(product_dict)
for (center_name, center_dict) in product_dict["initial amounts"] for (center_name, center_dict) in product_dict["initial amounts"]
if "location" in keys(center_dict) if "location" in keys(center_dict)
point = geodb_query(center_dict["location"]) region = geodb_query(center_dict["location"])
center_dict["latitude (deg)"] = point.lat center_dict["latitude (deg)"] = region.centroid.lat
center_dict["longitude (deg)"] = point.lon center_dict["longitude (deg)"] = region.centroid.lon
end end
center = CollectionCenter( center = CollectionCenter(
length(collection_centers) + 1, length(collection_centers) + 1,

@ -5,19 +5,19 @@
using RELOG using RELOG
@testset "geodb_query (2018-us-county)" begin @testset "geodb_query (2018-us-county)" begin
point = RELOG.geodb_query("2018-us-county:17043") region = RELOG.geodb_query("2018-us-county:17043")
@test point.lat == 41.83956 @test region.centroid.lat == 41.83956
@test point.lon == -88.08857 @test region.centroid.lon == -88.08857
end end
@testset "geodb_query (2018-us-zcta)" begin @testset "geodb_query (2018-us-zcta)" begin
point = RELOG.geodb_query("2018-us-zcta:60439") region = RELOG.geodb_query("2018-us-zcta:60439")
@test point.lat == 41.68241 @test region.centroid.lat == 41.68241
@test point.lon == -87.98954 @test region.centroid.lon == -87.98954
end end
@testset "geodb_query (us-state)" begin @testset "geodb_query (us-state)" begin
point = RELOG.geodb_query("us-state:IL") region = RELOG.geodb_query("us-state:IL")
@test point.lat == 39.73939 @test region.centroid.lat == 39.73939
@test point.lon == -89.50414 @test region.centroid.lon == -89.50414
end end

Loading…
Cancel
Save