diff --git a/src/instance/geodb.jl b/src/instance/geodb.jl index 0ffe84e..118c7d0 100644 --- a/src/instance/geodb.jl +++ b/src/instance/geodb.jl @@ -17,7 +17,13 @@ struct GeoPoint lon::Float64 end -DB_CACHE = Dict{String,Dict{String,GeoPoint}}() +struct GeoRegion + centroid::GeoPoint + population::Int + GeoRegion(; centroid, population) = new(centroid, population) +end + +DB_CACHE = Dict{String,Dict{String,GeoRegion}}() function centroid(geom::Shapefile.Polygon)::GeoPoint x_max, x_min, y_max, y_min = -Inf, Inf, -Inf, Inf @@ -32,7 +38,7 @@ function centroid(geom::Shapefile.Polygon)::GeoPoint return GeoPoint(round(y_center, digits = 5), round(x_center, digits = 5)) end -function download_census_gov(url, outputdir, expected_crc32)::Nothing +function download_zip(url, outputdir, shp_crc32)::Nothing if isdir(outputdir) return end @@ -40,7 +46,7 @@ function download_census_gov(url, outputdir, expected_crc32)::Nothing @info "Downloading: $url" zip_filename = download(url) actual_crc32 = open(crc32, zip_filename) - expected_crc32 == actual_crc32 || error("CRC32 mismatch") + shp_crc32 == actual_crc32 || error("CRC32 mismatch") open(zip_filename) do zip_file zr = ZipFile.Reader(zip_file) for file in zr.files @@ -52,17 +58,17 @@ function download_census_gov(url, outputdir, expected_crc32)::Nothing return end -function load_census_gov(; +function geodb_load_gov_census(; db_name, - url, - expected_crc32, - shp_filename, extract_id, -)::Dict{String,GeoPoint} + shp_crc32, + shp_filename, + shp_url, +)::Dict{String,GeoRegion} basedir = joinpath(dirname(@__FILE__), "..", "..", "data", db_name) csv_filename = "$basedir/locations.csv" if !isfile(csv_filename) - download_census_gov(url, basedir, expected_crc32) + download_zip(shp_url, basedir, shp_crc32) @info "Processing: $shp_filename" table = Shapefile.Table(joinpath(basedir, shp_filename)) geoms = Shapefile.shapes(table) @@ -77,8 +83,12 @@ function load_census_gov(; end if db_name ∉ keys(DB_CACHE) csv = CSV.File(csv_filename; types = [String, Float64, Float64]) - DB_CACHE[db_name] = - Dict(row.id => GeoPoint(row.latitude, row.longitude) for row in csv) + DB_CACHE[db_name] = Dict( + row.id => GeoRegion( + centroid = GeoPoint(row.latitude, row.longitude), + population = 0, + ) for row in csv + ) end return DB_CACHE[db_name] end @@ -87,13 +97,13 @@ function _id_2018_us_county(table::Shapefile.Table, i::Int)::String return table.STATEFP[i] * table.COUNTYFP[i] end -function load_2018_us_county()::Dict{String,GeoPoint} - return load_census_gov( +function geodb_load_2018_us_county()::Dict{String,GeoRegion} + return geodb_load_gov_census( db_name = "2018-us-county", - url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip", - expected_crc32 = 0x83eaec6d, - shp_filename = "cb_2018_us_county_500k.shp", extract_id = _id_2018_us_county, + shp_crc32 = 0x83eaec6d, + shp_filename = "cb_2018_us_county_500k.shp", + shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip", ) end @@ -101,13 +111,13 @@ function _id_2018_us_zcta(table::Shapefile.Table, i::Int)::String return table.ZCTA5CE10[i] end -function load_2018_us_zcta()::Dict{String,GeoPoint} - return load_census_gov( +function geodb_load_2018_us_zcta()::Dict{String,GeoRegion} + return geodb_load_gov_census( db_name = "2018-us-zcta", - url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_zcta510_500k.zip", - expected_crc32 = 0x6391f5fc, - shp_filename = "cb_2018_us_zcta510_500k.shp", extract_id = _id_2018_us_zcta, + shp_crc32 = 0x6391f5fc, + shp_filename = "cb_2018_us_zcta510_500k.shp", + shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_zcta510_500k.zip", ) end @@ -115,24 +125,24 @@ function _id_us_state(table::Shapefile.Table, i::Int)::String return table.STUSPS[i] end -function load_us_state()::Dict{String,GeoPoint} - return load_census_gov( +function geodb_load_us_state()::Dict{String,GeoRegion} + return geodb_load_gov_census( db_name = "us-state", - url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_500k.zip", - expected_crc32 = 0x9469e5ca, - shp_filename = "cb_2018_us_state_500k.shp", extract_id = _id_us_state, + shp_crc32 = 0x9469e5ca, + shp_filename = "cb_2018_us_state_500k.shp", + shp_url = "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_state_500k.zip", ) end -function load_latlon_database(db_name) - db_name == "2018-us-county" && return load_2018_us_county() - db_name == "2018-us-zcta" && return load_2018_us_zcta() - db_name == "us-state" && return load_us_state() +function geodb_load(db_name::AbstractString)::Dict{String,GeoRegion} + db_name == "2018-us-county" && return geodb_load_2018_us_county() + db_name == "2018-us-zcta" && return geodb_load_2018_us_zcta() + db_name == "us-state" && return geodb_load_us_state() error("Unknown database: $db_name") end -function geodb_query(name) +function geodb_query(name)::GeoRegion db_name, id = split(name, ":") - return load_latlon_database(db_name)[id] + return geodb_load(db_name)[id] end diff --git a/src/instance/parse.jl b/src/instance/parse.jl index cd587c1..4948baa 100644 --- a/src/instance/parse.jl +++ b/src/instance/parse.jl @@ -54,9 +54,9 @@ function parse(json)::Instance if "initial amounts" in keys(product_dict) for (center_name, center_dict) in product_dict["initial amounts"] if "location" in keys(center_dict) - point = geodb_query(center_dict["location"]) - center_dict["latitude (deg)"] = point.lat - center_dict["longitude (deg)"] = point.lon + region = geodb_query(center_dict["location"]) + center_dict["latitude (deg)"] = region.centroid.lat + center_dict["longitude (deg)"] = region.centroid.lon end center = CollectionCenter( length(collection_centers) + 1, diff --git a/test/instance/geodb_test.jl b/test/instance/geodb_test.jl index b9a3c9d..8a962fb 100644 --- a/test/instance/geodb_test.jl +++ b/test/instance/geodb_test.jl @@ -5,19 +5,19 @@ using RELOG @testset "geodb_query (2018-us-county)" begin - point = RELOG.geodb_query("2018-us-county:17043") - @test point.lat == 41.83956 - @test point.lon == -88.08857 + region = RELOG.geodb_query("2018-us-county:17043") + @test region.centroid.lat == 41.83956 + @test region.centroid.lon == -88.08857 end @testset "geodb_query (2018-us-zcta)" begin - point = RELOG.geodb_query("2018-us-zcta:60439") - @test point.lat == 41.68241 - @test point.lon == -87.98954 + region = RELOG.geodb_query("2018-us-zcta:60439") + @test region.centroid.lat == 41.68241 + @test region.centroid.lon == -87.98954 end @testset "geodb_query (us-state)" begin - point = RELOG.geodb_query("us-state:IL") - @test point.lat == 39.73939 - @test point.lon == -89.50414 + region = RELOG.geodb_query("us-state:IL") + @test region.centroid.lat == 39.73939 + @test region.centroid.lon == -89.50414 end