From ca187fe78e799b95149eff1f441c82ca66fcb901 Mon Sep 17 00:00:00 2001 From: Alinson S Xavier Date: Tue, 22 Jun 2021 20:07:56 -0500 Subject: [PATCH] Implement geodb.jl --- .gitignore | 3 ++ Project.toml | 6 +++ src/RELOG.jl | 1 + src/instance/geodb.jl | 92 +++++++++++++++++++++++++++++++++++++ src/reports/write.jl | 1 + test/instance/geodb_test.jl | 11 +++++ test/runtests.jl | 1 + 7 files changed, 115 insertions(+) create mode 100644 src/instance/geodb.jl create mode 100644 test/instance/geodb_test.jl diff --git a/.gitignore b/.gitignore index e081eb6..56f7226 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,6 @@ notebooks .idea *.lp Manifest.toml +data +build +benchmark diff --git a/Project.toml b/Project.toml index 8c58931..2f427ee 100644 --- a/Project.toml +++ b/Project.toml @@ -4,6 +4,7 @@ authors = ["Alinson S Xavier "] version = "0.5.0" [deps] +CRC = "44b605c4-b955-5f2b-9b6d-d2bd01d3d205" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" Cbc = "9961bab8-2fa3-5c5a-9d89-47fab24efd76" Clp = "e2554f3b-3117-50c0-817c-e040a3ddf72d" @@ -19,10 +20,13 @@ MathOptInterface = "b8f27783-ece8-5eb3-8dc8-9495eed66fee" PackageCompiler = "9b87118b-4619-50d2-8e1e-99f35a4d4d9d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" ProgressBars = "49802e3a-d2f1-5c88-81d8-b72133a6f568" +Shapefile = "8e980c4a-a4fe-5da2-b3a7-4b4b0353a2f4" Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [compat] +CRC = "4" CSV = "0.7" Cbc = "0.6" Clp = "0.8" @@ -36,4 +40,6 @@ JuMP = "0.21" MathOptInterface = "0.9" PackageCompiler = "1" ProgressBars = "0.6" +Shapefile = "0.7" +ZipFile = "0.9" julia = "1" diff --git a/src/RELOG.jl b/src/RELOG.jl index e115591..46df1c3 100644 --- a/src/RELOG.jl +++ b/src/RELOG.jl @@ -11,6 +11,7 @@ include("graph/structs.jl") include("graph/build.jl") include("graph/csv.jl") include("instance/compress.jl") +include("instance/geodb.jl") include("instance/parse.jl") include("instance/validate.jl") include("model/build.jl") diff --git a/src/instance/geodb.jl b/src/instance/geodb.jl new file mode 100644 index 0000000..9d8f39a --- /dev/null +++ b/src/instance/geodb.jl @@ -0,0 +1,92 @@ +# RELOG: Reverse Logistics Optimization +# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Released under the modified BSD license. See COPYING.md for more details. + +using CRC +using CSV +using DataFrames +using Shapefile +using Statistics +using ZipFile + +crc32 = crc(CRC_32) + +struct GeoPoint + lat::Float64 + lon::Float64 +end + +DB_CACHE = Dict{String,Dict{String,GeoPoint}}() + +function centroid(geom::Shapefile.Polygon)::GeoPoint + x_max, x_min, y_max, y_min = -Inf, Inf, -Inf, Inf + for p in geom.points + x_max = max(x_max, p.x) + x_min = min(x_min, p.x) + y_max = max(y_max, p.y) + y_min = min(y_min, p.y) + end + x_center = (x_max + x_min) / 2.0 + y_center = (y_max + y_min) / 2.0 + return GeoPoint(round(y_center, digits = 5), round(x_center, digits = 5)) +end + +function download_census_gov(url, outputdir, expected_crc32)::Nothing + if isdir(outputdir) + return + end + mkpath(outputdir) + @info "Downloading: $url" + zip_filename = download(url) + actual_crc32 = open(crc32, zip_filename) + expected_crc32 == actual_crc32 || error("CRC32 mismatch") + open(zip_filename) do zip_file + zr = ZipFile.Reader(zip_file) + for file in zr.files + open(joinpath(outputdir, file.name), "w") do output_file + write(output_file, read(file)) + end + end + end + return +end + +function load_2018_us_county()::Dict{String,GeoPoint} + db_name = "2018-us-county" + basedir = joinpath(dirname(@__FILE__), "..", "..", "data", db_name) + csv_filename = "$basedir/locations.csv" + if !isfile(csv_filename) + download_census_gov( + "https://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_us_county_500k.zip", + basedir, + 0x83eaec6d, + ) + table = Shapefile.Table("$basedir/cb_2018_us_county_500k.shp") + geoms = Shapefile.shapes(table) + df = DataFrame(id = String[], latitude = Float64[], longitude = Float64[]) + for (i, geom) in enumerate(geoms) + c = centroid(geom) + id = table.STATEFP[i] * table.COUNTYFP[i] + push!(df, [id, c.lat, c.lon]) + end + sort!(df) + @info "Writing: $csv_filename" + CSV.write(csv_filename, df) + end + if db_name ∉ keys(DB_CACHE) + csv = CSV.File(csv_filename; types = [String, Float64, Float64]) + DB_CACHE[db_name] = + Dict(row.id => GeoPoint(row.latitude, row.longitude) for row in csv) + end + return DB_CACHE[db_name] +end + +function load_latlon_database(db_name) + db_name == "2018-us-county" && return load_2018_us_county() + error("Unknown database: $db_name") +end + +function geodb_query(name) + db_name, id = split(name, ":") + return load_latlon_database(db_name)[id] +end diff --git a/src/reports/write.jl b/src/reports/write.jl index bacf6d4..3242c27 100644 --- a/src/reports/write.jl +++ b/src/reports/write.jl @@ -4,6 +4,7 @@ using DataFrames using CSV +import Base: write function write(solution::AbstractDict, filename::AbstractString) @info "Writing solution: $filename" diff --git a/test/instance/geodb_test.jl b/test/instance/geodb_test.jl new file mode 100644 index 0000000..ace3341 --- /dev/null +++ b/test/instance/geodb_test.jl @@ -0,0 +1,11 @@ +# RELOG: Reverse Logistics Optimization +# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Released under the modified BSD license. See COPYING.md for more details. + +using RELOG + +@testset "geodb_query (2018-us-county)" begin + point = RELOG.geodb_query("2018-us-county:17043") + @test point.lat == 41.83956 + @test point.lon == -88.08857 +end diff --git a/test/runtests.jl b/test/runtests.jl index e15324a..9012e6b 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -6,6 +6,7 @@ using Test @testset "RELOG" begin @testset "Instance" begin include("instance/compress_test.jl") + include("instance/geodb_test.jl") include("instance/parse_test.jl") end @testset "Graph" begin