From 3d36caa5078aad9b3e4041a6955a96cb6fe73035 Mon Sep 17 00:00:00 2001 From: "Alinson S. Xavier" Date: Fri, 28 Mar 2025 13:46:45 -0500 Subject: [PATCH] Circular: Implement driving distances --- .gitignore | 1 + Project.toml | 5 ++ src/instance/parse.jl | 11 +++- src/instance/structs.jl | 11 +++- src/model/build.jl | 8 ++- src/model/dist.jl | 103 +++++++++++++++++++++++++++++++- test/fixtures/simple.json | 2 +- test/src/instance/parse_test.jl | 2 +- test/src/model/dist_test.jl | 5 +- test/src/reports_test.jl | 1 + 10 files changed, 141 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index ef222c5..c4b1c48 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,4 @@ run.jl relog-web-legacy .vscode jobs +tmp diff --git a/Project.toml b/Project.toml index 64f8bbd..d6be9fd 100644 --- a/Project.toml +++ b/Project.toml @@ -4,10 +4,15 @@ authors = ["Alinson S. Xavier "] version = "0.8.0" [deps] +CRC = "44b605c4-b955-5f2b-9b6d-d2bd01d3d205" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" Geodesy = "0ef565a4-170c-5f04-8de2-149903a85f3d" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" +NearestNeighbors = "b8a86587-4115-5ab1-83bc-aa920d37bbce" OrderedCollections = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" Printf = "de0858da-6303-5e67-8744-51eddeeeb8d7" +Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" +TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" +ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" diff --git a/src/instance/parse.jl b/src/instance/parse.jl index 44a71c5..1036229 100644 --- a/src/instance/parse.jl +++ b/src/instance/parse.jl @@ -9,7 +9,16 @@ function parse(json)::Instance # Read parameters time_horizon = json["parameters"]["time horizon (years)"] building_period = json["parameters"]["building period (years)"] - distance_metric = json["parameters"]["distance metric"] + + # Read distance metric + distance_metric_str = lowercase(json["parameters"]["distance metric"]) + if distance_metric_str == "driving" + distance_metric = KnnDrivingDistance() + elseif distance_metric_str == "euclidean" + distance_metric = EuclideanDistance() + else + error("Invalid distance metric: $distance_metric_str") + end timeseries(x::Union{Nothing,Number}) = repeat([x], time_horizon) timeseries(x::Array) = x diff --git a/src/instance/structs.jl b/src/instance/structs.jl index b1f0ece..ed276cf 100644 --- a/src/instance/structs.jl +++ b/src/instance/structs.jl @@ -1,5 +1,14 @@ using OrderedCollections +abstract type DistanceMetric end + +Base.@kwdef mutable struct KnnDrivingDistance <: DistanceMetric + tree = nothing + ratios = nothing +end + +mutable struct EuclideanDistance <: DistanceMetric end + Base.@kwdef struct Product name::String tr_cost::Vector{Float64} @@ -48,7 +57,7 @@ Base.@kwdef struct Instance building_period::Vector{Int} centers_by_name::OrderedDict{String,Center} centers::Vector{Center} - distance_metric::String + distance_metric::DistanceMetric products_by_name::OrderedDict{String,Product} products::Vector{Product} time_horizon::Int diff --git a/src/model/build.jl b/src/model/build.jl index 47d4992..156d288 100644 --- a/src/model/build.jl +++ b/src/model/build.jl @@ -60,7 +60,13 @@ function build_model(instance::Instance; optimizer, variable_names::Bool = false # Distances model.ext[:distances] = distances = Dict() for (p1, p2, m) in E - d = _calculate_distance(p1.latitude, p1.longitude, p2.latitude, p2.longitude) + d = _calculate_distance( + p1.latitude, + p1.longitude, + p2.latitude, + p2.longitude, + instance.distance_metric, + ) distances[p1, p2, m] = d end diff --git a/src/model/dist.jl b/src/model/dist.jl index a1e7afd..fad54c2 100644 --- a/src/model/dist.jl +++ b/src/model/dist.jl @@ -1,11 +1,110 @@ # RELOG: Reverse Logistics Optimization -# Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved. +# Copyright (C) 2020-2025, UChicago Argonne, LLC. All rights reserved. # Released under the modified BSD license. See COPYING.md for more details. using Geodesy +using NearestNeighbors +using DataFrames +using CRC +using ZipFile +using Statistics +using TimerOutputs -function _calculate_distance(source_lat, source_lon, dest_lat, dest_lon)::Float64 +crc32 = crc(CRC_32) + +function _calculate_distance( + source_lat, + source_lon, + dest_lat, + dest_lon, + ::EuclideanDistance, +)::Float64 x = LLA(source_lat, source_lon, 0.0) y = LLA(dest_lat, dest_lon, 0.0) return round(euclidean_distance(x, y) / 1000.0, digits = 3) end + +function _download_file(url, output, expected_crc32)::Nothing + if isfile(output) + return + end + mkpath(dirname(output)) + @info "Downloading: $url" + fname = download(url) + actual_crc32 = open(crc32, fname) + expected_crc32 == actual_crc32 || error("CRC32 mismatch") + cp(fname, output) + return +end + +function _download_zip(url, outputdir, expected_output_file, expected_crc32)::Nothing + if isfile(expected_output_file) + return + end + mkpath(outputdir) + @info "Downloading: $url" + zip_filename = download(url) + actual_crc32 = open(crc32, zip_filename) + expected_crc32 == actual_crc32 || error("CRC32 mismatch") + open(zip_filename) do zip_file + zr = ZipFile.Reader(zip_file) + for file in zr.files + open(joinpath(outputdir, file.name), "w") do output_file + write(output_file, read(file)) + end + end + end + return +end + +function _calculate_distance( + source_lat, + source_lon, + dest_lat, + dest_lon, + metric::KnnDrivingDistance, +)::Float64 + if metric.tree === nothing + basedir = joinpath(dirname(@__FILE__), "data") + csv_filename = joinpath(basedir, "dist_driving.csv") + + # Download pre-computed driving data + @timeit "Download data" begin + if !isfile(csv_filename) + _download_zip( + "https://axavier.org/RELOG/0.6/data/dist_driving_0b9a6ad6.zip", + basedir, + csv_filename, + 0x0b9a6ad6, + ) + end + end + + @timeit "Fit KNN model" begin + df = DataFrame(CSV.File(csv_filename, missingstring = "NaN")) + dropmissing!(df) + coords = Matrix(df[!, [:source_lat, :source_lon, :dest_lat, :dest_lon]])' + metric.ratios = Matrix(df[!, [:ratio]]) + metric.tree = KDTree(coords) + end + end + + @timeit "Compute Euclidean distance" begin + dist_euclidean = _calculate_distance( + source_lat, + source_lon, + dest_lat, + dest_lon, + EuclideanDistance(), + ) + end + + @timeit "Predict driving distance" begin + idxs, _ = knn(metric.tree, [source_lat, source_lon, dest_lat, dest_lon], 5) + ratio_pred = mean(metric.ratios[idxs]) + dist_pred = round(dist_euclidean * ratio_pred, digits = 3) + isfinite(dist_pred) || error("non-finite distance detected: $dist_pred") + end + + return dist_pred +end diff --git a/test/fixtures/simple.json b/test/fixtures/simple.json index c5b7f1e..7956cf1 100644 --- a/test/fixtures/simple.json +++ b/test/fixtures/simple.json @@ -2,7 +2,7 @@ "parameters": { "time horizon (years)": 4, "building period (years)": [1], - "distance metric": "driving" + "distance metric": "euclidean" }, "products": { "P1": { diff --git a/test/src/instance/parse_test.jl b/test/src/instance/parse_test.jl index 07697c4..d16b497 100644 --- a/test/src/instance/parse_test.jl +++ b/test/src/instance/parse_test.jl @@ -8,7 +8,7 @@ function instance_parse_test_1() # Parameters @test instance.time_horizon == 4 @test instance.building_period == [1] - @test instance.distance_metric == "driving" + @test instance.distance_metric isa RELOG.EuclideanDistance # Products @test length(instance.products) == 4 diff --git a/test/src/model/dist_test.jl b/test/src/model/dist_test.jl index c1ace4d..fb545bd 100644 --- a/test/src/model/dist_test.jl +++ b/test/src/model/dist_test.jl @@ -6,5 +6,8 @@ using RELOG function model_dist_test() # Euclidean distance between Chicago and Indianapolis - @test RELOG._calculate_distance(41.866, -87.656, 39.764, -86.148) == 265.818 + @test RELOG._calculate_distance(41.866, -87.656, 39.764, -86.148, RELOG.EuclideanDistance()) == 265.818 + + # Driving distance between Chicago and Indianapolis + @test RELOG._calculate_distance(41.866, -87.656, 39.764, -86.148, RELOG.KnnDrivingDistance()) == 316.43 end diff --git a/test/src/reports_test.jl b/test/src/reports_test.jl index 504e479..d82f112 100644 --- a/test/src/reports_test.jl +++ b/test/src/reports_test.jl @@ -3,6 +3,7 @@ function report_tests() instance = RELOG.parsefile(fixture("boat_example.json")) model = RELOG.build_model(instance, optimizer = HiGHS.Optimizer, variable_names = true) optimize!(model) + mkpath("tmp") write_to_file(model, "tmp/model.lp") RELOG.write_plants_report(model, "tmp/plants.csv") RELOG.write_plant_outputs_report(model, "tmp/plant_outputs.csv")