diff --git a/benchmark/Makefile b/benchmark/Makefile
index eb4e224..325e804 100644
--- a/benchmark/Makefile
+++ b/benchmark/Makefile
@@ -5,7 +5,8 @@
 
 CHALLENGES := \
     stab/ChallengeA \
-    knapsack/ChallengeA
+    knapsack/ChallengeA \
+    tsp/ChallengeA
 
 main: $(addsuffix /performance.png, $(CHALLENGES))
 
diff --git a/benchmark/benchmark.py b/benchmark/benchmark.py
index f573b89..3744805 100755
--- a/benchmark/benchmark.py
+++ b/benchmark/benchmark.py
@@ -20,9 +20,16 @@ from miplearn import (LearningSolver, BenchmarkRunner)
 from numpy import median
 import pyomo.environ as pe
 import pickle
-
 import logging
+import sys
+
+logging.basicConfig(format='%(asctime)s %(levelname).1s %(name)s: %(message)12s',
+                    datefmt='%H:%M:%S',
+                    level=logging.INFO,
+                    stream=sys.stdout)
 logging.getLogger('pyomo.core').setLevel(logging.ERROR)
+logging.getLogger('miplearn').setLevel(logging.INFO)
+logger = logging.getLogger("benchmark")
 
 n_jobs = 10
 time_limit = 900
@@ -34,7 +41,7 @@ pathlib.Path(basepath).mkdir(parents=True, exist_ok=True)
 
 
 def save(obj, filename):
-    print("Writing %s..." % filename)
+    logger.info("Writing %s..." % filename)
     with open(filename, "wb") as file:
         pickle.dump(obj, file)
         
@@ -55,7 +62,6 @@ def train():
                             solver=internal_solver,
                             components={})
     solver.parallel_solve(train_instances, n_jobs=n_jobs)
-    solver.fit(n_jobs=n_jobs)
     save(train_instances, "%s/train_instances.bin" % basepath)
     save(test_instances, "%s/test_instances.bin" % basepath)
     
@@ -65,6 +71,7 @@ def test_baseline():
     solvers = {
         "baseline": LearningSolver(
             time_limit=time_limit,
+            solver=internal_solver,
             components={},
         ),
     }
@@ -74,22 +81,28 @@ def test_baseline():
     
     
 def test_ml():
+    logger.info("Loading instances...")
     train_instances = load("%s/train_instances.bin" % basepath)
     test_instances = load("%s/test_instances.bin" % basepath)
     solvers = {
         "ml-exact": LearningSolver(
-           time_limit=time_limit,
+            time_limit=time_limit,
+            solver=internal_solver,
         ),
         "ml-heuristic": LearningSolver(
             time_limit=time_limit,
+            solver=internal_solver,
             mode="heuristic",
         ),
     }
     benchmark = BenchmarkRunner(solvers)
+    logger.info("Loading results...")
     benchmark.load_results("%s/benchmark_baseline.csv" % basepath)
+    logger.info("Fitting...")
     benchmark.fit(train_instances)
+    logger.info("Solving...")
     benchmark.parallel_solve(test_instances, n_jobs=n_jobs)
-    benchmark.save_results("%s/benchmark_ml.csv" % basepath)    
+    benchmark.save_results("%s/benchmark_ml.csv" % basepath)
 
     
 def charts():
diff --git a/benchmark/tsp/ChallengeA/benchmark_baseline.csv b/benchmark/tsp/ChallengeA/benchmark_baseline.csv
new file mode 100644
index 0000000..ca44ed9
--- /dev/null
+++ b/benchmark/tsp/ChallengeA/benchmark_baseline.csv
@@ -0,0 +1,51 @@
+,Solver,Instance,Wallclock Time,Lower Bound,Upper Bound,Gap,Nodes,Mode,Relative Lower Bound,Relative Upper Bound,Relative Wallclock Time,Relative Gap,Relative Nodes
+0,baseline,0,88.44052076339722,13538.699999999997,13540.0,9.602103599333102e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+1,baseline,1,74.86838150024414,13565.666666666666,13567.0,9.828734304744377e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+2,baseline,2,113.96127772331238,13560.699999999997,13562.0,9.586525769340157e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+3,baseline,3,91.72307801246643,13520.666666666666,13522.0,9.861446674231594e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+4,baseline,4,75.7019145488739,13532.98571428571,13534.0,7.494914542165241e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+5,baseline,5,148.34671473503113,13530.670398009952,13532.0,9.82657880901199e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+6,baseline,6,128.75406980514526,13533.647058823528,13535.0,9.99687054488398e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+7,baseline,7,89.82294702529907,13611.833333333334,13613.0,8.57097378506001e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+8,baseline,8,163.10344243049622,13578.666666666664,13580.0,9.819324430497046e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+9,baseline,9,110.7302086353302,13582.666666666664,13584.0,9.816432708371643e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+10,baseline,10,969.3387920856476,13576.642857142855,13578.0,9.996159370362496e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+11,baseline,11,74.61696720123291,13575.0,13575.0,0.0,1,exact,1.0,1.0,1.0,,1.0
+12,baseline,12,83.92108988761902,13542.649999999998,13544.0,9.968506902284139e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+13,baseline,13,71.3746600151062,13532.75,13534.0,9.2368513421145e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+14,baseline,14,95.41746068000793,13549.66666666667,13551.0,9.840340475758086e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+15,baseline,15,119.16796040534973,13592.724074074078,13594.0,9.386830181858343e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+16,baseline,16,236.61669611930847,13592.65,13594.0,9.931838162539047e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+17,baseline,17,151.28878140449524,13542.65,13544.0,9.968506902270707e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+18,baseline,18,85.78852319717407,13523.833333333332,13525.0,8.62674537545725e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+19,baseline,19,101.29387998580933,13562.666666666662,13564.0,9.830908375965233e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+20,baseline,20,158.05654454231262,13567.666666666666,13569.0,9.827285458078813e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+21,baseline,21,142.137060880661,13564.750000000002,13566.0,9.215061095841655e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+22,baseline,22,75.79312753677368,13563.714285714286,13565.0,9.479072314785627e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+23,baseline,23,125.0184965133667,13578.651041666662,13580.0,9.934406070221867e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+24,baseline,24,110.1647527217865,13541.666666666666,13543.0,9.846153846158324e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+25,baseline,25,107.13047480583191,13540.75,13542.0,9.231394125140778e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+26,baseline,26,86.15372657775879,13530.672413793101,13532.0,9.811679466463352e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+27,baseline,27,94.40602779388428,13520.7,13522.0,9.614886803192678e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+28,baseline,28,65.10137605667114,13569.749999999998,13571.0,9.21166565339685e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+29,baseline,29,62.560155391693115,13593.777777777777,13595.0,8.991041653046851e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+30,baseline,30,123.58262610435486,13575.666666666668,13577.0,9.821494340354953e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+31,baseline,31,77.88054895401001,13580.75,13582.0,9.204204480606741e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+32,baseline,32,91.31177544593811,13523.0,13524.0,7.394808844191378e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+33,baseline,33,125.20542597770691,13546.724137931033,13548.0,9.418233190376748e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+34,baseline,34,93.4059066772461,13555.999999999996,13557.0,7.376807317819698e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+35,baseline,35,133.77592086791992,13566.671171171172,13568.0,9.794803839953082e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+36,baseline,36,105.60492086410522,13552.674418604645,13554.0,9.780958019141685e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+37,baseline,37,106.68410634994507,13530.666666666666,13532.0,9.854158454872436e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+38,baseline,38,76.45212483406067,13512.666666666666,13514.0,9.86728501653219e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+39,baseline,39,66.65553593635559,13536.666666666666,13538.0,9.849790691952276e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+40,baseline,40,72.2125403881073,13578.0,13578.0,0.0,1,exact,1.0,1.0,1.0,,1.0
+41,baseline,41,68.3276731967926,13526.0,13526.0,0.0,1,exact,1.0,1.0,1.0,,1.0
+42,baseline,42,91.24437737464905,13527.7,13529.0,9.609911514886287e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+43,baseline,43,197.3597583770752,13563.65151515152,13565.0,9.941901315984782e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+44,baseline,44,68.33869886398315,13551.749999999995,13553.0,9.223900972239434e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+45,baseline,45,88.19205832481384,13519.66666666667,13521.0,9.862176089131646e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+46,baseline,46,85.30370616912842,13541.745070422534,13543.0,9.267118609455138e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+47,baseline,47,80.86965203285217,13562.678333333333,13564.0,9.744879545055802e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+48,baseline,48,88.52302312850952,13550.6875,13552.0,9.685855422464727e-05,1,exact,1.0,1.0,1.0,1.0,1.0
+49,baseline,49,130.87233448028564,13523.000000000002,13524.0,7.394808844177925e-05,1,exact,1.0,1.0,1.0,1.0,1.0
diff --git a/benchmark/tsp/ChallengeA/benchmark_ml.csv b/benchmark/tsp/ChallengeA/benchmark_ml.csv
new file mode 100644
index 0000000..0ac34a3
--- /dev/null
+++ b/benchmark/tsp/ChallengeA/benchmark_ml.csv
@@ -0,0 +1,151 @@
+,Solver,Instance,Wallclock Time,Lower Bound,Upper Bound,Gap,Nodes,Mode,Relative Lower Bound,Relative Upper Bound,Relative Wallclock Time,Relative Gap,Relative Nodes
+0,baseline,0,88.44052076339722,13538.699999999995,13540.0,9.602103599333102e-05,1,exact,0.999990151785604,1.0,41.72369503515307,1.1142966881214733,1.0
+1,baseline,1,74.86838150024414,13565.666666666664,13567.0,9.828734304744377e-05,1,exact,0.9999017223164048,1.0,76.61356087521061,inf,1.0
+2,baseline,2,113.96127772331238,13560.699999999995,13562.0,9.586525769340156e-05,1,exact,0.9999235465104994,1.0,76.24000609297043,1.0400038346080955,1.0
+3,baseline,3,91.72307801246643,13520.666666666664,13522.0,9.861446674231594e-05,1,exact,0.9999013952571116,1.0,94.94335769471294,inf,1.0
+4,baseline,4,75.70191454887392,13532.98571428571,13534.0,7.49491454216524e-05,1,exact,0.9999250564715317,1.0,41.48575715983321,inf,1.0
+5,baseline,5,148.34671473503113,13530.670398009952,13532.0,9.82657880901199e-05,1,exact,0.9999978122351358,1.0,119.90309626530366,1.0227729991614376,1.0
+6,baseline,6,128.75406980514526,13533.647058823528,13535.0,9.99687054488398e-05,1,exact,0.9999000412872943,1.0,89.50928932403158,inf,1.0
+7,baseline,7,89.82294702529906,13611.833333333336,13613.0,8.570973785060009e-05,1,exact,0.9999142976076791,1.0,58.998624886131445,inf,1.0
+8,baseline,8,163.10344243049622,13578.666666666664,13580.0,9.819324430497046e-05,1,exact,0.9999999999999999,1.0,89.45505955167431,1.0000000000013642,1.0
+9,baseline,9,110.7302086353302,13582.666666666664,13584.0,9.816432708371644e-05,1,exact,0.9999018453082056,1.0,69.69178446747482,inf,1.0
+10,baseline,10,969.3387920856476,13576.642857142857,13578.0,9.996159370362496e-05,1,exact,0.9999000483976179,1.0,515.7574545488682,inf,1.0
+11,baseline,11,74.61696720123291,13575.0,13575.0,0.0,1,exact,1.0,1.0,89.50190791176391,,1.0
+12,baseline,12,83.92108988761902,13542.649999999998,13544.0,9.968506902284139e-05,1,exact,0.9999003248670997,1.0,55.69203928988848,inf,1.0
+13,baseline,13,71.3746600151062,13532.75,13534.0,9.236851342114499e-05,1,exact,0.9999076400177331,1.0,52.71414831739878,inf,1.0
+14,baseline,14,95.41746068000792,13549.66666666667,13551.0,9.840340475758086e-05,1,exact,0.9999016062775197,1.0,64.56994873575405,inf,1.0
+15,baseline,15,119.16796040534972,13592.724074074076,13594.0,9.386830181858343e-05,1,exact,0.9999061405086124,1.0,65.40523211338186,inf,1.0
+16,baseline,16,236.61669611930847,13592.65,13594.0,9.931838162539048e-05,1,exact,0.9999742514529537,1.0,92.34020750352192,1.3500347614339328,1.0
+17,baseline,17,151.28878140449527,13542.65,13544.0,9.968506902270708e-05,1,exact,0.9999741563907554,1.0000738388835562,91.4641415949715,1.350034889774522,1.0
+18,baseline,18,85.78852319717406,13523.833333333332,13525.0,8.62674537545725e-05,1,exact,0.9999137399876771,1.0,96.1413272100395,inf,1.0
+19,baseline,19,101.29387998580931,13562.666666666662,13564.0,9.830908375965231e-05,1,exact,0.9999017005799663,1.0,75.05436661902205,inf,1.0
+20,baseline,20,158.05654454231262,13567.666666666664,13569.0,9.827285458078812e-05,1,exact,0.999901736802024,1.0,90.31845612470958,inf,1.0
+21,baseline,21,142.13706088066098,13564.750000000002,13566.0,9.215061095841657e-05,1,exact,0.9999078578799943,1.0,145.5161989355379,inf,1.0
+22,baseline,22,75.7931275367737,13563.714285714286,13565.0,9.479072314785627e-05,1,exact,0.9999052182612816,1.0,103.67687261190537,inf,1.0
+23,baseline,23,125.0184965133667,13578.651041666662,13580.0,9.934406070221867e-05,1,exact,0.9999006658075599,1.0,76.27885738292231,inf,1.0
+24,baseline,24,110.1647527217865,13541.666666666664,13543.0,9.846153846158324e-05,1,exact,0.9999015481552583,1.0,137.78651586217427,inf,1.0
+25,baseline,25,107.13047480583192,13540.75,13542.0,9.231394125140777e-05,1,exact,0.9999815375526179,1.0,81.76214710854182,1.2500230784875868,1.0
+26,baseline,26,86.15372657775879,13530.672413793101,13532.0,9.811679466463352e-05,1,exact,0.9999018928312963,1.0,95.4669269129848,inf,1.0
+27,baseline,27,94.40602779388428,13520.7,13522.0,9.614886803192679e-05,1,exact,0.9999778122919903,1.0,54.71143091363273,1.3000288446596822,1.0
+28,baseline,28,65.10137605667114,13569.749999999998,13571.0,9.21166565339685e-05,1,exact,0.9999078918281629,1.0,50.24970031074932,inf,1.0
+29,baseline,29,62.560155391693115,13593.777777777776,13595.0,8.991041653046851e-05,1,exact,0.9999100976666256,1.0,46.92879617539241,inf,1.0
+30,baseline,30,123.58262610435486,13575.666666666668,13577.0,9.821494340354953e-05,1,exact,0.9999754468670203,1.0,71.13568329179179,1.3333660716465885,1.0
+31,baseline,31,77.88054895401002,13580.75,13582.0,9.20420448060674e-05,1,exact,0.9999079664261522,1.0,101.52119482807224,inf,1.0
+32,baseline,32,91.31177544593812,13523.0,13524.0,7.394808844191378e-05,1,exact,1.0,1.0,58.01274392159489,1.0,1.0
+33,baseline,33,125.20542597770692,13546.724137931033,13548.0,9.418233190376748e-05,1,exact,0.9999796366672351,1.0,91.02421056827991,1.275888050298017,1.0
+34,baseline,34,93.4059066772461,13555.999999999995,13557.0,7.376807317819698e-05,1,exact,0.9999262373681489,1.0,75.47322729615996,inf,1.0
+35,baseline,35,133.77592086791992,13566.671171171172,13568.0,9.794803839953082e-05,1,exact,0.9999020615544791,1.0,84.38612997732044,inf,1.0
+36,baseline,36,105.60492086410522,13552.674418604644,13554.0,9.780958019141684e-05,1,exact,0.9999759771714486,1.0,58.58183671223716,1.3256132403342726,1.0
+37,baseline,37,106.68410634994508,13530.666666666664,13532.0,9.854158454872437e-05,1,exact,0.9999014681249382,1.0,136.8255659357326,inf,1.0
+38,baseline,38,76.45212483406067,13512.666666666664,13514.0,9.867285016532191e-05,1,exact,0.9999901328123457,1.0,84.32910693002859,1.1111220747609611,1.0
+39,baseline,39,66.65553593635559,13536.666666666664,13538.0,9.849790691952276e-05,1,exact,0.9999015117939625,1.0,85.20374060262657,inf,1.0
+40,baseline,40,72.2125403881073,13578.0,13578.0,0.0,1,exact,1.0,1.0,102.32184109577935,,1.0
+41,baseline,41,68.3276731967926,13526.0,13526.0,0.0,1,exact,1.0,1.0,36.988942309740004,,1.0
+42,baseline,42,91.24437737464905,13527.7,13529.0,9.609911514886287e-05,1,exact,0.9999039101190037,1.0,60.45086955649546,inf,1.0
+43,baseline,43,197.3597583770752,13563.651515151521,13565.0,9.941901315984781e-05,1,exact,0.9999743081061281,1.0,97.77650283861117,1.3485194945001757,1.0
+44,baseline,44,68.33869886398314,13551.749999999995,13553.0,9.223900972239434e-05,1,exact,0.9999907761841069,1.0,92.29526354225831,1.1111213598948202,1.0
+45,baseline,45,88.19205832481384,13519.66666666667,13521.0,9.862176089131646e-05,1,exact,0.9999013879644013,1.0,113.90345047382237,inf,1.0
+46,baseline,46,85.30370616912842,13541.745070422534,13543.0,9.267118609455138e-05,1,exact,0.9999073374010584,1.0,67.75852498047483,inf,1.0
+47,baseline,47,80.86965203285217,13562.678333333333,13564.0,9.744879545055802e-05,1,exact,0.9999025606998919,1.0,116.23732479169651,inf,1.0
+48,baseline,48,88.52302312850952,13550.6875,13552.0,9.685855422464728e-05,1,exact,0.9999031508264463,1.0,122.53937560747,inf,1.0
+49,baseline,49,130.87233448028564,13523.000000000002,13524.0,7.394808844177925e-05,1,exact,0.9999260573794737,1.0,97.84851972743084,inf,1.0
+50,ml-exact,0,9.014199018478394,13538.699999999999,13540.0,9.602103599319664e-05,1,exact,0.9999901517856042,1.0,4.252639939099353,1.114296688119914,1.0
+51,ml-exact,1,5.4113500118255615,13565.709677419356,13567.0,9.511648202170943e-05,1,exact,0.999904892564263,1.0,5.537488392836666,inf,1.0
+52,ml-exact,2,7.311800241470337,13560.749999999995,13562.0,9.217779252662703e-05,1,exact,0.9999272333539017,1.0,4.891588670265028,1.0,1.0
+53,ml-exact,3,22.324601411819458,13520.72222222222,13522.0,9.450514231252222e-05,1,exact,0.9999055037880654,1.0,23.108389547788498,inf,1.0
+54,ml-exact,4,7.276817560195923,13534.0,13534.0,0.0,1,exact,1.0,1.0,3.987802527818481,,1.0
+55,ml-exact,5,6.725529432296753,13530.699999999999,13532.0,9.607780824355662e-05,1,exact,1.0,1.0,5.435993674657236,1.0,1.0
+56,ml-exact,6,8.695855855941772,13533.665041782733,13535.0,9.863981509409236e-05,1,exact,0.9999013699137593,1.0,6.045322519960155,inf,1.0
+57,ml-exact,7,6.677775144577026,13612.0,13613.0,7.346459006758743e-05,1,exact,0.9999265408065819,1.0,4.386179299125712,inf,1.0
+58,ml-exact,8,11.08420991897583,13578.642857142859,13580.0,9.994687034775287e-05,1,exact,0.999998246549209,1.0,6.079201294649372,1.0178589276210523,1.0
+59,ml-exact,9,16.643534421920776,13582.66666666667,13584.0,9.816432708331462e-05,1,exact,0.9999018453082059,1.0,10.475168682554177,inf,1.0
+60,ml-exact,10,16.841771364212036,13576.691176470587,13578.0,9.640224649735783e-05,1,exact,0.999903607046,1.0,8.961024978903895,inf,1.0
+61,ml-exact,11,4.076904058456421,13573.75,13575.0,9.208951100469657e-05,1,exact,0.9999079189686925,1.0,4.89018389907214,inf,1.0
+62,ml-exact,12,6.3809661865234375,13542.750000000002,13544.0,9.230030828289534e-05,1,exact,0.9999077082102777,1.0,4.234561539217345,inf,1.0
+63,ml-exact,13,4.139940500259399,13532.999999999998,13534.0,7.389344565150514e-05,1,exact,0.9999261120141864,1.0,3.0575758610926025,inf,1.0
+64,ml-exact,14,7.238400936126709,13549.66666666667,13551.0,9.840340475758086e-05,1,exact,0.9999016062775197,1.0,4.898298215480208,inf,1.0
+65,ml-exact,15,7.4390175342559814,13592.6875,13594.0,9.655927129936593e-05,1,exact,0.9999034500514933,1.0,4.082898346741258,inf,1.0
+66,ml-exact,16,45.386595487594604,13592.666666666664,13594.0,9.809210849005043e-05,1,exact,0.9999754775742414,1.0,17.712222822558893,1.3333660307052555,1.0
+67,ml-exact,17,7.782220363616943,13541.750000000002,13543.0,9.230712426371635e-05,1,exact,0.9999077013955551,1.0,4.704870372100049,1.2501153839035106,1.0
+68,ml-exact,18,3.7182631492614746,13525.0,13525.0,0.0,1,exact,1.0,1.0,4.166976429522626,,1.0
+69,ml-exact,19,5.698674201965332,13564.0,13564.0,0.0,1,exact,1.0,1.0,4.222470132021685,,1.0
+70,ml-exact,20,27.741767644882202,13567.699999999997,13569.0,9.581579781414025e-05,1,exact,0.9999041933819734,1.0,15.852514244896831,inf,1.0
+71,ml-exact,21,5.77320122718811,13566.0,13566.0,0.0,1,exact,1.0,1.0,5.91045215839762,,1.0
+72,ml-exact,22,3.9558210372924805,13563.871192621193,13565.0,8.32216232944913e-05,1,exact,0.9999167853019678,1.0,5.41113907141357,inf,1.0
+73,ml-exact,23,23.586190462112427,13578.64814814815,13580.0,9.955717514005507e-05,1,exact,0.9999004527355044,1.0,14.390891817144492,inf,1.0
+74,ml-exact,24,4.234185695648193,13543.0,13543.0,0.0,1,exact,1.0,1.0,5.2958290206504826,,1.0
+75,ml-exact,25,4.053018093109131,13541.0,13542.0,7.384978952809984e-05,1,exact,0.9999999999999999,1.0,3.0932697924001986,1.0000000000018192,1.0
+76,ml-exact,26,4.456820011138916,13532.0,13532.0,0.0,1,exact,1.0,1.0,4.9386013486451725,,1.0
+77,ml-exact,27,6.604680299758911,13520.892857142859,13522.0,8.188385699369056e-05,1,exact,0.9999920758185681,1.0,3.827631755843238,1.10715163041169,1.0
+78,ml-exact,28,4.3237597942352295,13569.666666666666,13571.0,9.825837038497186e-05,1,exact,0.9999017512833738,1.0,3.3373739087612315,inf,1.0
+79,ml-exact,29,4.937336444854736,13593.833333333334,13595.0,8.582322866981795e-05,1,exact,0.9999141841363247,1.0,3.7036873425140273,inf,1.0
+80,ml-exact,30,7.1004478931427,13575.670776255707,13577.0,9.791219647266045e-05,1,exact,0.9999757495768788,1.0,4.087105351928353,1.3292559793128385,1.0
+81,ml-exact,31,4.129274129867554,13582.0,13582.0,0.0,1,exact,1.0,1.0,5.3827155697676625,,1.0
+82,ml-exact,32,4.985649824142456,13522.750000000002,13524.0,9.243681943378239e-05,1,exact,0.9999815129778896,1.0,3.1675128987275314,1.2500231092030394,1.0
+83,ml-exact,33,26.542813777923584,13546.649999999996,13548.0,9.9655634419137e-05,1,exact,0.9999741640215545,1.0,19.29659718442664,1.350034879473593,1.0
+84,ml-exact,34,5.016777992248535,13557.0,13557.0,0.0,1,exact,1.0,1.0,4.053624007009211,,1.0
+85,ml-exact,35,35.91903018951416,13566.666666666662,13568.0,9.828009828041115e-05,1,exact,0.9999017295597481,1.0,22.657799180648382,inf,1.0
+86,ml-exact,36,7.446932315826416,13552.654459753448,13554.0,9.928241368119515e-05,1,exact,0.9999745045195491,1.0,4.1310098938875015,1.345574552621238,1.0
+87,ml-exact,37,7.494789361953735,13530.666666666668,13532.0,9.854158454858992e-05,1,exact,0.9999014681249385,1.0,9.612292131450713,inf,1.0
+88,ml-exact,38,5.067941904067993,13512.8,13514.0,8.880468888762712e-05,1,exact,1.0,1.0,5.59009989154572,1.0,1.0
+89,ml-exact,39,6.453751564025879,13536.75,13538.0,9.234121927345928e-05,1,exact,0.99990766730684,1.0,8.249633979390692,inf,1.0
+90,ml-exact,40,5.8958964347839355,13578.0,13578.0,0.0,1,exact,1.0,1.0,8.354213476977858,,1.0
+91,ml-exact,41,4.61811900138855,13524.9455782313,13526.0,7.79612577811263e-05,1,exact,0.9999220448197028,1.0,2.5000022586733377,inf,1.0
+92,ml-exact,42,7.025639057159424,13527.725,13529.0,9.425088106090537e-05,1,exact,0.9999057580013305,1.0,4.654599027527176,inf,1.0
+93,ml-exact,43,10.472574234008789,13563.657608695652,13565.0,9.89697132643173e-05,1,exact,0.9999747573500186,1.0,5.188350921887166,1.3424251907171998,1.0
+94,ml-exact,44,4.061512231826782,13551.75,13553.0,9.223900972199163e-05,1,exact,0.9999907761841074,1.0,5.485301125832847,1.1111213598899692,1.0
+95,ml-exact,45,4.208792209625244,13521.0,13521.0,0.0,1,exact,1.0,1.0,5.435817738123648,,1.0
+96,ml-exact,46,4.613062381744385,13541.897435897436,13543.0,8.141873085242141e-05,1,exact,0.9999185878976177,1.0,3.664252312908731,inf,1.0
+97,ml-exact,47,4.933764457702637,13564.0,13564.0,0.0,1,exact,1.0,1.0,7.091505494332267,,1.0
+98,ml-exact,48,4.755006313323975,13550.955555555562,13552.0,7.707533540025109e-05,1,exact,0.9999229306047492,1.0,6.582191661014823,inf,1.0
+99,ml-exact,49,5.486748695373535,13522.728991596638,13524.0,9.3990525444403e-05,1,exact,0.9999060183079442,1.0,4.102243916490891,inf,1.0
+100,ml-heuristic,0,2.11967134475708,13538.833333333334,13540.0,8.61718759616949e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+101,ml-heuristic,1,0.9772210121154785,13567.0,13567.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+102,ml-heuristic,2,1.4947700500488281,13561.736842105263,13563.0,9.314130700537644e-05,1,heuristic,1.0,1.000073735437251,1.0,1.0104527831741152,1.0
+103,ml-heuristic,3,0.9660820960998535,13522.0,13522.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+104,ml-heuristic,4,1.8247687816619873,13533.000000000002,13534.0,7.38934456512363e-05,1,heuristic,0.9999261120141866,1.0,1.0,inf,1.0
+105,ml-heuristic,5,1.2372217178344727,13530.666666666668,13532.0,9.854158454858992e-05,1,heuristic,0.9999975364664555,1.0,1.0,1.0256435523465277,1.0
+106,ml-heuristic,6,1.438443660736084,13535.0,13535.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+107,ml-heuristic,7,1.5224583148956299,13613.0,13613.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+108,ml-heuristic,8,1.8233003616333008,13578.666666666666,13580.0,9.819324430483649e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+109,ml-heuristic,9,1.5888559818267822,13584.0,13584.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+110,ml-heuristic,10,1.8794469833374023,13578.0,13578.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+111,ml-heuristic,11,0.8336913585662842,13574.0,13575.0,7.367025195226167e-05,1,heuristic,0.999926335174954,1.0,1.0,inf,1.0
+112,ml-heuristic,12,1.5068776607513428,13544.0,13544.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+113,ml-heuristic,13,1.353994369506836,13534.0,13534.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+114,ml-heuristic,14,1.4777379035949707,13551.0,13551.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+115,ml-heuristic,15,1.8219943046569824,13594.0,13594.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+116,ml-heuristic,16,2.5624449253082275,13593.0,13594.0,7.356727727506805e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+117,ml-heuristic,17,1.6540775299072266,13543.0,13544.0,7.383888355608063e-05,1,heuristic,1.0,1.0000738388835562,1.0,1.0,1.0
+118,ml-heuristic,18,0.8923168182373047,13525.0,13525.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+119,ml-heuristic,19,1.3496067523956299,13563.0,13564.0,7.37300007373e-05,1,heuristic,0.9999262754349749,1.0,1.0,inf,1.0
+120,ml-heuristic,20,1.7499916553497314,13569.0,13569.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+121,ml-heuristic,21,0.9767782688140869,13566.0,13566.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+122,ml-heuristic,22,0.7310514450073242,13565.0,13565.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+123,ml-heuristic,23,1.6389665603637695,13580.0,13580.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+124,ml-heuristic,24,0.7995321750640869,13543.0,13543.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+125,ml-heuristic,25,1.310269832611084,13541.000000000002,13542.0,7.38497895279655e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+126,ml-heuristic,26,0.9024457931518555,13532.0,13532.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+127,ml-heuristic,27,1.7255265712738037,13521.0,13522.0,7.395902669920864e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+128,ml-heuristic,28,1.2955574989318848,13571.0,13571.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+129,ml-heuristic,29,1.3330867290496826,13595.0,13595.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+130,ml-heuristic,30,1.7372803688049316,13576.0,13577.0,7.365939893930465e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+131,ml-heuristic,31,0.7671358585357666,13582.0,13582.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+132,ml-heuristic,32,1.5739951133728027,13522.999999999998,13524.0,7.39480884420483e-05,1,heuristic,0.9999999999999999,1.0,1.0,1.0000000000018192,1.0
+133,ml-heuristic,33,1.3755178451538086,13546.999999999998,13548.0,7.381708127274076e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+134,ml-heuristic,34,1.2376031875610352,13557.0,13557.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+135,ml-heuristic,35,1.5852832794189453,13568.0,13568.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+136,ml-heuristic,36,1.8026905059814453,13553.0,13554.0,7.378440197742197e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+137,ml-heuristic,37,0.7797088623046875,13532.0,13532.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+138,ml-heuristic,38,0.9065923690795898,13512.75,13514.0,9.250522654529981e-05,1,heuristic,0.9999962998046298,1.0,1.0,1.0416705210504744,1.0
+139,ml-heuristic,39,0.7823076248168945,13538.0,13538.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+140,ml-heuristic,40,0.7057392597198486,13576.75,13578.0,9.206916235476089e-05,1,heuristic,0.9999079393135956,1.0,1.0,inf,1.0
+141,ml-heuristic,41,1.8472459316253662,13526.0,13526.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+142,ml-heuristic,42,1.509397268295288,13529.0,13529.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+143,ml-heuristic,43,2.0184783935546875,13564.0,13565.0,7.372456502506635e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+144,ml-heuristic,44,0.7404356002807617,13551.875,13553.0,8.301434303371305e-05,1,heuristic,1.0,1.0,1.0,1.0,1.0
+145,ml-heuristic,45,0.7742702960968018,13520.0,13521.0,7.396449704142012e-05,1,heuristic,0.9999260409733008,1.0,1.0,inf,1.0
+146,ml-heuristic,46,1.258936882019043,13543.0,13543.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+147,ml-heuristic,47,0.6957287788391113,13564.0,13564.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+148,ml-heuristic,48,0.7224047183990479,13552.0,13552.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
+149,ml-heuristic,49,1.3374993801116943,13524.0,13524.0,0.0,1,heuristic,1.0,1.0,1.0,,1.0
diff --git a/benchmark/tsp/ChallengeA/performance.png b/benchmark/tsp/ChallengeA/performance.png
new file mode 100644
index 0000000..fa17cba
Binary files /dev/null and b/benchmark/tsp/ChallengeA/performance.png differ
diff --git a/docs-src/css/custom.css b/docs-src/css/custom.css
new file mode 100644
index 0000000..e69de29
diff --git a/docs-src/figures/benchmark_tsp_a.png b/docs-src/figures/benchmark_tsp_a.png
new file mode 120000
index 0000000..31a74c2
--- /dev/null
+++ b/docs-src/figures/benchmark_tsp_a.png
@@ -0,0 +1 @@
+../../benchmark/tsp/ChallengeA/performance.png
\ No newline at end of file
diff --git a/docs-src/problems.md b/docs-src/problems.md
index c1e9872..954aa6d 100644
--- a/docs-src/problems.md
+++ b/docs-src/problems.md
@@ -20,6 +20,8 @@ To illustrate the performance of `LearningSolver`, and to set a baseline for new
 
 All experiments presented here were performed on a Linux server (Ubuntu Linux 18.04 LTS) with Intel Xeon Gold 6230s (2 processors, 40 cores, 80 threads) and 256 GB RAM (DDR4, 2933 MHz). All solvers were restricted to use 4 threads, with no time limits, and 10 instances were solved simultaneously at a time.
 
+
+
 ## Maximum Weight Stable Set Problem
 
 ### Problem definition
@@ -45,6 +47,53 @@ MaxWeightStableSetGenerator(w=uniform(loc=100., scale=50.),
 
 ![alt](figures/benchmark_stab_a.png)
 
+
+## Traveling Salesman Problem
+
+### Problem definition
+
+Given a list of cities and the distance between each pair of cities, the problem asks for the
+shortest route starting at the first city, visiting each other city exactly once, then returning
+to the first city. This problem is a generalization of the Hamiltonian path problem, one of Karp's
+21 NP-complete problems.
+
+### Random problem generator
+
+The class `TravelingSalesmanGenerator` can be used to generate random instances of this
+problem. Initially, the generator creates $n$ cities $(x_1,y_1),\ldots,(x_n,y_n) \in \mathbb{R}^2$,
+where $n, x_i$ and $y_i$ are sampled independently from the provided probability distributions `n`,
+`x` and `y`. For each pair of cities $(i,j)$, the distance $d_{i,j}$ between them is set to:
+$$
+    d_{i,j} = \gamma_{i,j} \sqrt{(x_i-x_j)^2 + (y_i - y_j)^2}
+$$
+where $\gamma_{i,j}$ is sampled from the distribution `gamma`.
+
+If `fix_cities=True` is provided, the list of cities is kept the same for all generated instances.
+The $gamma$ values, and therefore also the distances, are still different.
+
+By default, all distances $d_{i,j}$ are rounded to the nearest integer.  If `round=False`
+is provided, this rounding will be disabled.
+
+### Challenge A
+
+* Fixed list of 350 cities in the $[0, 1000]^2$ square
+* $\gamma_{i,j} \sim U(0.95, 1.05)$
+* 500 training instances, 50 test instances
+
+
+```python
+TravelingSalesmanGenerator(x=uniform(loc=0.0, scale=1000.0),
+                           y=uniform(loc=0.0, scale=1000.0),
+                           n=randint(low=350, high=351),
+                           gamma=uniform(loc=0.95, scale=0.1),
+                           fix_cities=True,
+                           round=True,
+                          )
+```
+
+![alt](figures/benchmark_tsp_a.png)
+
+
 ## Multidimensional 0-1 Knapsack Problem
 
 ### Problem definition
@@ -115,3 +164,4 @@ MultiKnapsackGenerator(n=randint(low=250, high=251),
 ```
 
 ![alt](figures/benchmark_knapsack_a.png)
+
diff --git a/docs-src/usage.md b/docs-src/usage.md
index 528b187..08a678a 100644
--- a/docs-src/usage.md
+++ b/docs-src/usage.md
@@ -35,16 +35,16 @@ Instances to be solved by `LearningSolver` must derive from the abstract class `
 
 * `instance.to_model()`, which returns a concrete Pyomo model corresponding to the instance;
 * `instance.get_instance_features()`, which returns a 1-dimensional Numpy array of (numerical) features describing the entire instance;
-* `instance.get_variable_features(var, index)`, which returns a 1-dimensional array of (numerical) features describing a particular decision variable.
+* `instance.get_variable_features(var_name, index)`, which returns a 1-dimensional array of (numerical) features describing a particular decision variable.
 
 
 The first method is used by `LearningSolver` to construct a concrete Pyomo model, which will be provided to the internal MIP solver. The user should keep a reference to this Pyomo model, in order to retrieve, for example, the optimal variable values.
 
 The second and third methods provide an encoding of the instance, which can be used by the ML models to make predictions. In the knapsack problem, for example, an implementation may decide to provide as instance features the average weights, average prices, number of items and the size of the knapsack. The weight and the price of each individual item could be provided as variable features. See `miplearn/problems/knapsack.py` for a concrete example.
 
-An optional method which can be implemented is `instance.get_variable_category(var, index)`, which returns a category (a string, an integer or any hashable type) for each decision variable. If two variables have the same category, `LearningSolver` will use the same internal ML model to predict the values of both variables. By default, all variables belong to the `"default"` category, and therefore only one ML model is used for all variables. If the returned category is `None`, ML predictors will ignore the variable.
+An optional method which can be implemented is `instance.get_variable_category(var_name, index)`, which returns a category (a string, an integer or any hashable type) for each decision variable. If two variables have the same category, `LearningSolver` will use the same internal ML model to predict the values of both variables. By default, all variables belong to the `"default"` category, and therefore only one ML model is used for all variables. If the returned category is `None`, ML predictors will ignore the variable.
 
-It is not necessary to have a one-to-one correspondence between features and problem instances. One important (and deliberate) limitation of MIPLearn, however, is that `get_instance_features()` must always return arrays of same length for all relevant instances of the problem. Similarly, `get_variable_features(var, index)` must also always return arrays of same length for all variables in each category. It is up to the user to decide how to encode variable-length characteristics of the problem into fixed-length vectors. In graph problems, for example, graph embeddings can be used to reduce the (variable-length) lists of nodes and edges into a fixed-length structure that still preserves some properties of the graph. Different instance encodings may have significant impact on performance.
+It is not necessary to have a one-to-one correspondence between features and problem instances. One important (and deliberate) limitation of MIPLearn, however, is that `get_instance_features()` must always return arrays of same length for all relevant instances of the problem. Similarly, `get_variable_features(var_name, index)` must also always return arrays of same length for all variables in each category. It is up to the user to decide how to encode variable-length characteristics of the problem into fixed-length vectors. In graph problems, for example, graph embeddings can be used to reduce the (variable-length) lists of nodes and edges into a fixed-length structure that still preserves some properties of the graph. Different instance encodings may have significant impact on performance.
 
 
 ### Obtaining heuristic solutions
diff --git a/docs/404.html b/docs/404.html
index 590b939..7dbe612 100644
--- a/docs/404.html
+++ b/docs/404.html
@@ -32,15 +32,6 @@
             <script src="https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js"></script>
         <![endif]-->
 
-    <script src="//ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"></script>
-    <script>
-    WebFont.load({
-        google: {
-            families: ['Open Sans', 'PT Sans']
-        }
-    });
-    </script>
-
     
 
      
@@ -142,9 +133,10 @@
         
         <hr>
         <p>
-        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.<br></small>
+        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.</small><br>
         
-        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</p></small>
+        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</small>
+        </p>
 
         
         
diff --git a/docs/about/index.html b/docs/about/index.html
index a8cd0e1..05ea301 100644
--- a/docs/about/index.html
+++ b/docs/about/index.html
@@ -32,15 +32,6 @@
             <script src="https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js"></script>
         <![endif]-->
 
-    <script src="//ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"></script>
-    <script>
-    WebFont.load({
-        google: {
-            families: ['Open Sans', 'PT Sans']
-        }
-    });
-    </script>
-
     
 
      
@@ -200,9 +191,10 @@ POSSIBILITY OF SUCH DAMAGE.
         
         <hr>
         <p>
-        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.<br></small>
+        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.</small><br>
         
-        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</p></small>
+        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</small>
+        </p>
 
         
         
diff --git a/docs/benchmark/index.html b/docs/benchmark/index.html
index 1a9ff02..d16e537 100644
--- a/docs/benchmark/index.html
+++ b/docs/benchmark/index.html
@@ -32,15 +32,6 @@
             <script src="https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js"></script>
         <![endif]-->
 
-    <script src="//ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"></script>
-    <script>
-    WebFont.load({
-        google: {
-            families: ['Open Sans', 'PT Sans']
-        }
-    });
-    </script>
-
     
 
      
@@ -209,9 +200,10 @@ benchmark.parallel_solve(test_instances)
         
         <hr>
         <p>
-        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.<br></small>
+        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.</small><br>
         
-        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</p></small>
+        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</small>
+        </p>
 
         
         
diff --git a/docs/css/custom.css b/docs/css/custom.css
new file mode 100644
index 0000000..e69de29
diff --git a/docs/customization/index.html b/docs/customization/index.html
index fbbd082..e3d6ef7 100644
--- a/docs/customization/index.html
+++ b/docs/customization/index.html
@@ -32,15 +32,6 @@
             <script src="https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js"></script>
         <![endif]-->
 
-    <script src="//ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"></script>
-    <script>
-    WebFont.load({
-        google: {
-            families: ['Open Sans', 'PT Sans']
-        }
-    });
-    </script>
-
     
 
      
@@ -162,9 +153,10 @@ solver = LearningSolver(solver=&quot;cplex&quot;,
         
         <hr>
         <p>
-        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.<br></small>
+        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.</small><br>
         
-        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</p></small>
+        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</small>
+        </p>
 
         
         
diff --git a/docs/figures/benchmark_tsp_a.png b/docs/figures/benchmark_tsp_a.png
new file mode 100644
index 0000000..fa17cba
Binary files /dev/null and b/docs/figures/benchmark_tsp_a.png differ
diff --git a/docs/index.html b/docs/index.html
index ccf8bf2..01e82b7 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -32,15 +32,6 @@
             <script src="https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js"></script>
         <![endif]-->
 
-    <script src="//ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"></script>
-    <script>
-    WebFont.load({
-        google: {
-            families: ['Open Sans', 'PT Sans']
-        }
-    });
-    </script>
-
     
 
      
@@ -188,9 +179,10 @@
         
         <hr>
         <p>
-        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.<br></small>
+        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.</small><br>
         
-        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</p></small>
+        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</small>
+        </p>
 
         
         
@@ -276,6 +268,6 @@
 </html>
 
 <!--
-MkDocs version : 1.0.4
-Build Date UTC : 2020-02-24 16:57:12
+MkDocs version : 1.1
+Build Date UTC : 2020-02-26 04:29:16
 -->
diff --git a/docs/js/base.js b/docs/js/base.js
index d933b3a..1db589c 100644
--- a/docs/js/base.js
+++ b/docs/js/base.js
@@ -1,88 +1,78 @@
-function getSearchTerm()
-{
+function getSearchTerm() {
     var sPageURL = window.location.search.substring(1);
     var sURLVariables = sPageURL.split('&');
-    for (var i = 0; i < sURLVariables.length; i++)
-    {
+    for (var i = 0; i < sURLVariables.length; i++) {
         var sParameterName = sURLVariables[i].split('=');
-        if (sParameterName[0] == 'q')
-        {
+        if (sParameterName[0] == 'q') {
             return sParameterName[1];
         }
     }
 }
 
 $(document).ready(function() {
-    /**
-     * ------------------------------------------------------------------------
-     * Cinder theme specific
-     * ------------------------------------------------------------------------
-     */
-    hljs.initHighlightingOnLoad();
-
-
     /**
      * ------------------------------------------------------------------------
      * Taken from themes/mkdocs/js/base.js
      * ------------------------------------------------------------------------
      */
     var search_term = getSearchTerm(),
-    $search_modal = $('#mkdocs_search_modal'),
-    $keyboard_modal = $('#mkdocs_keyboard_modal');
+        $search_modal = $('#mkdocs_search_modal'),
+        $keyboard_modal = $('#mkdocs_keyboard_modal');
 
-    if(search_term){
+    if (search_term) {
         $search_modal.modal();
     }
 
     // make sure search input gets autofocus everytime modal opens.
-    $search_modal.on('shown.bs.modal', function () {
+    $search_modal.on('shown.bs.modal', function() {
         $search_modal.find('#mkdocs-search-query').focus();
     });
 
     // Close search modal when result is selected
     // The links get added later so listen to parent
     $('#mkdocs-search-results').click(function(e) {
-      if ($(e.target).is('a')) {
-        $search_modal.modal('hide');
-      }
+        if ($(e.target).is('a')) {
+            $search_modal.modal('hide');
+        }
     });
 
     if (typeof shortcuts !== 'undefined') {
-      // Populate keyboard modal with proper Keys
-      $keyboard_modal.find('.help.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.help];
-      $keyboard_modal.find('.prev.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.previous];
-      $keyboard_modal.find('.next.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.next];
-      $keyboard_modal.find('.search.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.search];
+        // Populate keyboard modal with proper Keys
+        $keyboard_modal.find('.help.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.help];
+        $keyboard_modal.find('.prev.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.previous];
+        $keyboard_modal.find('.next.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.next];
+        $keyboard_modal.find('.search.shortcut kbd')[0].innerHTML = keyCodes[shortcuts.search];
 
-      // Keyboard navigation
-      document.addEventListener("keydown", function(e) {
-          if ($(e.target).is(':input')) return true;
-          var key = e.which || e.key || window.event && window.event.key;
-          var page;
-          switch (key) {
-              case shortcuts.next:
-                  page = $('.navbar a[rel="next"]:first').prop('href');
-                  break;
-              case shortcuts.previous:
-                  page = $('.navbar a[rel="prev"]:first').prop('href');
-                  break;
-              case shortcuts.search:
-                  e.preventDefault();
-                  $keyboard_modal.modal('hide');
-                  $search_modal.modal('show');
-                  $search_modal.find('#mkdocs-search-query').focus();
-                  break;
-              case shortcuts.help:
-                  $search_modal.modal('hide');
-                  $keyboard_modal.modal('show');
-                  break;
-              default: break;
-          }
-          if (page) {
-              $keyboard_modal.modal('hide');
-              window.location.href = page;
-          }
-      });
+        // Keyboard navigation
+        document.addEventListener("keydown", function(e) {
+            if ($(e.target).is(':input')) return true;
+            var key = e.which || e.key || window.event && window.event.key;
+            var page;
+            switch (key) {
+                case shortcuts.next:
+                    page = $('.navbar a[rel="next"]:first').prop('href');
+                    break;
+                case shortcuts.previous:
+                    page = $('.navbar a[rel="prev"]:first').prop('href');
+                    break;
+                case shortcuts.search:
+                    e.preventDefault();
+                    $keyboard_modal.modal('hide');
+                    $search_modal.modal('show');
+                    $search_modal.find('#mkdocs-search-query').focus();
+                    break;
+                case shortcuts.help:
+                    $search_modal.modal('hide');
+                    $keyboard_modal.modal('show');
+                    break;
+                default:
+                    break;
+            }
+            if (page) {
+                $keyboard_modal.modal('hide');
+                window.location.href = page;
+            }
+        });
     }
 
     $('table').addClass('table table-striped table-hover');
@@ -121,115 +111,115 @@ $("li.disabled a").click(function() {
 // See https://www.cambiaresearch.com/articles/15/javascript-char-codes-key-codes
 // We only list common keys below. Obscure keys are omited and their use is discouraged.
 var keyCodes = {
-  8: 'backspace',
-  9: 'tab',
-  13: 'enter',
-  16: 'shift',
-  17: 'ctrl',
-  18: 'alt',
-  19: 'pause/break',
-  20: 'caps lock',
-  27: 'escape',
-  32: 'spacebar',
-  33: 'page up',
-  34: 'page down',
-  35: 'end',
-  36: 'home',
-  37: '&larr;',
-  38: '&uarr;',
-  39: '&rarr;',
-  40: '&darr;',
-  45: 'insert',
-  46: 'delete',
-  48: '0',
-  49: '1',
-  50: '2',
-  51: '3',
-  52: '4',
-  53: '5',
-  54: '6',
-  55: '7',
-  56: '8',
-  57: '9',
-  65: 'a',
-  66: 'b',
-  67: 'c',
-  68: 'd',
-  69: 'e',
-  70: 'f',
-  71: 'g',
-  72: 'h',
-  73: 'i',
-  74: 'j',
-  75: 'k',
-  76: 'l',
-  77: 'm',
-  78: 'n',
-  79: 'o',
-  80: 'p',
-  81: 'q',
-  82: 'r',
-  83: 's',
-  84: 't',
-  85: 'u',
-  86: 'v',
-  87: 'w',
-  88: 'x',
-  89: 'y',
-  90: 'z',
-  91: 'Left Windows Key / Left ⌘',
-  92: 'Right Windows Key',
-  93: 'Windows Menu / Right ⌘',
-  96: 'numpad 0',
-  97: 'numpad 1',
-  98: 'numpad 2',
-  99: 'numpad 3',
-  100: 'numpad 4',
-  101: 'numpad 5',
-  102: 'numpad 6',
-  103: 'numpad 7',
-  104: 'numpad 8',
-  105: 'numpad 9',
-  106: 'multiply',
-  107: 'add',
-  109: 'subtract',
-  110: 'decimal point',
-  111: 'divide',
-  112: 'f1',
-  113: 'f2',
-  114: 'f3',
-  115: 'f4',
-  116: 'f5',
-  117: 'f6',
-  118: 'f7',
-  119: 'f8',
-  120: 'f9',
-  121: 'f10',
-  122: 'f11',
-  123: 'f12',
-  124: 'f13',
-  125: 'f14',
-  126: 'f15',
-  127: 'f16',
-  128: 'f17',
-  129: 'f18',
-  130: 'f19',
-  131: 'f20',
-  132: 'f21',
-  133: 'f22',
-  134: 'f23',
-  135: 'f24',
-  144: 'num lock',
-  145: 'scroll lock',
-  186: '&semi;',
-  187: '&equals;',
-  188: '&comma;',
-  189: '&hyphen;',
-  190: '&period;',
-  191: '&quest;',
-  192: '&grave;',
-  219: '&lsqb;',
-  220: '&bsol;',
-  221: '&rsqb;',
-  222: '&apos;',
-};
\ No newline at end of file
+    8: 'backspace',
+    9: 'tab',
+    13: 'enter',
+    16: 'shift',
+    17: 'ctrl',
+    18: 'alt',
+    19: 'pause/break',
+    20: 'caps lock',
+    27: 'escape',
+    32: 'spacebar',
+    33: 'page up',
+    34: 'page down',
+    35: 'end',
+    36: 'home',
+    37: '&larr;',
+    38: '&uarr;',
+    39: '&rarr;',
+    40: '&darr;',
+    45: 'insert',
+    46: 'delete',
+    48: '0',
+    49: '1',
+    50: '2',
+    51: '3',
+    52: '4',
+    53: '5',
+    54: '6',
+    55: '7',
+    56: '8',
+    57: '9',
+    65: 'a',
+    66: 'b',
+    67: 'c',
+    68: 'd',
+    69: 'e',
+    70: 'f',
+    71: 'g',
+    72: 'h',
+    73: 'i',
+    74: 'j',
+    75: 'k',
+    76: 'l',
+    77: 'm',
+    78: 'n',
+    79: 'o',
+    80: 'p',
+    81: 'q',
+    82: 'r',
+    83: 's',
+    84: 't',
+    85: 'u',
+    86: 'v',
+    87: 'w',
+    88: 'x',
+    89: 'y',
+    90: 'z',
+    91: 'Left Windows Key / Left ⌘',
+    92: 'Right Windows Key',
+    93: 'Windows Menu / Right ⌘',
+    96: 'numpad 0',
+    97: 'numpad 1',
+    98: 'numpad 2',
+    99: 'numpad 3',
+    100: 'numpad 4',
+    101: 'numpad 5',
+    102: 'numpad 6',
+    103: 'numpad 7',
+    104: 'numpad 8',
+    105: 'numpad 9',
+    106: 'multiply',
+    107: 'add',
+    109: 'subtract',
+    110: 'decimal point',
+    111: 'divide',
+    112: 'f1',
+    113: 'f2',
+    114: 'f3',
+    115: 'f4',
+    116: 'f5',
+    117: 'f6',
+    118: 'f7',
+    119: 'f8',
+    120: 'f9',
+    121: 'f10',
+    122: 'f11',
+    123: 'f12',
+    124: 'f13',
+    125: 'f14',
+    126: 'f15',
+    127: 'f16',
+    128: 'f17',
+    129: 'f18',
+    130: 'f19',
+    131: 'f20',
+    132: 'f21',
+    133: 'f22',
+    134: 'f23',
+    135: 'f24',
+    144: 'num lock',
+    145: 'scroll lock',
+    186: '&semi;',
+    187: '&equals;',
+    188: '&comma;',
+    189: '&hyphen;',
+    190: '&period;',
+    191: '&quest;',
+    192: '&grave;',
+    219: '&lsqb;',
+    220: '&bsol;',
+    221: '&rsqb;',
+    222: '&apos;',
+};
diff --git a/docs/problems/index.html b/docs/problems/index.html
index 6b2d9c6..c2e5154 100644
--- a/docs/problems/index.html
+++ b/docs/problems/index.html
@@ -32,15 +32,6 @@
             <script src="https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js"></script>
         <![endif]-->
 
-    <script src="//ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"></script>
-    <script>
-    WebFont.load({
-        google: {
-            families: ['Open Sans', 'PT Sans']
-        }
-    });
-    </script>
-
     
 
      
@@ -149,11 +140,16 @@
                 <li class="third-level"><a href="#problem-definition">Problem definition</a></li>
                 <li class="third-level"><a href="#random-instance-generator">Random instance generator</a></li>
                 <li class="third-level"><a href="#challenge-a">Challenge A</a></li>
-            <li class="second-level"><a href="#multidimensional-0-1-knapsack-problem">Multidimensional 0-1 Knapsack Problem</a></li>
+            <li class="second-level"><a href="#traveling-salesman-problem">Traveling Salesman Problem</a></li>
                 
                 <li class="third-level"><a href="#problem-definition_1">Problem definition</a></li>
-                <li class="third-level"><a href="#random-instance-generator_1">Random instance generator</a></li>
+                <li class="third-level"><a href="#random-problem-generator">Random problem generator</a></li>
                 <li class="third-level"><a href="#challenge-a_1">Challenge A</a></li>
+            <li class="second-level"><a href="#multidimensional-0-1-knapsack-problem">Multidimensional 0-1 Knapsack Problem</a></li>
+                
+                <li class="third-level"><a href="#problem-definition_2">Problem definition</a></li>
+                <li class="third-level"><a href="#random-instance-generator_1">Random instance generator</a></li>
+                <li class="third-level"><a href="#challenge-a_2">Challenge A</a></li>
     </ul>
 </div></div>
         <div class="col-md-9" role="main">
@@ -190,8 +186,43 @@
 </code></pre>
 
 <p><img alt="alt" src="../figures/benchmark_stab_a.png" /></p>
-<h2 id="multidimensional-0-1-knapsack-problem">Multidimensional 0-1 Knapsack Problem</h2>
+<h2 id="traveling-salesman-problem">Traveling Salesman Problem</h2>
 <h3 id="problem-definition_1">Problem definition</h3>
+<p>Given a list of cities and the distance between each pair of cities, the problem asks for the
+shortest route starting at the first city, visiting each other city exactly once, then returning
+to the first city. This problem is a generalization of the Hamiltonian path problem, one of Karp's
+21 NP-complete problems.</p>
+<h3 id="random-problem-generator">Random problem generator</h3>
+<p>The class <code>TravelingSalesmanGenerator</code> can be used to generate random instances of this
+problem. Initially, the generator creates $n$ cities $(x_1,y_1),\ldots,(x_n,y_n) \in \mathbb{R}^2$,
+where $n, x_i$ and $y_i$ are sampled independently from the provided probability distributions <code>n</code>,
+<code>x</code> and <code>y</code>. For each pair of cities $(i,j)$, the distance $d_{i,j}$ between them is set to:
+<script type="math/tex; mode=display">
+    d_{i,j} = \gamma_{i,j} \sqrt{(x_i-x_j)^2 + (y_i - y_j)^2}
+</script>
+where $\gamma_{i,j}$ is sampled from the distribution <code>gamma</code>.</p>
+<p>If <code>fix_cities=True</code> is provided, the list of cities is kept the same for all generated instances.
+The $gamma$ values, and therefore also the distances, are still different.</p>
+<p>By default, all distances $d_{i,j}$ are rounded to the nearest integer.  If <code>round=False</code>
+is provided, this rounding will be disabled.</p>
+<h3 id="challenge-a_1">Challenge A</h3>
+<ul>
+<li>Fixed list of 350 cities in the $[0, 1000]^2$ square</li>
+<li>$\gamma_{i,j} \sim U(0.95, 1.05)$</li>
+<li>500 training instances, 50 test instances</li>
+</ul>
+<pre><code class="python">TravelingSalesmanGenerator(x=uniform(loc=0.0, scale=1000.0),
+                           y=uniform(loc=0.0, scale=1000.0),
+                           n=randint(low=350, high=351),
+                           gamma=uniform(loc=0.95, scale=0.1),
+                           fix_cities=True,
+                           round=True,
+                          )
+</code></pre>
+
+<p><img alt="alt" src="../figures/benchmark_tsp_a.png" /></p>
+<h2 id="multidimensional-0-1-knapsack-problem">Multidimensional 0-1 Knapsack Problem</h2>
+<h3 id="problem-definition_2">Problem definition</h3>
 <p>Given a set of $n$ items and $m$ types of resources (also called <em>knapsacks</em>), the problem is to find a subset of items that maximizes profit without consuming more resources than it is available. More precisely, the problem is:</p>
 <p>
 <script type="math/tex; mode=display">\begin{align*}
@@ -233,7 +264,7 @@ from the provided probability distributions <code>K</code> and <code>u</code>.</
 <li>Fréville, Arnaud. <em>The multidimensional 0–1 knapsack problem: An overview.</em> European Journal of Operational Research 155.1 (2004): 1-21.</li>
 </ul>
 </div>
-<h3 id="challenge-a_1">Challenge A</h3>
+<h3 id="challenge-a_2">Challenge A</h3>
 <ul>
 <li>250 variables, 10 constraints, fixed weights</li>
 <li>$w \sim U(0, 1000), \gamma \sim U(0.95, 1.05)$</li>
@@ -260,9 +291,10 @@ from the provided probability distributions <code>K</code> and <code>u</code>.</
         
         <hr>
         <p>
-        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.<br></small>
+        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.</small><br>
         
-        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</p></small>
+        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</small>
+        </p>
 
         
         
diff --git a/docs/search/lunr.js b/docs/search/lunr.js
index c218cc8..c353765 100644
--- a/docs/search/lunr.js
+++ b/docs/search/lunr.js
@@ -1,6 +1,6 @@
 /**
- * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.1.6
- * Copyright (C) 2018 Oliver Nightingale
+ * lunr - http://lunrjs.com - A bit like Solr, but much smaller and not as bright - 2.3.8
+ * Copyright (C) 2019 Oliver Nightingale
  * @license MIT
  */
 
@@ -54,14 +54,15 @@ var lunr = function (config) {
   return builder.build()
 }
 
-lunr.version = "2.1.6"
+lunr.version = "2.3.8"
 /*!
  * lunr.utils
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
  * A namespace containing utils for the rest of the lunr library
+ * @namespace lunr.utils
  */
 lunr.utils = {}
 
@@ -69,7 +70,8 @@ lunr.utils = {}
  * Print a warning message to the console.
  *
  * @param {String} message The message to be printed.
- * @memberOf Utils
+ * @memberOf lunr.utils
+ * @function
  */
 lunr.utils.warn = (function (global) {
   /* eslint-disable no-console */
@@ -90,7 +92,7 @@ lunr.utils.warn = (function (global) {
  *
  * @param {Any} obj The object to convert to a string.
  * @return {String} string representation of the passed object.
- * @memberOf Utils
+ * @memberOf lunr.utils
  */
 lunr.utils.asString = function (obj) {
   if (obj === void 0 || obj === null) {
@@ -99,6 +101,52 @@ lunr.utils.asString = function (obj) {
     return obj.toString()
   }
 }
+
+/**
+ * Clones an object.
+ *
+ * Will create a copy of an existing object such that any mutations
+ * on the copy cannot affect the original.
+ *
+ * Only shallow objects are supported, passing a nested object to this
+ * function will cause a TypeError.
+ *
+ * Objects with primitives, and arrays of primitives are supported.
+ *
+ * @param {Object} obj The object to clone.
+ * @return {Object} a clone of the passed object.
+ * @throws {TypeError} when a nested object is passed.
+ * @memberOf Utils
+ */
+lunr.utils.clone = function (obj) {
+  if (obj === null || obj === undefined) {
+    return obj
+  }
+
+  var clone = Object.create(null),
+      keys = Object.keys(obj)
+
+  for (var i = 0; i < keys.length; i++) {
+    var key = keys[i],
+        val = obj[key]
+
+    if (Array.isArray(val)) {
+      clone[key] = val.slice()
+      continue
+    }
+
+    if (typeof val === 'string' ||
+        typeof val === 'number' ||
+        typeof val === 'boolean') {
+      clone[key] = val
+      continue
+    }
+
+    throw new TypeError("clone is not deep and does not support nested objects")
+  }
+
+  return clone
+}
 lunr.FieldRef = function (docRef, fieldName, stringValue) {
   this.docRef = docRef
   this.fieldName = fieldName
@@ -127,6 +175,139 @@ lunr.FieldRef.prototype.toString = function () {
 
   return this._stringValue
 }
+/*!
+ * lunr.Set
+ * Copyright (C) 2019 Oliver Nightingale
+ */
+
+/**
+ * A lunr set.
+ *
+ * @constructor
+ */
+lunr.Set = function (elements) {
+  this.elements = Object.create(null)
+
+  if (elements) {
+    this.length = elements.length
+
+    for (var i = 0; i < this.length; i++) {
+      this.elements[elements[i]] = true
+    }
+  } else {
+    this.length = 0
+  }
+}
+
+/**
+ * A complete set that contains all elements.
+ *
+ * @static
+ * @readonly
+ * @type {lunr.Set}
+ */
+lunr.Set.complete = {
+  intersect: function (other) {
+    return other
+  },
+
+  union: function (other) {
+    return other
+  },
+
+  contains: function () {
+    return true
+  }
+}
+
+/**
+ * An empty set that contains no elements.
+ *
+ * @static
+ * @readonly
+ * @type {lunr.Set}
+ */
+lunr.Set.empty = {
+  intersect: function () {
+    return this
+  },
+
+  union: function (other) {
+    return other
+  },
+
+  contains: function () {
+    return false
+  }
+}
+
+/**
+ * Returns true if this set contains the specified object.
+ *
+ * @param {object} object - Object whose presence in this set is to be tested.
+ * @returns {boolean} - True if this set contains the specified object.
+ */
+lunr.Set.prototype.contains = function (object) {
+  return !!this.elements[object]
+}
+
+/**
+ * Returns a new set containing only the elements that are present in both
+ * this set and the specified set.
+ *
+ * @param {lunr.Set} other - set to intersect with this set.
+ * @returns {lunr.Set} a new set that is the intersection of this and the specified set.
+ */
+
+lunr.Set.prototype.intersect = function (other) {
+  var a, b, elements, intersection = []
+
+  if (other === lunr.Set.complete) {
+    return this
+  }
+
+  if (other === lunr.Set.empty) {
+    return other
+  }
+
+  if (this.length < other.length) {
+    a = this
+    b = other
+  } else {
+    a = other
+    b = this
+  }
+
+  elements = Object.keys(a.elements)
+
+  for (var i = 0; i < elements.length; i++) {
+    var element = elements[i]
+    if (element in b.elements) {
+      intersection.push(element)
+    }
+  }
+
+  return new lunr.Set (intersection)
+}
+
+/**
+ * Returns a new set combining the elements of this and the specified set.
+ *
+ * @param {lunr.Set} other - set to union with this set.
+ * @return {lunr.Set} a new set that is the union of this and the specified set.
+ */
+
+lunr.Set.prototype.union = function (other) {
+  if (other === lunr.Set.complete) {
+    return lunr.Set.complete
+  }
+
+  if (other === lunr.Set.empty) {
+    return this
+  }
+
+  return new lunr.Set(Object.keys(this.elements).concat(Object.keys(other.elements)))
+}
 /**
  * A function to calculate the inverse document frequency for
  * a posting. This is shared between the builder and the index
@@ -208,7 +389,7 @@ lunr.Token.prototype.clone = function (fn) {
 }
 /*!
  * lunr.tokenizer
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
@@ -220,22 +401,30 @@ lunr.Token.prototype.clone = function (fn) {
  * then will split this string on the character in `lunr.tokenizer.separator`.
  * Arrays will have their elements converted to strings and wrapped in a lunr.Token.
  *
+ * Optional metadata can be passed to the tokenizer, this metadata will be cloned and
+ * added as metadata to every token that is created from the object to be tokenized.
+ *
  * @static
  * @param {?(string|object|object[])} obj - The object to convert into tokens
+ * @param {?object} metadata - Optional metadata to associate with every token
  * @returns {lunr.Token[]}
+ * @see {@link lunr.Pipeline}
  */
-lunr.tokenizer = function (obj) {
+lunr.tokenizer = function (obj, metadata) {
   if (obj == null || obj == undefined) {
     return []
   }
 
   if (Array.isArray(obj)) {
     return obj.map(function (t) {
-      return new lunr.Token(lunr.utils.asString(t).toLowerCase())
+      return new lunr.Token(
+        lunr.utils.asString(t).toLowerCase(),
+        lunr.utils.clone(metadata)
+      )
     })
   }
 
-  var str = obj.toString().trim().toLowerCase(),
+  var str = obj.toString().toLowerCase(),
       len = str.length,
       tokens = []
 
@@ -246,11 +435,15 @@ lunr.tokenizer = function (obj) {
     if ((char.match(lunr.tokenizer.separator) || sliceEnd == len)) {
 
       if (sliceLength > 0) {
+        var tokenMetadata = lunr.utils.clone(metadata) || {}
+        tokenMetadata["position"] = [sliceStart, sliceLength]
+        tokenMetadata["index"] = tokens.length
+
         tokens.push(
-          new lunr.Token (str.slice(sliceStart, sliceEnd), {
-            position: [sliceStart, sliceLength],
-            index: tokens.length
-          })
+          new lunr.Token (
+            str.slice(sliceStart, sliceEnd),
+            tokenMetadata
+          )
         )
       }
 
@@ -272,7 +465,7 @@ lunr.tokenizer = function (obj) {
 lunr.tokenizer.separator = /[\s\-]+/
 /*!
  * lunr.Pipeline
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
@@ -316,8 +509,8 @@ lunr.Pipeline.registeredFunctions = Object.create(null)
  * or mutate (or add) metadata for a given token.
  *
  * A pipeline function can indicate that the passed token should be discarded by returning
- * null. This token will not be passed to any downstream pipeline functions and will not be
- * added to the index.
+ * null, undefined or an empty string. This token will not be passed to any downstream pipeline
+ * functions and will not be added to the index.
  *
  * Multiple tokens can be returned by returning an array of tokens. Each token will be passed
  * to any downstream pipeline functions and all will returned tokens will be added to the index.
@@ -480,9 +673,9 @@ lunr.Pipeline.prototype.run = function (tokens) {
     for (var j = 0; j < tokens.length; j++) {
       var result = fn(tokens[j], j, tokens)
 
-      if (result === void 0 || result === '') continue
+      if (result === null || result === void 0 || result === '') continue
 
-      if (result instanceof Array) {
+      if (Array.isArray(result)) {
         for (var k = 0; k < result.length; k++) {
           memo.push(result[k])
         }
@@ -503,10 +696,12 @@ lunr.Pipeline.prototype.run = function (tokens) {
  * token and mapping the resulting tokens back to strings.
  *
  * @param {string} str - The string to pass through the pipeline.
+ * @param {?object} metadata - Optional metadata to associate with the token
+ * passed to the pipeline.
  * @returns {string[]}
  */
-lunr.Pipeline.prototype.runString = function (str) {
-  var token = new lunr.Token (str)
+lunr.Pipeline.prototype.runString = function (str, metadata) {
+  var token = new lunr.Token (str, metadata)
 
   return this.run([token]).map(function (t) {
     return t.toString()
@@ -537,7 +732,7 @@ lunr.Pipeline.prototype.toJSON = function () {
 }
 /*!
  * lunr.Vector
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
@@ -698,15 +893,14 @@ lunr.Vector.prototype.dot = function (otherVector) {
 }
 
 /**
- * Calculates the cosine similarity between this vector and another
- * vector.
+ * Calculates the similarity between this vector and another vector.
  *
  * @param {lunr.Vector} otherVector - The other vector to calculate the
  * similarity with.
  * @returns {Number}
  */
 lunr.Vector.prototype.similarity = function (otherVector) {
-  return this.dot(otherVector) / (this.magnitude() * otherVector.magnitude())
+  return this.dot(otherVector) / this.magnitude() || 0
 }
 
 /**
@@ -735,7 +929,7 @@ lunr.Vector.prototype.toJSON = function () {
 /* eslint-disable */
 /*!
  * lunr.stemmer
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  * Includes code from - http://tartarus.org/~martin/PorterStemmer/js.txt
  */
 
@@ -748,6 +942,7 @@ lunr.Vector.prototype.toJSON = function () {
  * @param {lunr.Token} token - The string to stem
  * @returns {lunr.Token}
  * @see {@link lunr.Pipeline}
+ * @function
  */
 lunr.stemmer = (function(){
   var step2list = {
@@ -956,7 +1151,7 @@ lunr.stemmer = (function(){
 lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
 /*!
  * lunr.stopWordFilter
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
@@ -966,6 +1161,7 @@ lunr.Pipeline.registerFunction(lunr.stemmer, 'stemmer')
  * The built in lunr.stopWordFilter is built using this generator and can be used
  * to generate custom stopWordFilters for applications or non English languages.
  *
+ * @function
  * @param {Array} token The token to pass through the filter
  * @returns {lunr.PipelineFunction}
  * @see lunr.Pipeline
@@ -989,6 +1185,7 @@ lunr.generateStopWordFilter = function (stopWords) {
  * This is intended to be used in the Pipeline. If the token does not pass the
  * filter then undefined will be returned.
  *
+ * @function
  * @implements {lunr.PipelineFunction}
  * @params {lunr.Token} token - A token to check for being a stop word.
  * @returns {lunr.Token}
@@ -1119,7 +1316,7 @@ lunr.stopWordFilter = lunr.generateStopWordFilter([
 lunr.Pipeline.registerFunction(lunr.stopWordFilter, 'stopWordFilter')
 /*!
  * lunr.trimmer
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
@@ -1146,7 +1343,7 @@ lunr.trimmer = function (token) {
 lunr.Pipeline.registerFunction(lunr.trimmer, 'trimmer')
 /*!
  * lunr.TokenSet
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
@@ -1263,50 +1460,58 @@ lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
 
       if (frame.str.length == 1) {
         noEditNode.final = true
-      } else {
-        stack.push({
-          node: noEditNode,
-          editsRemaining: frame.editsRemaining,
-          str: frame.str.slice(1)
-        })
       }
+
+      stack.push({
+        node: noEditNode,
+        editsRemaining: frame.editsRemaining,
+        str: frame.str.slice(1)
+      })
+    }
+
+    if (frame.editsRemaining == 0) {
+      continue
+    }
+
+    // insertion
+    if ("*" in frame.node.edges) {
+      var insertionNode = frame.node.edges["*"]
+    } else {
+      var insertionNode = new lunr.TokenSet
+      frame.node.edges["*"] = insertionNode
     }
 
+    if (frame.str.length == 0) {
+      insertionNode.final = true
+    }
+
+    stack.push({
+      node: insertionNode,
+      editsRemaining: frame.editsRemaining - 1,
+      str: frame.str
+    })
+
     // deletion
     // can only do a deletion if we have enough edits remaining
     // and if there are characters left to delete in the string
-    if (frame.editsRemaining > 0 && frame.str.length > 1) {
-      var char = frame.str.charAt(1),
-          deletionNode
-
-      if (char in frame.node.edges) {
-        deletionNode = frame.node.edges[char]
-      } else {
-        deletionNode = new lunr.TokenSet
-        frame.node.edges[char] = deletionNode
-      }
-
-      if (frame.str.length <= 2) {
-        deletionNode.final = true
-      } else {
-        stack.push({
-          node: deletionNode,
-          editsRemaining: frame.editsRemaining - 1,
-          str: frame.str.slice(2)
-        })
-      }
+    if (frame.str.length > 1) {
+      stack.push({
+        node: frame.node,
+        editsRemaining: frame.editsRemaining - 1,
+        str: frame.str.slice(1)
+      })
     }
 
     // deletion
     // just removing the last character from the str
-    if (frame.editsRemaining > 0 && frame.str.length == 1) {
+    if (frame.str.length == 1) {
       frame.node.final = true
     }
 
     // substitution
     // can only do a substitution if we have enough edits remaining
     // and if there are characters left to substitute
-    if (frame.editsRemaining > 0 && frame.str.length >= 1) {
+    if (frame.str.length >= 1) {
       if ("*" in frame.node.edges) {
         var substitutionNode = frame.node.edges["*"]
       } else {
@@ -1316,40 +1521,19 @@ lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
 
       if (frame.str.length == 1) {
         substitutionNode.final = true
-      } else {
-        stack.push({
-          node: substitutionNode,
-          editsRemaining: frame.editsRemaining - 1,
-          str: frame.str.slice(1)
-        })
       }
-    }
 
-    // insertion
-    // can only do insertion if there are edits remaining
-    if (frame.editsRemaining > 0) {
-      if ("*" in frame.node.edges) {
-        var insertionNode = frame.node.edges["*"]
-      } else {
-        var insertionNode = new lunr.TokenSet
-        frame.node.edges["*"] = insertionNode
-      }
-
-      if (frame.str.length == 0) {
-        insertionNode.final = true
-      } else {
-        stack.push({
-          node: insertionNode,
-          editsRemaining: frame.editsRemaining - 1,
-          str: frame.str
-        })
-      }
+      stack.push({
+        node: substitutionNode,
+        editsRemaining: frame.editsRemaining - 1,
+        str: frame.str.slice(1)
+      })
     }
 
     // transposition
     // can only do a transposition if there are edits remaining
     // and there are enough characters to transpose
-    if (frame.editsRemaining > 0 && frame.str.length > 1) {
+    if (frame.str.length > 1) {
       var charA = frame.str.charAt(0),
           charB = frame.str.charAt(1),
           transposeNode
@@ -1363,13 +1547,13 @@ lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
 
       if (frame.str.length == 1) {
         transposeNode.final = true
-      } else {
-        stack.push({
-          node: transposeNode,
-          editsRemaining: frame.editsRemaining - 1,
-          str: charA + frame.str.slice(2)
-        })
       }
+
+      stack.push({
+        node: transposeNode,
+        editsRemaining: frame.editsRemaining - 1,
+        str: charA + frame.str.slice(2)
+      })
     }
   }
 
@@ -1388,14 +1572,13 @@ lunr.TokenSet.fromFuzzyString = function (str, editDistance) {
  */
 lunr.TokenSet.fromString = function (str) {
   var node = new lunr.TokenSet,
-      root = node,
-      wildcardFound = false
+      root = node
 
   /*
    * Iterates through all characters within the passed string
    * appending a node for each character.
    *
-   * As soon as a wildcard character is found then a self
+   * When a wildcard character is found then a self
    * referencing edge is introduced to continually match
    * any number of any characters.
    */
@@ -1404,7 +1587,6 @@ lunr.TokenSet.fromString = function (str) {
         final = (i == len - 1)
 
     if (char == "*") {
-      wildcardFound = true
       node.edges[char] = node
       node.final = final
 
@@ -1414,11 +1596,6 @@ lunr.TokenSet.fromString = function (str) {
 
       node.edges[char] = next
       node = next
-
-      // TODO: is this needed anymore?
-      if (wildcardFound) {
-        node.edges["*"] = root
-      }
     }
   }
 
@@ -1429,6 +1606,10 @@ lunr.TokenSet.fromString = function (str) {
  * Converts this TokenSet into an array of strings
  * contained within the TokenSet.
  *
+ * This is not intended to be used on a TokenSet that
+ * contains wildcards, in these cases the results are
+ * undefined and are likely to cause an infinite loop.
+ *
  * @returns {string[]}
  */
 lunr.TokenSet.prototype.toArray = function () {
@@ -1445,6 +1626,11 @@ lunr.TokenSet.prototype.toArray = function () {
         len = edges.length
 
     if (frame.node.final) {
+      /* In Safari, at this point the prefix is sometimes corrupted, see:
+       * https://github.com/olivernn/lunr.js/issues/279 Calling any
+       * String.prototype method forces Safari to "cast" this string to what
+       * it's supposed to be, fixing the bug. */
+      frame.prefix.charAt(0)
       words.push(frame.prefix)
     }
 
@@ -1641,7 +1827,7 @@ lunr.TokenSet.Builder.prototype.minimize = function (downTo) {
 }
 /*!
  * lunr.Index
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
@@ -1655,7 +1841,7 @@ lunr.TokenSet.Builder.prototype.minimize = function (downTo) {
  * @constructor
  * @param {Object} attrs - The attributes of the built search index.
  * @param {Object} attrs.invertedIndex - An index of term/field to document reference.
- * @param {Object<string, lunr.Vector>} attrs.documentVectors - Document vectors keyed by document reference.
+ * @param {Object<string, lunr.Vector>} attrs.fieldVectors - Field vectors
  * @param {lunr.TokenSet} attrs.tokenSet - An set of all corpus tokens.
  * @param {string[]} attrs.fields - The names of indexed document fields.
  * @param {lunr.Pipeline} attrs.pipeline - The pipeline to use for search terms.
@@ -1701,6 +1887,12 @@ lunr.Index = function (attrs) {
  * to provide fuzzy matching, e.g. 'hello~2' will match documents with hello with an edit distance of 2.
  * Avoid large values for edit distance to improve query performance.
  *
+ * Each term also supports a presence modifier. By default a term's presence in document is optional, however
+ * this can be changed to either required or prohibited. For a term's presence to be required in a document the
+ * term should be prefixed with a '+', e.g. `+foo bar` is a search for documents that must contain 'foo' and
+ * optionally contain 'bar'. Conversely a leading '-' sets the terms presence to prohibited, i.e. it must not
+ * appear in a document, e.g. `-foo bar` is a search for documents that do not contain 'foo' but may contain 'bar'.
+ *
  * To escape special characters the backslash character '\' can be used, this allows searches to include
  * characters that would normally be considered modifiers, e.g. `foo\~2` will search for a term "foo~2" instead
  * of attempting to apply a boost of 2 to the search term "foo".
@@ -1716,13 +1908,16 @@ lunr.Index = function (attrs) {
  * hello^10
  * @example <caption>term with an edit distance of 2</caption>
  * hello~2
+ * @example <caption>terms with presence modifiers</caption>
+ * -foo +bar baz
  */
 
 /**
  * Performs a search against the index using lunr query syntax.
  *
  * Results will be returned sorted by their score, the most relevant results
- * will be returned first.
+ * will be returned first.  For details on how the score is calculated, please see
+ * the {@link https://lunrjs.com/guides/searching.html#scoring|guide}.
  *
  * For more programmatic querying use lunr.Index#query.
  *
@@ -1773,7 +1968,18 @@ lunr.Index.prototype.query = function (fn) {
   var query = new lunr.Query(this.fields),
       matchingFields = Object.create(null),
       queryVectors = Object.create(null),
-      termFieldCache = Object.create(null)
+      termFieldCache = Object.create(null),
+      requiredMatches = Object.create(null),
+      prohibitedMatches = Object.create(null)
+
+  /*
+   * To support field level boosts a query vector is created per
+   * field. An empty vector is eagerly created to support negated
+   * queries.
+   */
+  for (var i = 0; i < this.fields.length; i++) {
+    queryVectors[this.fields[i]] = new lunr.Vector
+  }
 
   fn.call(query, query)
 
@@ -1787,10 +1993,13 @@ lunr.Index.prototype.query = function (fn) {
      * for a single query term.
      */
     var clause = query.clauses[i],
-        terms = null
+        terms = null,
+        clauseMatches = lunr.Set.complete
 
     if (clause.usePipeline) {
-      terms = this.pipeline.runString(clause.term)
+      terms = this.pipeline.runString(clause.term, {
+        fields: clause.fields
+      })
     } else {
       terms = [clause.term]
     }
@@ -1814,6 +2023,21 @@ lunr.Index.prototype.query = function (fn) {
       var termTokenSet = lunr.TokenSet.fromClause(clause),
           expandedTerms = this.tokenSet.intersect(termTokenSet).toArray()
 
+      /*
+       * If a term marked as required does not exist in the tokenSet it is
+       * impossible for the search to return any matches. We set all the field
+       * scoped required matches set to empty and stop examining any further
+       * clauses.
+       */
+      if (expandedTerms.length === 0 && clause.presence === lunr.Query.presence.REQUIRED) {
+        for (var k = 0; k < clause.fields.length; k++) {
+          var field = clause.fields[k]
+          requiredMatches[field] = lunr.Set.empty
+        }
+
+        break
+      }
+
       for (var j = 0; j < expandedTerms.length; j++) {
         /*
          * For each term get the posting and termIndex, this is required for
@@ -1835,26 +2059,50 @@ lunr.Index.prototype.query = function (fn) {
           var field = clause.fields[k],
               fieldPosting = posting[field],
               matchingDocumentRefs = Object.keys(fieldPosting),
-              termField = expandedTerm + "/" + field
+              termField = expandedTerm + "/" + field,
+              matchingDocumentsSet = new lunr.Set(matchingDocumentRefs)
 
           /*
-           * To support field level boosts a query vector is created per
-           * field. This vector is populated using the termIndex found for
-           * the term and a unit value with the appropriate boost applied.
+           * if the presence of this term is required ensure that the matching
+           * documents are added to the set of required matches for this clause.
            *
-           * If the query vector for this field does not exist yet it needs
-           * to be created.
            */
-          if (queryVectors[field] === undefined) {
-            queryVectors[field] = new lunr.Vector
+          if (clause.presence == lunr.Query.presence.REQUIRED) {
+            clauseMatches = clauseMatches.union(matchingDocumentsSet)
+
+            if (requiredMatches[field] === undefined) {
+              requiredMatches[field] = lunr.Set.complete
+            }
+          }
+
+          /*
+           * if the presence of this term is prohibited ensure that the matching
+           * documents are added to the set of prohibited matches for this field,
+           * creating that set if it does not yet exist.
+           */
+          if (clause.presence == lunr.Query.presence.PROHIBITED) {
+            if (prohibitedMatches[field] === undefined) {
+              prohibitedMatches[field] = lunr.Set.empty
+            }
+
+            prohibitedMatches[field] = prohibitedMatches[field].union(matchingDocumentsSet)
+
+            /*
+             * Prohibited matches should not be part of the query vector used for
+             * similarity scoring and no metadata should be extracted so we continue
+             * to the next field
+             */
+            continue
           }
 
           /*
+           * The query field vector is populated using the termIndex found for
+           * the term and a unit value with the appropriate boost applied.
            * Using upsert because there could already be an entry in the vector
            * for the term we are working with. In that case we just add the scores
            * together.
            */
-          queryVectors[field].upsert(termIndex, 1 * clause.boost, function (a, b) { return a + b })
+          queryVectors[field].upsert(termIndex, clause.boost, function (a, b) { return a + b })
 
           /**
            * If we've already seen this term, field combo then we've already collected
@@ -1888,12 +2136,65 @@ lunr.Index.prototype.query = function (fn) {
         }
       }
     }
+
+    /**
+     * If the presence was required we need to update the requiredMatches field sets.
+     * We do this after all fields for the term have collected their matches because
+     * the clause terms presence is required in _any_ of the fields not _all_ of the
+     * fields.
+     */
+    if (clause.presence === lunr.Query.presence.REQUIRED) {
+      for (var k = 0; k < clause.fields.length; k++) {
+        var field = clause.fields[k]
+        requiredMatches[field] = requiredMatches[field].intersect(clauseMatches)
+      }
+    }
+  }
+
+  /**
+   * Need to combine the field scoped required and prohibited
+   * matching documents into a global set of required and prohibited
+   * matches
+   */
+  var allRequiredMatches = lunr.Set.complete,
+      allProhibitedMatches = lunr.Set.empty
+
+  for (var i = 0; i < this.fields.length; i++) {
+    var field = this.fields[i]
+
+    if (requiredMatches[field]) {
+      allRequiredMatches = allRequiredMatches.intersect(requiredMatches[field])
+    }
+
+    if (prohibitedMatches[field]) {
+      allProhibitedMatches = allProhibitedMatches.union(prohibitedMatches[field])
+    }
   }
 
   var matchingFieldRefs = Object.keys(matchingFields),
       results = [],
       matches = Object.create(null)
 
+  /*
+   * If the query is negated (contains only prohibited terms)
+   * we need to get _all_ fieldRefs currently existing in the
+   * index. This is only done when we know that the query is
+   * entirely prohibited terms to avoid any cost of getting all
+   * fieldRefs unnecessarily.
+   *
+   * Additionally, blank MatchData must be created to correctly
+   * populate the results.
+   */
+  if (query.isNegated()) {
+    matchingFieldRefs = Object.keys(this.fieldVectors)
+
+    for (var i = 0; i < matchingFieldRefs.length; i++) {
+      var matchingFieldRef = matchingFieldRefs[i]
+      var fieldRef = lunr.FieldRef.fromString(matchingFieldRef)
+      matchingFields[matchingFieldRef] = new lunr.MatchData
+    }
+  }
+
   for (var i = 0; i < matchingFieldRefs.length; i++) {
     /*
      * Currently we have document fields that match the query, but we
@@ -1904,8 +2205,17 @@ lunr.Index.prototype.query = function (fn) {
      * above, and combined into a final document score using addition.
      */
     var fieldRef = lunr.FieldRef.fromString(matchingFieldRefs[i]),
-        docRef = fieldRef.docRef,
-        fieldVector = this.fieldVectors[fieldRef],
+        docRef = fieldRef.docRef
+
+    if (!allRequiredMatches.contains(docRef)) {
+      continue
+    }
+
+    if (allProhibitedMatches.contains(docRef)) {
+      continue
+    }
+
+    var fieldVector = this.fieldVectors[fieldRef],
         score = queryVectors[fieldRef.fieldName].similarity(fieldVector),
         docMatch
 
@@ -1970,7 +2280,7 @@ lunr.Index.load = function (serializedIndex) {
   var attrs = {},
       fieldVectors = {},
       serializedVectors = serializedIndex.fieldVectors,
-      invertedIndex = {},
+      invertedIndex = Object.create(null),
       serializedInvertedIndex = serializedIndex.invertedIndex,
       tokenSetBuilder = new lunr.TokenSet.Builder,
       pipeline = lunr.Pipeline.load(serializedIndex.pipeline)
@@ -2009,7 +2319,7 @@ lunr.Index.load = function (serializedIndex) {
 }
 /*!
  * lunr.Builder
- * Copyright (C) 2018 Oliver Nightingale
+ * Copyright (C) 2019 Oliver Nightingale
  */
 
 /**
@@ -2038,7 +2348,8 @@ lunr.Index.load = function (serializedIndex) {
  */
 lunr.Builder = function () {
   this._ref = "id"
-  this._fields = []
+  this._fields = Object.create(null)
+  this._documents = Object.create(null)
   this.invertedIndex = Object.create(null)
   this.fieldTermFrequencies = {}
   this.fieldLengths = {}
@@ -2068,6 +2379,20 @@ lunr.Builder.prototype.ref = function (ref) {
   this._ref = ref
 }
 
+/**
+ * A function that is used to extract a field from a document.
+ *
+ * Lunr expects a field to be at the top level of a document, if however the field
+ * is deeply nested within a document an extractor function can be used to extract
+ * the right field for indexing.
+ *
+ * @callback fieldExtractor
+ * @param {object} doc - The document being added to the index.
+ * @returns {?(string|object|object[])} obj - The object that will be indexed for this field.
+ * @example <caption>Extracting a nested field</caption>
+ * function (doc) { return doc.nested.field }
+ */
+
 /**
  * Adds a field to the list of document fields that will be indexed. Every document being
  * indexed should have this field. Null values for this field in indexed documents will
@@ -2076,10 +2401,22 @@ lunr.Builder.prototype.ref = function (ref) {
  * All fields should be added before adding documents to the index. Adding fields after
  * a document has been indexed will have no effect on already indexed documents.
  *
- * @param {string} field - The name of a field to index in all documents.
+ * Fields can be boosted at build time. This allows terms within that field to have more
+ * importance when ranking search results. Use a field boost to specify that matches within
+ * one field are more important than other fields.
+ *
+ * @param {string} fieldName - The name of a field to index in all documents.
+ * @param {object} attributes - Optional attributes associated with this field.
+ * @param {number} [attributes.boost=1] - Boost applied to all terms within this field.
+ * @param {fieldExtractor} [attributes.extractor] - Function to extract a field from a document.
+ * @throws {RangeError} fieldName cannot contain unsupported characters '/'
  */
-lunr.Builder.prototype.field = function (field) {
-  this._fields.push(field)
+lunr.Builder.prototype.field = function (fieldName, attributes) {
+  if (/\//.test(fieldName)) {
+    throw new RangeError ("Field '" + fieldName + "' contains illegal character '/'")
+  }
+
+  this._fields[fieldName] = attributes || {}
 }
 
 /**
@@ -2121,17 +2458,27 @@ lunr.Builder.prototype.k1 = function (number) {
  * it should have all fields defined for indexing, though null or undefined values will not
  * cause errors.
  *
+ * Entire documents can be boosted at build time. Applying a boost to a document indicates that
+ * this document should rank higher in search results than other documents.
+ *
  * @param {object} doc - The document to add to the index.
+ * @param {object} attributes - Optional attributes associated with this document.
+ * @param {number} [attributes.boost=1] - Boost applied to all terms within this document.
  */
-lunr.Builder.prototype.add = function (doc) {
-  var docRef = doc[this._ref]
+lunr.Builder.prototype.add = function (doc, attributes) {
+  var docRef = doc[this._ref],
+      fields = Object.keys(this._fields)
 
+  this._documents[docRef] = attributes || {}
   this.documentCount += 1
 
-  for (var i = 0; i < this._fields.length; i++) {
-    var fieldName = this._fields[i],
-        field = doc[fieldName],
-        tokens = this.tokenizer(field),
+  for (var i = 0; i < fields.length; i++) {
+    var fieldName = fields[i],
+        extractor = this._fields[fieldName].extractor,
+        field = extractor ? extractor(doc) : doc[fieldName],
+        tokens = this.tokenizer(field, {
+          fields: [fieldName]
+        }),
         terms = this.pipeline.run(tokens),
         fieldRef = new lunr.FieldRef (docRef, fieldName),
         fieldTerms = Object.create(null)
@@ -2159,8 +2506,8 @@ lunr.Builder.prototype.add = function (doc) {
         posting["_index"] = this.termIndex
         this.termIndex += 1
 
-        for (var k = 0; k < this._fields.length; k++) {
-          posting[this._fields[k]] = Object.create(null)
+        for (var k = 0; k < fields.length; k++) {
+          posting[fields[k]] = Object.create(null)
         }
 
         this.invertedIndex[term] = posting
@@ -2211,9 +2558,11 @@ lunr.Builder.prototype.calculateAverageFieldLengths = function () {
     accumulator[field] += this.fieldLengths[fieldRef]
   }
 
-  for (var i = 0; i < this._fields.length; i++) {
-    var field = this._fields[i]
-    accumulator[field] = accumulator[field] / documentsWithField[field]
+  var fields = Object.keys(this._fields)
+
+  for (var i = 0; i < fields.length; i++) {
+    var fieldName = fields[i]
+    accumulator[fieldName] = accumulator[fieldName] / documentsWithField[fieldName]
   }
 
   this.averageFieldLength = accumulator
@@ -2232,13 +2581,17 @@ lunr.Builder.prototype.createFieldVectors = function () {
 
   for (var i = 0; i < fieldRefsLength; i++) {
     var fieldRef = lunr.FieldRef.fromString(fieldRefs[i]),
-        field = fieldRef.fieldName,
+        fieldName = fieldRef.fieldName,
         fieldLength = this.fieldLengths[fieldRef],
         fieldVector = new lunr.Vector,
         termFrequencies = this.fieldTermFrequencies[fieldRef],
         terms = Object.keys(termFrequencies),
         termsLength = terms.length
 
+
+    var fieldBoost = this._fields[fieldName].boost || 1,
+        docBoost = this._documents[fieldRef.docRef].boost || 1
+
     for (var j = 0; j < termsLength; j++) {
       var term = terms[j],
           tf = termFrequencies[term],
@@ -2252,7 +2605,9 @@ lunr.Builder.prototype.createFieldVectors = function () {
         idf = termIdfCache[term]
       }
 
-      score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[field])) + tf)
+      score = idf * ((this._k1 + 1) * tf) / (this._k1 * (1 - this._b + this._b * (fieldLength / this.averageFieldLength[fieldName])) + tf)
+      score *= fieldBoost
+      score *= docBoost
       scoreWithPrecision = Math.round(score * 1000) / 1000
       // Converts 1.23456789 to 1.234.
       // Reducing the precision so that the vectors take up less
@@ -2298,7 +2653,7 @@ lunr.Builder.prototype.build = function () {
     invertedIndex: this.invertedIndex,
     fieldVectors: this.fieldVectors,
     tokenSet: this.tokenSet,
-    fields: this._fields,
+    fields: Object.keys(this._fields),
     pipeline: this.searchPipeline
   })
 }
@@ -2336,7 +2691,7 @@ lunr.Builder.prototype.use = function (fn) {
  */
 lunr.MatchData = function (term, field, metadata) {
   var clonedMetadata = Object.create(null),
-      metadataKeys = Object.keys(metadata)
+      metadataKeys = Object.keys(metadata || {})
 
   // Cloning the metadata to prevent the original
   // being mutated during match data combination.
@@ -2349,8 +2704,11 @@ lunr.MatchData = function (term, field, metadata) {
   }
 
   this.metadata = Object.create(null)
-  this.metadata[term] = Object.create(null)
-  this.metadata[term][field] = clonedMetadata
+
+  if (term !== undefined) {
+    this.metadata[term] = Object.create(null)
+    this.metadata[term][field] = clonedMetadata
+  }
 }
 
 /**
@@ -2465,11 +2823,42 @@ lunr.Query = function (allFields) {
  *   wildcard: lunr.Query.wildcard.LEADING | lunr.Query.wildcard.TRAILING
  * })
  */
+
 lunr.Query.wildcard = new String ("*")
 lunr.Query.wildcard.NONE = 0
 lunr.Query.wildcard.LEADING = 1
 lunr.Query.wildcard.TRAILING = 2
 
+/**
+ * Constants for indicating what kind of presence a term must have in matching documents.
+ *
+ * @constant
+ * @enum {number}
+ * @see lunr.Query~Clause
+ * @see lunr.Query#clause
+ * @see lunr.Query#term
+ * @example <caption>query term with required presence</caption>
+ * query.term('foo', { presence: lunr.Query.presence.REQUIRED })
+ */
+lunr.Query.presence = {
+  /**
+   * Term's presence in a document is optional, this is the default value.
+   */
+  OPTIONAL: 1,
+
+  /**
+   * Term's presence in a document is required, documents that do not contain
+   * this term will not be returned.
+   */
+  REQUIRED: 2,
+
+  /**
+   * Term's presence in a document is prohibited, documents that do contain
+   * this term will not be returned.
+   */
+  PROHIBITED: 3
+}
+
 /**
  * A single clause in a {@link lunr.Query} contains a term and details on how to
  * match that term against a {@link lunr.Index}.
@@ -2479,7 +2868,8 @@ lunr.Query.wildcard.TRAILING = 2
  * @property {number} [boost=1] - Any boost that should be applied when matching this clause.
  * @property {number} [editDistance] - Whether the term should have fuzzy matching applied, and how fuzzy the match should be.
  * @property {boolean} [usePipeline] - Whether the term should be passed through the search pipeline.
- * @property {number} [wildcard=0] - Whether the term should have wildcards appended or prepended.
+ * @property {number} [wildcard=lunr.Query.wildcard.NONE] - Whether the term should have wildcards appended or prepended.
+ * @property {number} [presence=lunr.Query.presence.OPTIONAL] - The terms presence in any matching documents.
  */
 
 /**
@@ -2517,17 +2907,44 @@ lunr.Query.prototype.clause = function (clause) {
     clause.term = "" + clause.term + "*"
   }
 
+  if (!('presence' in clause)) {
+    clause.presence = lunr.Query.presence.OPTIONAL
+  }
+
   this.clauses.push(clause)
 
   return this
 }
 
+/**
+ * A negated query is one in which every clause has a presence of
+ * prohibited. These queries require some special processing to return
+ * the expected results.
+ *
+ * @returns boolean
+ */
+lunr.Query.prototype.isNegated = function () {
+  for (var i = 0; i < this.clauses.length; i++) {
+    if (this.clauses[i].presence != lunr.Query.presence.PROHIBITED) {
+      return false
+    }
+  }
+
+  return true
+}
+
 /**
  * Adds a term to the current query, under the covers this will create a {@link lunr.Query~Clause}
  * to the list of clauses that make up this query.
  *
- * @param {string} term - The term to add to the query.
- * @param {Object} [options] - Any additional properties to add to the query clause.
+ * The term is used as is, i.e. no tokenization will be performed by this method. Instead conversion
+ * to a token or token-like string should be done before calling this method.
+ *
+ * The term will be converted to a string by calling `toString`. Multiple terms can be passed as an
+ * array, each term in the array will share the same options.
+ *
+ * @param {object|object[]} term - The term(s) to add to the query.
+ * @param {object} [options] - Any additional properties to add to the query clause.
  * @returns {lunr.Query}
  * @see lunr.Query#clause
  * @see lunr.Query~Clause
@@ -2539,10 +2956,17 @@ lunr.Query.prototype.clause = function (clause) {
  *   boost: 10,
  *   wildcard: lunr.Query.wildcard.TRAILING
  * })
+ * @example <caption>using lunr.tokenizer to convert a string to tokens before using them as terms</caption>
+ * query.term(lunr.tokenizer("foo bar"))
  */
 lunr.Query.prototype.term = function (term, options) {
+  if (Array.isArray(term)) {
+    term.forEach(function (t) { this.term(t, lunr.utils.clone(options)) }, this)
+    return this
+  }
+
   var clause = options || {}
-  clause.term = term
+  clause.term = term.toString()
 
   this.clause(clause)
 
@@ -2654,6 +3078,7 @@ lunr.QueryLexer.FIELD = 'FIELD'
 lunr.QueryLexer.TERM = 'TERM'
 lunr.QueryLexer.EDIT_DISTANCE = 'EDIT_DISTANCE'
 lunr.QueryLexer.BOOST = 'BOOST'
+lunr.QueryLexer.PRESENCE = 'PRESENCE'
 
 lunr.QueryLexer.lexField = function (lexer) {
   lexer.backup()
@@ -2742,6 +3167,22 @@ lunr.QueryLexer.lexText = function (lexer) {
       return lunr.QueryLexer.lexBoost
     }
 
+    // "+" indicates term presence is required
+    // checking for length to ensure that only
+    // leading "+" are considered
+    if (char == "+" && lexer.width() === 1) {
+      lexer.emit(lunr.QueryLexer.PRESENCE)
+      return lunr.QueryLexer.lexText
+    }
+
+    // "-" indicates term presence is prohibited
+    // checking for length to ensure that only
+    // leading "-" are considered
+    if (char == "-" && lexer.width() === 1) {
+      lexer.emit(lunr.QueryLexer.PRESENCE)
+      return lunr.QueryLexer.lexText
+    }
+
     if (char.match(lunr.QueryLexer.termSeparator)) {
       return lunr.QueryLexer.lexTerm
     }
@@ -2759,7 +3200,7 @@ lunr.QueryParser.prototype.parse = function () {
   this.lexer.run()
   this.lexemes = this.lexer.lexemes
 
-  var state = lunr.QueryParser.parseFieldOrTerm
+  var state = lunr.QueryParser.parseClause
 
   while (state) {
     state = state(this)
@@ -2784,7 +3225,7 @@ lunr.QueryParser.prototype.nextClause = function () {
   this.currentClause = {}
 }
 
-lunr.QueryParser.parseFieldOrTerm = function (parser) {
+lunr.QueryParser.parseClause = function (parser) {
   var lexeme = parser.peekLexeme()
 
   if (lexeme == undefined) {
@@ -2792,6 +3233,8 @@ lunr.QueryParser.parseFieldOrTerm = function (parser) {
   }
 
   switch (lexeme.type) {
+    case lunr.QueryLexer.PRESENCE:
+      return lunr.QueryParser.parsePresence
     case lunr.QueryLexer.FIELD:
       return lunr.QueryParser.parseField
     case lunr.QueryLexer.TERM:
@@ -2807,6 +3250,43 @@ lunr.QueryParser.parseFieldOrTerm = function (parser) {
   }
 }
 
+lunr.QueryParser.parsePresence = function (parser) {
+  var lexeme = parser.consumeLexeme()
+
+  if (lexeme == undefined) {
+    return
+  }
+
+  switch (lexeme.str) {
+    case "-":
+      parser.currentClause.presence = lunr.Query.presence.PROHIBITED
+      break
+    case "+":
+      parser.currentClause.presence = lunr.Query.presence.REQUIRED
+      break
+    default:
+      var errorMessage = "unrecognised presence operator'" + lexeme.str + "'"
+      throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
+  }
+
+  var nextLexeme = parser.peekLexeme()
+
+  if (nextLexeme == undefined) {
+    var errorMessage = "expecting term or field, found nothing"
+    throw new lunr.QueryParseError (errorMessage, lexeme.start, lexeme.end)
+  }
+
+  switch (nextLexeme.type) {
+    case lunr.QueryLexer.FIELD:
+      return lunr.QueryParser.parseField
+    case lunr.QueryLexer.TERM:
+      return lunr.QueryParser.parseTerm
+    default:
+      var errorMessage = "expecting term or field, found '" + nextLexeme.type + "'"
+      throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
+  }
+}
+
 lunr.QueryParser.parseField = function (parser) {
   var lexeme = parser.consumeLexeme()
 
@@ -2870,6 +3350,9 @@ lunr.QueryParser.parseTerm = function (parser) {
       return lunr.QueryParser.parseEditDistance
     case lunr.QueryLexer.BOOST:
       return lunr.QueryParser.parseBoost
+    case lunr.QueryLexer.PRESENCE:
+      parser.nextClause()
+      return lunr.QueryParser.parsePresence
     default:
       var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
       throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
@@ -2910,6 +3393,9 @@ lunr.QueryParser.parseEditDistance = function (parser) {
       return lunr.QueryParser.parseEditDistance
     case lunr.QueryLexer.BOOST:
       return lunr.QueryParser.parseBoost
+    case lunr.QueryLexer.PRESENCE:
+      parser.nextClause()
+      return lunr.QueryParser.parsePresence
     default:
       var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
       throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
@@ -2950,6 +3436,9 @@ lunr.QueryParser.parseBoost = function (parser) {
       return lunr.QueryParser.parseEditDistance
     case lunr.QueryLexer.BOOST:
       return lunr.QueryParser.parseBoost
+    case lunr.QueryLexer.PRESENCE:
+      parser.nextClause()
+      return lunr.QueryParser.parsePresence
     default:
       var errorMessage = "Unexpected lexeme type '" + nextLexeme.type + "'"
       throw new lunr.QueryParseError (errorMessage, nextLexeme.start, nextLexeme.end)
diff --git a/docs/search/search_index.json b/docs/search/search_index.json
index 8cae497..c5d2b79 100644
--- a/docs/search/search_index.json
+++ b/docs/search/search_index.json
@@ -1 +1 @@
-{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"MIPLearn MIPLearn is an extensible framework for Learning-Enhanced Mixed-Integer Optimization , an approach targeted at discrete optimization problems that need to be repeatedly solved with only minor changes to input data. The package uses Machine Learning (ML) to automatically identify patterns in previously solved instances of the problem, or in the solution process itself, and produces hints that can guide a conventional MIP solver towards the optimal solution faster. For particular classes of problems, this approach has been shown to provide significant performance benefits (see benchmark results and references for more details). Features MIPLearn proposes a flexible problem specification format, which allows users to describe their particular optimization problems to a Learning-Enhanced MIP solver, both from the MIP perspective and from the ML perspective, without making any assumptions on the problem being modeled, the mathematical formulation of the problem, or ML encoding. While the format is very flexible, some constraints are enforced to ensure that it is usable by an actual solver. MIPLearn provides a reference implementation of a Learning-Enhanced Solver , which can use the above problem specification format to automatically predict, based on previously solved instances, a number of hints to accelerate MIP performance. Currently, the reference solver is able to predict: (i) partial solutions which are likely to work well as MIP starts; (ii) an initial set of lazy constraints to enforce; (iii) affine subspaces where the solution is likely to reside; (iv) variable branching priorities to accelerate the exploration of the branch-and-bound tree. The usage of the solver is very straightforward. The most suitable ML models are automatically selected, trained, cross-validated and applied to the problem with no user intervention. MIPLearn provides a set of benchmark problems and random instance generators, covering applications from different domains, which can be used to quickly evaluate new learning-enhanced MIP techniques in a measurable and reproducible way. MIPLearn is customizable and extensible . For MIP and ML researchers exploring new techniques to accelerate MIP performance based on historical data, each component of the reference solver can be individually replaced, extended or customized. Documentation Installation and typical usage Benchmark utilities Benchmark problems, challenges and results Customizing the solver License, authors, references and acknowledgements Souce Code https://github.com/ANL-CEEESA/MIPLearn","title":"Home"},{"location":"#miplearn","text":"MIPLearn is an extensible framework for Learning-Enhanced Mixed-Integer Optimization , an approach targeted at discrete optimization problems that need to be repeatedly solved with only minor changes to input data. The package uses Machine Learning (ML) to automatically identify patterns in previously solved instances of the problem, or in the solution process itself, and produces hints that can guide a conventional MIP solver towards the optimal solution faster. For particular classes of problems, this approach has been shown to provide significant performance benefits (see benchmark results and references for more details).","title":"MIPLearn"},{"location":"#features","text":"MIPLearn proposes a flexible problem specification format, which allows users to describe their particular optimization problems to a Learning-Enhanced MIP solver, both from the MIP perspective and from the ML perspective, without making any assumptions on the problem being modeled, the mathematical formulation of the problem, or ML encoding. While the format is very flexible, some constraints are enforced to ensure that it is usable by an actual solver. MIPLearn provides a reference implementation of a Learning-Enhanced Solver , which can use the above problem specification format to automatically predict, based on previously solved instances, a number of hints to accelerate MIP performance. Currently, the reference solver is able to predict: (i) partial solutions which are likely to work well as MIP starts; (ii) an initial set of lazy constraints to enforce; (iii) affine subspaces where the solution is likely to reside; (iv) variable branching priorities to accelerate the exploration of the branch-and-bound tree. The usage of the solver is very straightforward. The most suitable ML models are automatically selected, trained, cross-validated and applied to the problem with no user intervention. MIPLearn provides a set of benchmark problems and random instance generators, covering applications from different domains, which can be used to quickly evaluate new learning-enhanced MIP techniques in a measurable and reproducible way. MIPLearn is customizable and extensible . For MIP and ML researchers exploring new techniques to accelerate MIP performance based on historical data, each component of the reference solver can be individually replaced, extended or customized.","title":"Features"},{"location":"#documentation","text":"Installation and typical usage Benchmark utilities Benchmark problems, challenges and results Customizing the solver License, authors, references and acknowledgements","title":"Documentation"},{"location":"#souce-code","text":"https://github.com/ANL-CEEESA/MIPLearn","title":"Souce Code"},{"location":"about/","text":"About Authors Alinson S. Xavier, Argonne National Laboratory < axavier@anl.gov > Feng Qiu, Argonne National Laboratory < fqiu@anl.gov > Acknowledgements Based upon work supported by Laboratory Directed Research and Development (LDRD) funding from Argonne National Laboratory, provided by the Director, Office of Science, of the U.S. Department of Energy under Contract No. DE-AC02-06CH11357. References Learning to Solve Large-Scale Security-Constrained Unit Commitment Problems. Alinson S. Xavier, Feng Qiu, Shabbir Ahmed . INFORMS Journal on Computing (to appear). ArXiv:1902:01696 License MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization Copyright \u00a9 2020, UChicago Argonne, LLC. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.","title":"About"},{"location":"about/#about","text":"","title":"About"},{"location":"about/#authors","text":"Alinson S. Xavier, Argonne National Laboratory < axavier@anl.gov > Feng Qiu, Argonne National Laboratory < fqiu@anl.gov >","title":"Authors"},{"location":"about/#acknowledgements","text":"Based upon work supported by Laboratory Directed Research and Development (LDRD) funding from Argonne National Laboratory, provided by the Director, Office of Science, of the U.S. Department of Energy under Contract No. DE-AC02-06CH11357.","title":"Acknowledgements"},{"location":"about/#references","text":"Learning to Solve Large-Scale Security-Constrained Unit Commitment Problems. Alinson S. Xavier, Feng Qiu, Shabbir Ahmed . INFORMS Journal on Computing (to appear). ArXiv:1902:01696","title":"References"},{"location":"about/#license","text":"MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization Copyright \u00a9 2020, UChicago Argonne, LLC. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.","title":"License"},{"location":"benchmark/","text":"Benchmarks Utilities Using BenchmarkRunner MIPLearn provides the utility class BenchmarkRunner , which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage: from miplearn import BenchmarkRunner, LearningSolver # Create train and test instances train_instances = [...] test_instances = [...] # Training phase... training_solver = LearningSolver(...) training_solver.parallel_solve(train_instances, n_jobs=10) training_solver.save_state(\"data.bin\") # Test phase... test_solvers = { \"Baseline\": LearningSolver(...), # each solver may have different parameters \"Strategy A\": LearningSolver(...), \"Strategy B\": LearningSolver(...), \"Strategy C\": LearningSolver(...), } benchmark = BenchmarkRunner(test_solvers) benchmark.load_state(\"data.bin\") benchmark.fit() benchmark.parallel_solve(test_instances, n_jobs=2) print(benchmark.raw_results()) The method load_state loads the saved training data into each one of the provided solvers, while fit trains their respective ML models. The method parallel_solve solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, raw_results produces a table of results (Pandas DataFrame) with the following columns: Solver, the name of the solver. Instance, the sequence number identifying the instance. Wallclock Time, the wallclock running time (in seconds) spent by the solver; Lower Bound, the best lower bound obtained by the solver; Upper Bound, the best upper bound obtained by the solver; Gap, the relative MIP integrality gap at the end of the optimization; Nodes, the number of explored branch-and-bound nodes. In addition to the above, there is also a \"Relative\" version of most columns, where the raw number is compared to the solver which provided the best performance. The Relative Wallclock Time for example, indicates how many times slower this run was when compared to the best time achieved by any solver when processing this instance. For example, if this run took 10 seconds, but the fastest solver took only 5 seconds to solve the same instance, the relative wallclock time would be 2. Saving and loading benchmark results When iteratively exploring new formulations, encoding and solver parameters, it is often desirable to avoid repeating parts of the benchmark suite. For example, if the baseline solver has not been changed, there is no need to evaluate its performance again and again when making small changes to the remaining solvers. BenchmarkRunner provides the methods save_results and load_results , which can be used to avoid this repetition, as the next example shows: # Benchmark baseline solvers and save results to a file. benchmark = BenchmarkRunner(baseline_solvers) benchmark.load_state(\"training_data.bin\") benchmark.parallel_solve(test_instances) benchmark.save_results(\"baseline_results.csv\") # Benchmark remaining solvers, loading baseline results from file. benchmark = BenchmarkRunner(alternative_solvers) benchmark.load_state(\"training_data.bin\") benchmark.load_results(\"baseline_results.csv\") benchmark.parallel_solve(test_instances)","title":"Benchmark"},{"location":"benchmark/#benchmarks-utilities","text":"","title":"Benchmarks Utilities"},{"location":"benchmark/#using-benchmarkrunner","text":"MIPLearn provides the utility class BenchmarkRunner , which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage: from miplearn import BenchmarkRunner, LearningSolver # Create train and test instances train_instances = [...] test_instances = [...] # Training phase... training_solver = LearningSolver(...) training_solver.parallel_solve(train_instances, n_jobs=10) training_solver.save_state(\"data.bin\") # Test phase... test_solvers = { \"Baseline\": LearningSolver(...), # each solver may have different parameters \"Strategy A\": LearningSolver(...), \"Strategy B\": LearningSolver(...), \"Strategy C\": LearningSolver(...), } benchmark = BenchmarkRunner(test_solvers) benchmark.load_state(\"data.bin\") benchmark.fit() benchmark.parallel_solve(test_instances, n_jobs=2) print(benchmark.raw_results()) The method load_state loads the saved training data into each one of the provided solvers, while fit trains their respective ML models. The method parallel_solve solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, raw_results produces a table of results (Pandas DataFrame) with the following columns: Solver, the name of the solver. Instance, the sequence number identifying the instance. Wallclock Time, the wallclock running time (in seconds) spent by the solver; Lower Bound, the best lower bound obtained by the solver; Upper Bound, the best upper bound obtained by the solver; Gap, the relative MIP integrality gap at the end of the optimization; Nodes, the number of explored branch-and-bound nodes. In addition to the above, there is also a \"Relative\" version of most columns, where the raw number is compared to the solver which provided the best performance. The Relative Wallclock Time for example, indicates how many times slower this run was when compared to the best time achieved by any solver when processing this instance. For example, if this run took 10 seconds, but the fastest solver took only 5 seconds to solve the same instance, the relative wallclock time would be 2.","title":"Using BenchmarkRunner"},{"location":"benchmark/#saving-and-loading-benchmark-results","text":"When iteratively exploring new formulations, encoding and solver parameters, it is often desirable to avoid repeating parts of the benchmark suite. For example, if the baseline solver has not been changed, there is no need to evaluate its performance again and again when making small changes to the remaining solvers. BenchmarkRunner provides the methods save_results and load_results , which can be used to avoid this repetition, as the next example shows: # Benchmark baseline solvers and save results to a file. benchmark = BenchmarkRunner(baseline_solvers) benchmark.load_state(\"training_data.bin\") benchmark.parallel_solve(test_instances) benchmark.save_results(\"baseline_results.csv\") # Benchmark remaining solvers, loading baseline results from file. benchmark = BenchmarkRunner(alternative_solvers) benchmark.load_state(\"training_data.bin\") benchmark.load_results(\"baseline_results.csv\") benchmark.parallel_solve(test_instances)","title":"Saving and loading benchmark results"},{"location":"customization/","text":"Customization Selecting the internal MIP solver By default, LearningSolver uses Gurobi as its internal MIP solver. Another supported solver is IBM ILOG CPLEX . To switch between solvers, use the solver constructor argument, as shown below. It is also possible to specify a time limit (in seconds) and a relative MIP gap tolerance. from miplearn import LearningSolver solver = LearningSolver(solver=\"cplex\", time_limit=300, gap_tolerance=1e-3)","title":"Customization"},{"location":"customization/#customization","text":"","title":"Customization"},{"location":"customization/#selecting-the-internal-mip-solver","text":"By default, LearningSolver uses Gurobi as its internal MIP solver. Another supported solver is IBM ILOG CPLEX . To switch between solvers, use the solver constructor argument, as shown below. It is also possible to specify a time limit (in seconds) and a relative MIP gap tolerance. from miplearn import LearningSolver solver = LearningSolver(solver=\"cplex\", time_limit=300, gap_tolerance=1e-3)","title":"Selecting the internal MIP solver"},{"location":"problems/","text":"Benchmark Problems, Challenges and Results MIPLearn provides a selection of benchmark problems and random instance generators, covering applications from different fields, that can be used to evaluate new learning-enhanced MIP techniques in a measurable and reproducible way. In this page, we describe these problems, the included instance generators, and we present some benchmark results for LearningSolver with default parameters. Preliminaries Benchmark challenges When evaluating the performance of a conventional MIP solver, benchmark sets , such as MIPLIB and TSPLIB, are typically used. The performance of newly proposed solvers or solution techniques are typically measured as the average (or total) running time the solver takes to solve the entire benchmark set. For Learning-Enhanced MIP solvers, it is also necessary to specify what instances should the solver be trained on (the training instances ) before solving the actual set of instances we are interested in (the test instances ). If the training instances are very similar to the test instances, we would expect a Learning-Enhanced Solver to present stronger perfomance benefits. In MIPLearn, each optimization problem comes with a set of benchmark challenges , which specify how should the training and test instances be generated. The first challenges are typically easier, in the sense that training and test instances are very similar. Later challenges gradually make the sets more distinct, and therefore harder to learn from. Baseline results To illustrate the performance of LearningSolver , and to set a baseline for newly proposed techniques, we present in this page, for each benchmark challenge, a small set of computational results measuring the solution speed of the solver and the solution quality with default parameters. For more detailed computational studies, see references . We compare three solvers: baseline: Gurobi 9.0 with default settings (a conventional state-of-the-art MIP solver) ml-exact: LearningSolver with default settings, using Gurobi 9.0 as internal MIP solver ml-heuristic: Same as above, but with mode=\"heuristic\" All experiments presented here were performed on a Linux server (Ubuntu Linux 18.04 LTS) with Intel Xeon Gold 6230s (2 processors, 40 cores, 80 threads) and 256 GB RAM (DDR4, 2933 MHz). All solvers were restricted to use 4 threads, with no time limits, and 10 instances were solved simultaneously at a time. Maximum Weight Stable Set Problem Problem definition Given a simple undirected graph $G=(V,E)$ and weights $w \\in \\mathbb{R}^V$, the problem is to find a stable set $S \\subseteq V$ that maximizes $ \\sum_{v \\in V} w_v$. We recall that a subset $S \\subseteq V$ is a stable set if no two vertices of $S$ are adjacent. This is one of Karp's 21 NP-complete problems. Random instance generator The class MaxWeightStableSetGenerator can be used to generate random instances of this problem, with user-specified probability distributions. When the constructor parameter fix_graph=True is provided, one random Erd\u0151s-R\u00e9nyi graph $G_{n,p}$ is generated during the constructor, where $n$ and $p$ are sampled from user-provided probability distributions n and p . To generate each instance, the generator independently samples each $w_v$ from the user-provided probability distribution w . When fix_graph=False , a new random graph is generated for each instance, while the remaining parameters are sampled in the same way. Challenge A Fixed random Erd\u0151s-R\u00e9nyi graph $G_{n,p}$ with $n=200$ and $p=5\\%$ Random vertex weights $w_v \\sim U(100, 150)$ 500 training instances, 50 test instances MaxWeightStableSetGenerator(w=uniform(loc=100., scale=50.), n=randint(low=200, high=201), p=uniform(loc=0.05, scale=0.0), fix_graph=True) Multidimensional 0-1 Knapsack Problem Problem definition Given a set of $n$ items and $m$ types of resources (also called knapsacks ), the problem is to find a subset of items that maximizes profit without consuming more resources than it is available. More precisely, the problem is: \\begin{align*} \\text{maximize} & \\sum_{j=1}^n p_j x_j \\\\ \\text{subject to} & \\sum_{j=1}^n w_{ij} x_j \\leq b_i & \\forall i=1,\\ldots,m \\\\ & x_j \\in \\{0,1\\} & \\forall j=1,\\ldots,n \\end{align*} Random instance generator The class MultiKnapsackGenerator can be used to generate random instances of this problem. The number of items $n$ and knapsacks $m$ are sampled from the user-provided probability distributions n and m . The weights $w_{ij}$ are sampled independently from the provided distribution w . The capacity of knapsack $i$ is set to b_i = \\alpha_i \\sum_{j=1}^n w_{ij} where $\\alpha_i$, the tightness ratio, is sampled from the provided probability distribution alpha . To make the instances more challenging, the costs of the items are linearly correlated to their average weights. More specifically, the price of each item $j$ is set to: p_j = \\sum_{i=1}^m \\frac{w_{ij}}{m} + K u_j, where $K$, the correlation coefficient, and $u_j$, the correlation multiplier, are sampled from the provided probability distributions K and u . If fix_w=True is provided, then $w_{ij}$ are kept the same in all generated instances. This also implies that $n$ and $m$ are kept fixed. Although the prices and capacities are derived from $w_{ij}$, as long as u and K are not constants, the generated instances will still not be completely identical. If a probability distribution w_jitter is provided, then item weights will be set to $w_{ij} \\gamma_{ij}$ where $\\gamma_{ij}$ is sampled from w_jitter . When combined with fix_w=True , this argument may be used to generate instances where the weight of each item is roughly the same, but not exactly identical, across all instances. The prices of the items and the capacities of the knapsacks will be calculated as above, but using these perturbed weights instead. By default, all generated prices, weights and capacities are rounded to the nearest integer number. If round=False is provided, this rounding will be disabled. References Freville, Arnaud, and G\u00e9rard Plateau. An efficient preprocessing procedure for the multidimensional 0\u20131 knapsack problem. Discrete applied mathematics 49.1-3 (1994): 189-212. Fr\u00e9ville, Arnaud. The multidimensional 0\u20131 knapsack problem: An overview. European Journal of Operational Research 155.1 (2004): 1-21. Challenge A 250 variables, 10 constraints, fixed weights $w \\sim U(0, 1000), \\gamma \\sim U(0.95, 1.05)$ $K = 500, u \\sim U(0, 1), \\alpha = 0.25$ 500 training instances, 50 test instances MultiKnapsackGenerator(n=randint(low=250, high=251), m=randint(low=10, high=11), w=uniform(loc=0.0, scale=1000.0), K=uniform(loc=500.0, scale=0.0), u=uniform(loc=0.0, scale=1.0), alpha=uniform(loc=0.25, scale=0.0), fix_w=True, w_jitter=uniform(loc=0.95, scale=0.1), )","title":"Problems"},{"location":"problems/#benchmark-problems-challenges-and-results","text":"MIPLearn provides a selection of benchmark problems and random instance generators, covering applications from different fields, that can be used to evaluate new learning-enhanced MIP techniques in a measurable and reproducible way. In this page, we describe these problems, the included instance generators, and we present some benchmark results for LearningSolver with default parameters.","title":"Benchmark Problems, Challenges and Results"},{"location":"problems/#preliminaries","text":"","title":"Preliminaries"},{"location":"problems/#benchmark-challenges","text":"When evaluating the performance of a conventional MIP solver, benchmark sets , such as MIPLIB and TSPLIB, are typically used. The performance of newly proposed solvers or solution techniques are typically measured as the average (or total) running time the solver takes to solve the entire benchmark set. For Learning-Enhanced MIP solvers, it is also necessary to specify what instances should the solver be trained on (the training instances ) before solving the actual set of instances we are interested in (the test instances ). If the training instances are very similar to the test instances, we would expect a Learning-Enhanced Solver to present stronger perfomance benefits. In MIPLearn, each optimization problem comes with a set of benchmark challenges , which specify how should the training and test instances be generated. The first challenges are typically easier, in the sense that training and test instances are very similar. Later challenges gradually make the sets more distinct, and therefore harder to learn from.","title":"Benchmark challenges"},{"location":"problems/#baseline-results","text":"To illustrate the performance of LearningSolver , and to set a baseline for newly proposed techniques, we present in this page, for each benchmark challenge, a small set of computational results measuring the solution speed of the solver and the solution quality with default parameters. For more detailed computational studies, see references . We compare three solvers: baseline: Gurobi 9.0 with default settings (a conventional state-of-the-art MIP solver) ml-exact: LearningSolver with default settings, using Gurobi 9.0 as internal MIP solver ml-heuristic: Same as above, but with mode=\"heuristic\" All experiments presented here were performed on a Linux server (Ubuntu Linux 18.04 LTS) with Intel Xeon Gold 6230s (2 processors, 40 cores, 80 threads) and 256 GB RAM (DDR4, 2933 MHz). All solvers were restricted to use 4 threads, with no time limits, and 10 instances were solved simultaneously at a time.","title":"Baseline results"},{"location":"problems/#maximum-weight-stable-set-problem","text":"","title":"Maximum Weight Stable Set Problem"},{"location":"problems/#problem-definition","text":"Given a simple undirected graph $G=(V,E)$ and weights $w \\in \\mathbb{R}^V$, the problem is to find a stable set $S \\subseteq V$ that maximizes $ \\sum_{v \\in V} w_v$. We recall that a subset $S \\subseteq V$ is a stable set if no two vertices of $S$ are adjacent. This is one of Karp's 21 NP-complete problems.","title":"Problem definition"},{"location":"problems/#random-instance-generator","text":"The class MaxWeightStableSetGenerator can be used to generate random instances of this problem, with user-specified probability distributions. When the constructor parameter fix_graph=True is provided, one random Erd\u0151s-R\u00e9nyi graph $G_{n,p}$ is generated during the constructor, where $n$ and $p$ are sampled from user-provided probability distributions n and p . To generate each instance, the generator independently samples each $w_v$ from the user-provided probability distribution w . When fix_graph=False , a new random graph is generated for each instance, while the remaining parameters are sampled in the same way.","title":"Random instance generator"},{"location":"problems/#challenge-a","text":"Fixed random Erd\u0151s-R\u00e9nyi graph $G_{n,p}$ with $n=200$ and $p=5\\%$ Random vertex weights $w_v \\sim U(100, 150)$ 500 training instances, 50 test instances MaxWeightStableSetGenerator(w=uniform(loc=100., scale=50.), n=randint(low=200, high=201), p=uniform(loc=0.05, scale=0.0), fix_graph=True)","title":"Challenge A"},{"location":"problems/#multidimensional-0-1-knapsack-problem","text":"","title":"Multidimensional 0-1 Knapsack Problem"},{"location":"problems/#problem-definition_1","text":"Given a set of $n$ items and $m$ types of resources (also called knapsacks ), the problem is to find a subset of items that maximizes profit without consuming more resources than it is available. More precisely, the problem is: \\begin{align*} \\text{maximize} & \\sum_{j=1}^n p_j x_j \\\\ \\text{subject to} & \\sum_{j=1}^n w_{ij} x_j \\leq b_i & \\forall i=1,\\ldots,m \\\\ & x_j \\in \\{0,1\\} & \\forall j=1,\\ldots,n \\end{align*}","title":"Problem definition"},{"location":"problems/#random-instance-generator_1","text":"The class MultiKnapsackGenerator can be used to generate random instances of this problem. The number of items $n$ and knapsacks $m$ are sampled from the user-provided probability distributions n and m . The weights $w_{ij}$ are sampled independently from the provided distribution w . The capacity of knapsack $i$ is set to b_i = \\alpha_i \\sum_{j=1}^n w_{ij} where $\\alpha_i$, the tightness ratio, is sampled from the provided probability distribution alpha . To make the instances more challenging, the costs of the items are linearly correlated to their average weights. More specifically, the price of each item $j$ is set to: p_j = \\sum_{i=1}^m \\frac{w_{ij}}{m} + K u_j, where $K$, the correlation coefficient, and $u_j$, the correlation multiplier, are sampled from the provided probability distributions K and u . If fix_w=True is provided, then $w_{ij}$ are kept the same in all generated instances. This also implies that $n$ and $m$ are kept fixed. Although the prices and capacities are derived from $w_{ij}$, as long as u and K are not constants, the generated instances will still not be completely identical. If a probability distribution w_jitter is provided, then item weights will be set to $w_{ij} \\gamma_{ij}$ where $\\gamma_{ij}$ is sampled from w_jitter . When combined with fix_w=True , this argument may be used to generate instances where the weight of each item is roughly the same, but not exactly identical, across all instances. The prices of the items and the capacities of the knapsacks will be calculated as above, but using these perturbed weights instead. By default, all generated prices, weights and capacities are rounded to the nearest integer number. If round=False is provided, this rounding will be disabled. References Freville, Arnaud, and G\u00e9rard Plateau. An efficient preprocessing procedure for the multidimensional 0\u20131 knapsack problem. Discrete applied mathematics 49.1-3 (1994): 189-212. Fr\u00e9ville, Arnaud. The multidimensional 0\u20131 knapsack problem: An overview. European Journal of Operational Research 155.1 (2004): 1-21.","title":"Random instance generator"},{"location":"problems/#challenge-a_1","text":"250 variables, 10 constraints, fixed weights $w \\sim U(0, 1000), \\gamma \\sim U(0.95, 1.05)$ $K = 500, u \\sim U(0, 1), \\alpha = 0.25$ 500 training instances, 50 test instances MultiKnapsackGenerator(n=randint(low=250, high=251), m=randint(low=10, high=11), w=uniform(loc=0.0, scale=1000.0), K=uniform(loc=500.0, scale=0.0), u=uniform(loc=0.0, scale=1.0), alpha=uniform(loc=0.25, scale=0.0), fix_w=True, w_jitter=uniform(loc=0.95, scale=0.1), )","title":"Challenge A"},{"location":"usage/","text":"Usage Installation The package is currently available for Python and Pyomo. It can be installed as follows: pip install git+ssh://git@github.com/ANL-CEEESA/MIPLearn.git A Julia + JuMP version of the package is planned. Using LearningSolver The main class provided by this package is LearningSolver , a reference learning-enhanced MIP solver which automatically extracts information from previous runs to accelerate the solution of new instances. Assuming we already have a list of instances to solve, LearningSolver can be used as follows: from miplearn import LearningSolver all_instances = ... # user-provided list of instances to solve solver = LearningSolver() for instance in all_instances: solver.solve(instance) solver.fit() During the first call to solver.solve(instance) , the solver will process the instance from scratch, since no historical information is available, but it will already start gathering information. By calling solver.fit() , we instruct the solver to train all the internal Machine Learning models based on the information gathered so far. As this operation can be expensive, it may be performed after a larger batch of instances has been solved, instead of after every solve. After the first call to solver.fit() , subsequent calls to solver.solve(instance) will automatically use the trained Machine Learning models to accelerate the solution process. Describing problem instances Instances to be solved by LearningSolver must derive from the abstract class miplearn.Instance . The following three abstract methods must be implemented: instance.to_model() , which returns a concrete Pyomo model corresponding to the instance; instance.get_instance_features() , which returns a 1-dimensional Numpy array of (numerical) features describing the entire instance; instance.get_variable_features(var, index) , which returns a 1-dimensional array of (numerical) features describing a particular decision variable. The first method is used by LearningSolver to construct a concrete Pyomo model, which will be provided to the internal MIP solver. The user should keep a reference to this Pyomo model, in order to retrieve, for example, the optimal variable values. The second and third methods provide an encoding of the instance, which can be used by the ML models to make predictions. In the knapsack problem, for example, an implementation may decide to provide as instance features the average weights, average prices, number of items and the size of the knapsack. The weight and the price of each individual item could be provided as variable features. See miplearn/problems/knapsack.py for a concrete example. An optional method which can be implemented is instance.get_variable_category(var, index) , which returns a category (a string, an integer or any hashable type) for each decision variable. If two variables have the same category, LearningSolver will use the same internal ML model to predict the values of both variables. By default, all variables belong to the \"default\" category, and therefore only one ML model is used for all variables. If the returned category is None , ML predictors will ignore the variable. It is not necessary to have a one-to-one correspondence between features and problem instances. One important (and deliberate) limitation of MIPLearn, however, is that get_instance_features() must always return arrays of same length for all relevant instances of the problem. Similarly, get_variable_features(var, index) must also always return arrays of same length for all variables in each category. It is up to the user to decide how to encode variable-length characteristics of the problem into fixed-length vectors. In graph problems, for example, graph embeddings can be used to reduce the (variable-length) lists of nodes and edges into a fixed-length structure that still preserves some properties of the graph. Different instance encodings may have significant impact on performance. Obtaining heuristic solutions By default, LearningSolver uses Machine Learning to accelerate the MIP solution process, while maintaining all optimality guarantees provided by the MIP solver. In the default mode of operation, for example, predicted optimal solutions are used only as MIP starts. For more significant performance benefits, LearningSolver can also be configured to place additional trust in the Machine Learning predictors, by using the mode=\"heuristic\" constructor argument. When operating in this mode, if a ML model is statistically shown (through stratified k-fold cross validation ) to have exceptionally high accuracy, the solver may decide to restrict the search space based on its predictions. The parts of the solution which the ML models cannot predict accurately will still be explored using traditional (branch-and-bound) methods. For particular applications, this mode has been shown to quickly produce optimal or near-optimal solutions (see references and benchmark results ). Danger The heuristic mode provides no optimality guarantees, and therefore should only be used if the solver is first trained on a large and representative set of training instances. Training on a small or non-representative set of instances may produce low-quality solutions, or make the solver incorrectly classify new instances as infeasible. Saving and loading solver state After solving a large number of training instances, it may be desirable to save the current state of LearningSolver to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the methods solver.save_state(filename) and solver.load_state(filename) , as the following example illustrates: from miplearn import LearningSolver solver = LearningSolver() for instance in some_instances: solver.solve(instance) solver.fit() solver.save_state(\"/tmp/state.bin\") # Application restarts... solver = LearningSolver() solver.load_state(\"/tmp/state.bin\") for instance in more_instances: solver.solve(instance) In addition to storing the training data, save_state also stores all trained ML models. Therefore, if the the models were trained before saving the state to disk, it is not necessary to train them again after loading. Solving training instances in parallel In many situations, training instances can be solved in parallel to accelerate the training process. LearningSolver provides the method parallel_solve(instances) to easily achieve this: from miplearn import LearningSolver # Training phase... solver = LearningSolver(...) # training solver parameters solver.parallel_solve(training_instances, n_jobs=4) solver.fit() solver.save_state(\"/tmp/data.bin\") # Test phase... solver = LearningSolver(...) # test solver parameters solver.load_state(\"/tmp/data.bin\") solver.solve(test_instance) After all training instances have been solved in parallel, the ML models can be trained and saved to disk as usual, using fit and save_state , as explained in the previous subsections. Current Limitations Only binary and continuous decision variables are currently supported. Solver callbacks (lazy constraints, cutting planes) are not currently supported. Only Gurobi and CPLEX are currently supported as internal MIP solvers.","title":"Usage"},{"location":"usage/#usage","text":"","title":"Usage"},{"location":"usage/#installation","text":"The package is currently available for Python and Pyomo. It can be installed as follows: pip install git+ssh://git@github.com/ANL-CEEESA/MIPLearn.git A Julia + JuMP version of the package is planned.","title":"Installation"},{"location":"usage/#using-learningsolver","text":"The main class provided by this package is LearningSolver , a reference learning-enhanced MIP solver which automatically extracts information from previous runs to accelerate the solution of new instances. Assuming we already have a list of instances to solve, LearningSolver can be used as follows: from miplearn import LearningSolver all_instances = ... # user-provided list of instances to solve solver = LearningSolver() for instance in all_instances: solver.solve(instance) solver.fit() During the first call to solver.solve(instance) , the solver will process the instance from scratch, since no historical information is available, but it will already start gathering information. By calling solver.fit() , we instruct the solver to train all the internal Machine Learning models based on the information gathered so far. As this operation can be expensive, it may be performed after a larger batch of instances has been solved, instead of after every solve. After the first call to solver.fit() , subsequent calls to solver.solve(instance) will automatically use the trained Machine Learning models to accelerate the solution process.","title":"Using LearningSolver"},{"location":"usage/#describing-problem-instances","text":"Instances to be solved by LearningSolver must derive from the abstract class miplearn.Instance . The following three abstract methods must be implemented: instance.to_model() , which returns a concrete Pyomo model corresponding to the instance; instance.get_instance_features() , which returns a 1-dimensional Numpy array of (numerical) features describing the entire instance; instance.get_variable_features(var, index) , which returns a 1-dimensional array of (numerical) features describing a particular decision variable. The first method is used by LearningSolver to construct a concrete Pyomo model, which will be provided to the internal MIP solver. The user should keep a reference to this Pyomo model, in order to retrieve, for example, the optimal variable values. The second and third methods provide an encoding of the instance, which can be used by the ML models to make predictions. In the knapsack problem, for example, an implementation may decide to provide as instance features the average weights, average prices, number of items and the size of the knapsack. The weight and the price of each individual item could be provided as variable features. See miplearn/problems/knapsack.py for a concrete example. An optional method which can be implemented is instance.get_variable_category(var, index) , which returns a category (a string, an integer or any hashable type) for each decision variable. If two variables have the same category, LearningSolver will use the same internal ML model to predict the values of both variables. By default, all variables belong to the \"default\" category, and therefore only one ML model is used for all variables. If the returned category is None , ML predictors will ignore the variable. It is not necessary to have a one-to-one correspondence between features and problem instances. One important (and deliberate) limitation of MIPLearn, however, is that get_instance_features() must always return arrays of same length for all relevant instances of the problem. Similarly, get_variable_features(var, index) must also always return arrays of same length for all variables in each category. It is up to the user to decide how to encode variable-length characteristics of the problem into fixed-length vectors. In graph problems, for example, graph embeddings can be used to reduce the (variable-length) lists of nodes and edges into a fixed-length structure that still preserves some properties of the graph. Different instance encodings may have significant impact on performance.","title":"Describing problem instances"},{"location":"usage/#obtaining-heuristic-solutions","text":"By default, LearningSolver uses Machine Learning to accelerate the MIP solution process, while maintaining all optimality guarantees provided by the MIP solver. In the default mode of operation, for example, predicted optimal solutions are used only as MIP starts. For more significant performance benefits, LearningSolver can also be configured to place additional trust in the Machine Learning predictors, by using the mode=\"heuristic\" constructor argument. When operating in this mode, if a ML model is statistically shown (through stratified k-fold cross validation ) to have exceptionally high accuracy, the solver may decide to restrict the search space based on its predictions. The parts of the solution which the ML models cannot predict accurately will still be explored using traditional (branch-and-bound) methods. For particular applications, this mode has been shown to quickly produce optimal or near-optimal solutions (see references and benchmark results ). Danger The heuristic mode provides no optimality guarantees, and therefore should only be used if the solver is first trained on a large and representative set of training instances. Training on a small or non-representative set of instances may produce low-quality solutions, or make the solver incorrectly classify new instances as infeasible.","title":"Obtaining heuristic solutions"},{"location":"usage/#saving-and-loading-solver-state","text":"After solving a large number of training instances, it may be desirable to save the current state of LearningSolver to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the methods solver.save_state(filename) and solver.load_state(filename) , as the following example illustrates: from miplearn import LearningSolver solver = LearningSolver() for instance in some_instances: solver.solve(instance) solver.fit() solver.save_state(\"/tmp/state.bin\") # Application restarts... solver = LearningSolver() solver.load_state(\"/tmp/state.bin\") for instance in more_instances: solver.solve(instance) In addition to storing the training data, save_state also stores all trained ML models. Therefore, if the the models were trained before saving the state to disk, it is not necessary to train them again after loading.","title":"Saving and loading solver state"},{"location":"usage/#solving-training-instances-in-parallel","text":"In many situations, training instances can be solved in parallel to accelerate the training process. LearningSolver provides the method parallel_solve(instances) to easily achieve this: from miplearn import LearningSolver # Training phase... solver = LearningSolver(...) # training solver parameters solver.parallel_solve(training_instances, n_jobs=4) solver.fit() solver.save_state(\"/tmp/data.bin\") # Test phase... solver = LearningSolver(...) # test solver parameters solver.load_state(\"/tmp/data.bin\") solver.solve(test_instance) After all training instances have been solved in parallel, the ML models can be trained and saved to disk as usual, using fit and save_state , as explained in the previous subsections.","title":"Solving training instances in parallel"},{"location":"usage/#current-limitations","text":"Only binary and continuous decision variables are currently supported. Solver callbacks (lazy constraints, cutting planes) are not currently supported. Only Gurobi and CPLEX are currently supported as internal MIP solvers.","title":"Current Limitations"}]}
\ No newline at end of file
+{"config":{"lang":["en"],"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"MIPLearn MIPLearn is an extensible framework for Learning-Enhanced Mixed-Integer Optimization , an approach targeted at discrete optimization problems that need to be repeatedly solved with only minor changes to input data. The package uses Machine Learning (ML) to automatically identify patterns in previously solved instances of the problem, or in the solution process itself, and produces hints that can guide a conventional MIP solver towards the optimal solution faster. For particular classes of problems, this approach has been shown to provide significant performance benefits (see benchmark results and references for more details). Features MIPLearn proposes a flexible problem specification format, which allows users to describe their particular optimization problems to a Learning-Enhanced MIP solver, both from the MIP perspective and from the ML perspective, without making any assumptions on the problem being modeled, the mathematical formulation of the problem, or ML encoding. While the format is very flexible, some constraints are enforced to ensure that it is usable by an actual solver. MIPLearn provides a reference implementation of a Learning-Enhanced Solver , which can use the above problem specification format to automatically predict, based on previously solved instances, a number of hints to accelerate MIP performance. Currently, the reference solver is able to predict: (i) partial solutions which are likely to work well as MIP starts; (ii) an initial set of lazy constraints to enforce; (iii) affine subspaces where the solution is likely to reside; (iv) variable branching priorities to accelerate the exploration of the branch-and-bound tree. The usage of the solver is very straightforward. The most suitable ML models are automatically selected, trained, cross-validated and applied to the problem with no user intervention. MIPLearn provides a set of benchmark problems and random instance generators, covering applications from different domains, which can be used to quickly evaluate new learning-enhanced MIP techniques in a measurable and reproducible way. MIPLearn is customizable and extensible . For MIP and ML researchers exploring new techniques to accelerate MIP performance based on historical data, each component of the reference solver can be individually replaced, extended or customized. Documentation Installation and typical usage Benchmark utilities Benchmark problems, challenges and results Customizing the solver License, authors, references and acknowledgements Souce Code https://github.com/ANL-CEEESA/MIPLearn","title":"Home"},{"location":"#miplearn","text":"MIPLearn is an extensible framework for Learning-Enhanced Mixed-Integer Optimization , an approach targeted at discrete optimization problems that need to be repeatedly solved with only minor changes to input data. The package uses Machine Learning (ML) to automatically identify patterns in previously solved instances of the problem, or in the solution process itself, and produces hints that can guide a conventional MIP solver towards the optimal solution faster. For particular classes of problems, this approach has been shown to provide significant performance benefits (see benchmark results and references for more details).","title":"MIPLearn"},{"location":"#features","text":"MIPLearn proposes a flexible problem specification format, which allows users to describe their particular optimization problems to a Learning-Enhanced MIP solver, both from the MIP perspective and from the ML perspective, without making any assumptions on the problem being modeled, the mathematical formulation of the problem, or ML encoding. While the format is very flexible, some constraints are enforced to ensure that it is usable by an actual solver. MIPLearn provides a reference implementation of a Learning-Enhanced Solver , which can use the above problem specification format to automatically predict, based on previously solved instances, a number of hints to accelerate MIP performance. Currently, the reference solver is able to predict: (i) partial solutions which are likely to work well as MIP starts; (ii) an initial set of lazy constraints to enforce; (iii) affine subspaces where the solution is likely to reside; (iv) variable branching priorities to accelerate the exploration of the branch-and-bound tree. The usage of the solver is very straightforward. The most suitable ML models are automatically selected, trained, cross-validated and applied to the problem with no user intervention. MIPLearn provides a set of benchmark problems and random instance generators, covering applications from different domains, which can be used to quickly evaluate new learning-enhanced MIP techniques in a measurable and reproducible way. MIPLearn is customizable and extensible . For MIP and ML researchers exploring new techniques to accelerate MIP performance based on historical data, each component of the reference solver can be individually replaced, extended or customized.","title":"Features"},{"location":"#documentation","text":"Installation and typical usage Benchmark utilities Benchmark problems, challenges and results Customizing the solver License, authors, references and acknowledgements","title":"Documentation"},{"location":"#souce-code","text":"https://github.com/ANL-CEEESA/MIPLearn","title":"Souce Code"},{"location":"about/","text":"About Authors Alinson S. Xavier, Argonne National Laboratory < axavier@anl.gov > Feng Qiu, Argonne National Laboratory < fqiu@anl.gov > Acknowledgements Based upon work supported by Laboratory Directed Research and Development (LDRD) funding from Argonne National Laboratory, provided by the Director, Office of Science, of the U.S. Department of Energy under Contract No. DE-AC02-06CH11357. References Learning to Solve Large-Scale Security-Constrained Unit Commitment Problems. Alinson S. Xavier, Feng Qiu, Shabbir Ahmed . INFORMS Journal on Computing (to appear). ArXiv:1902:01696 License MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization Copyright \u00a9 2020, UChicago Argonne, LLC. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.","title":"About"},{"location":"about/#about","text":"","title":"About"},{"location":"about/#authors","text":"Alinson S. Xavier, Argonne National Laboratory < axavier@anl.gov > Feng Qiu, Argonne National Laboratory < fqiu@anl.gov >","title":"Authors"},{"location":"about/#acknowledgements","text":"Based upon work supported by Laboratory Directed Research and Development (LDRD) funding from Argonne National Laboratory, provided by the Director, Office of Science, of the U.S. Department of Energy under Contract No. DE-AC02-06CH11357.","title":"Acknowledgements"},{"location":"about/#references","text":"Learning to Solve Large-Scale Security-Constrained Unit Commitment Problems. Alinson S. Xavier, Feng Qiu, Shabbir Ahmed . INFORMS Journal on Computing (to appear). ArXiv:1902:01696","title":"References"},{"location":"about/#license","text":"MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization Copyright \u00a9 2020, UChicago Argonne, LLC. All Rights Reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.","title":"License"},{"location":"benchmark/","text":"Benchmarks Utilities Using BenchmarkRunner MIPLearn provides the utility class BenchmarkRunner , which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage: from miplearn import BenchmarkRunner, LearningSolver # Create train and test instances train_instances = [...] test_instances = [...] # Training phase... training_solver = LearningSolver(...) training_solver.parallel_solve(train_instances, n_jobs=10) training_solver.save_state(\"data.bin\") # Test phase... test_solvers = { \"Baseline\": LearningSolver(...), # each solver may have different parameters \"Strategy A\": LearningSolver(...), \"Strategy B\": LearningSolver(...), \"Strategy C\": LearningSolver(...), } benchmark = BenchmarkRunner(test_solvers) benchmark.load_state(\"data.bin\") benchmark.fit() benchmark.parallel_solve(test_instances, n_jobs=2) print(benchmark.raw_results()) The method load_state loads the saved training data into each one of the provided solvers, while fit trains their respective ML models. The method parallel_solve solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, raw_results produces a table of results (Pandas DataFrame) with the following columns: Solver, the name of the solver. Instance, the sequence number identifying the instance. Wallclock Time, the wallclock running time (in seconds) spent by the solver; Lower Bound, the best lower bound obtained by the solver; Upper Bound, the best upper bound obtained by the solver; Gap, the relative MIP integrality gap at the end of the optimization; Nodes, the number of explored branch-and-bound nodes. In addition to the above, there is also a \"Relative\" version of most columns, where the raw number is compared to the solver which provided the best performance. The Relative Wallclock Time for example, indicates how many times slower this run was when compared to the best time achieved by any solver when processing this instance. For example, if this run took 10 seconds, but the fastest solver took only 5 seconds to solve the same instance, the relative wallclock time would be 2. Saving and loading benchmark results When iteratively exploring new formulations, encoding and solver parameters, it is often desirable to avoid repeating parts of the benchmark suite. For example, if the baseline solver has not been changed, there is no need to evaluate its performance again and again when making small changes to the remaining solvers. BenchmarkRunner provides the methods save_results and load_results , which can be used to avoid this repetition, as the next example shows: # Benchmark baseline solvers and save results to a file. benchmark = BenchmarkRunner(baseline_solvers) benchmark.load_state(\"training_data.bin\") benchmark.parallel_solve(test_instances) benchmark.save_results(\"baseline_results.csv\") # Benchmark remaining solvers, loading baseline results from file. benchmark = BenchmarkRunner(alternative_solvers) benchmark.load_state(\"training_data.bin\") benchmark.load_results(\"baseline_results.csv\") benchmark.parallel_solve(test_instances)","title":"Benchmark"},{"location":"benchmark/#benchmarks-utilities","text":"","title":"Benchmarks Utilities"},{"location":"benchmark/#using-benchmarkrunner","text":"MIPLearn provides the utility class BenchmarkRunner , which simplifies the task of comparing the performance of different solvers. The snippet below shows its basic usage: from miplearn import BenchmarkRunner, LearningSolver # Create train and test instances train_instances = [...] test_instances = [...] # Training phase... training_solver = LearningSolver(...) training_solver.parallel_solve(train_instances, n_jobs=10) training_solver.save_state(\"data.bin\") # Test phase... test_solvers = { \"Baseline\": LearningSolver(...), # each solver may have different parameters \"Strategy A\": LearningSolver(...), \"Strategy B\": LearningSolver(...), \"Strategy C\": LearningSolver(...), } benchmark = BenchmarkRunner(test_solvers) benchmark.load_state(\"data.bin\") benchmark.fit() benchmark.parallel_solve(test_instances, n_jobs=2) print(benchmark.raw_results()) The method load_state loads the saved training data into each one of the provided solvers, while fit trains their respective ML models. The method parallel_solve solves the test instances in parallel, and collects solver statistics such as running time and optimal value. Finally, raw_results produces a table of results (Pandas DataFrame) with the following columns: Solver, the name of the solver. Instance, the sequence number identifying the instance. Wallclock Time, the wallclock running time (in seconds) spent by the solver; Lower Bound, the best lower bound obtained by the solver; Upper Bound, the best upper bound obtained by the solver; Gap, the relative MIP integrality gap at the end of the optimization; Nodes, the number of explored branch-and-bound nodes. In addition to the above, there is also a \"Relative\" version of most columns, where the raw number is compared to the solver which provided the best performance. The Relative Wallclock Time for example, indicates how many times slower this run was when compared to the best time achieved by any solver when processing this instance. For example, if this run took 10 seconds, but the fastest solver took only 5 seconds to solve the same instance, the relative wallclock time would be 2.","title":"Using BenchmarkRunner"},{"location":"benchmark/#saving-and-loading-benchmark-results","text":"When iteratively exploring new formulations, encoding and solver parameters, it is often desirable to avoid repeating parts of the benchmark suite. For example, if the baseline solver has not been changed, there is no need to evaluate its performance again and again when making small changes to the remaining solvers. BenchmarkRunner provides the methods save_results and load_results , which can be used to avoid this repetition, as the next example shows: # Benchmark baseline solvers and save results to a file. benchmark = BenchmarkRunner(baseline_solvers) benchmark.load_state(\"training_data.bin\") benchmark.parallel_solve(test_instances) benchmark.save_results(\"baseline_results.csv\") # Benchmark remaining solvers, loading baseline results from file. benchmark = BenchmarkRunner(alternative_solvers) benchmark.load_state(\"training_data.bin\") benchmark.load_results(\"baseline_results.csv\") benchmark.parallel_solve(test_instances)","title":"Saving and loading benchmark results"},{"location":"customization/","text":"Customization Selecting the internal MIP solver By default, LearningSolver uses Gurobi as its internal MIP solver. Another supported solver is IBM ILOG CPLEX . To switch between solvers, use the solver constructor argument, as shown below. It is also possible to specify a time limit (in seconds) and a relative MIP gap tolerance. from miplearn import LearningSolver solver = LearningSolver(solver=\"cplex\", time_limit=300, gap_tolerance=1e-3)","title":"Customization"},{"location":"customization/#customization","text":"","title":"Customization"},{"location":"customization/#selecting-the-internal-mip-solver","text":"By default, LearningSolver uses Gurobi as its internal MIP solver. Another supported solver is IBM ILOG CPLEX . To switch between solvers, use the solver constructor argument, as shown below. It is also possible to specify a time limit (in seconds) and a relative MIP gap tolerance. from miplearn import LearningSolver solver = LearningSolver(solver=\"cplex\", time_limit=300, gap_tolerance=1e-3)","title":"Selecting the internal MIP solver"},{"location":"problems/","text":"Benchmark Problems, Challenges and Results MIPLearn provides a selection of benchmark problems and random instance generators, covering applications from different fields, that can be used to evaluate new learning-enhanced MIP techniques in a measurable and reproducible way. In this page, we describe these problems, the included instance generators, and we present some benchmark results for LearningSolver with default parameters. Preliminaries Benchmark challenges When evaluating the performance of a conventional MIP solver, benchmark sets , such as MIPLIB and TSPLIB, are typically used. The performance of newly proposed solvers or solution techniques are typically measured as the average (or total) running time the solver takes to solve the entire benchmark set. For Learning-Enhanced MIP solvers, it is also necessary to specify what instances should the solver be trained on (the training instances ) before solving the actual set of instances we are interested in (the test instances ). If the training instances are very similar to the test instances, we would expect a Learning-Enhanced Solver to present stronger perfomance benefits. In MIPLearn, each optimization problem comes with a set of benchmark challenges , which specify how should the training and test instances be generated. The first challenges are typically easier, in the sense that training and test instances are very similar. Later challenges gradually make the sets more distinct, and therefore harder to learn from. Baseline results To illustrate the performance of LearningSolver , and to set a baseline for newly proposed techniques, we present in this page, for each benchmark challenge, a small set of computational results measuring the solution speed of the solver and the solution quality with default parameters. For more detailed computational studies, see references . We compare three solvers: baseline: Gurobi 9.0 with default settings (a conventional state-of-the-art MIP solver) ml-exact: LearningSolver with default settings, using Gurobi 9.0 as internal MIP solver ml-heuristic: Same as above, but with mode=\"heuristic\" All experiments presented here were performed on a Linux server (Ubuntu Linux 18.04 LTS) with Intel Xeon Gold 6230s (2 processors, 40 cores, 80 threads) and 256 GB RAM (DDR4, 2933 MHz). All solvers were restricted to use 4 threads, with no time limits, and 10 instances were solved simultaneously at a time. Maximum Weight Stable Set Problem Problem definition Given a simple undirected graph $G=(V,E)$ and weights $w \\in \\mathbb{R}^V$, the problem is to find a stable set $S \\subseteq V$ that maximizes $ \\sum_{v \\in V} w_v$. We recall that a subset $S \\subseteq V$ is a stable set if no two vertices of $S$ are adjacent. This is one of Karp's 21 NP-complete problems. Random instance generator The class MaxWeightStableSetGenerator can be used to generate random instances of this problem, with user-specified probability distributions. When the constructor parameter fix_graph=True is provided, one random Erd\u0151s-R\u00e9nyi graph $G_{n,p}$ is generated during the constructor, where $n$ and $p$ are sampled from user-provided probability distributions n and p . To generate each instance, the generator independently samples each $w_v$ from the user-provided probability distribution w . When fix_graph=False , a new random graph is generated for each instance, while the remaining parameters are sampled in the same way. Challenge A Fixed random Erd\u0151s-R\u00e9nyi graph $G_{n,p}$ with $n=200$ and $p=5\\%$ Random vertex weights $w_v \\sim U(100, 150)$ 500 training instances, 50 test instances MaxWeightStableSetGenerator(w=uniform(loc=100., scale=50.), n=randint(low=200, high=201), p=uniform(loc=0.05, scale=0.0), fix_graph=True) Traveling Salesman Problem Problem definition Given a list of cities and the distance between each pair of cities, the problem asks for the shortest route starting at the first city, visiting each other city exactly once, then returning to the first city. This problem is a generalization of the Hamiltonian path problem, one of Karp's 21 NP-complete problems. Random problem generator The class TravelingSalesmanGenerator can be used to generate random instances of this problem. Initially, the generator creates $n$ cities $(x_1,y_1),\\ldots,(x_n,y_n) \\in \\mathbb{R}^2$, where $n, x_i$ and $y_i$ are sampled independently from the provided probability distributions n , x and y . For each pair of cities $(i,j)$, the distance $d_{i,j}$ between them is set to: d_{i,j} = \\gamma_{i,j} \\sqrt{(x_i-x_j)^2 + (y_i - y_j)^2} where $\\gamma_{i,j}$ is sampled from the distribution gamma . If fix_cities=True is provided, the list of cities is kept the same for all generated instances. The $gamma$ values, and therefore also the distances, are still different. By default, all distances $d_{i,j}$ are rounded to the nearest integer. If round=False is provided, this rounding will be disabled. Challenge A Fixed list of 350 cities in the $[0, 1000]^2$ square $\\gamma_{i,j} \\sim U(0.95, 1.05)$ 500 training instances, 50 test instances TravelingSalesmanGenerator(x=uniform(loc=0.0, scale=1000.0), y=uniform(loc=0.0, scale=1000.0), n=randint(low=350, high=351), gamma=uniform(loc=0.95, scale=0.1), fix_cities=True, round=True, ) Multidimensional 0-1 Knapsack Problem Problem definition Given a set of $n$ items and $m$ types of resources (also called knapsacks ), the problem is to find a subset of items that maximizes profit without consuming more resources than it is available. More precisely, the problem is: \\begin{align*} \\text{maximize} & \\sum_{j=1}^n p_j x_j \\\\ \\text{subject to} & \\sum_{j=1}^n w_{ij} x_j \\leq b_i & \\forall i=1,\\ldots,m \\\\ & x_j \\in \\{0,1\\} & \\forall j=1,\\ldots,n \\end{align*} Random instance generator The class MultiKnapsackGenerator can be used to generate random instances of this problem. The number of items $n$ and knapsacks $m$ are sampled from the user-provided probability distributions n and m . The weights $w_{ij}$ are sampled independently from the provided distribution w . The capacity of knapsack $i$ is set to b_i = \\alpha_i \\sum_{j=1}^n w_{ij} where $\\alpha_i$, the tightness ratio, is sampled from the provided probability distribution alpha . To make the instances more challenging, the costs of the items are linearly correlated to their average weights. More specifically, the price of each item $j$ is set to: p_j = \\sum_{i=1}^m \\frac{w_{ij}}{m} + K u_j, where $K$, the correlation coefficient, and $u_j$, the correlation multiplier, are sampled from the provided probability distributions K and u . If fix_w=True is provided, then $w_{ij}$ are kept the same in all generated instances. This also implies that $n$ and $m$ are kept fixed. Although the prices and capacities are derived from $w_{ij}$, as long as u and K are not constants, the generated instances will still not be completely identical. If a probability distribution w_jitter is provided, then item weights will be set to $w_{ij} \\gamma_{ij}$ where $\\gamma_{ij}$ is sampled from w_jitter . When combined with fix_w=True , this argument may be used to generate instances where the weight of each item is roughly the same, but not exactly identical, across all instances. The prices of the items and the capacities of the knapsacks will be calculated as above, but using these perturbed weights instead. By default, all generated prices, weights and capacities are rounded to the nearest integer number. If round=False is provided, this rounding will be disabled. References Freville, Arnaud, and G\u00e9rard Plateau. An efficient preprocessing procedure for the multidimensional 0\u20131 knapsack problem. Discrete applied mathematics 49.1-3 (1994): 189-212. Fr\u00e9ville, Arnaud. The multidimensional 0\u20131 knapsack problem: An overview. European Journal of Operational Research 155.1 (2004): 1-21. Challenge A 250 variables, 10 constraints, fixed weights $w \\sim U(0, 1000), \\gamma \\sim U(0.95, 1.05)$ $K = 500, u \\sim U(0, 1), \\alpha = 0.25$ 500 training instances, 50 test instances MultiKnapsackGenerator(n=randint(low=250, high=251), m=randint(low=10, high=11), w=uniform(loc=0.0, scale=1000.0), K=uniform(loc=500.0, scale=0.0), u=uniform(loc=0.0, scale=1.0), alpha=uniform(loc=0.25, scale=0.0), fix_w=True, w_jitter=uniform(loc=0.95, scale=0.1), )","title":"Problems"},{"location":"problems/#benchmark-problems-challenges-and-results","text":"MIPLearn provides a selection of benchmark problems and random instance generators, covering applications from different fields, that can be used to evaluate new learning-enhanced MIP techniques in a measurable and reproducible way. In this page, we describe these problems, the included instance generators, and we present some benchmark results for LearningSolver with default parameters.","title":"Benchmark Problems, Challenges and Results"},{"location":"problems/#preliminaries","text":"","title":"Preliminaries"},{"location":"problems/#benchmark-challenges","text":"When evaluating the performance of a conventional MIP solver, benchmark sets , such as MIPLIB and TSPLIB, are typically used. The performance of newly proposed solvers or solution techniques are typically measured as the average (or total) running time the solver takes to solve the entire benchmark set. For Learning-Enhanced MIP solvers, it is also necessary to specify what instances should the solver be trained on (the training instances ) before solving the actual set of instances we are interested in (the test instances ). If the training instances are very similar to the test instances, we would expect a Learning-Enhanced Solver to present stronger perfomance benefits. In MIPLearn, each optimization problem comes with a set of benchmark challenges , which specify how should the training and test instances be generated. The first challenges are typically easier, in the sense that training and test instances are very similar. Later challenges gradually make the sets more distinct, and therefore harder to learn from.","title":"Benchmark challenges"},{"location":"problems/#baseline-results","text":"To illustrate the performance of LearningSolver , and to set a baseline for newly proposed techniques, we present in this page, for each benchmark challenge, a small set of computational results measuring the solution speed of the solver and the solution quality with default parameters. For more detailed computational studies, see references . We compare three solvers: baseline: Gurobi 9.0 with default settings (a conventional state-of-the-art MIP solver) ml-exact: LearningSolver with default settings, using Gurobi 9.0 as internal MIP solver ml-heuristic: Same as above, but with mode=\"heuristic\" All experiments presented here were performed on a Linux server (Ubuntu Linux 18.04 LTS) with Intel Xeon Gold 6230s (2 processors, 40 cores, 80 threads) and 256 GB RAM (DDR4, 2933 MHz). All solvers were restricted to use 4 threads, with no time limits, and 10 instances were solved simultaneously at a time.","title":"Baseline results"},{"location":"problems/#maximum-weight-stable-set-problem","text":"","title":"Maximum Weight Stable Set Problem"},{"location":"problems/#problem-definition","text":"Given a simple undirected graph $G=(V,E)$ and weights $w \\in \\mathbb{R}^V$, the problem is to find a stable set $S \\subseteq V$ that maximizes $ \\sum_{v \\in V} w_v$. We recall that a subset $S \\subseteq V$ is a stable set if no two vertices of $S$ are adjacent. This is one of Karp's 21 NP-complete problems.","title":"Problem definition"},{"location":"problems/#random-instance-generator","text":"The class MaxWeightStableSetGenerator can be used to generate random instances of this problem, with user-specified probability distributions. When the constructor parameter fix_graph=True is provided, one random Erd\u0151s-R\u00e9nyi graph $G_{n,p}$ is generated during the constructor, where $n$ and $p$ are sampled from user-provided probability distributions n and p . To generate each instance, the generator independently samples each $w_v$ from the user-provided probability distribution w . When fix_graph=False , a new random graph is generated for each instance, while the remaining parameters are sampled in the same way.","title":"Random instance generator"},{"location":"problems/#challenge-a","text":"Fixed random Erd\u0151s-R\u00e9nyi graph $G_{n,p}$ with $n=200$ and $p=5\\%$ Random vertex weights $w_v \\sim U(100, 150)$ 500 training instances, 50 test instances MaxWeightStableSetGenerator(w=uniform(loc=100., scale=50.), n=randint(low=200, high=201), p=uniform(loc=0.05, scale=0.0), fix_graph=True)","title":"Challenge A"},{"location":"problems/#traveling-salesman-problem","text":"","title":"Traveling Salesman Problem"},{"location":"problems/#problem-definition_1","text":"Given a list of cities and the distance between each pair of cities, the problem asks for the shortest route starting at the first city, visiting each other city exactly once, then returning to the first city. This problem is a generalization of the Hamiltonian path problem, one of Karp's 21 NP-complete problems.","title":"Problem definition"},{"location":"problems/#random-problem-generator","text":"The class TravelingSalesmanGenerator can be used to generate random instances of this problem. Initially, the generator creates $n$ cities $(x_1,y_1),\\ldots,(x_n,y_n) \\in \\mathbb{R}^2$, where $n, x_i$ and $y_i$ are sampled independently from the provided probability distributions n , x and y . For each pair of cities $(i,j)$, the distance $d_{i,j}$ between them is set to: d_{i,j} = \\gamma_{i,j} \\sqrt{(x_i-x_j)^2 + (y_i - y_j)^2} where $\\gamma_{i,j}$ is sampled from the distribution gamma . If fix_cities=True is provided, the list of cities is kept the same for all generated instances. The $gamma$ values, and therefore also the distances, are still different. By default, all distances $d_{i,j}$ are rounded to the nearest integer. If round=False is provided, this rounding will be disabled.","title":"Random problem generator"},{"location":"problems/#challenge-a_1","text":"Fixed list of 350 cities in the $[0, 1000]^2$ square $\\gamma_{i,j} \\sim U(0.95, 1.05)$ 500 training instances, 50 test instances TravelingSalesmanGenerator(x=uniform(loc=0.0, scale=1000.0), y=uniform(loc=0.0, scale=1000.0), n=randint(low=350, high=351), gamma=uniform(loc=0.95, scale=0.1), fix_cities=True, round=True, )","title":"Challenge A"},{"location":"problems/#multidimensional-0-1-knapsack-problem","text":"","title":"Multidimensional 0-1 Knapsack Problem"},{"location":"problems/#problem-definition_2","text":"Given a set of $n$ items and $m$ types of resources (also called knapsacks ), the problem is to find a subset of items that maximizes profit without consuming more resources than it is available. More precisely, the problem is: \\begin{align*} \\text{maximize} & \\sum_{j=1}^n p_j x_j \\\\ \\text{subject to} & \\sum_{j=1}^n w_{ij} x_j \\leq b_i & \\forall i=1,\\ldots,m \\\\ & x_j \\in \\{0,1\\} & \\forall j=1,\\ldots,n \\end{align*}","title":"Problem definition"},{"location":"problems/#random-instance-generator_1","text":"The class MultiKnapsackGenerator can be used to generate random instances of this problem. The number of items $n$ and knapsacks $m$ are sampled from the user-provided probability distributions n and m . The weights $w_{ij}$ are sampled independently from the provided distribution w . The capacity of knapsack $i$ is set to b_i = \\alpha_i \\sum_{j=1}^n w_{ij} where $\\alpha_i$, the tightness ratio, is sampled from the provided probability distribution alpha . To make the instances more challenging, the costs of the items are linearly correlated to their average weights. More specifically, the price of each item $j$ is set to: p_j = \\sum_{i=1}^m \\frac{w_{ij}}{m} + K u_j, where $K$, the correlation coefficient, and $u_j$, the correlation multiplier, are sampled from the provided probability distributions K and u . If fix_w=True is provided, then $w_{ij}$ are kept the same in all generated instances. This also implies that $n$ and $m$ are kept fixed. Although the prices and capacities are derived from $w_{ij}$, as long as u and K are not constants, the generated instances will still not be completely identical. If a probability distribution w_jitter is provided, then item weights will be set to $w_{ij} \\gamma_{ij}$ where $\\gamma_{ij}$ is sampled from w_jitter . When combined with fix_w=True , this argument may be used to generate instances where the weight of each item is roughly the same, but not exactly identical, across all instances. The prices of the items and the capacities of the knapsacks will be calculated as above, but using these perturbed weights instead. By default, all generated prices, weights and capacities are rounded to the nearest integer number. If round=False is provided, this rounding will be disabled. References Freville, Arnaud, and G\u00e9rard Plateau. An efficient preprocessing procedure for the multidimensional 0\u20131 knapsack problem. Discrete applied mathematics 49.1-3 (1994): 189-212. Fr\u00e9ville, Arnaud. The multidimensional 0\u20131 knapsack problem: An overview. European Journal of Operational Research 155.1 (2004): 1-21.","title":"Random instance generator"},{"location":"problems/#challenge-a_2","text":"250 variables, 10 constraints, fixed weights $w \\sim U(0, 1000), \\gamma \\sim U(0.95, 1.05)$ $K = 500, u \\sim U(0, 1), \\alpha = 0.25$ 500 training instances, 50 test instances MultiKnapsackGenerator(n=randint(low=250, high=251), m=randint(low=10, high=11), w=uniform(loc=0.0, scale=1000.0), K=uniform(loc=500.0, scale=0.0), u=uniform(loc=0.0, scale=1.0), alpha=uniform(loc=0.25, scale=0.0), fix_w=True, w_jitter=uniform(loc=0.95, scale=0.1), )","title":"Challenge A"},{"location":"usage/","text":"Usage Installation The package is currently available for Python and Pyomo. It can be installed as follows: pip install git+ssh://git@github.com/ANL-CEEESA/MIPLearn.git A Julia + JuMP version of the package is planned. Using LearningSolver The main class provided by this package is LearningSolver , a reference learning-enhanced MIP solver which automatically extracts information from previous runs to accelerate the solution of new instances. Assuming we already have a list of instances to solve, LearningSolver can be used as follows: from miplearn import LearningSolver all_instances = ... # user-provided list of instances to solve solver = LearningSolver() for instance in all_instances: solver.solve(instance) solver.fit() During the first call to solver.solve(instance) , the solver will process the instance from scratch, since no historical information is available, but it will already start gathering information. By calling solver.fit() , we instruct the solver to train all the internal Machine Learning models based on the information gathered so far. As this operation can be expensive, it may be performed after a larger batch of instances has been solved, instead of after every solve. After the first call to solver.fit() , subsequent calls to solver.solve(instance) will automatically use the trained Machine Learning models to accelerate the solution process. Describing problem instances Instances to be solved by LearningSolver must derive from the abstract class miplearn.Instance . The following three abstract methods must be implemented: instance.to_model() , which returns a concrete Pyomo model corresponding to the instance; instance.get_instance_features() , which returns a 1-dimensional Numpy array of (numerical) features describing the entire instance; instance.get_variable_features(var_name, index) , which returns a 1-dimensional array of (numerical) features describing a particular decision variable. The first method is used by LearningSolver to construct a concrete Pyomo model, which will be provided to the internal MIP solver. The user should keep a reference to this Pyomo model, in order to retrieve, for example, the optimal variable values. The second and third methods provide an encoding of the instance, which can be used by the ML models to make predictions. In the knapsack problem, for example, an implementation may decide to provide as instance features the average weights, average prices, number of items and the size of the knapsack. The weight and the price of each individual item could be provided as variable features. See miplearn/problems/knapsack.py for a concrete example. An optional method which can be implemented is instance.get_variable_category(var_name, index) , which returns a category (a string, an integer or any hashable type) for each decision variable. If two variables have the same category, LearningSolver will use the same internal ML model to predict the values of both variables. By default, all variables belong to the \"default\" category, and therefore only one ML model is used for all variables. If the returned category is None , ML predictors will ignore the variable. It is not necessary to have a one-to-one correspondence between features and problem instances. One important (and deliberate) limitation of MIPLearn, however, is that get_instance_features() must always return arrays of same length for all relevant instances of the problem. Similarly, get_variable_features(var_name, index) must also always return arrays of same length for all variables in each category. It is up to the user to decide how to encode variable-length characteristics of the problem into fixed-length vectors. In graph problems, for example, graph embeddings can be used to reduce the (variable-length) lists of nodes and edges into a fixed-length structure that still preserves some properties of the graph. Different instance encodings may have significant impact on performance. Obtaining heuristic solutions By default, LearningSolver uses Machine Learning to accelerate the MIP solution process, while maintaining all optimality guarantees provided by the MIP solver. In the default mode of operation, for example, predicted optimal solutions are used only as MIP starts. For more significant performance benefits, LearningSolver can also be configured to place additional trust in the Machine Learning predictors, by using the mode=\"heuristic\" constructor argument. When operating in this mode, if a ML model is statistically shown (through stratified k-fold cross validation ) to have exceptionally high accuracy, the solver may decide to restrict the search space based on its predictions. The parts of the solution which the ML models cannot predict accurately will still be explored using traditional (branch-and-bound) methods. For particular applications, this mode has been shown to quickly produce optimal or near-optimal solutions (see references and benchmark results ). Danger The heuristic mode provides no optimality guarantees, and therefore should only be used if the solver is first trained on a large and representative set of training instances. Training on a small or non-representative set of instances may produce low-quality solutions, or make the solver incorrectly classify new instances as infeasible. Saving and loading solver state After solving a large number of training instances, it may be desirable to save the current state of LearningSolver to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the methods solver.save_state(filename) and solver.load_state(filename) , as the following example illustrates: from miplearn import LearningSolver solver = LearningSolver() for instance in some_instances: solver.solve(instance) solver.fit() solver.save_state(\"/tmp/state.bin\") # Application restarts... solver = LearningSolver() solver.load_state(\"/tmp/state.bin\") for instance in more_instances: solver.solve(instance) In addition to storing the training data, save_state also stores all trained ML models. Therefore, if the the models were trained before saving the state to disk, it is not necessary to train them again after loading. Solving training instances in parallel In many situations, training instances can be solved in parallel to accelerate the training process. LearningSolver provides the method parallel_solve(instances) to easily achieve this: from miplearn import LearningSolver # Training phase... solver = LearningSolver(...) # training solver parameters solver.parallel_solve(training_instances, n_jobs=4) solver.fit() solver.save_state(\"/tmp/data.bin\") # Test phase... solver = LearningSolver(...) # test solver parameters solver.load_state(\"/tmp/data.bin\") solver.solve(test_instance) After all training instances have been solved in parallel, the ML models can be trained and saved to disk as usual, using fit and save_state , as explained in the previous subsections. Current Limitations Only binary and continuous decision variables are currently supported. Solver callbacks (lazy constraints, cutting planes) are not currently supported. Only Gurobi and CPLEX are currently supported as internal MIP solvers.","title":"Usage"},{"location":"usage/#usage","text":"","title":"Usage"},{"location":"usage/#installation","text":"The package is currently available for Python and Pyomo. It can be installed as follows: pip install git+ssh://git@github.com/ANL-CEEESA/MIPLearn.git A Julia + JuMP version of the package is planned.","title":"Installation"},{"location":"usage/#using-learningsolver","text":"The main class provided by this package is LearningSolver , a reference learning-enhanced MIP solver which automatically extracts information from previous runs to accelerate the solution of new instances. Assuming we already have a list of instances to solve, LearningSolver can be used as follows: from miplearn import LearningSolver all_instances = ... # user-provided list of instances to solve solver = LearningSolver() for instance in all_instances: solver.solve(instance) solver.fit() During the first call to solver.solve(instance) , the solver will process the instance from scratch, since no historical information is available, but it will already start gathering information. By calling solver.fit() , we instruct the solver to train all the internal Machine Learning models based on the information gathered so far. As this operation can be expensive, it may be performed after a larger batch of instances has been solved, instead of after every solve. After the first call to solver.fit() , subsequent calls to solver.solve(instance) will automatically use the trained Machine Learning models to accelerate the solution process.","title":"Using LearningSolver"},{"location":"usage/#describing-problem-instances","text":"Instances to be solved by LearningSolver must derive from the abstract class miplearn.Instance . The following three abstract methods must be implemented: instance.to_model() , which returns a concrete Pyomo model corresponding to the instance; instance.get_instance_features() , which returns a 1-dimensional Numpy array of (numerical) features describing the entire instance; instance.get_variable_features(var_name, index) , which returns a 1-dimensional array of (numerical) features describing a particular decision variable. The first method is used by LearningSolver to construct a concrete Pyomo model, which will be provided to the internal MIP solver. The user should keep a reference to this Pyomo model, in order to retrieve, for example, the optimal variable values. The second and third methods provide an encoding of the instance, which can be used by the ML models to make predictions. In the knapsack problem, for example, an implementation may decide to provide as instance features the average weights, average prices, number of items and the size of the knapsack. The weight and the price of each individual item could be provided as variable features. See miplearn/problems/knapsack.py for a concrete example. An optional method which can be implemented is instance.get_variable_category(var_name, index) , which returns a category (a string, an integer or any hashable type) for each decision variable. If two variables have the same category, LearningSolver will use the same internal ML model to predict the values of both variables. By default, all variables belong to the \"default\" category, and therefore only one ML model is used for all variables. If the returned category is None , ML predictors will ignore the variable. It is not necessary to have a one-to-one correspondence between features and problem instances. One important (and deliberate) limitation of MIPLearn, however, is that get_instance_features() must always return arrays of same length for all relevant instances of the problem. Similarly, get_variable_features(var_name, index) must also always return arrays of same length for all variables in each category. It is up to the user to decide how to encode variable-length characteristics of the problem into fixed-length vectors. In graph problems, for example, graph embeddings can be used to reduce the (variable-length) lists of nodes and edges into a fixed-length structure that still preserves some properties of the graph. Different instance encodings may have significant impact on performance.","title":"Describing problem instances"},{"location":"usage/#obtaining-heuristic-solutions","text":"By default, LearningSolver uses Machine Learning to accelerate the MIP solution process, while maintaining all optimality guarantees provided by the MIP solver. In the default mode of operation, for example, predicted optimal solutions are used only as MIP starts. For more significant performance benefits, LearningSolver can also be configured to place additional trust in the Machine Learning predictors, by using the mode=\"heuristic\" constructor argument. When operating in this mode, if a ML model is statistically shown (through stratified k-fold cross validation ) to have exceptionally high accuracy, the solver may decide to restrict the search space based on its predictions. The parts of the solution which the ML models cannot predict accurately will still be explored using traditional (branch-and-bound) methods. For particular applications, this mode has been shown to quickly produce optimal or near-optimal solutions (see references and benchmark results ). Danger The heuristic mode provides no optimality guarantees, and therefore should only be used if the solver is first trained on a large and representative set of training instances. Training on a small or non-representative set of instances may produce low-quality solutions, or make the solver incorrectly classify new instances as infeasible.","title":"Obtaining heuristic solutions"},{"location":"usage/#saving-and-loading-solver-state","text":"After solving a large number of training instances, it may be desirable to save the current state of LearningSolver to disk, so that the solver can still use the acquired knowledge after the application restarts. This can be accomplished by using the methods solver.save_state(filename) and solver.load_state(filename) , as the following example illustrates: from miplearn import LearningSolver solver = LearningSolver() for instance in some_instances: solver.solve(instance) solver.fit() solver.save_state(\"/tmp/state.bin\") # Application restarts... solver = LearningSolver() solver.load_state(\"/tmp/state.bin\") for instance in more_instances: solver.solve(instance) In addition to storing the training data, save_state also stores all trained ML models. Therefore, if the the models were trained before saving the state to disk, it is not necessary to train them again after loading.","title":"Saving and loading solver state"},{"location":"usage/#solving-training-instances-in-parallel","text":"In many situations, training instances can be solved in parallel to accelerate the training process. LearningSolver provides the method parallel_solve(instances) to easily achieve this: from miplearn import LearningSolver # Training phase... solver = LearningSolver(...) # training solver parameters solver.parallel_solve(training_instances, n_jobs=4) solver.fit() solver.save_state(\"/tmp/data.bin\") # Test phase... solver = LearningSolver(...) # test solver parameters solver.load_state(\"/tmp/data.bin\") solver.solve(test_instance) After all training instances have been solved in parallel, the ML models can be trained and saved to disk as usual, using fit and save_state , as explained in the previous subsections.","title":"Solving training instances in parallel"},{"location":"usage/#current-limitations","text":"Only binary and continuous decision variables are currently supported. Solver callbacks (lazy constraints, cutting planes) are not currently supported. Only Gurobi and CPLEX are currently supported as internal MIP solvers.","title":"Current Limitations"}]}
\ No newline at end of file
diff --git a/docs/sitemap.xml b/docs/sitemap.xml
index 7c83a84..b414108 100644
--- a/docs/sitemap.xml
+++ b/docs/sitemap.xml
@@ -1,33 +1,27 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
-    <url>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"><url>
      <loc>None</loc>
-     <lastmod>2020-02-24</lastmod>
+     <lastmod>2020-02-25</lastmod>
      <changefreq>daily</changefreq>
-    </url>
-    <url>
+    </url><url>
      <loc>None</loc>
-     <lastmod>2020-02-24</lastmod>
+     <lastmod>2020-02-25</lastmod>
      <changefreq>daily</changefreq>
-    </url>
-    <url>
+    </url><url>
      <loc>None</loc>
-     <lastmod>2020-02-24</lastmod>
+     <lastmod>2020-02-25</lastmod>
      <changefreq>daily</changefreq>
-    </url>
-    <url>
+    </url><url>
      <loc>None</loc>
-     <lastmod>2020-02-24</lastmod>
+     <lastmod>2020-02-25</lastmod>
      <changefreq>daily</changefreq>
-    </url>
-    <url>
+    </url><url>
      <loc>None</loc>
-     <lastmod>2020-02-24</lastmod>
+     <lastmod>2020-02-25</lastmod>
      <changefreq>daily</changefreq>
-    </url>
-    <url>
+    </url><url>
      <loc>None</loc>
-     <lastmod>2020-02-24</lastmod>
+     <lastmod>2020-02-25</lastmod>
      <changefreq>daily</changefreq>
     </url>
 </urlset>
\ No newline at end of file
diff --git a/docs/sitemap.xml.gz b/docs/sitemap.xml.gz
index b9e3e16..f85985a 100644
Binary files a/docs/sitemap.xml.gz and b/docs/sitemap.xml.gz differ
diff --git a/docs/usage/index.html b/docs/usage/index.html
index 0480a32..073c0b7 100644
--- a/docs/usage/index.html
+++ b/docs/usage/index.html
@@ -32,15 +32,6 @@
             <script src="https://cdn.jsdelivr.net/npm/respond.js@1.4.2/dest/respond.min.js"></script>
         <![endif]-->
 
-    <script src="//ajax.googleapis.com/ajax/libs/webfont/1.6.26/webfont.js"></script>
-    <script>
-    WebFont.load({
-        google: {
-            families: ['Open Sans', 'PT Sans']
-        }
-    });
-    </script>
-
     
 
      
@@ -182,12 +173,12 @@ for instance in all_instances:
 <ul>
 <li><code>instance.to_model()</code>, which returns a concrete Pyomo model corresponding to the instance;</li>
 <li><code>instance.get_instance_features()</code>, which returns a 1-dimensional Numpy array of (numerical) features describing the entire instance;</li>
-<li><code>instance.get_variable_features(var, index)</code>, which returns a 1-dimensional array of (numerical) features describing a particular decision variable.</li>
+<li><code>instance.get_variable_features(var_name, index)</code>, which returns a 1-dimensional array of (numerical) features describing a particular decision variable.</li>
 </ul>
 <p>The first method is used by <code>LearningSolver</code> to construct a concrete Pyomo model, which will be provided to the internal MIP solver. The user should keep a reference to this Pyomo model, in order to retrieve, for example, the optimal variable values.</p>
 <p>The second and third methods provide an encoding of the instance, which can be used by the ML models to make predictions. In the knapsack problem, for example, an implementation may decide to provide as instance features the average weights, average prices, number of items and the size of the knapsack. The weight and the price of each individual item could be provided as variable features. See <code>miplearn/problems/knapsack.py</code> for a concrete example.</p>
-<p>An optional method which can be implemented is <code>instance.get_variable_category(var, index)</code>, which returns a category (a string, an integer or any hashable type) for each decision variable. If two variables have the same category, <code>LearningSolver</code> will use the same internal ML model to predict the values of both variables. By default, all variables belong to the <code>"default"</code> category, and therefore only one ML model is used for all variables. If the returned category is <code>None</code>, ML predictors will ignore the variable.</p>
-<p>It is not necessary to have a one-to-one correspondence between features and problem instances. One important (and deliberate) limitation of MIPLearn, however, is that <code>get_instance_features()</code> must always return arrays of same length for all relevant instances of the problem. Similarly, <code>get_variable_features(var, index)</code> must also always return arrays of same length for all variables in each category. It is up to the user to decide how to encode variable-length characteristics of the problem into fixed-length vectors. In graph problems, for example, graph embeddings can be used to reduce the (variable-length) lists of nodes and edges into a fixed-length structure that still preserves some properties of the graph. Different instance encodings may have significant impact on performance.</p>
+<p>An optional method which can be implemented is <code>instance.get_variable_category(var_name, index)</code>, which returns a category (a string, an integer or any hashable type) for each decision variable. If two variables have the same category, <code>LearningSolver</code> will use the same internal ML model to predict the values of both variables. By default, all variables belong to the <code>"default"</code> category, and therefore only one ML model is used for all variables. If the returned category is <code>None</code>, ML predictors will ignore the variable.</p>
+<p>It is not necessary to have a one-to-one correspondence between features and problem instances. One important (and deliberate) limitation of MIPLearn, however, is that <code>get_instance_features()</code> must always return arrays of same length for all relevant instances of the problem. Similarly, <code>get_variable_features(var_name, index)</code> must also always return arrays of same length for all variables in each category. It is up to the user to decide how to encode variable-length characteristics of the problem into fixed-length vectors. In graph problems, for example, graph embeddings can be used to reduce the (variable-length) lists of nodes and edges into a fixed-length structure that still preserves some properties of the graph. Different instance encodings may have significant impact on performance.</p>
 <h3 id="obtaining-heuristic-solutions">Obtaining heuristic solutions</h3>
 <p>By default, <code>LearningSolver</code> uses Machine Learning to accelerate the MIP solution process, while maintaining all optimality guarantees provided by the MIP solver. In the default mode of operation, for example, predicted optimal solutions are used only as MIP starts.</p>
 <p>For more significant performance benefits, <code>LearningSolver</code> can also be configured to place additional trust in the Machine Learning predictors, by using the <code>mode="heuristic"</code> constructor argument. When operating in this mode, if a ML model is statistically shown (through <em>stratified k-fold cross validation</em>) to have exceptionally high accuracy, the solver may decide to restrict the search space based on its predictions. The parts of the solution which the ML models cannot predict accurately will still be explored using traditional (branch-and-bound) methods.  For particular applications, this mode has been shown to quickly produce optimal or near-optimal solutions (see <a href="../about/#references">references</a> and <a href="../benchmark/">benchmark results</a>).</p>
@@ -245,9 +236,10 @@ solver.solve(test_instance)
         
         <hr>
         <p>
-        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.<br></small>
+        <small>Copyright © 2020, UChicago Argonne, LLC. All Rights Reserved.</small><br>
         
-        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</p></small>
+        <small>Documentation built with <a href="http://www.mkdocs.org/">MkDocs</a>.</small>
+        </p>
 
         
         
diff --git a/miplearn/__init__.py b/miplearn/__init__.py
index 8ee19d4..1c438aa 100644
--- a/miplearn/__init__.py
+++ b/miplearn/__init__.py
@@ -3,13 +3,13 @@
 #  Released under the modified BSD license. See COPYING.md for more details.
 
 from .extractors import (SolutionExtractor,
-                         CombinedExtractor,
                          InstanceFeaturesExtractor,
                          ObjectiveValueExtractor,
                          VariableFeaturesExtractor,
                         )
 from .components.component import Component
 from .components.objective import ObjectiveValueComponent
+from .components.lazy import LazyConstraintsComponent
 from .components.primal import (PrimalSolutionComponent,
                                 AdaptivePredictor,
                                )
diff --git a/miplearn/components/component.py b/miplearn/components/component.py
index fe4ca6a..fba3bf1 100644
--- a/miplearn/components/component.py
+++ b/miplearn/components/component.py
@@ -18,10 +18,6 @@ class Component(ABC):
     def after_solve(self, solver, instance, model):
         pass
     
-    @abstractmethod
-    def merge(self, other):
-        pass
-    
     @abstractmethod
     def fit(self, training_instances):
         pass
diff --git a/miplearn/components/lazy.py b/miplearn/components/lazy.py
new file mode 100644
index 0000000..8200085
--- /dev/null
+++ b/miplearn/components/lazy.py
@@ -0,0 +1,57 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from .component import Component
+from ..extractors import *
+
+from abc import ABC, abstractmethod
+from copy import deepcopy
+import numpy as np
+from sklearn.pipeline import make_pipeline
+from sklearn.linear_model import LogisticRegression
+from sklearn.preprocessing import StandardScaler
+from sklearn.model_selection import cross_val_score
+from sklearn.metrics import roc_curve
+from sklearn.neighbors import KNeighborsClassifier
+from tqdm.auto import tqdm
+import pyomo.environ as pe
+import logging
+logger = logging.getLogger(__name__)
+
+
+class LazyConstraintsComponent(Component):
+    """
+    A component that predicts which lazy constraints to enforce.
+    """
+    
+    def __init__(self):
+        self.violations = set()
+        self.count = {}
+        self.n_samples = 0
+    
+    def before_solve(self, solver, instance, model):
+        logger.info("Enforcing %d lazy constraints" % len(self.violations))
+        for v in self.violations:
+            if self.count[v] < self.n_samples * 0.05:
+                continue
+            cut = instance.build_lazy_constraint(model, v)
+            solver.internal_solver.add_constraint(cut)
+        
+    def after_solve(self, solver, instance, model):
+        pass
+                
+    def fit(self, training_instances):
+        logger.debug("Fitting...")
+        self.n_samples = len(training_instances)
+        for instance in training_instances:
+            if not hasattr(instance, "found_violations"):
+                continue
+            for v in instance.found_violations:
+                self.violations.add(v)
+                if v not in self.count.keys():
+                    self.count[v] = 0
+                self.count[v] += 1
+                
+    def predict(self, instance, model=None):
+        return self.violations
diff --git a/miplearn/components/objective.py b/miplearn/components/objective.py
index ecf2f72..cf6d20e 100644
--- a/miplearn/components/objective.py
+++ b/miplearn/components/objective.py
@@ -30,16 +30,16 @@ class ObjectiveValueComponent(Component):
     def after_solve(self, solver, instance, model):
         pass
     
-    def merge(self, other):
-        pass
-    
     def fit(self, training_instances):
+        logger.debug("Extracting features...")
         features = InstanceFeaturesExtractor().extract(training_instances)
         ub = ObjectiveValueExtractor(kind="upper bound").extract(training_instances)
         lb = ObjectiveValueExtractor(kind="lower bound").extract(training_instances)
         self.ub_regressor = deepcopy(self.regressor_prototype)
         self.lb_regressor = deepcopy(self.regressor_prototype)
+        logger.debug("Fitting ub_regressor...")
         self.ub_regressor.fit(features, ub)
+        logger.debug("Fitting ub_regressor...")
         self.lb_regressor.fit(features, lb)
         
     def predict(self, instances):
diff --git a/miplearn/components/primal.py b/miplearn/components/primal.py
index 92d7542..3b21075 100644
--- a/miplearn/components/primal.py
+++ b/miplearn/components/primal.py
@@ -129,7 +129,7 @@ class PrimalSolutionComponent(Component):
         self.dynamic_thresholds = dynamic_thresholds
     
     def before_solve(self, solver, instance, model):
-        solution = self.predict(instance, model)
+        solution = self.predict(instance)
         if self.mode == "heuristic":
             solver.internal_solver.fix(solution)
         else:
@@ -139,6 +139,7 @@ class PrimalSolutionComponent(Component):
         pass
                 
     def fit(self, training_instances):
+        logger.debug("Extracting features...")
         features = VariableFeaturesExtractor().extract(training_instances)
         solutions = SolutionExtractor().extract(training_instances)
         
@@ -180,12 +181,10 @@ class PrimalSolutionComponent(Component):
                 self.thresholds[category, label] = thresholds[k]
                 
                 
-    def predict(self, instance, model=None):
-        if model is None:
-            model = instance.to_model()
-        x_test = VariableFeaturesExtractor().extract([instance], [model])
+    def predict(self, instance):
+        x_test = VariableFeaturesExtractor().extract([instance])
         solution = {}
-        var_split = Extractor.split_variables(instance, model)
+        var_split = Extractor.split_variables(instance)
         for category in var_split.keys():
             for (i, (var, index)) in enumerate(var_split[category]):
                 if var not in solution.keys():
@@ -200,6 +199,3 @@ class PrimalSolutionComponent(Component):
                     if ws[i, 1] >= self.thresholds[category, label]:
                         solution[var][index] = label
         return solution
-
-    def merge(self, other_components):
-        pass
diff --git a/miplearn/components/tests/test_primal.py b/miplearn/components/tests/test_primal.py
index b35d5e7..3f9ec03 100644
--- a/miplearn/components/tests/test_primal.py
+++ b/miplearn/components/tests/test_primal.py
@@ -27,29 +27,7 @@ def test_predict():
     instances, models = _get_instances()
     comp = PrimalSolutionComponent()
     comp.fit(instances)
-    solution = comp.predict(instances[0], models[0])
-    assert models[0].x in solution.keys()
+    solution = comp.predict(instances[0])
+    assert "x" in solution
     for idx in range(4):
-        assert idx in solution[models[0].x].keys()
-
-# def test_warm_start_save_load():
-#     state_file = tempfile.NamedTemporaryFile(mode="r")
-#     solver = LearningSolver(components={"warm-start": WarmStartComponent()})
-#     solver.parallel_solve(_get_instances(), n_jobs=2)
-#     solver.fit()
-#     comp = solver.components["warm-start"]
-#     assert comp.x_train["default"].shape == (8, 6)
-#     assert comp.y_train["default"].shape == (8, 2)
-#     assert ("default", 0) in comp.predictors.keys()
-#     assert ("default", 1) in comp.predictors.keys()
-#     solver.save_state(state_file.name)
-    
-#     solver.solve(_get_instances()[0])
-    
-#     solver = LearningSolver(components={"warm-start": WarmStartComponent()})
-#     solver.load_state(state_file.name)
-#     comp = solver.components["warm-start"]
-#     assert comp.x_train["default"].shape == (8, 6)
-#     assert comp.y_train["default"].shape == (8, 2)
-#     assert ("default", 0) in comp.predictors.keys()
-#     assert ("default", 1) in comp.predictors.keys()
+        assert idx in solution["x"]
diff --git a/miplearn/extractors.py b/miplearn/extractors.py
index f966250..d4d1d1d 100644
--- a/miplearn/extractors.py
+++ b/miplearn/extractors.py
@@ -5,6 +5,10 @@
 import numpy as np
 from abc import ABC, abstractmethod
 from pyomo.core import Var
+from tqdm.auto import tqdm, trange
+from p_tqdm import p_map
+import logging
+logger = logging.getLogger(__name__)
 
 
 class Extractor(ABC):
@@ -13,59 +17,39 @@ class Extractor(ABC):
         pass
     
     @staticmethod
-    def split_variables(instance, model):
+    def split_variables(instance):
+        assert hasattr(instance, "lp_solution")
         result = {}
-        for var in model.component_objects(Var):
-            for index in var:
-                category = instance.get_variable_category(var, index)
+        for var_name in instance.lp_solution:
+            for index in instance.lp_solution[var_name]:
+                category = instance.get_variable_category(var_name, index)
                 if category is None:
                     continue
-                if category not in result.keys():
+                if category not in result:
                     result[category] = []
-                result[category] += [(var, index)]
+                result[category] += [(var_name, index)]
         return result
-    
-    @staticmethod
-    def merge(partial_results, vertical=False):
-        results = {}
-        all_categories = set()
-        for pr in partial_results:
-            all_categories |= pr.keys()
-        for category in all_categories:
-            results[category] = []
-            for pr in partial_results:
-                if category in pr.keys():
-                    results[category] += [pr[category]]
-            if vertical:
-                results[category] = np.vstack(results[category])
-            else:
-                results[category] = np.hstack(results[category])
-        return results
 
     
 class VariableFeaturesExtractor(Extractor):
-    def extract(self,
-                instances,
-                models=None,
-               ):
+    def extract(self, instances):
         result = {}
-        if models is None:
-            models = [instance.to_model() for instance in instances]
-        for (index, instance) in enumerate(instances):
-            model = models[index]
+        for instance in tqdm(instances,
+                             desc="Extract var features",
+                             disable=len(instances) < 5):
             instance_features = instance.get_instance_features()
-            var_split = self.split_variables(instance, model)
+            var_split = self.split_variables(instance)
             for (category, var_index_pairs) in var_split.items():
-                if category not in result.keys():
+                if category not in result:
                     result[category] = []
-                for (var, index) in var_index_pairs:
-                    result[category] += [np.hstack([
-                        instance_features,
-                        instance.get_variable_features(var, index),
-                        instance.lp_solution[str(var)][index],
-                    ])]
-        for category in result.keys():
-            result[category] = np.vstack(result[category])
+                for (var_name, index) in var_index_pairs:
+                    result[category] += [
+                        instance_features.tolist() + \
+                        instance.get_variable_features(var_name, index).tolist() + \
+                        [instance.lp_solution[var_name][index]]
+                    ]
+        for category in result:
+            result[category] = np.array(result[category])
         return result
 
 
@@ -73,39 +57,29 @@ class SolutionExtractor(Extractor):
     def __init__(self, relaxation=False):
         self.relaxation = relaxation
         
-    def extract(self, instances, models=None):
+    def extract(self, instances):
         result = {}
-        if models is None:
-            models = [instance.to_model() for instance in instances]
-        for (index, instance) in enumerate(instances):
-            model = models[index]
-            var_split = self.split_variables(instance, model)
+        for instance in tqdm(instances,
+                             desc="Extract solution",
+                             disable=len(instances) < 5):
+            var_split = self.split_variables(instance)
             for (category, var_index_pairs) in var_split.items():
-                if category not in result.keys():
+                if category not in result:
                     result[category] = []
-                for (var, index) in var_index_pairs:
+                for (var_name, index) in var_index_pairs:
                     if self.relaxation:
-                        v = instance.lp_solution[str(var)][index]
+                        v = instance.lp_solution[var_name][index]
                     else:
-                        v = instance.solution[str(var)][index]
+                        v = instance.solution[var_name][index]
                     if v is None:
                         result[category] += [[0, 0]]
                     else:
                         result[category] += [[1 - v, v]]
-        for category in result.keys():
-            result[category] = np.vstack(result[category])            
+        for category in result:
+            result[category] = np.array(result[category])            
         return result
     
     
-class CombinedExtractor(Extractor):
-    def __init__(self, extractors):
-        self.extractors = extractors
-    
-    def extract(self, instances, models):
-        return self.merge([ex.extract(instances, models)
-                           for ex in self.extractors])
-
-    
 class InstanceFeaturesExtractor(Extractor):
     def extract(self, instances, models=None):
         return np.vstack([
diff --git a/miplearn/instance.py b/miplearn/instance.py
index a3fda45..40884fd 100644
--- a/miplearn/instance.py
+++ b/miplearn/instance.py
@@ -65,12 +65,50 @@ class Instance(ABC):
 
     def get_variable_category(self, var, index):
         """
-        Returns a category (a string, an integer or any hashable type) for each decision variable.
+        Returns the category (a string, an integer or any hashable type) for each decision
+        variable.
         
-        If two variables have the same category, LearningSolver will use the same internal ML model
-        to predict the values of both variables. By default, all variables belong to the "default"
-        category, and therefore only one ML model is used for all variables.
+        If two variables have the same category, LearningSolver will use the same internal ML
+        model to predict the values of both variables. By default, all variables belong to the
+        "default" category, and therefore only one ML model is used for all variables.
         
         If the returned category is None, ML models will ignore the variable.
         """
         return "default"
+
+    def find_violations(self, model):
+        """
+        Returns lazy constraint violations found for the current solution.
+        
+        After solving a model, LearningSolver will ask the instance to identify which lazy
+        constraints are violated by the current solution. For each identified violation,
+        LearningSolver will then call the build_lazy_constraint, add the generated Pyomo
+        constraint to the model, then resolve the problem. The process repeats until no further
+        lazy constraint violations are found.
+        
+        Each "violation" is simply a string, a tuple or any other hashable type which allows the
+        instance to identify unambiguously which lazy constraint should be generated. In the
+        Traveling Salesman Problem, for example, a subtour violation could be a frozen set
+        containing the cities in the subtour.
+        
+        For a concrete example, see TravelingSalesmanInstance.
+        """
+        return []
+
+    def build_lazy_constraint(self, model, violation):
+        """
+        Returns a Pyomo constraint which fixes a given violation.
+        
+        This method is typically called immediately after find_violations. The violation object
+        provided to this method is exactly the same object returned earlier by find_violations.
+        After some training, LearningSolver may decide to proactively build some lazy constraints
+        at the beginning of the optimization process, before a solution is even available. In this
+        case, build_lazy_constraints will be called without a corresponding call to
+        find_violations.
+        
+        The implementation should not directly add the constraint to the model. The constraint
+        will be added by LearningSolver after the method returns.
+        
+        For a concrete example, see TravelingSalesmanInstance.
+        """
+        pass
\ No newline at end of file
diff --git a/miplearn/problems/tests/test_tsp.py b/miplearn/problems/tests/test_tsp.py
new file mode 100644
index 0000000..d6182b5
--- /dev/null
+++ b/miplearn/problems/tests/test_tsp.py
@@ -0,0 +1,68 @@
+#  MIPLearn: Extensible Framework for Learning-Enhanced Mixed-Integer Optimization
+#  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
+#  Released under the modified BSD license. See COPYING.md for more details.
+
+from miplearn import LearningSolver
+from miplearn.problems.tsp import TravelingSalesmanGenerator, TravelingSalesmanInstance
+import numpy as np
+from numpy.linalg import norm
+from scipy.spatial.distance import pdist, squareform
+from scipy.stats import uniform, randint
+
+
+def test_generator():
+    instances = TravelingSalesmanGenerator(x=uniform(loc=0.0, scale=1000.0),
+                                           y=uniform(loc=0.0, scale=1000.0),
+                                           n=randint(low=100, high=101),
+                                           gamma=uniform(loc=0.95, scale=0.1),
+                                           fix_cities=True).generate(100)
+    assert len(instances) == 100
+    assert instances[0].n_cities == 100
+    assert norm(instances[0].distances - instances[0].distances.T) < 1e-6
+    d = [instance.distances[0,1] for instance in instances]
+    assert np.std(d) > 0
+
+    
+def test_instance():
+    n_cities = 4
+    distances = np.array([
+        [0., 1., 2., 1.],
+        [1., 0., 1., 2.],
+        [2., 1., 0., 1.],
+        [1., 2., 1., 0.],
+    ])
+    instance = TravelingSalesmanInstance(n_cities, distances)
+    solver = LearningSolver()
+    solver.solve(instance)
+    x = instance.solution["x"]
+    assert x[0,1] == 1.0
+    assert x[0,2] == 0.0
+    assert x[0,3] == 1.0
+    assert x[1,2] == 1.0
+    assert x[1,3] == 0.0
+    assert x[2,3] == 1.0
+    assert instance.lower_bound == 4.0
+    assert instance.upper_bound == 4.0
+
+    
+def test_subtour():
+    n_cities = 6
+    cities = np.array([
+        [0., 0.],
+        [1., 0.],
+        [2., 0.],
+        [3., 0.],
+        [0., 1.],
+        [3., 1.],
+    ])
+    distances = squareform(pdist(cities))
+    instance = TravelingSalesmanInstance(n_cities, distances)
+    solver = LearningSolver()
+    solver.solve(instance)
+    x = instance.solution["x"]
+    assert x[0,1] == 1.0
+    assert x[0,4] == 1.0
+    assert x[1,2] == 1.0
+    assert x[2,3] == 1.0
+    assert x[3,5] == 1.0
+    assert x[4,5] == 1.0
\ No newline at end of file
diff --git a/miplearn/problems/tsp.py b/miplearn/problems/tsp.py
new file mode 100644
index 0000000..e7fae64
--- /dev/null
+++ b/miplearn/problems/tsp.py
@@ -0,0 +1,169 @@
+# MIPLearn, an extensible framework for Learning-Enhanced Mixed-Integer Optimization
+# Copyright (C) 2019-2020 Argonne National Laboratory. All rights reserved.
+# Written by Alinson S. Xavier <axavier@anl.gov>
+
+import numpy as np
+import pyomo.environ as pe
+from miplearn import Instance
+from scipy.stats import uniform, randint
+from scipy.spatial.distance import pdist, squareform
+from scipy.stats.distributions import rv_frozen
+import networkx as nx
+import random
+
+
+class ChallengeA:
+    def __init__(self,
+                 seed=42,
+                 n_training_instances=500,
+                 n_test_instances=50,
+                ):
+        
+        np.random.seed(seed)
+        self.generator = TravelingSalesmanGenerator(x=uniform(loc=0.0, scale=1000.0),
+                                                    y=uniform(loc=0.0, scale=1000.0),
+                                                    n=randint(low=350, high=351),
+                                                    gamma=uniform(loc=0.95, scale=0.1),
+                                                    fix_cities=True,
+                                                    round=True,
+                                                   )
+
+        np.random.seed(seed + 1)
+        self.training_instances = self.generator.generate(n_training_instances)
+        
+        np.random.seed(seed + 2)
+        self.test_instances = self.generator.generate(n_test_instances)        
+    
+        
+class TravelingSalesmanGenerator:
+    """Random generator for the Traveling Salesman Problem."""
+
+    def __init__(self,
+                 x=uniform(loc=0.0, scale=1000.0),
+                 y=uniform(loc=0.0, scale=1000.0),
+                 n=randint(low=100, high=101),
+                 gamma=uniform(loc=1.0, scale=0.0),
+                 fix_cities=True,
+                 round=True,
+                ):
+        """Initializes the problem generator.
+        
+        Initially, the generator creates n cities (x_1,y_1),...,(x_n,y_n) where n, x_i and y_i are
+        sampled independently from the provided probability distributions `n`, `x` and `y`. For each
+        (unordered) pair of cities (i,j), the distance d[i,j] between them is set to:
+
+            d[i,j] = gamma[i,j] \sqrt{(x_i - x_j)^2 + (y_i - y_j)^2}
+
+        where gamma is sampled from the provided probability distribution `gamma`.
+
+        If fix_cities=True, the list of cities is kept the same for all generated instances. The
+        gamma values, and therefore also the distances, are still different.
+        
+        By default, all distances d[i,j] are rounded to the nearest integer.  If `round=False`
+        is provided, this rounding will be disabled.
+
+        Arguments
+        ---------
+        x: rv_continuous
+            Probability distribution for the x-coordinate of each city.
+        y: rv_continuous
+            Probability distribution for the y-coordinate of each city.
+        n: rv_discrete
+            Probability distribution for the number of cities.
+        fix_cities: bool
+            If False, cities will be resampled for every generated instance. Otherwise, list of
+            cities will be computed once, during the constructor.
+        round: bool
+            If True, distances are rounded to the nearest integer.
+        """
+        assert isinstance(x, rv_frozen), "x should be a SciPy probability distribution"
+        assert isinstance(y, rv_frozen), "y should be a SciPy probability distribution"
+        assert isinstance(n, rv_frozen), "n should be a SciPy probability distribution"
+        assert isinstance(gamma, rv_frozen), "gamma should be a SciPy probability distribution"
+        self.x = x
+        self.y = y
+        self.n = n
+        self.gamma = gamma
+        self.round = round
+        
+        if fix_cities:
+            self.fixed_n, self.fixed_cities = self._generate_cities()
+        else:
+            self.fixed_n = None
+            self.fixed_cities = None
+    
+    def generate(self, n_samples):
+        def _sample():
+            if self.fixed_cities is not None:
+                n, cities = self.fixed_n, self.fixed_cities
+            else:
+                n, cities = self._generate_cities()
+            distances = squareform(pdist(cities)) * self.gamma.rvs(size=(n, n))
+            distances = np.tril(distances) + np.triu(distances.T, 1)
+            if self.round:
+                distances = distances.round()
+            return TravelingSalesmanInstance(n, distances)
+        return [_sample() for _ in range(n_samples)]
+            
+    def _generate_cities(self):
+        n = self.n.rvs()
+        cities = np.array([(self.x.rvs(), self.y.rvs()) for _ in range(n)])
+        return n, cities
+    
+
+class TravelingSalesmanInstance(Instance):
+    """An instance ot the Traveling Salesman Problem.
+    
+    Given a list of cities and the distance between each pair of cities, the problem asks for the
+    shortest route starting at the first city, visiting each other city exactly once, then
+    returning to the first city. This problem is a generalization of the Hamiltonian path problem,
+    one of Karp's 21 NP-complete problems.
+    """
+    
+    def __init__(self, n_cities, distances):
+        assert isinstance(distances, np.ndarray)
+        assert distances.shape == (n_cities, n_cities)
+        self.n_cities = n_cities
+        self.distances = distances
+    
+    def to_model(self):
+        model = pe.ConcreteModel()
+        model.edges = edges = [(i,j)
+                              for i in range(self.n_cities)
+                              for j in range(i+1, self.n_cities)]
+        model.x = pe.Var(edges, domain=pe.Binary)
+        model.obj = pe.Objective(expr=sum(model.x[i,j] * self.distances[i,j]
+                                          for (i,j) in edges),
+                                 sense=pe.minimize)
+        model.eq_degree = pe.ConstraintList()
+        model.eq_subtour = pe.ConstraintList()
+        for i in range(self.n_cities):
+            model.eq_degree.add(sum(model.x[min(i,j), max(i,j)]
+                                    for j in range(self.n_cities) if i != j) == 2)
+        return model
+    
+    def get_instance_features(self):
+        return np.array([1])
+    
+    def get_variable_features(self, var_name, index):
+        return np.array([1])
+    
+    def get_variable_category(self, var_name, index):
+        return index
+    
+    def find_violations(self, model):
+        selected_edges = [e for e in model.edges if model.x[e].value > 0.5]
+        graph = nx.Graph()
+        graph.add_edges_from(selected_edges)
+        components = [frozenset(c) for c in list(nx.connected_components(graph))]
+        violations = []
+        for c in components:
+            if len(c) < self.n_cities:
+                violations += [c]
+        return violations
+    
+    def build_lazy_constraint(self, model, component):
+        cut_edges = [e for e in model.edges
+                     if (e[0] in component and e[1] not in component) or
+                        (e[0] not in component and e[1] in component)]
+        return model.eq_subtour.add(sum(model.x[e] for e in cut_edges) >= 2)
diff --git a/miplearn/solvers.py b/miplearn/solvers.py
index 7ff976c..9077639 100644
--- a/miplearn/solvers.py
+++ b/miplearn/solvers.py
@@ -2,45 +2,66 @@
 #  Copyright (C) 2020, UChicago Argonne, LLC. All rights reserved.
 #  Released under the modified BSD license. See COPYING.md for more details.
 
-from . import ObjectiveValueComponent, PrimalSolutionComponent
+from . import ObjectiveValueComponent, PrimalSolutionComponent, LazyConstraintsComponent
 import pyomo.environ as pe
 from pyomo.core import Var
 from copy import deepcopy
 import pickle
 from scipy.stats import randint
 from p_tqdm import p_map
+import numpy as np
 import logging
 logger = logging.getLogger(__name__)
 
 
+# Global memory for multiprocessing
+SOLVER = [None]
+INSTANCES = [None]
+
+
+def _parallel_solve(instance_idx):
+    solver = deepcopy(SOLVER[0])
+    instance = INSTANCES[0][instance_idx]
+    results = solver.solve(instance)
+    return {
+        "Results": results,
+        "Solution": instance.solution,
+        "LP solution": instance.lp_solution,
+        "LP value": instance.lp_value,
+        "Upper bound": instance.upper_bound,
+        "Lower bound": instance.lower_bound,
+        "Violations": instance.found_violations,
+    }
+
+
 class InternalSolver:
     def __init__(self):
         self.is_warm_start_available = False
         self.model = None
-        pass
+        self.var_name_to_var = {}
     
     def solve_lp(self, tee=False):
+        self.solver.set_instance(self.model)
+        
         # Relax domain
         from pyomo.core.base.set_types import Reals
-        original_domain = {}
-        for var in self.model.component_data_objects(Var):
-            original_domain[str(var)] = var.domain
+        original_domains = []
+        for (idx, var) in enumerate(self.model.component_data_objects(Var)):
+            original_domains += [var.domain]
             lb, ub = var.bounds
             var.setlb(lb)
             var.setub(ub)
             var.domain = Reals
+            self.solver.update_var(var)
         
         # Solve LP relaxation
-        self.solver.set_instance(self.model)
         results = self.solver.solve(tee=tee)
         
         # Restore domains
-        for var in self.model.component_data_objects(Var):
-            var.domain = original_domain[str(var)]
+        for (idx, var) in enumerate(self.model.component_data_objects(Var)):
+            var.domain = original_domains[idx]
+            self.solver.update_var(var)
             
-        # Reload original model
-        self.solver.set_instance(self.model)
-        
         return {
             "Optimal value": results["Problem"][0]["Lower bound"],
         }
@@ -58,36 +79,43 @@ class InternalSolver:
                 solution[str(var)][index] = var[index].value
         return solution   
     
-    def set_warm_start(self, ws):
+    def set_warm_start(self, solution):
         self.is_warm_start_available = True
         self.clear_values()
         count_total, count_fixed = 0, 0
-        for var in ws.keys():
-            for index in var:
+        for var_name in solution:
+            var = self.var_name_to_var[var_name]
+            for index in solution[var_name]:
                 count_total += 1
-                var[index].value = ws[var][index]
-                if ws[var][index] is not None:
+                var[index].value = solution[var_name][index]
+                if solution[var_name][index] is not None:
                     count_fixed += 1
         logger.info("Setting start values for %d variables (out of %d)" %
                     (count_fixed, count_total))
-
                 
     def set_model(self, model):
         self.model = model
         self.solver.set_instance(model)
+        self.var_name_to_var = {}
+        for var in model.component_objects(Var):
+            self.var_name_to_var[var.name] = var
         
-    def fix(self, ws):
+    def fix(self, solution):
         count_total, count_fixed = 0, 0
-        for var in ws.keys():
-            for index in var:
+        for var_name in solution:
+            for index in solution[var_name]:
+                var = self.var_name_to_var[var_name]
                 count_total += 1
-                if ws[var][index] is None:
+                if solution[var_name][index] is None:
                     continue
                 count_fixed += 1
-                var[index].fix(ws[var][index])
+                var[index].fix(solution[var_name][index])
                 self.solver.update_var(var[index])        
         logger.info("Fixing values for %d variables (out of %d)" %
                     (count_fixed, count_total))
+        
+    def add_constraint(self, cut):
+        self.solver.add_constraint(cut)
 
     
 class GurobiSolver(InternalSolver):
@@ -198,6 +226,7 @@ class LearningSolver:
             self.components = {
                 "ObjectiveValue": ObjectiveValueComponent(),
                 "PrimalSolution": PrimalSolutionComponent(),
+                "LazyConstraints": LazyConstraintsComponent(),
             }
             
         assert self.mode in ["exact", "heuristic"]
@@ -231,27 +260,44 @@ class LearningSolver:
         self.internal_solver = self._create_internal_solver()
         self.internal_solver.set_model(model)
 
-        # Solve LP relaxation
+        logger.debug("Solving LP relaxation...")
         results = self.internal_solver.solve_lp(tee=tee)
         instance.lp_solution = self.internal_solver.get_solution()
         instance.lp_value = results["Optimal value"]
         
-        # Invoke before_solve callbacks
+        logger.debug("Running before_solve callbacks...")
         for component in self.components.values():
             component.before_solve(self, instance, model)
         
         if relaxation_only:
             return results
         
-        # Solver original MIP
-        results = self.internal_solver.solve(tee=tee)
+        total_wallclock_time = 0
+        instance.found_violations = []
+        while True:
+            logger.debug("Solving MIP...")
+            results = self.internal_solver.solve(tee=tee)
+            logger.debug("    %.2f s" % results["Wallclock time"])
+            total_wallclock_time += results["Wallclock time"]
+            if not hasattr(instance, "find_violations"):
+                break
+            logger.debug("Finding violated constraints...")    
+            violations = instance.find_violations(model)
+            if len(violations) == 0:
+                break
+            instance.found_violations += violations
+            logger.debug("    %d violations found" % len(violations))
+            for v in violations:
+                cut = instance.build_lazy_constraint(model, v)
+                self.internal_solver.add_constraint(cut)
+        results["Wallclock time"] = total_wallclock_time
         
         # Read MIP solution and bounds
         instance.lower_bound = results["Lower bound"]
         instance.upper_bound = results["Upper bound"]
         instance.solution = self.internal_solver.get_solution()
         
-        # Invoke after_solve callbacks
+        logger.debug("Calling after_solve callbacks...")    
         for component in self.components.values():
             component.after_solve(self, instance, model)
             
@@ -266,40 +312,23 @@ class LearningSolver:
                        label="Solve",
                        collect_training_data=True,
                       ):
-        self.internal_solver = None
         
-        def _process(instance):
-            solver = deepcopy(self)
-            results = solver.solve(instance)
-            solver.internal_solver = None
-            if not collect_training_data:
-                solver.components = {}
-            return {
-                "Solver": solver,
-                "Results": results,
-                "Solution": instance.solution,
-                "LP solution": instance.lp_solution,
-                "LP value": instance.lp_value,
-                "Upper bound": instance.upper_bound,
-                "Lower bound": instance.lower_bound,
-            }
+        self.internal_solver = None
+        SOLVER[0] = self
+        INSTANCES[0] = instances
+        p_map_results = p_map(_parallel_solve,
+                              list(range(len(instances))),
+                              num_cpus=n_jobs,
+                              desc=label)
 
-        p_map_results = p_map(_process, instances, num_cpus=n_jobs, desc=label)
-        subsolvers = [p["Solver"] for p in p_map_results]
         results = [p["Results"] for p in p_map_results]
-        
         for (idx, r) in enumerate(p_map_results):
             instances[idx].solution = r["Solution"]
             instances[idx].lp_solution = r["LP solution"]
             instances[idx].lp_value = r["LP value"]
             instances[idx].lower_bound = r["Lower bound"]
             instances[idx].upper_bound = r["Upper bound"]
-        
-        for (name, component) in self.components.items():
-            subcomponents = [subsolver.components[name]
-                             for subsolver in subsolvers
-                             if name in subsolver.components.keys()]
-            self.components[name].merge(subcomponents)
+            instances[idx].found_violations = r["Violations"]
         
         return results
 
@@ -310,21 +339,3 @@ class LearningSolver:
             return
         for component in self.components.values():
             component.fit(training_instances)
-            
-    def save_state(self, filename):
-        with open(filename, "wb") as file:
-            pickle.dump({
-                "version": 2,
-                "components": self.components,
-            }, file)
-
-    def load_state(self, filename):
-        with open(filename, "rb") as file:
-            data = pickle.load(file)
-            assert data["version"] == 2
-            for (component_name, component) in data["components"].items():
-                if component_name not in self.components.keys():
-                    continue
-                else:
-                    self.components[component_name].merge([component])
-
diff --git a/miplearn/tests/test_benchmark.py b/miplearn/tests/test_benchmark.py
index 094eee4..523ba61 100644
--- a/miplearn/tests/test_benchmark.py
+++ b/miplearn/tests/test_benchmark.py
@@ -18,8 +18,6 @@ def test_benchmark():
     # Training phase...
     training_solver = LearningSolver()
     training_solver.parallel_solve(train_instances, n_jobs=10)
-    training_solver.fit()
-    training_solver.save_state("data.bin")
 
     # Test phase...
     test_solvers = {
@@ -27,7 +25,7 @@ def test_benchmark():
         "Strategy B": LearningSolver(),
     }
     benchmark = BenchmarkRunner(test_solvers)
-    benchmark.load_state("data.bin")
+    benchmark.fit(train_instances)
     benchmark.parallel_solve(test_instances, n_jobs=2, n_trials=2)
     assert benchmark.raw_results().values.shape == (12,13)
     
diff --git a/miplearn/tests/test_extractors.py b/miplearn/tests/test_extractors.py
index eb57df7..a51b386 100644
--- a/miplearn/tests/test_extractors.py
+++ b/miplearn/tests/test_extractors.py
@@ -5,7 +5,6 @@
 from miplearn.problems.knapsack import KnapsackInstance
 from miplearn import (LearningSolver,
                       SolutionExtractor,
-                      CombinedExtractor,
                       InstanceFeaturesExtractor,
                       VariableFeaturesExtractor,
                      )
@@ -33,7 +32,7 @@ def _get_instances():
 
 def test_solution_extractor():
     instances, models = _get_instances()
-    features = SolutionExtractor().extract(instances, models)
+    features = SolutionExtractor().extract(instances)
     assert isinstance(features, dict)
     assert "default" in features.keys()
     assert isinstance(features["default"], np.ndarray)
@@ -48,17 +47,6 @@ def test_solution_extractor():
     ]
 
     
-def test_combined_extractor():
-    instances, models = _get_instances()
-    extractor = CombinedExtractor(extractors=[VariableFeaturesExtractor(),
-                                              SolutionExtractor()])
-    features = extractor.extract(instances, models)
-    assert isinstance(features, dict)
-    assert "default" in features.keys()
-    assert isinstance(features["default"], np.ndarray)
-    assert features["default"].shape == (6, 7)
-    
-    
 def test_instance_features_extractor():
     instances, models = _get_instances()
     features = InstanceFeaturesExtractor().extract(instances)
diff --git a/miplearn/tests/test_solver.py b/miplearn/tests/test_solver.py
index c8b662e..24d879b 100644
--- a/miplearn/tests/test_solver.py
+++ b/miplearn/tests/test_solver.py
@@ -41,29 +41,6 @@ def test_solver():
             solver.fit()
             solver.solve(instance)
 
-
-# def test_solve_save_load_state():
-#     instance = _get_instance()
-#     components_before = {
-#         "warm-start": WarmStartComponent(),
-#     }
-#     solver = LearningSolver(components=components_before)
-#     solver.solve(instance)
-#     solver.fit()
-#     solver.save_state("/tmp/knapsack_train.bin")
-#     prev_x_train_len = len(solver.components["warm-start"].x_train)
-#     prev_y_train_len = len(solver.components["warm-start"].y_train)
-    
-#     components_after = {
-#         "warm-start": WarmStartComponent(),
-#     }
-#     solver = LearningSolver(components=components_after)
-#     solver.load_state("/tmp/knapsack_train.bin")
-#     assert len(solver.components.keys()) == 1
-#     assert len(solver.components["warm-start"].x_train) == prev_x_train_len
-#     assert len(solver.components["warm-start"].y_train) == prev_y_train_len
-
-
 def test_parallel_solve():
     instances = [_get_instance() for _ in range(10)]
     solver = LearningSolver()