@@ -486,11 +486,14 @@ def _load_best_individual_model(self) -> SingleBest:
486
486
487
487
return ensemble
488
488
489
- def _do_dummy_prediction (self , num_run : int ) -> None :
489
+ def _do_dummy_prediction (self ) -> None :
490
490
491
491
assert self ._metric is not None
492
492
assert self ._logger is not None
493
493
494
+ # For dummy estimator, we always expect the num_run to be 1
495
+ num_run = 1
496
+
494
497
self ._logger .info ("Starting to create dummy predictions." )
495
498
496
499
memory_limit = self ._memory_limit
@@ -551,29 +554,20 @@ def _do_dummy_prediction(self, num_run: int) -> None:
551
554
% (str (status ), str (additional_info ))
552
555
)
553
556
554
- def _do_traditional_prediction (self , num_run : int , time_left : int , func_eval_time_limit_secs : int
555
- ) -> int :
557
+ def _do_traditional_prediction (self , time_left : int , func_eval_time_limit_secs : int ) -> None :
556
558
"""
557
559
Fits traditional machine learning algorithms to the provided dataset, while
558
560
complying with time resource allocation.
559
561
560
562
This method currently only supports classification.
561
563
562
564
Args:
563
- num_run: (int)
564
- An identifier to indicate the current machine learning algorithm
565
- being processed
566
565
time_left: (int)
567
566
Hard limit on how many machine learning algorithms can be fit. Depending on how
568
567
fast a traditional machine learning algorithm trains, it will allow multiple
569
568
models to be fitted.
570
569
func_eval_time_limit_secs: (int)
571
570
Maximum training time each algorithm is allowed to take, during training
572
-
573
- Returns:
574
- num_run: (int)
575
- The incremented identifier index. This depends on how many machine learning
576
- models were fitted.
577
571
"""
578
572
579
573
# Mypy Checkings -- Traditional prediction is only called for search
@@ -592,8 +586,8 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
592
586
available_classifiers = get_available_classifiers ()
593
587
dask_futures = []
594
588
595
- total_number_classifiers = len (available_classifiers ) + num_run
596
- for n_r , classifier in enumerate (available_classifiers , start = num_run ):
589
+ total_number_classifiers = len (available_classifiers )
590
+ for n_r , classifier in enumerate (available_classifiers ):
597
591
598
592
# Only launch a task if there is time
599
593
start_time = time .time ()
@@ -612,7 +606,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
612
606
logger_port = self ._logger_port ,
613
607
cost_for_crash = get_cost_of_crash (self ._metric ),
614
608
abort_on_first_run_crash = False ,
615
- initial_num_run = n_r ,
609
+ initial_num_run = self . _backend . get_next_num_run () ,
616
610
stats = stats ,
617
611
memory_limit = memory_limit ,
618
612
disable_file_output = True if len (self ._disable_file_output ) > 0 else False ,
@@ -626,9 +620,6 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
626
620
)
627
621
])
628
622
629
- # Increment the launched job index
630
- num_run = n_r
631
-
632
623
# When managing time, we need to take into account the allocated time resources,
633
624
# which are dependent on the number of cores. 'dask_futures' is a proxy to the number
634
625
# of workers /n_jobs that we have, in that if there are 4 cores allocated, we can run at most
@@ -691,7 +682,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
691
682
self .run_history .update (run_history , DataOrigin .EXTERNAL_SAME_INSTANCES )
692
683
run_history .save_json (os .path .join (self ._backend .internals_directory , 'traditional_run_history.json' ),
693
684
save_external = True )
694
- return num_run
685
+ return
695
686
696
687
def _search (
697
688
self ,
@@ -861,10 +852,9 @@ def _search(
861
852
)
862
853
863
854
# ============> Run dummy predictions
864
- num_run = 1
865
855
dummy_task_name = 'runDummy'
866
856
self ._stopwatch .start_task (dummy_task_name )
867
- self ._do_dummy_prediction (num_run )
857
+ self ._do_dummy_prediction ()
868
858
self ._stopwatch .stop_task (dummy_task_name )
869
859
870
860
# ============> Run traditional ml
@@ -880,8 +870,8 @@ def _search(
880
870
time_for_traditional = int (
881
871
self ._time_for_task - elapsed_time - func_eval_time_limit_secs
882
872
)
883
- num_run = self ._do_traditional_prediction (
884
- num_run = num_run + 1 , func_eval_time_limit_secs = func_eval_time_limit_secs ,
873
+ self ._do_traditional_prediction (
874
+ func_eval_time_limit_secs = func_eval_time_limit_secs ,
885
875
time_left = time_for_traditional ,
886
876
)
887
877
self ._stopwatch .stop_task (traditional_task_name )
@@ -957,7 +947,9 @@ def _search(
957
947
pipeline_config = {** self .pipeline_options , ** budget_config },
958
948
ensemble_callback = proc_ensemble ,
959
949
logger_port = self ._logger_port ,
960
- start_num_run = num_run ,
950
+ # We do not increase the num_run here, this is something
951
+ # smac does internally
952
+ start_num_run = self ._backend .get_next_num_run (peek = True ),
961
953
search_space_updates = self .search_space_updates
962
954
)
963
955
try :
@@ -1063,7 +1055,7 @@ def refit(
1063
1055
'train_indices' : dataset .splits [split_id ][0 ],
1064
1056
'val_indices' : dataset .splits [split_id ][1 ],
1065
1057
'split_id' : split_id ,
1066
- 'num_run' : 0
1058
+ 'num_run' : self . _backend . get_next_num_run (),
1067
1059
})
1068
1060
X .update ({** self .pipeline_options , ** budget_config })
1069
1061
if self .models_ is None or len (self .models_ ) == 0 or self .ensemble_ is None :
@@ -1140,7 +1132,7 @@ def fit(self,
1140
1132
'train_indices' : dataset .splits [split_id ][0 ],
1141
1133
'val_indices' : dataset .splits [split_id ][1 ],
1142
1134
'split_id' : split_id ,
1143
- 'num_run' : 0
1135
+ 'num_run' : self . _backend . get_next_num_run (),
1144
1136
})
1145
1137
X .update ({** self .pipeline_options , ** budget_config })
1146
1138
0 commit comments