-
Notifications
You must be signed in to change notification settings - Fork 68
Checkpoint in freeze only #2107
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 29 commits
1eb292c
ec6a437
b7a65bb
9b9c830
7cb2e5f
f71ce44
0218314
3604312
ae586c1
155e3c5
92fbacb
5b4e82c
2638f88
ac82444
08ed1e2
3520b1e
61e9294
7d7fd63
9fc718f
50dc60e
7821695
0926010
cebc83f
deafb9f
3e568f8
fd0b1e2
080b6ec
83d790b
b236b25
8a2734c
ef7df26
0641f9f
abe6229
856e3ce
8de78c1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| sys.exec.out.time {s},testSimObject.my_foo.a {1},testSimObject.my_foo.b {1} | ||
| 5,6,12 | ||
| 5.1,6,12 | ||
| 5.2,6,12 | ||
| 5.3,6,12 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| sys.exec.out.time {s},testSimObject.my_foo.a {1},testSimObject.my_foo.b {1} | ||
| 5.5,6,12 | ||
| 5.6,6,12 | ||
| 5.7,6,12 | ||
| 5.8,6,12 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| sys.exec.out.time {s},testSimObject.my_foo.a {1},testSimObject.my_foo.b {1} | ||
| 5,6,12 | ||
| 5.1,6,12 | ||
| 5.2,6,12 | ||
| 5.3,6,12 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| sys.exec.out.time {s},testSimObject.my_foo.a {1},testSimObject.my_foo.b {1} | ||
| 2,3,6 | ||
| 2.1,3,6 | ||
| 2.2,3,6 | ||
| 2.3,3,6 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| sys.exec.out.time {s},testSimObject.my_foo.a {1},testSimObject.my_foo.b {1} | ||
| 7,8,16 | ||
| 7.1,8,16 | ||
| 7.2,8,16 | ||
| 7.3,8,16 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| sys.exec.out.time {s},testSimObject.my_foo.b {1} | ||
| 7,16 | ||
| 8,18 | ||
| 9,20 | ||
| 10,22 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| sys.exec.out.time {s},testSimObject.my_foo.q {1} | ||
| 5.1,2 | ||
| 5,2 | ||
| 8,3 | ||
| 11,4 | ||
| 14,5 | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -19,6 +19,7 @@ | |
| #include "trick/message_proto.h" | ||
| #include "trick/message_type.h" | ||
| #include "trick/TrickConstant.hh" | ||
| #include "trick/sim_mode.h" | ||
|
|
||
| Trick::CheckPointRestart * the_cpr ; | ||
|
|
||
|
|
@@ -112,7 +113,12 @@ int Trick::CheckPointRestart::find_write_checkpoint_jobs(std::string sim_object_ | |
| return(0) ; | ||
| } | ||
|
|
||
| int Trick::CheckPointRestart::checkpoint(double in_time) { | ||
| /** | ||
| * @brief Schedule a checkpoint to be written at a given time. | ||
| * @param in_time The time the checkpoint should be dumped | ||
| * @see write_checkpoint() | ||
| */ | ||
| int Trick::CheckPointRestart::checkpoint(double in_time, std::string file_name) { | ||
|
|
||
| long long curr_time = exec_get_time_tics() ; | ||
| long long new_time ; | ||
|
|
@@ -124,6 +130,10 @@ int Trick::CheckPointRestart::checkpoint(double in_time) { | |
| if ( new_time < write_checkpoint_job->next_tics ) { | ||
| write_checkpoint_job->next_tics = new_time ; | ||
| } | ||
|
|
||
| if (!file_name.empty()) chkpnt_names[new_time] = file_name; | ||
|
|
||
| the_exec->freeze(in_time); | ||
| //std::cout << "\033[33mSET CHECKPOINT TIME " << in_time << " " << new_time << "\033[0m" << std::endl ; | ||
| } else { | ||
| message_publish(MSG_ERROR, "Checkpoint time specified in the past. specified %f, current_time %f\n", | ||
|
|
@@ -174,6 +184,19 @@ int Trick::CheckPointRestart::do_checkpoint(std::string file_name, bool print_st | |
|
|
||
| JobData * curr_job ; | ||
| pid_t pid; | ||
| SIM_MODE mode; | ||
|
|
||
| mode = the_exec->get_mode(); | ||
|
|
||
| if (mode == Run) { | ||
| std::string msg_format = "WARNING: Saving a checkpoint in 'Run Mode' may cause non time-homogeneous data. "; | ||
| msg_format += "Current Mode: %s (%d)\n"; | ||
| message_publish(MSG_WARNING, msg_format.c_str(), | ||
| simModeCharString(mode), mode); | ||
|
|
||
| return 0; | ||
| } | ||
|
|
||
|
|
||
| if ( ! file_name.compare("") ) { | ||
| std::stringstream file_name_stream ; | ||
|
|
@@ -238,6 +261,10 @@ int Trick::CheckPointRestart::do_checkpoint(std::string file_name, bool print_st | |
| return 0 ; | ||
| } | ||
|
|
||
| /** | ||
| * @brief Writes a scheduled checkpoint if it is the correct time. | ||
| * @see checkpoint(double in_time) | ||
| */ | ||
| int Trick::CheckPointRestart::write_checkpoint() { | ||
|
|
||
| long long curr_time = exec_get_time_tics() ; | ||
|
|
@@ -257,12 +284,20 @@ int Trick::CheckPointRestart::write_checkpoint() { | |
| } | ||
|
|
||
| double sim_time = exec_get_sim_time() ; | ||
| std::stringstream chk_name_stream ; | ||
| std::string file_name = ""; | ||
|
|
||
| chk_name_stream << "chkpnt_" << std::fixed << std::setprecision(6) << sim_time ; | ||
| if (chkpnt_names.find(curr_time) == chkpnt_names.end()) { | ||
| std::stringstream chk_name_stream ; | ||
| chk_name_stream << "chkpnt_" << std::fixed << std::setprecision(6) << sim_time ; | ||
| file_name = chk_name_stream.str(); | ||
| } else { | ||
| file_name = chkpnt_names[curr_time]; | ||
| chkpnt_names.erase(curr_time); | ||
| } | ||
|
|
||
| checkpoint( chk_name_stream.str() ); | ||
| checkpoint( file_name ); | ||
|
|
||
| the_exec->run(); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i think this unfreezes after checkpoint even when it wasn't an auto-freeze also: indentation
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. In this branch, only scheduled checkpoint such as calling checkpoint(time, ... ) sets freeze at the specified time, then freeze_loop calls write_checkpoint at the scheduled time which sets to Run back at end. So it's safe. When calling checkpoint without a time (not scheduled):
The auto_freeze flag is only used for checkpoint loading. And we'll discuss if we want to only allow loading when in Freeze mode. This branch uses auto_freeze flag to unfreeze when loading is called in Run mode.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. However though, if input file has both schedule freeze and checkpoint for the same time, after checkpoint, it'd go to unfreeze automatically which might not be as expected. So needs to check if there is scheduled freeze before unfreeze. |
||
| } | ||
|
|
||
| return(0) ; | ||
|
|
@@ -304,6 +339,20 @@ int Trick::CheckPointRestart::safestore_checkpoint() { | |
| } | ||
|
|
||
| void Trick::CheckPointRestart::load_checkpoint(std::string file_name) { | ||
| SIM_MODE mode = the_exec->get_mode(); | ||
|
|
||
| if (mode == Run) { | ||
| std::string msg_format = "WARNING: Loading a checkpoint in 'Run Mode' may cause non time-homogeneous data. "; | ||
| msg_format += "Current Mode: %s (%d)\n"; | ||
|
|
||
| message_publish(MSG_WARNING, msg_format.c_str(), | ||
| file_name.c_str(), simModeCharString(mode), mode); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. too many parameters for the format string |
||
| // If in RUN mode, this will freeze the simulation and notify the code to unfreeze later. | ||
| // To forbid loading a checkpoint in RUN mode, remove the following two lines and the second to last line in load_checkpoint_job() | ||
|
brendan-nasa marked this conversation as resolved.
Outdated
|
||
| the_exec->freeze(); | ||
| auto_freeze = true; | ||
| } | ||
|
|
||
| load_checkpoint_file_name = file_name ; | ||
| } | ||
|
|
||
|
|
@@ -335,7 +384,7 @@ int Trick::CheckPointRestart::load_checkpoint_job() { | |
| JobData * curr_job ; | ||
| struct stat temp_buf ; | ||
|
|
||
| if ( ! load_checkpoint_file_name.empty() ) { | ||
| if ( ! load_checkpoint_file_name.empty() && the_exec->get_mode() != Run) { | ||
|
|
||
| if ( stat( load_checkpoint_file_name.c_str() , &temp_buf) == 0 ) { | ||
| preload_checkpoint_queue.reset_curr_index() ; | ||
|
|
@@ -373,6 +422,7 @@ int Trick::CheckPointRestart::load_checkpoint_job() { | |
| message_publish(MSG_INFO, "Could not find checkpoint file %s.\n", load_checkpoint_file_name.c_str()) ; | ||
| } | ||
| load_checkpoint_file_name.clear() ; | ||
| if(auto_freeze) the_exec->run(); | ||
| } | ||
|
|
||
| return(0) ; | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
|
|
||
| #include "trick/sim_mode.h" | ||
|
|
||
| const char * simModeCharString(SIM_MODE mode) { | ||
| switch (mode) | ||
| { | ||
| case Initialization: return "Initialization"; | ||
| case Run: return "Run"; | ||
| case Step: return "Step"; | ||
| case Freeze: return "Freeze"; | ||
| case ExitMode: return "ExitMode"; | ||
| default: return "InvalidMode"; | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should it be
mode != Freeze, otherwise if in Initialization mode, can still do checkpoint?