diff --git a/R/figures/fig1.R b/R/figures/fig1.R
index 52529b0..e3a79c7 100644
--- a/R/figures/fig1.R
+++ b/R/figures/fig1.R
@@ -45,7 +45,6 @@ create_figure_1 <- function(
   raw_ct_2019,
   health_weighted,
   refining_mortality,
-  labor_2019,
   ca_regions,
   raw_pop_income_2021,
   cpi2020,
@@ -1554,11 +1553,6 @@ create_figure_1 <- function(
   ## merge counties to census tracts
   ## -----------------------------------------------------------------
 
-  # ## join with spatial data 2019
-  # census_tract_labor_2019_sp <- raw_ct_2019 |>
-  #   rename(census_tract = GEOID) |>
-  #   left_join(census_tract_labor_2020)
-
   # Create the expanded data
   census_tracts_l_expanded <- expand_grid(
     census_tracts,
diff --git a/R/figures/health_labor_revised_figs.R b/R/figures/health_labor_revised_figs.R
index 16412af..90963f4 100644
--- a/R/figures/health_labor_revised_figs.R
+++ b/R/figures/health_labor_revised_figs.R
@@ -1300,8 +1300,8 @@ plot_npv_health_labor <- function(
       x = "GHG emissions reduction (%, 2045 vs 2019)"
     ) +
     scale_y_continuous(
-      limits = c(0, 50),
-      breaks = seq(0, 50, by = 10)
+      limits = c(0, 40),
+      breaks = seq(0, 40, by = 10)
     ) +
     xlim(0, 80) +
     scale_color_manual(
@@ -1380,8 +1380,8 @@ plot_npv_health_labor <- function(
       x = "GHG emissions reduction (%, 2045 vs 2019)"
     ) +
     scale_y_continuous(
-      limits = c(-50, 0),
-      breaks = seq(-50, 0, by = 10)
+      limits = c(-40, 0),
+      breaks = seq(-40, 0, by = 10)
     ) +
     xlim(0, 80) +
     scale_color_manual(
@@ -1451,8 +1451,8 @@ plot_npv_health_labor <- function(
       x = "GHG emissions reduction (%, 2045 vs 2019)"
     ) +
     scale_y_continuous(
-      limits = c(-50, 0),
-      breaks = seq(-50, 0, by = 10)
+      limits = c(-40, 0),
+      breaks = seq(-40, 0, by = 10)
     ) +
     xlim(0, 80) +
     scale_color_manual(
@@ -1523,8 +1523,8 @@ plot_npv_health_labor <- function(
       x = "GHG emissions reduction (%, 2045 vs 2019)"
     ) +
     scale_y_continuous(
-      limits = c(-50, 0),
-      breaks = seq(-50, 0, by = 10)
+      limits = c(-40, 0),
+      breaks = seq(-40, 0, by = 10)
     ) +
     xlim(0, 80) +
     scale_color_manual(
@@ -1580,7 +1580,7 @@ plot_npv_health_labor <- function(
       y = NULL,
       x = "GHG emissions reduction (%, 2045 vs 2019)"
     ) +
-    ylim(-50, 0) +
+    ylim(-40, 0) +
     xlim(0, 80) +
     scale_color_manual(
       values = refin_colors,
@@ -1655,7 +1655,7 @@ plot_npv_health_labor <- function(
       y = NULL,
       x = "GHG emissions reduction (%, 2045 vs 2019)"
     ) +
-    ylim(-50, 0) +
+    ylim(-40, 0) +
     xlim(0, 80) +
     scale_color_manual(
       values = refin_colors,
@@ -4727,8 +4727,8 @@ plot_npv_health_labor_non_age_vsl <- function(
       x = "GHG emissions reduction (%, 2045 vs 2019)"
     ) +
     scale_y_continuous(
-      limits = c(-50, 0),
-      breaks = seq(-50, 0, by = 10)
+      limits = c(-40, 0),
+      breaks = seq(-40, 0, by = 10)
     ) +
     xlim(0, 80) +
     scale_color_manual(
diff --git a/README.md b/README.md
index c300b83..1af2f44 100644
--- a/README.md
+++ b/README.md
@@ -1,48 +1,90 @@
 # Distributional and climate impacts of low-carbon transition pathways for California's oil refining (repo)
 
 ## Setting up
-This repo relies on the R package ``targets`` to maintain the pipeline of the scripts and the reproducibility of the project.
-Install the package if you have not already done so:
 
-```
+This repo relies on the R package `targets` to maintain the pipeline of the scripts and the reproducibility of the project. Install the package if you have not already done so:
+
+```         
 install.packages("targets")
 ```
 
 Load the package:
 
-```
+```         
 library(targets)
 ```
 
-All of the functions for the pipeline are in the ``R/`` folder.
-To open the ``_targets.R`` script (which is where the workflow is built and specified), run:
+All of the functions for the pipeline are in the `R/` folder. To open the `_targets.R` script (which is where the workflow is built and specified), run:
 
-```
+```         
 tar_edit()
 ```
 
-## Changing the file directory
-
-**IMPORTANT**: Before running the pipeline, one thing needs to be changed -- the path to the ``calepa-cn`` folder. 
-Look for the ``user`` target:
-
-```
-tar_target(name = user, "meas"),
-```
-
-And replace the name in the quotations above with a specified user.
+## Changing important user-specific `targets`
+
+**IMPORTANT**: Before running the pipeline, several things should be modified 
+to reflect user-specific specifications. The `setup_data_paths.R` file, which is 
+sources in the main `_targets.R` folder, should auto-configure paths. Make sure
+the data folder has been moved into your main repo (note that this folder is not
+tracked by git and therefore is not pulled into the repo). 
+
+Next, set the `confidential_data_access` target option based on whether or not 
+you have access to the confidential datasets. The code is as follows:
+
+```         
+tar_target(name = confidential_data_access,
+             command = FALSE),
+```
+
+where `FALSE` indicates that the user does not have access and `TRUE` indicates 
+access. Users with access will have a subfolder within the data folder called
+`confidential-data`.
+
+Next, set the following target to reflect which values for `beta` and `cuf`
+you are running:
+
+```
+## set module settings for specific run (cuf and beta)
+  tar_target(name = beta_scenario, command = "main"), ## UPDATE WITH ("main", "high", or "low")
+  tar_target(
+    name = beta,
+    command = ifelse(
+      beta_scenario == "low",
+      0.00422068,
+      ifelse(beta_scenario == "high", 0.00737932, 0.00582)
+    )
+  ), 
+  
+  # Coefficient from Krewski et al (2009) for mortality impact
+  tar_target(name = ref_threshold, command = 0.6),
+  ```
+  
+  where the target `beta_scenario` can be "main", "low", or "high" and 
+  will be used for file saving name conventions and sets the `beta` target below. 
+  
+  Finally, specify the `version` target to determine folder names for saving outputs:
+  
+  ```
+  # list save paths (UPDATE VERSION AS NEEDED)
+  tar_target(name = version, command = "test-no-conf-data"),
+  
+```
+where in this example will create a folder with `test-no-conf-data` in the name.
+  
 
 ## Using the repo to recreate the analysis
 
 ### Debugging the pipeline
+
 In order to check the pipeline is engineered properly, run the following command:
 
-```
+```         
 tar_manifest(fields=command)
 ```
 
 The output should look something like:
-```
+
+```         
 # A tibble: 90 × 2
    name          command                                                                                                             
    <chr>         <chr>                                                                                                               
@@ -59,19 +101,20 @@ The output should look something like:
 # … with 80 more rows
 # ℹ Use `print(n = ...)` to see more rows
 ```
+
 If there are any issues (missing targets, bugs, etc), you should receive an error message.
 
 ### Running the pipeline
 
 To build and run the pipeline (this will execute everything), run:
 
-```
+```         
 tar_make()
 ```
 
 If you are running this for the first time, it should take a few minutes, but the outputs should look something like:
 
-```
+```         
 • start target ei_crude
 • built target ei_crude [0.019 seconds]
 • start target ei_diesel
@@ -87,23 +130,23 @@ If you are running this for the first time, it should take a few minutes, but th
 ...
 ```
 
-Assuming none of the targets change, the next time(s) you run ``tar_make()``, ``targets`` will skip building targets that are already up-to-date. 
+Assuming none of the targets change, the next time(s) you run `tar_make()`, `targets` will skip building targets that are already up-to-date.
 
 ### Viewing and loading targets
 
-If you are new to ``targets`` you might be confused that there are no objects in your environment. That's because the objects are stored locally in a folder called ``_targets`` (in your local repo).
+If you are new to `targets` you might be confused that there are no objects in your environment. That's because the objects are stored locally in a folder called `_targets` (in your local repo).
 
-But let's say you want to inspect a specific object, like ``dt_its``. If you want to just view it in your console, you can enter:
+But let's say you want to inspect a specific object, like `dt_its`. If you want to just view it in your console, you can enter:
 
-```
+```         
 tar_read(dt_its)
 ```
 
-And that should print the ``data.table``.
+And that should print the `data.table`.
 
-If you want to load the ``data.table`` into your environment, you can run the following instead:
+If you want to load the `data.table` into your environment, you can run the following instead:
 
-```
+```         
 tar_load(dt_its)
 ```
 
@@ -111,7 +154,7 @@ You'll notice the object is in your environment.
 
 You can also view plots. Running the following line should either load the plot in your Plots window or open a new window with the plot:
 
-```
+```         
 tar_read(fig_demand)
 ```
 
@@ -119,58 +162,57 @@ tar_read(fig_demand)
 
 If you want to visualize the pipeline, run:
 
-```
+```         
 tar_visnetwork()
 ```
 
-You'll notice the diagram is very small -- you can use your mouse to zoom in on the objects if you'd like. If you make changes to the targets/pipeline and run ``tar_visnetwork()`` before running ``tar_make``, you can see the colors of the objects change.
+You'll notice the diagram is very small -- you can use your mouse to zoom in on the objects if you'd like. If you make changes to the targets/pipeline and run `tar_visnetwork()` before running `tar_make`, you can see the colors of the objects change.
 
 ## Example of target changes and impacts on the pipeline
 
 Want an example of what happens when a target is changed? Here's an easy one:
 
-1. Find the target ``ei_crude`` in ``_targets.R``:
+1.  Find the target `ei_crude` in `_targets.R`:
 
-```
+```         
 tar_target(name = ei_crude, command = 5.698)
 ```
 
-2. Change the command value to something else, say 10 for example:
+2.  Change the command value to something else, say 10 for example:
 
-```
+```         
 tar_target(name = ei_crude, command = 10)
 ```
 
-3. Save the script. Then run:
+3.  Save the script. Then run:
 
-```
+```         
 tar_visnetwork()
 ```
 
-4. You'll see the diagram now looks different, with a few lines and points assigned a different color, representing "Outdated". These are the targets affected by the updated ``ei_crude``. Run ``tar_make()`` to rerun the pipeline with the new ``ei_crude`` value:
+4.  You'll see the diagram now looks different, with a few lines and points assigned a different color, representing "Outdated". These are the targets affected by the updated `ei_crude`. Run `tar_make()` to rerun the pipeline with the new `ei_crude` value:
 
-```
+```         
 tar_make()
 ```
 
 In the outputs you'll see that the targets that are affected are being updated, while the ones that are unaffected are not being rebuilt.
 
-If you run ``tar_visnetwork()`` everything should be up-to-date now in the diagram.
+If you run `tar_visnetwork()` everything should be up-to-date now in the diagram.
 
 **Remember to change the value of the target back to normal (by ctrl + z for example).**
 
 ## Output Structure and Git Tracking
 
-This repository uses a standardized output structure defined in `structure.md` and `output_structure.csv`.
-The `output_structure.csv` file specifies:
+This repository uses a standardized output structure defined in `structure.md` and `output_structure.csv`. The `output_structure.csv` file specifies:
 
-- `file_name`: The name of each output file
-- `relative_path`: The path where the file should be saved (relative to `save_path`)
-- `tracked`: Whether the file should be tracked in git (`YES` or `NO`)
+-   `file_name`: The name of each output file
+-   `relative_path`: The path where the file should be saved (relative to `save_path`)
+-   `tracked`: Whether the file should be tracked in git (`YES` or `NO`)
 
 ### Directory Structure
 
-```text
+``` text
 outputs/
   version/
     iteration/
@@ -195,14 +237,14 @@ outputs/
 
 Two utility scripts help manage output files and git tracking:
 
-1. `update_gitignore.R`: Updates all `.gitignore` files based on `output_structure.csv`
-2. `verify_file_paths.R`: Verifies that all files in `_targets.R` are saved in the correct locations
+1.  `update_gitignore.R`: Updates all `.gitignore` files based on `output_structure.csv`
+2.  `verify_file_paths.R`: Verifies that all files in `_targets.R` are saved in the correct locations
 
 ### File Saving Conventions
 
 All file-producing targets in `_targets.R` should use the `simple_fwrite_repo` function:
 
-```r
+``` r
 simple_fwrite_repo(
   data = your_data,
   folder_path = NULL,  # Not needed when using save_path and file_type
diff --git a/_targets.R b/_targets.R
index beeba7a..1f519a8 100644
--- a/_targets.R
+++ b/_targets.R
@@ -1,5 +1,6 @@
 # Load packages required to define the pipeline:
 library(targets)
+library(tarchetypes)
 library(data.table)
 library(tidyr)
 library(tidyverse)
@@ -50,6 +51,12 @@ source("setup_data_paths.R")
 
 # Replace the target list below with your own:
 list(
+  
+  ## indicate if you have access to confidential data (data/confidential-data folder)
+  ## TRUE indicates access, FALSE indicates that user does not have access
+  tar_target(name = confidential_data_access,
+             command = FALSE),
+  
   # data path (automatically configured)
   tar_target(
     name = main_path,
@@ -65,11 +72,13 @@ list(
       0.00422068,
       ifelse(beta_scenario == "high", 0.00737932, 0.00582)
     )
-  ), # Coefficient from Krewski et al (2009) for mortality impact
+  ), 
+  
+  # Coefficient from Krewski et al (2009) for mortality impact
   tar_target(name = ref_threshold, command = 0.6),
 
   # list save paths (UPDATE VERSION AS NEEDED)
-  tar_target(name = version, command = "rev-submission"),
+  tar_target(name = version, command = "test-no-conf-data"),
   tar_target(
     name = iteration,
     command = paste0("cuf=", ref_threshold, "_beta-scenario=", beta_scenario)
@@ -221,7 +230,7 @@ list(
     name = file_raw_its,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/raw/Study 1 - Preliminary Fuel Volumes BAU & LC1.xlsx"
+      "data-files/stocks-flows/raw/Study 1 - Preliminary Fuel Volumes BAU & LC1.xlsx"
     ),
     format = "file"
   ),
@@ -229,7 +238,7 @@ list(
     name = file_raw_avgas,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/raw/Distillates 10-10.xlsx"
+      "data-files/stocks-flows/raw/Distillates 10-10.xlsx"
     ),
     format = "file"
   ),
@@ -237,7 +246,7 @@ list(
     name = file_raw_cec_jet,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/raw/5-20 Jet Fuel Demand.xlsx"
+      "data-files/stocks-flows/raw/5-20 Jet Fuel Demand.xlsx"
     ),
     format = "file"
   ),
@@ -245,7 +254,7 @@ list(
     name = file_raw_mil_jet,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/raw/California Transportion Fuel Consumption - Summary 2020-06-01 GDS_rename.xlsx"
+      "data-files/stocks-flows/raw/California Transportion Fuel Consumption - Summary 2020-06-01 GDS_rename.xlsx"
     ),
     format = "file"
   ),
@@ -253,7 +262,7 @@ list(
     name = file_raw_fpm,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/raw/Finished_Products_Movements.xlsx"
+      "data-files/stocks-flows/raw/Finished_Products_Movements.xlsx"
     ),
     format = "file"
   ),
@@ -261,7 +270,7 @@ list(
     name = file_refcap,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/processed/refinery_loc_cap_manual.csv"
+      "data-files/stocks-flows/processed/refinery_loc_cap_manual.csv"
     ),
     format = "file"
   ), # this is a manually created file
@@ -269,7 +278,7 @@ list(
     name = file_rediesel,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/processed/CARB_RE_fuels_CA_imports_figure10_053120.xlsx"
+      "data-files/stocks-flows/processed/CARB_RE_fuels_CA_imports_figure10_053120.xlsx"
     ),
     format = "file"
   ),
@@ -277,7 +286,7 @@ list(
     name = file_renref,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/processed/renewable_refinery_capacity.xlsx"
+      "data-files/stocks-flows/processed/renewable_refinery_capacity.xlsx"
     ),
     format = "file"
   ), # this is a manually created file
@@ -285,7 +294,7 @@ list(
     name = file_altair,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/raw/altair_refinery_capacity.xlsx"
+      "data-files/stocks-flows/raw/altair_refinery_capacity.xlsx"
     ),
     format = "file"
   ), # this is a manually created file
@@ -294,7 +303,7 @@ list(
     name = file_raw_ces,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/health/raw/ces3results.xlsx"
+      "data-files/health/raw/ces3results.xlsx"
     ),
     format = "file"
   ),
@@ -302,7 +311,7 @@ list(
     name = file_raw_dac,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/health/raw/SB535DACresultsdatadictionary_F_2022/SB535DACresultsdatadictionary_F_2022.xlsx"
+      "data-files/health/raw/SB535DACresultsdatadictionary_F_2022/SB535DACresultsdatadictionary_F_2022.xlsx"
     ),
     format = "file"
   ),
@@ -310,7 +319,7 @@ list(
     name = file_raw_income_house,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/Census/ca-median-house-income.csv"
+      "data-files/Census/ca-median-house-income.csv"
     ),
     format = "file"
   ), # remove from workflow
@@ -318,7 +327,7 @@ list(
     name = file_raw_income_county,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/Census/ca-median-house-income-county.csv"
+      "data-files/Census/ca-median-house-income-county.csv"
     ),
     format = "file"
   ), # remove from workflow
@@ -326,14 +335,14 @@ list(
     name = file_inmap_re,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/health/source_receptor_matrix/inmap_processed_srm/refining"
+      "data-files/health/source_receptor_matrix/inmap_processed_srm/refining"
     )
   ), # these were created upstream
   tar_target(
     name = file_dt_ef,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/health/processed/ref_emission_factor.csv"
+      "data-files/health/processed/ref_emission_factor.csv"
     ),
     format = "file"
   ), #cluster-level emission factors
@@ -341,7 +350,7 @@ list(
     name = file_dt_ef_ref,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/health/processed/refinery_emission_factor.csv"
+      "data-files/health/processed/refinery_emission_factor.csv"
     ),
     format = "file"
   ), #refinery-level emission factors
@@ -349,7 +358,7 @@ list(
     name = file_dt_age_vsl,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/health/processed/age_based_VSL_2019.csv"
+      "data-files/health/processed/age_based_VSL_2019.csv"
     ),
     format = "file"
   ),
@@ -357,7 +366,7 @@ list(
     name = file_dt_ct_inc_pop,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/health/processed/ct_inc_45_2020.csv"
+      "data-files/health/processed/ct_inc_45_2020.csv"
     ),
     format = "file"
   ),
@@ -365,7 +374,7 @@ list(
     name = file_dt_growth_cap_rate,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/benmap/processed/growth_per_cap.csv"
+      "data-files/benmap/processed/growth_per_cap.csv"
     ),
     format = "file"
   ),
@@ -373,7 +382,7 @@ list(
     name = file_dt_health_income,
     command = file.path(
       main_path,
-      "outputs-staged-for-deletion/refining-2023/health/refining_health_income_2023.csv"
+      "output-files/refining-2023/health/refining_health_income_2023.csv"
     ),
     format = "file"
   ),
@@ -381,7 +390,7 @@ list(
     name = file_raw_ct_2019,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/GIS/raw/ct-cartographic-boundaries/cb_2019_06_tract_500k/cb_2019_06_tract_500k.shp"
+      "data-files/GIS/raw/ct-cartographic-boundaries/cb_2019_06_tract_500k/cb_2019_06_tract_500k.shp"
     ),
     format = "file"
   ),
@@ -389,7 +398,7 @@ list(
     name = file_raw_ct_2020,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/GIS/raw/ct-cartographic-boundaries/nhgis0030_shapefile_tl2020_us_tract_2020/US_tract_2020.shp"
+      "data-files/GIS/raw/ct-cartographic-boundaries/nhgis0030_shapefile_tl2020_us_tract_2020/US_tract_2020.shp"
     ),
     format = "file"
   ),
@@ -397,7 +406,7 @@ list(
     name = file_raw_census_2020,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/Census/nhgis_2020/nhgis0024_csv/nhgis0024_ds249_20205_tract.csv"
+      "data-files/Census/nhgis_2020/nhgis0024_csv/nhgis0024_ds249_20205_tract.csv"
     ),
     format = "file"
   ),
@@ -405,7 +414,7 @@ list(
     name = file_raw_census_2021,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/Census/nhgis_2020/nhgis0024_csv/nhgis0024_ds254_20215_tract.csv"
+      "data-files/Census/nhgis_2020/nhgis0024_csv/nhgis0024_ds254_20215_tract.csv"
     ),
     format = "file"
   ),
@@ -413,7 +422,7 @@ list(
     name = file_raw_ct_race,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/Census/nhgis0039_csv/nhgis0039_ds258_2020_tract.csv"
+      "data-files/Census/nhgis0039_csv/nhgis0039_ds258_2020_tract.csv"
     ),
     format = "file"
   ),
@@ -421,7 +430,7 @@ list(
     name = file_raw_census_poverty,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/Census/nhgis_2020/nhgis0029_csv/nhgis0029_csv/nhgis0029_ds254_20215_tract.csv"
+      "data-files/Census/nhgis_2020/nhgis0029_csv/nhgis0029_csv/nhgis0029_ds254_20215_tract.csv"
     ),
     format = "file"
   ),
@@ -429,33 +438,52 @@ list(
     name = file_df_ca_regions,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/labor/raw/ca_regions.csv"
+      "data-files/labor/raw/ca_regions.csv"
     ),
     format = "file"
   ),
-  # tar_target(name = file_df_labor, command = file.path(main_path, "data-staged-for-deletion/labor/processed/implan-results/academic-paper-multipliers/processed/ica_multipliers_v2.xlsx"), format = "file"),
-  # tar_target(name = file_df_labor_dest, command = file.path(main_path, "data-staged-for-deletion/labor/processed/implan-results/academic-paper-multipliers/processed/20240524-1million_la-Detail Economic Indicators.csv"), format = "file"),
   tar_target(
     name = file_direct_multipliers,
-    command = file.path(
-      main_path,
-      "data-staged-for-deletion/labor/ncomms-revisions/direct_multipliers_tract.csv"
+    command =  { 
+      if (!isTRUE(confidential_data_access)) {
+        file.path(main_path,
+                  "intermediate-outputs/direct_multipliers_tract_blank.csv")
+      } else {
+      file.path(
+        main_path,
+        "confidential-data/direct_multipliers_tract.csv")
+       }
+      },
+      format = "file"
     ),
-    format = "file"
-  ),
   tar_target(
     name = file_indirect_state_multipliers,
-    command = file.path(
-      main_path,
-      "data-staged-for-deletion/labor/ncomms-revisions/indirect_induced_multipliers_state.csv"
-    ),
-    format = "file"
-  ),
+    command = {
+      if (!isTRUE(confidential_data_access)) {
+        file.path(
+          main_path,
+          "intermediate-outputs/indirect_induced_multipliers_state_blank.csv")
+      } else {
+      file.path(
+        main_path,
+        "confidential-data/indirect_induced_multipliers_state.csv")
+       }
+      },
+      format = "file"
+   ),
+  # tar_target(
+  #   name = file_indirect_state_multipliers,
+  #   command = file.path(
+  #     main_path,
+  #     "confidential-data/indirect_induced_multipliers_state.csv"
+  #   ),
+  #   format = "file"
+  # ),
   tar_target(
     name = file_df_labor_dest,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/labor/processed/implan-results/academic-paper-multipliers/processed/20240623-census_regions-Detail Economic Indicators.csv"
+      "data-files/labor/processed/implan-results/academic-paper-multipliers/processed/20240623-census_regions-Detail Economic Indicators.csv"
     ),
     format = "file"
   ),
@@ -463,7 +491,7 @@ list(
     name = file_df_labor_fte,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/labor/processed/implan-results/academic-paper-multipliers/processed/Emp_FTE and W&S_EC_546 Industry Scheme.xlsx"
+      "data-files/labor/processed/implan-results/academic-paper-multipliers/processed/Emp_FTE and W&S_EC_546 Industry Scheme.xlsx"
     ),
     format = "file"
   ),
@@ -471,7 +499,7 @@ list(
     name = file_oil_px,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/processed/oil_price_projections_revised.xlsx"
+      "data-files/stocks-flows/processed/oil_price_projections_revised.xlsx"
     ),
     format = "file"
   ),
@@ -479,7 +507,7 @@ list(
     name = file_ca_counties_sp,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/GIS/raw/CA_counties_noislands/CA_Counties_TIGER2016_noislands.shp"
+      "data-files/GIS/raw/CA_counties_noislands/CA_Counties_TIGER2016_noislands.shp"
     ),
     format = "file"
   ),
@@ -487,7 +515,7 @@ list(
     name = file_refin_locs_orig,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/GIS/raw/Petroleum_Refineries_US_EIA/Petroleum_Refineries_US_2019_v2.shp"
+      "data-files/GIS/raw/Petroleum_Refineries_US_EIA/Petroleum_Refineries_US_2019_v2.shp"
     ),
     format = "file"
   ),
@@ -495,7 +523,7 @@ list(
     name = file_refin_locs,
     command = file.path(
       main_path,
-      "/data-staged-for-deletion/stocks-flows/processed/refinery_lat_long_revised.csv"
+      "/data-files/stocks-flows/processed/refinery_lat_long_revised.csv"
     ),
     format = "file"
   ),
@@ -503,15 +531,7 @@ list(
     name = file_refin_locs_ct,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/labor/ncomms-revisions/refinery_cluster_tract.csv"
-    ),
-    format = "file"
-  ),
-  tar_target(
-    name = file_labor_2019,
-    command = file.path(
-      main_path,
-      "/data-staged-for-deletion/labor/implan/20241010-census_regions_2019-Detail Economic Indicators.csv"
+      "data-files/labor/ncomms-revisions/refinery_cluster_tract.csv"
     ),
     format = "file"
   ),
@@ -692,12 +712,34 @@ list(
   ),
   tar_target(
     name = dt_direct_multipliers,
-    command = read_labor_direct_mult_inputs(file_direct_multipliers)
+    command =  { 
+      if (!isTRUE(confidential_data_access)) {
+        fread(file_direct_multipliers)
+      } else {
+      
+      read_labor_direct_mult_inputs(file_direct_multipliers)
+      }
+    }
   ),
   tar_target(
     name = dt_indirect_state_multipliers,
-    command = read_labor_indirect_mult_inputs(file_indirect_state_multipliers)
+    command =  { 
+      if (!isTRUE(confidential_data_access)) {
+        fread(file_indirect_state_multipliers)
+      } else {
+      
+      read_labor_indirect_mult_inputs(file_indirect_state_multipliers)
+      }
+    }
   ),
+  # tar_target(
+  #   name = dt_direct_multipliers,
+  #   command = read_labor_direct_mult_inputs(file_direct_multipliers)
+  # ),
+  # tar_target(
+  #   name = dt_indirect_state_multipliers,
+  #   command = read_labor_indirect_mult_inputs(file_indirect_state_multipliers)
+  # ),
   tar_target(
     name = proc_oil_px_df,
     command = read_oil_px(
@@ -722,7 +764,7 @@ list(
     name = file_ghg_emissions,
     command = file.path(
       main_path,
-      "outputs-staged-for-deletion/stocks-flows/refinery_ghg_emissions.csv"
+      "output-files/stocks-flows/refinery_ghg_emissions.csv"
     ),
     format = "file"
   ),
@@ -730,11 +772,10 @@ list(
     file_hydrogen_facilities,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/raw/hydrogen_facilities_list.xlsx"
+      "data-files/stocks-flows/raw/hydrogen_facilities_list.xlsx"
     ),
     format = "file"
   ),
-  tar_target(name = labor_2019, command = fread(file_labor_2019)),
 
   # GHG factor calculation targets
   tar_target(
@@ -845,7 +886,7 @@ list(
     command = read_and_bind_csv_files(
       file.path(
         main_path,
-        'data-staged-for-deletion/stocks-flows/processed/ghg_mrr'
+        'data-files/stocks-flows/processed/ghg_mrr'
       ),
       ".csv"
     )
@@ -862,7 +903,7 @@ list(
     name = file_fpm,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/processed/finished_product_movements_weekly_cec.csv"
+      "data-files/stocks-flows/processed/finished_product_movements_weekly_cec.csv"
     ),
     format = "file"
   ),
@@ -870,7 +911,7 @@ list(
     name = file_fw,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/stocks-flows/processed/fuel_watch_data.csv"
+      "data-files/stocks-flows/processed/fuel_watch_data.csv"
     ),
     format = "file"
   ),
@@ -878,7 +919,7 @@ list(
     name = file_processed_ces3,
     command = file.path(
       main_path,
-      "data-staged-for-deletion/health/processed/ces3_data.csv"
+      "data-files/health/processed/ces3_data.csv"
     ),
     format = "file"
   ),
@@ -1526,45 +1567,85 @@ list(
       save_path
     )
   ),
+  
   tar_target(
     name = annual_direct_labor,
-    command = calc_labor_outputs(
-      main_path,
-      save_path,
-      indiv_prod_output,
-      dt_refcap,
-      product_px,
-      cpi2019,
-      cpi2020,
-      discount_rate,
-      alpha_comp,
-      alpha_emp,
-      refin_locs_ct,
-      dt_direct_multipliers
-    )
-  ),
+    command = {
+      
+      if (!isTRUE(confidential_data_access)) {
+        
+        fread('data/intermediate-outputs/annual_labor_outputs.csv',
+              colClasses = list(character = "census_tract"))
+      } else {
+        
+        calc_labor_outputs(
+          main_path,
+          save_path,
+          indiv_prod_output,
+          dt_refcap,
+          product_px,
+          cpi2019,
+          cpi2020,
+          discount_rate,
+          alpha_comp,
+          alpha_emp,
+          refin_locs_ct,
+          dt_direct_multipliers) 
+      }
+    } 
+  ),
+  
   tar_target(
     name = state_annual_direct_impacts,
     command = calc_state_direct_impacts(annual_direct_labor)
   ),
+  
   tar_target(
     name = annual_all_impacts_labor,
-    command = calc_labor_all_impacts_outputs(
-      main_path,
-      save_path,
-      state_annual_direct_impacts,
-      indiv_prod_output,
-      dt_refcap,
-      product_px,
-      cpi2019,
-      cpi2020,
-      discount_rate,
-      alpha_comp,
-      alpha_emp,
-      dt_indirect_state_multipliers,
-      indirect_induced_mult
-    )
-  ),
+    command = {
+      
+      if (!isTRUE(confidential_data_access)) {
+        
+        fread('data/intermediate-outputs/state_annual_labor_outputs.csv')
+      } else {
+        
+        calc_labor_all_impacts_outputs(
+          main_path,
+          save_path,
+          state_annual_direct_impacts,
+          indiv_prod_output,
+          dt_refcap,
+          product_px,
+          cpi2019,
+          cpi2020,
+          discount_rate,
+          alpha_comp,
+          alpha_emp,
+          dt_indirect_state_multipliers,
+          indirect_induced_mult
+        )
+      }
+    } 
+  ),
+  # tar_target(
+  #   name = annual_all_impacts_labor,
+  #   command = calc_labor_all_impacts_outputs(
+  #     main_path,
+  #     save_path,
+  #     state_annual_direct_impacts,
+  #     indiv_prod_output,
+  #     dt_refcap,
+  #     product_px,
+  #     cpi2019,
+  #     cpi2020,
+  #     discount_rate,
+  #     alpha_comp,
+  #     alpha_emp,
+  #     dt_indirect_state_multipliers,
+  #     indirect_induced_mult
+  #   )
+  # ),
+  
   tar_target(
     ref_labor_demog_yr,
     command = calculate_labor_x_demg_annual(
@@ -1939,7 +2020,6 @@ list(
       raw_ct_2019,
       health_weighted,
       refining_mortality,
-      labor_2019,
       ca_regions,
       raw_pop_income_2021,
       cpi2020,
@@ -2472,13 +2552,18 @@ list(
   ),
   tar_target(
     name = save_state_labor_annual,
-    command = simple_fwrite_repo(
+    command = {
+      
+      if (!isTRUE(confidential_data_access)) targets::tar_cancel()
+      
+      simple_fwrite_repo(
       annual_all_impacts_labor,
       NULL,
       "state_annual_labor_outputs.csv",
       save_path = save_path,
       file_type = "labor"
-    ),
+     )
+    },
     format = "file"
   ),
   tar_target(
@@ -3040,6 +3125,8 @@ list(
   tar_target(
     save_annual_direct_labor,
     command = {
+      if (!isTRUE(confidential_data_access)) targets::tar_cancel()
+      {
       simple_fwrite_repo(
         data = annual_direct_labor,
         folder_path = NULL,
@@ -3047,6 +3134,7 @@ list(
         save_path = save_path,
         file_type = "labor"
       )
+     }
     },
     format = "file"
   ),
diff --git a/before-targets/analysis/age_vsl_test.R b/before-targets/analysis/age_vsl_test.R
deleted file mode 100644
index 71b9543..0000000
--- a/before-targets/analysis/age_vsl_test.R
+++ /dev/null
@@ -1,173 +0,0 @@
-tar_load(ct_inc_45)
-tar_load(growth_cap_rates)
-tar_load(vsl_2019)
-tar_load(health_weighted)
-tar_load(beta)
-tar_load(dt_age_vsl)
-tar_load(income_elasticity_mort)
-tar_load(discount_rate)
-
-###############################################################
-
-# for monetary mortality impact - growth in income for use in WTP function
-growth_rates <- growth_cap_rates %>%
-  filter(year > 2019) %>%
-  mutate(cum_growth = cumprod(1 + growth_2035)) %>%
-  select(-growth_2035)%>%
-  drop_na(year)
-
-# Function to grow WTP
-future_WTP <- function(elasticity, growth_rate, WTP) {
-  return(elasticity * growth_rate * WTP + WTP)
-}
-
-#### VSL age cross-walk
-
-vsl_cross_walk <- dt_age_vsl %>%
-  filter(age_min > 29)%>%
-  select(-year)%>%
-  fuzzyjoin::fuzzy_left_join(
-    ct_inc_45 %>%
-      select(start_age, end_age) %>%
-      distinct()%>%
-      filter(start_age > 29),
-    by = c("age_min" = "start_age",
-           "age_max" = "end_age"),
-    match_fun = list(`>=`, `<=`))%>%
-  group_by(start_age)%>%
-  summarise(end_age = first(end_age),
-            age_VSL_2019 = mean(age_VSL_2019))%>%
-  drop_na(start_age)%>%
-  ungroup()
-  
-
-########### Add VSL 2019 by age group
-
-ct_inc_45_temp <- ct_inc_45 %>%
-  select(GEO_ID:end_age, year, pop, incidence_2015) %>%
-  filter(start_age > 29)%>%
-  left_join(vsl_cross_walk, by = c("start_age", "end_age"))%>%
-  mutate(age_VSL_2019 = ifelse(start_age>64, 
-                               dt_age_vsl %>% filter(age_min>61) %>% select(age_VSL_2019) %>% pull(),
-                               age_VSL_2019))
-
-########### Grow VSL 
-
-# for monetary mortality impact - growth in income for use in WTP function
-growth_rates <- growth_cap_rates %>%
-  filter(year > 2019) %>%
-  mutate(cum_growth = cumprod(1 + growth_2035)) %>%
-  select(-growth_2035)%>%
-  drop_na(year)
-
-# Function to grow WTP
-future_WTP <- function(elasticity, growth_rate, WTP) {
-  return(elasticity * growth_rate * WTP + WTP)
-}
-
-# Merge growth factors 
-ct_inc_45_temp <- ct_inc_45_temp%>%
-  mutate(VSL_2019 = vsl_2019) %>%
-  left_join(growth_rates, by = c("year" = "year")) %>%
-  mutate(
-    VSL = future_WTP(
-      income_elasticity_mort,
-      (cum_growth - 1),
-      VSL_2019
-    ),
-    age_VSL = future_WTP(
-      income_elasticity_mort,
-      (cum_growth - 1),
-      age_VSL_2019
-    ))
-
-########### Aggregate at census tract and year level 
-
-ct_inc_pop_45_weighted <- ct_inc_45_temp %>%
-  group_by(GEO_ID, year) %>%
-  mutate(
-    ct_pop = sum(pop, na.rm = T),
-    share = pop / ct_pop,
-    weighted_incidence = sum(share * incidence_2015, na.rm = T),
-    weighted_monetized_incidence = sum(share * incidence_2015 * VSL, na.rm = T),
-    weighted_monetized_age_incidence = sum(share * incidence_2015 * age_VSL, na.rm = T),
-    weighted_monetized_age_incidence_2019 = sum(share * incidence_2015 * age_VSL_2019, na.rm = T)) %>%
-  summarize(
-    weighted_incidence = unique(weighted_incidence),
-    weighted_monetized_incidence = unique(weighted_monetized_incidence),
-    weighted_monetized_age_incidence = unique(weighted_monetized_age_incidence),
-    weighted_monetized_age_incidence_2019 = unique(weighted_monetized_age_incidence),
-    pop = unique(ct_pop)
-  ) %>%
-  ungroup() %>%
-  mutate(GEO_ID = str_remove(GEO_ID, "US"))
-
-#  Delta of pollution change ######################################
-
-# refining pm25 BAU
-refining_BAU <- subset(health_weighted, (scen_id == "BAU historic production")) %>%
-  # refining_BAU<-subset(health_income,(scen_id=="BAU historic production"))%>%
-  rename(bau_total_pm25 = total_pm25) # %>%
-# mutate(census_tract = paste0("0",census_tract))
-
-# refining pm25 difference
-deltas_refining <- health_weighted %>%
-  # deltas_refining<- health_income%>%
-  # mutate(census_tract = paste0("0",census_tract))%>%
-  # left_join(refining_BAU %>% select(-scen_id,-demand_scenario,-refining_scenario,-population:-median_hh_income),by=c("census_tract", "year"))%>%
-  left_join(refining_BAU %>% select(-scen_id, -demand_scenario, -refining_scenario, -ces4_score, -disadvantaged), by = c("census_tract", "year")) %>%
-  mutate(delta_total_pm25 = total_pm25 - bau_total_pm25) %>%
-  select(census_tract, scen_id:year, total_pm25:delta_total_pm25)
-
-## Merge demographic data to pollution scenarios
-
-ct_incidence_ca_poll <- deltas_refining %>%
-  right_join(ct_inc_pop_45_weighted, by = c("census_tract" = "GEO_ID", "year" = "year")) %>%
-  drop_na(scen_id) # CURRENTLY DROPPING ALL THE MISMATCHED 2010/2022 GEOIDs
-
-# Monetized mortality for adults (>=29 years old) by age-based VSL
-
-ct_mort_cost <- ct_incidence_ca_poll %>%
-  mutate(
-    mortality_delta = ((exp(beta * delta_total_pm25) - 1)) * weighted_incidence * pop,
-    mortality_level = ((exp(beta * total_pm25) - 1)) * weighted_incidence * pop,
-    benefit_delta = ((exp(beta * delta_total_pm25) - 1)) * weighted_monetized_incidence * pop,
-    benefit_level = ((exp(beta * total_pm25) - 1)) * weighted_monetized_incidence * pop,
-    benefit_age_delta = ((exp(beta * delta_total_pm25) - 1)) * weighted_monetized_age_incidence * pop,
-    benefit_age_delta_2019 = ((exp(beta * delta_total_pm25) - 1)) * weighted_monetized_age_incidence_2019 * pop,
-    benefit_age_level = ((exp(beta * total_pm25) - 1)) * weighted_monetized_age_incidence * pop
-  )%>%
-  group_by(year) %>%
-  mutate(
-    cost_2019_PV = benefit_age_delta_2019 / ((1 + discount_rate)^(year - 2019)),
-    cost_PV = benefit_age_delta / ((1 + discount_rate)^(year - 2019))
-  )%>%
-  ungroup()
-
-# # #### Calculate the cost per premature mortality (old way)################
-# 
-# ct_health <- ct_incidence_ca_poll %>%
-#   mutate(
-#     mortality_delta = ((exp(beta * delta_total_pm25) - 1)) * weighted_incidence * pop,
-#     mortality_level = ((exp(beta * total_pm25) - 1)) * weighted_incidence * pop
-#   )
-# 
-# ct_mort_cost <- ct_health %>%
-#   mutate(VSL_2019 = vsl_2019) %>%
-#   left_join(growth_rates, by = c("year" = "year")) %>%
-#   mutate(
-#     VSL = future_WTP(
-#       income_elasticity_mort,
-#       (cum_growth - 1),
-#       VSL_2019
-#     ),
-#     cost_2019 = mortality_delta * VSL_2019,
-#     cost = mortality_delta * VSL
-#   ) %>%
-#   group_by(year) %>%
-#   mutate(
-#     cost_2019_PV = cost_2019 / ((1 + discount_rate)^(year - 2019)),
-#     cost_PV = cost / ((1 + discount_rate)^(year - 2019))
-#   )
-
-
diff --git a/before-targets/analysis/compare-labor-out.R b/before-targets/analysis/compare-labor-out.R
deleted file mode 100644
index 2ad02c6..0000000
--- a/before-targets/analysis/compare-labor-out.R
+++ /dev/null
@@ -1,63 +0,0 @@
-## compare outputs
-
-orig <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/labor_result_for_review.csv"),
-              colClasses = c(census_tract = "character"))
-
-compare <- merge(ct_out_refining_direct %>% filter(product_scenario == "changing prices"),
-                 orig, 
-                 by = c("demand_scenario",
-                        "refining_scenario",
-                        "oil_price_scenario",
-                        "census_tract",
-                        "year"), suffixes = c("_orig", "_new")) |>
-                   ## compute absolute difference
-                   mutate(diff_total_comp_usd19_h = total_comp_usd19_h_orig - total_comp_usd19_h_new,
-                          diff_prev_comp_usd19h = prev_comp_usd19h_orig - prev_comp_usd19h_new,
-                          diff_total_comp_usd19_l = total_comp_usd19_l_orig - total_comp_usd19_l_new,
-                          diff_total_emp = total_emp_orig - total_emp_new,
-                          diff_prev_emp = prev_emp_orig - prev_emp_new,
-                          diff_r = total_emp_revised_orig - total_emp_revised_new)
-
-
-orig2 <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_out_labor_all_impacts.csv"))
-
-orig2 <- orig2 |> pivot_longer(state_emp_h:comp_all_impacts_PV_l, names_to = "metric", values_to = "orig_value")
-
-compare2 <- state_out_labor_all_impacts |> pivot_longer(state_emp_h:comp_all_impacts_PV_l, names_to = "metric", values_to = "value") |>
-  filter(product_scenario == "changing prices") |>
-  full_join(orig2) |>
-  mutate(diff = orig_value - value)
-
-orig3 <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_labor_direct_impacts_demo_annual.csv")) |>
-  pivot_longer(sum_demo_emp:sum_demo_comp_pv_l, names_to = "metric", values_to = "orig_value")
-
-compare3 <- state_demo_labor_out |> pivot_longer(sum_demo_emp:sum_demo_comp_pv_l, names_to = "metric", values_to = "value") |>
-  filter(product_scenario == "changing prices") |>
-  full_join(orig3) |>
-  mutate(diff = orig_value - value)
-
-orig4 <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/", "labor_high_low_annual_outputs.csv")) |>
-  rename(orig_value = value)
-
-compare4 <- labor_pct_long |>
-  filter(product_scenario == "changing prices") |>
-  full_join(orig4) |>
-  mutate(diff = orig_value - value)
-
-orig5 <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_labor_direct_impacts_demo_annual.csv")) |>
-  pivot_longer(sum_demo_emp:sum_demo_comp_pv_l, names_to = "metric", values_to = "orig_value")
-
-compared5 <- state_demo_labor_out |>
-  filter(product_scenario == "changing prices") |>
-  pivot_longer(sum_demo_emp:sum_demo_comp_pv_l, names_to = "metric", values_to = "value") |>
-  full_join(orig5) |>
-  mutate(diff = orig_value - value)
-
-npv_comp <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/", "state_npv_fig_inputs_labor.csv"))
-
-comp6 <- plot_df_labor |>
-  filter(produc)
-
-  
-  
-  
\ No newline at end of file
diff --git a/before-targets/analysis/create_ct_xwalk.R b/before-targets/analysis/create_ct_xwalk.R
deleted file mode 100644
index df686af..0000000
--- a/before-targets/analysis/create_ct_xwalk.R
+++ /dev/null
@@ -1,59 +0,0 @@
-## Tracey Mangin
-## May 27, 2023
-## Create crosswalk
-
-## libraries
-library(tidyverse)
-library(sf)
-library(data.table)
-library(plotly)
-
-## paths
-main_path <- "/Volumes/GoogleDrive/Shared drives/emlab/projects/current-projects/calepa-cn/"
-main_path <- "/Users/traceymangin/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared\ drives/emlab/projects/current-projects/calepa-cn/"
-sp_data_path <- paste0(main_path, "data/GIS/raw/")
-save_path <- paste0(main_path, "project-materials/refining-paper/model-prep/census-xwalk/")
-
-## file names - use cartographic boundaries, not tigerlines
-prev_ct <- "ct-cartographic-boundaries/cb_2019_06_tract_500k/cb_2019_06_tract_500k.shp"
-ct_2020 <- "ct-cartographic-boundaries/nhgis0030_shapefile_tl2020_us_tract_2020/US_tract_2020.shp"
-
-## crs NAD83 / California Albers
-ca_crs <- 3310
-
-## previous version of cts
-census_tract19 <- read_sf(paste0(sp_data_path, prev_ct)) %>%
-  select(GEOID) %>%
-  st_transform(crs = ca_crs)
-
-## california id
-ca_code <- "06"
-
-census_tract20 <- read_sf(paste0(sp_data_path, ct_2020)) %>%
-  filter(STATEFP == ca_code) %>%
-  select(GEOID) %>%
-  st_transform(crs = ca_crs)
-
-census_tract20 <- st_make_valid(census_tract20)
-
-## make a crosswalk for census tracts, tract percentage overlap
-## -------------------------------
-
-## 2020 cts merged with 2019, all matches
-ct_merged <- census_tract20 %>%
-  rename(GEOID_2020 = GEOID) %>%
-  mutate(GEOID_2020_area = st_area(.)) %>%
-  st_intersection(census_tract19) %>%
-  mutate(intersect_area = st_area(.)) %>%
-  arrange(GEOID_2020, intersect_area) %>%
-  group_by(GEOID_2020) %>%
-  mutate(sum_intersect_area = sum(intersect_area)) %>%
-  ungroup() %>%
-  mutate(rel_intersect = intersect_area / sum_intersect_area) %>%
-  rename(GEOID_2019 = GEOID) %>%
-  mutate(rel_intersect = units::drop_units(rel_intersect)) %>%
-  select(GEOID_2020, GEOID_2020_area, GEOID_2019, intersect_area, sum_intersect_area, rel_intersect) %>%
-  st_drop_geometry()
-
-## save
-fwrite(ct_merged, paste0(save_path, "ct_xwalk.csv"))
diff --git a/before-targets/archive/health_labor.R b/before-targets/archive/health_labor.R
deleted file mode 100644
index 0d2eb43..0000000
--- a/before-targets/archive/health_labor.R
+++ /dev/null
@@ -1,10906 +0,0 @@
-## health and labor figures
-
-## labor SI figure
-## -----------------------------------------------------------------------------
-
-## NPV figure
-plot_npv_labor_oilpx <- function(
-  main_path,
-  save_path,
-  state_ghg_output,
-  dt_ghg_2019,
-  annual_all_impacts_labor
-) {
-  ## add ghg emission reduction
-  ## 2019 ghg
-  ghg_2019_val <- dt_ghg_2019$mtco2e[1]
-
-  ## 2045 vs 2019 ghg
-  ghg_2045 <- state_ghg_output[year == 2045 & source == "total"]
-  setnames(ghg_2045, "value", "ghg_kg")
-  ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6]
-  ghg_2045[, ghg_2019 := ghg_2019_val]
-  ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019]
-
-  perc_diff_df <- ghg_2045[, .(
-    demand_scenario,
-    refining_scenario,
-    ghg_2045,
-    ghg_2019,
-    perc_diff
-  )]
-
-  ## summarize by scenario, filter for total
-  state_ghg_df <- state_ghg_output[
-    source == "total",
-    .(total_ghg = sum(value)),
-    by = .(demand_scenario, refining_scenario)
-  ]
-
-  state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6]
-
-  ## reference
-  ref_df <- state_ghg_df[
-    demand_scenario == "BAU" & refining_scenario == "historic production",
-    .(total_ghg_mmt)
-  ]
-  setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt")
-  ref_value <- ref_df$ref_ghg_mmt[1]
-
-  ## merge with summarized df
-  state_ghg_df[, ref_ghg := ref_value]
-  state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1]
-
-  ## summarize labor for state
-  state_labor <- annual_all_impacts_labor[,
-    .(
-      # sum_total_emp = sum(total_emp),
-      sum_total_comp_pv_h = sum(comp_all_impacts_PV_h),
-      sum_total_comp_pv_l = sum(comp_all_impacts_PV_l, na.rm = T)
-    ),
-    by = .(demand_scenario, refining_scenario, oil_price_scenario)
-  ]
-
-  ## ref labor
-  ref_labor <- state_labor[
-    demand_scenario == "BAU" & refining_scenario == "historic production"
-  ]
-  setnames(
-    ref_labor,
-    c("sum_total_comp_pv_h", "sum_total_comp_pv_l"),
-    c("ref_total_comp_pv_h", "ref_total_comp_pv_l")
-  )
-  ref_labor <- ref_labor[, .(
-    oil_price_scenario,
-    ref_total_comp_pv_l,
-    ref_total_comp_pv_h
-  )]
-
-  ## add values to labor
-  state_labor_oil_px <- merge(
-    state_labor,
-    ref_labor,
-    by = c("oil_price_scenario")
-  )
-
-  ## compute forgone wages high and low
-  state_labor_oil_px[,
-    forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9
-  ]
-  state_labor_oil_px[,
-    forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9
-  ]
-
-  ## merge with health and ghg
-  labor_ghg_df <- merge(
-    state_labor_oil_px[, .(
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      sum_total_comp_pv_h,
-      ref_total_comp_pv_h,
-      forgone_wages_bil_h,
-      sum_total_comp_pv_l,
-      ref_total_comp_pv_l,
-      forgone_wages_bil_l
-    )],
-    state_ghg_df,
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## add ghg perc reduction
-  labor_ghg_df <- merge(
-    labor_ghg_df,
-    perc_diff_df,
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## add scen id
-  labor_ghg_df[, scen_id := paste(demand_scenario, refining_scenario)]
-
-  ## prepare to plot
-  plot_df <- labor_ghg_df[, .(
-    scen_id,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    forgone_wages_bil_h,
-    forgone_wages_bil_l,
-    avoided_ghg,
-    perc_diff
-  )]
-
-  setnames(plot_df, "perc_diff", "ghg_perc_diff")
-
-  plot_df[, `:=`(
-    forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg,
-    forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg
-  )]
-
-  plot_df_labor <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      ghg_perc_diff,
-      forgone_wages_bil_h,
-      forgone_wages_bil_l,
-      forgone_wages_bil_h_ghg,
-      forgone_wages_bil_l_ghg
-    ) %>%
-    pivot_longer(
-      forgone_wages_bil_h:forgone_wages_bil_l_ghg,
-      names_to = "metric",
-      values_to = "value"
-    ) %>%
-    mutate(
-      segment = "labor",
-      unit_desc = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "USD billion",
-        "USD billion per GHG"
-      ),
-      estimate = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"),
-        "high",
-        "low"
-      ),
-      metric = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "forgone_wages_bil",
-        "forgone_wages_bil_ghg"
-      )
-    ) %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      ghg_perc_diff,
-      segment,
-      metric,
-      unit_desc,
-      estimate,
-      value
-    ) %>%
-    pivot_wider(names_from = estimate, values_from = value)
-
-  ## prepare labor ----------------------
-  plot_df_labor <- plot_df_labor %>%
-    mutate(
-      title = ifelse(
-        metric == "forgone_wages_bil",
-        "Labor: forgone wages",
-        "Labor: forgone wages per avoided GHG"
-      )
-    )
-
-  plot_df_labor$title <- factor(
-    plot_df_labor$title,
-    levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG")
-  )
-
-  ## rename
-  setDT(plot_df_labor)
-  plot_df_labor[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## refactor
-  plot_df_labor$scenario <- factor(
-    plot_df_labor$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_labor[,
-    high := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      high * 1000,
-      high
-    )
-  ]
-  plot_df_labor[,
-    low := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      low * 1000,
-      low
-    )
-  ]
-  plot_df_labor[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_labor[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_labor[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## has oil price label
-  plot_df_labor[,
-    oil_px_label := ifelse(
-      oil_price_scenario == "reference case",
-      "Reference",
-      ifelse(oil_price_scenario == "high oil price", "High", "Low")
-    )
-  ]
-
-  plot_df_labor$oil_px_label <- factor(
-    plot_df_labor$oil_px_label,
-    levels = c("Low", "Reference", "High")
-  )
-
-  ## save figure inputs
-  fwrite(
-    plot_df_labor,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_npv_fig_inputs_labor_all_oilpx.csv"
-    )
-  )
-  # fwrite(plot_df_labor, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_labor.csv"))
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-  bau_scen <- "BAU historical production"
-
-  ## make the plot
-  ## ---------------------------------------------------
-
-  ## color for refining scenario
-  refin_colors <- c(
-    "LC1 low exports" = "#729b79",
-    "LC1 historical exports" = "#2F4858",
-    "BAU low exports" = "#F6AE2D",
-    "BAU historical exports" = "#F26419"
-  )
-
-  refin_labs <- c(
-    "LC1 low exports" = "Low demand, low exports",
-    "LC1 historical exports" = "Low demand, historical exports",
-    "BAU low exports" = "BAU demand, low exports",
-    "BAU historical exports" = "BAU demand, historical exports"
-  )
-
-  ## figs - make each separately
-  ## -------------------------------------------------------------------
-
-  hist_prod <- as.data.table(
-    plot_df_labor %>%
-      filter(
-        scen_id == bau_scen,
-        oil_price_scenario == "reference case",
-        unit == "NPV (2019 USD billion)"
-      )
-  )
-
-  ##
-  forgone_wages_all_oil_px_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod$ghg_perc_diff * -100,
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historical production",
-    #                                                metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.9
-    ) +
-    facet_wrap(~oil_px_label) +
-    labs(
-      color = NULL,
-      title = "Labor: forgone wages",
-      y = "NPV (2019 USD billion)",
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-90, 10) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  return(forgone_wages_all_oil_px_fig)
-}
-
-
-## -----------------------------------------------------------------------------
-## NPV figure: main text
-## -----------------------------------------------------------------------------
-
-## NPV figure
-plot_npv_health_labor <- function(
-  main_path,
-  save_path,
-  refining_mortality,
-  state_ghg_output,
-  dt_ghg_2019,
-  annual_all_impacts_labor
-) {
-  npv_df <- refining_mortality %>% as.data.table()
-
-  ## state level
-  state_npv_df <- npv_df[,
-    .(
-      sum_cost_2019_pv = sum(cost_2019_PV), ## constant VSL
-      sum_cost_pv = sum(cost_PV)
-    ), ## changing VSL
-    by = .(scen_id, demand_scenario, refining_scenario)
-  ]
-
-  ## add column
-  state_npv_df[, sum_cost_2019_pv_b := sum_cost_2019_pv / 1e9]
-  state_npv_df[, sum_cost_pv_b := sum_cost_pv / 1e9]
-
-  ## add ghg emission reduction
-  ## 2019 ghg
-  ghg_2019_val <- dt_ghg_2019$mtco2e[1]
-
-  ## 2045 vs 2019 ghg
-  ghg_2045 <- state_ghg_output[year == 2045 & source == "total"]
-  setnames(ghg_2045, "value", "ghg_kg")
-  ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6]
-  ghg_2045[, ghg_2019 := ghg_2019_val]
-  ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019]
-
-  perc_diff_df <- ghg_2045[, .(
-    demand_scenario,
-    refining_scenario,
-    ghg_2045,
-    ghg_2019,
-    perc_diff
-  )]
-
-  ## summarize by scenario, filter for total
-  state_ghg_df <- state_ghg_output[
-    source == "total",
-    .(total_ghg = sum(value)),
-    by = .(demand_scenario, refining_scenario)
-  ]
-
-  state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6]
-
-  ## reference
-  ref_df <- state_ghg_df[
-    demand_scenario == "BAU" & refining_scenario == "historic production",
-    .(total_ghg_mmt)
-  ]
-  setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt")
-  ref_value <- ref_df$ref_ghg_mmt[1]
-
-  ## merge with summarized df
-  state_ghg_df[, ref_ghg := ref_value]
-  state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1]
-
-  ## merge with health
-  health_ghg_df <- merge(
-    state_npv_df,
-    state_ghg_df[, .(
-      demand_scenario,
-      refining_scenario,
-      total_ghg_mmt,
-      ref_ghg,
-      avoided_ghg
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## summarize labor for state
-  state_labor <- annual_all_impacts_labor[,
-    .(
-      # sum_total_emp = sum(total_emp),
-      sum_total_comp_pv_h = sum(comp_all_impacts_PV_h),
-      sum_total_comp_pv_l = sum(comp_all_impacts_PV_l, na.rm = T)
-    ),
-    by = .(demand_scenario, refining_scenario, oil_price_scenario)
-  ]
-
-  state_labor <- state_labor[oil_price_scenario == "reference case", ]
-
-  ## ref labor
-  ref_labor <- state_labor[
-    demand_scenario == "BAU" & refining_scenario == "historic production"
-  ]
-  setnames(
-    ref_labor,
-    c("sum_total_comp_pv_h", "sum_total_comp_pv_l"),
-    c("ref_total_comp_pv_h", "ref_total_comp_pv_l")
-  )
-  # setnames(ref_labor, c("sum_total_emp", "sum_total_comp_pv_h", "sum_total_comp_pv_l"), c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l"))
-
-  ## add values to labor
-  state_labor[, `:=`(
-    # ref_total_emp = ref_labor$ref_total_emp[1],
-    ref_total_comp_pv_h = ref_labor$ref_total_comp_pv_h[1],
-    ref_total_comp_pv_l = ref_labor$ref_total_comp_pv_l[1]
-  )]
-
-  state_labor[,
-    forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9
-  ]
-  state_labor[,
-    forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9
-  ]
-
-  ## merge with health and ghg
-  health_labor_ghg_df <- merge(
-    health_ghg_df,
-    state_labor[, .(
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      sum_total_comp_pv_h,
-      ref_total_comp_pv_h,
-      forgone_wages_bil_h,
-      sum_total_comp_pv_l,
-      ref_total_comp_pv_l,
-      forgone_wages_bil_l
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## add ghg perc reduction
-  health_labor_ghg_df <- merge(
-    health_labor_ghg_df,
-    perc_diff_df,
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## prepare to plot
-  plot_df <- health_labor_ghg_df[, .(
-    scen_id,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    sum_cost_pv_b,
-    sum_cost_2019_pv_b,
-    forgone_wages_bil_h,
-    forgone_wages_bil_l,
-    avoided_ghg,
-    perc_diff
-  )]
-
-  setnames(plot_df, "perc_diff", "ghg_perc_diff")
-
-  ## add values / avoided ghgs
-  plot_df[, avoided_health_cost := sum_cost_2019_pv_b * -1]
-  plot_df[, avoided_health_cost_annual_vsl := sum_cost_pv_b * -1]
-  plot_df[, sum_cost_2019_pv_b := NULL]
-  plot_df[, sum_cost_pv_b := NULL]
-
-  plot_df[, `:=`(
-    avoided_health_cost_ghg = avoided_health_cost / avoided_ghg,
-    avoided_health_cost_ghg_vsl2 = avoided_health_cost_annual_vsl / avoided_ghg,
-    forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg,
-    forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg
-  )]
-
-  plot_df_health <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      ghg_perc_diff,
-      avoided_health_cost,
-      avoided_health_cost_annual_vsl,
-      avoided_health_cost_ghg,
-      avoided_health_cost_ghg_vsl2
-    ) %>%
-    pivot_longer(
-      avoided_health_cost:avoided_health_cost_ghg_vsl2,
-      names_to = "metric",
-      values_to = "value"
-    )
-
-  ## add column for vsl
-  plot_df_health <- plot_df_health %>%
-    mutate(
-      segment = "health",
-      unit_desc = ifelse(
-        metric == "avoided_health_cost",
-        "USD billion (2019 VSL)",
-        ifelse(
-          metric == "avoided_health_cost_annual_vsl",
-          "USD billion (annual VSL)",
-          ifelse(
-            metric == "avoided_health_cost_ghg",
-            "USD billion per GHG (2019 VSL)",
-            "USD billion per GHG (annual VSL)"
-          )
-        )
-      ),
-      metric = ifelse(
-        metric %in% c("avoided_health_cost", "avoided_health_cost_annual_vsl"),
-        "avoided_health_cost",
-        "avoided_health_cost_ghg"
-      )
-    )
-
-  plot_df_labor <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      ghg_perc_diff,
-      forgone_wages_bil_h,
-      forgone_wages_bil_l,
-      forgone_wages_bil_h_ghg,
-      forgone_wages_bil_l_ghg
-    ) %>%
-    pivot_longer(
-      forgone_wages_bil_h:forgone_wages_bil_l_ghg,
-      names_to = "metric",
-      values_to = "value"
-    ) %>%
-    mutate(
-      segment = "labor",
-      unit_desc = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "USD billion",
-        "USD billion per GHG"
-      ),
-      estimate = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"),
-        "high",
-        "low"
-      ),
-      metric = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "forgone_wages_bil",
-        "forgone_wages_bil_ghg"
-      )
-    ) %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      ghg_perc_diff,
-      segment,
-      metric,
-      unit_desc,
-      estimate,
-      value
-    ) %>%
-    pivot_wider(names_from = estimate, values_from = value)
-
-  # plot_df_long <- rbind(plot_df_health, plot_df_labor)
-
-  ## prepare health for plotting ------------------------------
-  plot_df_health <- plot_df_health %>%
-    mutate(
-      title = ifelse(
-        metric == "avoided_health_cost",
-        "Health: avoided mortality",
-        "Health: avoided mortality per avoided GHG"
-      )
-    )
-
-  plot_df_health$title <- factor(
-    plot_df_health$title,
-    levels = c(
-      "Health: avoided mortality",
-      "Health: avoided mortality per avoided GHG"
-    )
-  )
-
-  ## rename
-  setDT(plot_df_health)
-  plot_df_health[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_health[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## refactor
-  plot_df_health$scenario <- factor(
-    plot_df_health$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_health[,
-    value := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      value * 1000,
-      value
-    )
-  ]
-  plot_df_health[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_health[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_health[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_health[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_health[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## save figure inputs
-  fwrite(
-    plot_df_health,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_npv_fig_inputs_health.csv"
-    )
-  )
-  # fwrite(plot_df_health, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_health.csv"))
-
-  ## prepare labor ----------------------
-  plot_df_labor <- plot_df_labor %>%
-    mutate(
-      title = ifelse(
-        metric == "forgone_wages_bil",
-        "Labor: forgone wages",
-        "Labor: forgone wages per avoided GHG"
-      )
-    )
-
-  plot_df_labor$title <- factor(
-    plot_df_labor$title,
-    levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG")
-  )
-
-  ## rename
-  setDT(plot_df_labor)
-  plot_df_labor[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## refactor
-  plot_df_labor$scenario <- factor(
-    plot_df_labor$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_labor[,
-    high := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      high * 1000,
-      high
-    )
-  ]
-  plot_df_labor[,
-    low := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      low * 1000,
-      low
-    )
-  ]
-  plot_df_labor[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_labor[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_labor[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## save figure inputs
-  fwrite(
-    plot_df_labor,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_npv_fig_inputs_labor.csv"
-    )
-  )
-  # fwrite(plot_df_labor, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_labor.csv"))
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-  bau_scen <- "BAU historical production"
-
-  ## make the plot
-  ## ---------------------------------------------------
-
-  ## color for refining scenario
-  refin_colors <- c(
-    "LC1 low exports" = "#729b79",
-    "LC1 historical exports" = "#2F4858",
-    "BAU low exports" = "#F6AE2D",
-    "BAU historical exports" = "#F26419"
-  )
-
-  refin_labs <- c(
-    "LC1 low exports" = "Low demand, low exports",
-    "LC1 historical exports" = "Low demand, historical exports",
-    "BAU low exports" = "BAU demand, low exports",
-    "BAU historical exports" = "BAU demand, historical exports"
-  )
-
-  ## refactor
-  # plot_df_health$scen_id <- factor(plot_df_health$scen_id, levels = c('LC1 low exports',
-  #                                                                     'LC1 historical production',
-  #                                                                     'BAU demand\nlow exports',
-  #                                                                     'Low demand\nhistorical exports',
-  #                                                                     'Low demand\nlow exports',
-  #                                                                     'Low demand\nhistorical production'))
-  #
-
-  ## figs - make each separately
-  ## -------------------------------------------------------------------
-
-  hist_prod <- as.data.table(
-    plot_df_health %>%
-      filter(
-        scen_id == bau_scen,
-        unit == "NPV (2019 USD billion)",
-        unit_desc == "USD billion (2019 VSL)"
-      )
-  )
-
-  fig_bxm_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    geom_point(
-      data = plot_df_health %>%
-        filter(
-          !scen_id %in% remove_scen,
-          title == "Health: avoided mortality",
-          unit == "NPV (2019 USD billion)",
-          unit_desc == "USD billion (2019 VSL)",
-          !refining_scenario == "historical production"
-        ),
-      aes(x = ghg_perc_diff * -100, y = value, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    labs(
-      color = NULL,
-      title = "Health: avoided mortality",
-      y = "NPV (2019 USD billion)",
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(0, 60) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  # ## make separete df for labor high and low for plotting
-  # plot_df_labor_pts <- plot_df_labor %>%
-  #   filter(!scen_id %in% remove_scen,
-  #          title == "Labor: forgone wages",
-  #          unit == "NPV (2019 USD billion)",
-  #          refining_scenario != "historical production") %>%
-  #   select(scen_id, demand_scenario, refining_scenario, scenario, ghg_perc_diff, high, low) %>%
-  #   pivot_longer(high:low, names_to = "estimate", values_to =  "npv_2019_usd_billion")
-  #
-  fig_bxm_b <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historical production",
-    #                                                metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.9
-    ) +
-    labs(
-      color = NULL,
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-60, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  ## legends
-  low_legend_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historic production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 1
-    ) +
-    # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                            refining_scenario != "historic production",
-    #                                            metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = high, color = scen_id), shape = 1, size = 3, alpha = 0.8) +
-    labs(
-      color = "with re-emp:",
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-60, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      legend.title = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 1))
-
-  low_legend <- get_legend(
-    low_legend_fig
-  )
-
-  ## save legends
-  ggsave(
-    plot = low_legend,
-    device = "pdf",
-    filename = "fig3_low_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## legends
-  high_legend_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) +
-    # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                            refining_scenario != "historic production",
-    #                                            metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = low, color = scen_id), shape = 16, size = 3, alpha = 1) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historic production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.8
-    ) +
-    labs(
-      color = "no re-emp:",
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-60, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      legend.title = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 1))
-
-  high_legend <- get_legend(
-    high_legend_fig
-  )
-
-  ## save legends
-  ggsave(
-    plot = high_legend,
-    device = "pdf",
-    filename = "fig3_high_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  # fig_bxm_c <- ggplot() +
-  #   geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-  #   geom_vline(xintercept = hist_prod[title == "Climate: avoided damage", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-  #   geom_point(data = plot_df_long %>% filter(!scen_id %in% remove_scen,
-  #                                       title == "Climate: avoided damage",
-  #                                       unit == "NPV (2019 USD billion)",
-  #                                       !refining_scenario == "historic production"), aes(x = ghg_perc_diff * -100, y = value, color = refining_scenario, shape = demand_scenario), size = 3, alpha = 0.8) +
-  #   geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-  #   labs(color = "Policy",
-  #        title = "C. Climate: avoided damage",
-  #        y = NULL,
-  #        x = NULL) +
-  #   ylim(-1, 20) +
-  #   xlim(0, 80) +
-  #   scale_color_manual(values = refin_colors) +
-  #   theme_line +
-  #   theme(legend.position = "none",
-  #         plot.title = element_text(hjust = 0),
-  #         axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #         axis.ticks.length.y = unit(0.1, 'cm'),
-  #         axis.ticks.length.x = unit(0.1, 'cm'))
-
-  # fig_bxm_c <- ggplot() +
-  #   geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-  #   geom_vline(xintercept = hist_prod[title == "Health: avoided mortality", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-  #   geom_point(data = plot_df_long %>% filter(!scen_id %in% remove_scen,
-  #                                       title == "Health: avoided mortality per avoided GHG",
-  #                                       unit == "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-  #                                       unit_desc == "USD billion per GHG (2019 VSL)",
-  #                                       !refining_scenario == "historic production"), aes(x = ghg_perc_diff * -100, y = value, color = refining_scenario, shape = demand_scenario), size = 3, alpha = 0.8) +
-  #   labs(color = "Policy",
-  #        title = "C.",
-  #        y = bquote('NPV (2019 USD million)\nper avoided GHG MtCO'[2]~e),
-  #        x = "GHG emissions reduction (%, 2045 vs 2019)") +
-  #   scale_color_manual(values = refin_colors) +
-  #   ylim(0, 125) +
-  #   xlim(0, 80) +
-  #   theme_line +
-  #   theme(legend.position = "none",
-  #         axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #         axis.ticks.length.y = unit(0.1, 'cm'),
-  #         axis.ticks.length.x = unit(0.1, 'cm'))
-  #
-  # fig_bxm_d <- ggplot() +
-  #   geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-  #   geom_vline(xintercept = hist_prod[title == "Labor: forgone wages per avoided GHG", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-  #   geom_point(data = plot_df_long %>% filter(!scen_id %in% remove_scen,
-  #                                       title == "Labor: forgone wages per avoided GHG",
-  #                                       unit == "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-  #                                       !refining_scenario == "historic production"), aes(x = ghg_perc_diff * -100, y = value, color = refining_scenario, shape = demand_scenario), size = 3, alpha = 0.8) +
-  #   labs(color = "Policy",
-  #        title = "D.",
-  #        y = NULL,
-  #        # y = paste("NPV per avoied GHG ", bquotelab, "(2020 USD million / ", bquotelab),
-  #        x = "GHG emissions reduction (%, 2045 vs 2019)") +
-  #   scale_color_manual(values = refin_colors) +
-  #   theme_line +
-  #   xlim(0, 80) +
-  #   ylim(-125, 0) +
-  #   theme(legend.position = "none",
-  #         axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #         axis.ticks.length.y = unit(0.1, 'cm'),
-  #         axis.ticks.length.x = unit(0.1, 'cm'))
-
-  # fig_bxm_f <- ggplot() +
-  #   geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-  #   geom_vline(xintercept = hist_prod[title == "Health: avoided mortality", ghg_2045_perc_reduction], color = "darkgray", lty = 2) +
-  #   geom_point(data = npv_dt %>% filter(!scen_id %in% bau_scens,
-  #                                       title == "Climate: avoided damage",
-  #                                       measure == "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-  #                                       !refining_scenario == "historic production"), aes(x = ghg_2045_perc_reduction, y = value, color = refining_scenario, shape = demand_scenario), size = 3, alpha = 0.8) +
-  #   labs(color = "Policy",
-  #        title = "F.",
-  #        y = NULL,
-  #        # y = paste("NPV per avoied GHG ", bquotelab, "(2020 USD million / ", bquotelab),
-  #        x = "GHG emissions reduction target (%, 2045 vs 2019)") +
-  #   scale_color_manual(values = refin_colors) +
-  #   theme_line +
-  #   ylim(0, 80) +
-  #   xlim(0, 80) +
-  #   theme(legend.position = "none",
-  #         axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #         axis.ticks.length.y = unit(0.1, 'cm'),
-  #         axis.ticks.length.x = unit(0.1, 'cm'))
-  #
-  ## extract legend
-  # legend_fig <- ggplot() +
-  #   geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-  #   geom_point(data = npv_dt %>% filter(!scen_id %in% bau_scens,
-  #                                        title == "Labor: forgone wages",
-  #                                        measure == "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)"), aes(x = ghg_2045_perc_reduction, y = value, color = scenario, shape = scenario), size = 3, alpha = 0.8) +
-  #   labs(title = "",
-  #        y = NULL,
-  #        # y = paste("NPV per avoied GHG ", bquotelab, "(2020 USD million / ", bquotelab),
-  #        x = "GHG emissions reduction target (%, 2045 vs 2019)",
-  #        color = NULL,
-  #        shape = NULL) +
-  #   # scale_shape_manual(values = c(16, 16, 16, 17, 17)) +
-  #   scale_color_manual(name = "",
-  #                      labels = c("BAU demand - historic exports",
-  #                                 "BAU demand - historic production",
-  #                                 "BAU demand - low exports",
-  #                                 "Low C. demand - historic exports",
-  #                                 "Low C. demand - low exports"),
-  #                      values = c("BAU demand - historic exports" = "#2F4858",
-  #                                 "BAU demand - historic production" = "#F6AE2D",
-  #                                 "BAU demand - low exports" = "#F26419",
-  #                                 "Low C. demand - historic exports" = "#2F4858",
-  #                                 "Low C. demand - low exports" = "#F26419")) +
-  #   scale_shape_manual(name = "",
-  #                         labels = c("BAU demand - historic exports",
-  #                                    "BAU demand - historic production",
-  #                                    "BAU demand - low exports",
-  #                                    "Low C. demand - historic exports",
-  #                                    "Low C. demand - low exports"),
-  #                         values = c(16, 16, 16, 17, 17)) +
-  #   theme_line +
-  #   theme(legend.position = "bottom",
-  #         axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #         axis.ticks.length.y = unit(0.1, 'cm'),
-  #         axis.ticks.length.x = unit(0.1, 'cm')) +
-  #   guides(color = guide_legend(nrow = 2, byrow = TRUE))
-
-  ## combine figure
-  ## ---------------------------------
-
-  ## shared x axis
-  xaxis_lab <- ggdraw() +
-    draw_label("GHG emissions reduction (%, 2045 vs 2019)", size = 12)
-
-  fig3_plot_grid_ab <- plot_grid(
-    fig_bxm_a,
-    fig_bxm_b,
-    align = "vh",
-    labels = c("A", "B"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    rel_widths = c(1, 1)
-  )
-
-  # fig3_plot_grid2 <- plot_grid(
-  #   fig3_plot_grid_ab,
-  #   align = "v",
-  #   # labels = c("(A)", "(B)", "(C)", ""),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   # hjust = -1,
-  #   ncol = 1,
-  #   rel_heights = c(0.95, 0.05)
-  #   # rel_widths = c(1, 1),
-  # )
-
-  return(fig3_plot_grid_ab)
-}
-
-
-## -----------------------------------------------------------------------------
-## NPV figure: refinery level emission factors
-## -----------------------------------------------------------------------------
-
-## NPV figure
-plot_npv_health_labor_ref <- function(
-  main_path,
-  save_path,
-  refining_mortality,
-  state_ghg_output,
-  dt_ghg_2019,
-  annual_labor
-) {
-  npv_df <- refining_mortality %>% as.data.table()
-
-  ## state level
-  state_npv_df <- npv_df[,
-    .(
-      sum_cost_2019_pv = sum(cost_2019_PV), ## constant VSL
-      sum_cost_pv = sum(cost_PV)
-    ), ## changing VSL
-    by = .(scen_id, demand_scenario, refining_scenario)
-  ]
-
-  ## add column
-  state_npv_df[, sum_cost_2019_pv_b := sum_cost_2019_pv / 1e9]
-  state_npv_df[, sum_cost_pv_b := sum_cost_pv / 1e9]
-
-  ## add ghg emission reduction
-  ## 2019 ghg
-  ghg_2019_val <- dt_ghg_2019$mtco2e[1]
-
-  ## 2045 vs 2019 ghg
-  ghg_2045 <- state_ghg_output[year == 2045 & source == "total"]
-  setnames(ghg_2045, "value", "ghg_kg")
-  ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6]
-  ghg_2045[, ghg_2019 := ghg_2019_val]
-  ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019]
-
-  perc_diff_df <- ghg_2045[, .(
-    demand_scenario,
-    refining_scenario,
-    ghg_2045,
-    ghg_2019,
-    perc_diff
-  )]
-
-  ## summarize by scenario, filter for total
-  state_ghg_df <- state_ghg_output[
-    source == "total",
-    .(total_ghg = sum(value)),
-    by = .(demand_scenario, refining_scenario)
-  ]
-
-  state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6]
-
-  ## reference
-  ref_df <- state_ghg_df[
-    demand_scenario == "BAU" & refining_scenario == "historic production",
-    .(total_ghg_mmt)
-  ]
-  setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt")
-  ref_value <- ref_df$ref_ghg_mmt[1]
-
-  ## merge with summarized df
-  state_ghg_df[, ref_ghg := ref_value]
-  state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1]
-
-  ## merge with health
-  health_ghg_df <- merge(
-    state_npv_df,
-    state_ghg_df[, .(
-      demand_scenario,
-      refining_scenario,
-      total_ghg_mmt,
-      ref_ghg,
-      avoided_ghg
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## summarize labor for state
-  state_labor <- annual_labor[,
-    .(
-      sum_total_emp = sum(total_emp),
-      sum_total_comp_pv_h = sum(total_comp_PV_h),
-      sum_total_comp_pv_l = sum(total_comp_PV_l)
-    ),
-    by = .(demand_scenario, refining_scenario, oil_price_scenario)
-  ]
-
-  state_labor <- state_labor[oil_price_scenario == "reference case", ]
-
-  ## ref labor
-  ref_labor <- state_labor[
-    demand_scenario == "BAU" & refining_scenario == "historic production"
-  ]
-  setnames(
-    ref_labor,
-    c("sum_total_emp", "sum_total_comp_pv_h", "sum_total_comp_pv_l"),
-    c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l")
-  )
-
-  ## add values to labor
-  state_labor[, `:=`(
-    ref_total_emp = ref_labor$ref_total_emp[1],
-    ref_total_comp_pv_h = ref_labor$ref_total_comp_pv_h[1],
-    ref_total_comp_pv_l = ref_labor$ref_total_comp_pv_l[1]
-  )]
-
-  state_labor[,
-    forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9
-  ]
-  state_labor[,
-    forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9
-  ]
-
-  ## merge with health and ghg
-  health_labor_ghg_df <- merge(
-    health_ghg_df,
-    state_labor[, .(
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      sum_total_comp_pv_h,
-      ref_total_comp_pv_h,
-      forgone_wages_bil_h,
-      sum_total_comp_pv_l,
-      ref_total_comp_pv_l,
-      forgone_wages_bil_l
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## add ghg perc reduction
-  health_labor_ghg_df <- merge(
-    health_labor_ghg_df,
-    perc_diff_df,
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## prepare to plot
-  plot_df <- health_labor_ghg_df[, .(
-    scen_id,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    sum_cost_pv_b,
-    sum_cost_2019_pv_b,
-    forgone_wages_bil_h,
-    forgone_wages_bil_l,
-    avoided_ghg,
-    perc_diff
-  )]
-
-  setnames(plot_df, "perc_diff", "ghg_perc_diff")
-
-  ## add values / avoided ghgs
-  plot_df[, avoided_health_cost := sum_cost_2019_pv_b * -1]
-  plot_df[, avoided_health_cost_annual_vsl := sum_cost_pv_b * -1]
-  plot_df[, sum_cost_2019_pv_b := NULL]
-  plot_df[, sum_cost_pv_b := NULL]
-
-  plot_df[, `:=`(
-    avoided_health_cost_ghg = avoided_health_cost / avoided_ghg,
-    avoided_health_cost_ghg_vsl2 = avoided_health_cost_annual_vsl / avoided_ghg,
-    forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg,
-    forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg
-  )]
-
-  plot_df_health <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      ghg_perc_diff,
-      avoided_health_cost,
-      avoided_health_cost_annual_vsl,
-      avoided_health_cost_ghg,
-      avoided_health_cost_ghg_vsl2
-    ) %>%
-    pivot_longer(
-      avoided_health_cost:avoided_health_cost_ghg_vsl2,
-      names_to = "metric",
-      values_to = "value"
-    )
-
-  ## add column for vsl
-  plot_df_health <- plot_df_health %>%
-    mutate(
-      segment = "health",
-      unit_desc = ifelse(
-        metric == "avoided_health_cost",
-        "USD billion (2019 VSL)",
-        ifelse(
-          metric == "avoided_health_cost_annual_vsl",
-          "USD billion (annual VSL)",
-          ifelse(
-            metric == "avoided_health_cost_ghg",
-            "USD billion per GHG (2019 VSL)",
-            "USD billion per GHG (annual VSL)"
-          )
-        )
-      ),
-      metric = ifelse(
-        metric %in% c("avoided_health_cost", "avoided_health_cost_annual_vsl"),
-        "avoided_health_cost",
-        "avoided_health_cost_ghg"
-      )
-    )
-
-  plot_df_labor <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      ghg_perc_diff,
-      forgone_wages_bil_h,
-      forgone_wages_bil_l,
-      forgone_wages_bil_h_ghg,
-      forgone_wages_bil_l_ghg
-    ) %>%
-    pivot_longer(
-      forgone_wages_bil_h:forgone_wages_bil_l_ghg,
-      names_to = "metric",
-      values_to = "value"
-    ) %>%
-    mutate(
-      segment = "labor",
-      unit_desc = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "USD billion",
-        "USD billion per GHG"
-      ),
-      estimate = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"),
-        "high",
-        "low"
-      ),
-      metric = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "forgone_wages_bil",
-        "forgone_wages_bil_ghg"
-      )
-    ) %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      ghg_perc_diff,
-      segment,
-      metric,
-      unit_desc,
-      estimate,
-      value
-    ) %>%
-    pivot_wider(names_from = estimate, values_from = value)
-
-  # plot_df_long <- rbind(plot_df_health, plot_df_labor)
-
-  ## prepare health for plotting ------------------------------
-  plot_df_health <- plot_df_health %>%
-    mutate(
-      title = ifelse(
-        metric == "avoided_health_cost",
-        "Health: avoided mortality",
-        "Health: avoided mortality per avoided GHG"
-      )
-    )
-
-  plot_df_health$title <- factor(
-    plot_df_health$title,
-    levels = c(
-      "Health: avoided mortality",
-      "Health: avoided mortality per avoided GHG"
-    )
-  )
-
-  ## rename
-  setDT(plot_df_health)
-  plot_df_health[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_health[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## refactor
-  plot_df_health$scenario <- factor(
-    plot_df_health$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_health[,
-    value := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      value * 1000,
-      value
-    )
-  ]
-  plot_df_health[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_health[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_health[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_health[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_health[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## save figure inputs
-  fwrite(
-    plot_df_health,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_npv_fig_inputs_health_ref.csv"
-    )
-  )
-  # fwrite(plot_df_health, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_health.csv"))
-
-  ## prepare labor ----------------------
-  plot_df_labor <- plot_df_labor %>%
-    mutate(
-      title = ifelse(
-        metric == "forgone_wages_bil",
-        "Labor: forgone wages",
-        "Labor: forgone wages per avoided GHG"
-      )
-    )
-
-  plot_df_labor$title <- factor(
-    plot_df_labor$title,
-    levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG")
-  )
-
-  ## rename
-  setDT(plot_df_labor)
-  plot_df_labor[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## refactor
-  plot_df_labor$scenario <- factor(
-    plot_df_labor$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_labor[,
-    high := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      high * 1000,
-      high
-    )
-  ]
-  plot_df_labor[,
-    low := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      low * 1000,
-      low
-    )
-  ]
-  plot_df_labor[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_labor[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_labor[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-  bau_scen <- "BAU historical production"
-
-  ## make the plot
-  ## ---------------------------------------------------
-
-  ## color for refining scenario
-  refin_colors <- c(
-    "LC1 low exports" = "#729b79",
-    "LC1 historical exports" = "#2F4858",
-    "BAU low exports" = "#F6AE2D",
-    "BAU historical exports" = "#F26419"
-  )
-
-  refin_labs <- c(
-    "LC1 low exports" = "Low demand, low exports",
-    "LC1 historical exports" = "Low demand, historical exports",
-    "BAU low exports" = "BAU demand, low exports",
-    "BAU historical exports" = "BAU demand, historical exports"
-  )
-
-  ## refactor
-  # plot_df_health$scen_id <- factor(plot_df_health$scen_id, levels = c('LC1 low exports',
-  #                                                                     'LC1 historical production',
-  #                                                                     'BAU demand\nlow exports',
-  #                                                                     'Low demand\nhistorical exports',
-  #                                                                     'Low demand\nlow exports',
-  #                                                                     'Low demand\nhistorical production'))
-  #
-
-  ## figs - make each separately
-  ## -------------------------------------------------------------------
-
-  hist_prod <- as.data.table(
-    plot_df_health %>%
-      filter(
-        scen_id == bau_scen,
-        unit == "NPV (2019 USD billion)",
-        unit_desc == "USD billion (2019 VSL)"
-      )
-  )
-
-  fig_bxm_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    geom_point(
-      data = plot_df_health %>%
-        filter(
-          !scen_id %in% remove_scen,
-          title == "Health: avoided mortality",
-          unit == "NPV (2019 USD billion)",
-          unit_desc == "USD billion (2019 VSL)",
-          !refining_scenario == "historical production"
-        ),
-      aes(x = ghg_perc_diff * -100, y = value, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    labs(
-      color = NULL,
-      title = "Health: avoided mortality",
-      y = "NPV (2019 USD billion)",
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(0, 50) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  # ## make separete df for labor high and low for plotting
-  # plot_df_labor_pts <- plot_df_labor %>%
-  #   filter(!scen_id %in% remove_scen,
-  #          title == "Labor: forgone wages",
-  #          unit == "NPV (2019 USD billion)",
-  #          refining_scenario != "historical production") %>%
-  #   select(scen_id, demand_scenario, refining_scenario, scenario, ghg_perc_diff, high, low) %>%
-  #   pivot_longer(high:low, names_to = "estimate", values_to =  "npv_2019_usd_billion")
-  #
-  fig_bxm_b <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historical production",
-    #                                                metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.9
-    ) +
-    labs(
-      color = NULL,
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  ## legends
-  low_legend_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historic production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 1
-    ) +
-    # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                            refining_scenario != "historic production",
-    #                                            metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = high, color = scen_id), shape = 1, size = 3, alpha = 0.8) +
-    labs(
-      color = "with re-emp:",
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      legend.title = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 1))
-
-  low_legend <- get_legend(
-    low_legend_fig
-  )
-
-  ## legends
-  high_legend_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) +
-    # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                            refining_scenario != "historic production",
-    #                                            metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = low, color = scen_id), shape = 16, size = 3, alpha = 1) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historic production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.8
-    ) +
-    labs(
-      color = "no re-emp:",
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      legend.title = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 1))
-
-  high_legend <- get_legend(
-    high_legend_fig
-  )
-
-  ## combine figure
-  ## ---------------------------------
-
-  ## shared x axis
-  xaxis_lab <- ggdraw() +
-    draw_label("GHG emissions reduction (%, 2045 vs 2019)", size = 12)
-
-  fig3_plot_grid_ab <- plot_grid(
-    fig_bxm_a,
-    fig_bxm_b,
-    align = "vh",
-    labels = c("A", "B"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    rel_widths = c(1, 1)
-  )
-
-  fig3_plot_grid2 <- plot_grid(
-    fig3_plot_grid_ab,
-    xaxis_lab,
-    align = "v",
-    # labels = c("(A)", "(B)", "(C)", ""),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    # hjust = -1,
-    ncol = 1,
-    rel_heights = c(0.95, 0.05)
-    # rel_widths = c(1, 1),
-  )
-
-  return(fig3_plot_grid_ab)
-}
-
-
-## -----------------------------------------------------------------------------
-## NPV figure: constant VSL
-## -----------------------------------------------------------------------------
-
-## NPV figure
-plot_npv_health_labor_constant_vsl <- function(
-  main_path,
-  save_path,
-  refining_mortality,
-  state_ghg_output,
-  dt_ghg_2019,
-  annual_labor
-) {
-  npv_df <- refining_mortality %>% as.data.table()
-
-  ## state level
-  state_npv_df <- npv_df[,
-    .(
-      sum_cost_2019_pv = sum(cost_2019_PV), ## constant VSL
-      sum_cost_pv = sum(cost_PV)
-    ), ## changing VSL
-    by = .(scen_id, demand_scenario, refining_scenario)
-  ]
-
-  ## add column
-  state_npv_df[, sum_cost_2019_pv_b := sum_cost_2019_pv / 1e9]
-  state_npv_df[, sum_cost_pv_b := sum_cost_pv / 1e9]
-
-  ## add ghg emission reduction
-  ## 2019 ghg
-  ghg_2019_val <- dt_ghg_2019$mtco2e[1]
-
-  ## 2045 vs 2019 ghg
-  ghg_2045 <- state_ghg_output[year == 2045 & source == "total"]
-  setnames(ghg_2045, "value", "ghg_kg")
-  ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6]
-  ghg_2045[, ghg_2019 := ghg_2019_val]
-  ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019]
-
-  perc_diff_df <- ghg_2045[, .(
-    demand_scenario,
-    refining_scenario,
-    ghg_2045,
-    ghg_2019,
-    perc_diff
-  )]
-
-  ## summarize by scenario, filter for total
-  state_ghg_df <- state_ghg_output[
-    source == "total",
-    .(total_ghg = sum(value)),
-    by = .(demand_scenario, refining_scenario)
-  ]
-
-  state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6]
-
-  ## reference
-  ref_df <- state_ghg_df[
-    demand_scenario == "BAU" & refining_scenario == "historic production",
-    .(total_ghg_mmt)
-  ]
-  setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt")
-  ref_value <- ref_df$ref_ghg_mmt[1]
-
-  ## merge with summarized df
-  state_ghg_df[, ref_ghg := ref_value]
-  state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1]
-
-  ## merge with health
-  health_ghg_df <- merge(
-    state_npv_df,
-    state_ghg_df[, .(
-      demand_scenario,
-      refining_scenario,
-      total_ghg_mmt,
-      ref_ghg,
-      avoided_ghg
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## summarize labor for state
-  state_labor <- annual_labor[,
-    .(
-      sum_total_emp = sum(total_emp),
-      sum_total_comp_pv_h = sum(total_comp_PV_h),
-      sum_total_comp_pv_l = sum(total_comp_PV_l)
-    ),
-    by = .(demand_scenario, refining_scenario, oil_price_scenario)
-  ]
-
-  state_labor <- state_labor[oil_price_scenario == "reference case", ]
-
-  ## ref labor
-  ref_labor <- state_labor[
-    demand_scenario == "BAU" & refining_scenario == "historic production"
-  ]
-  setnames(
-    ref_labor,
-    c("sum_total_emp", "sum_total_comp_pv_h", "sum_total_comp_pv_l"),
-    c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l")
-  )
-
-  ## add values to labor
-  state_labor[, `:=`(
-    ref_total_emp = ref_labor$ref_total_emp[1],
-    ref_total_comp_pv_h = ref_labor$ref_total_comp_pv_h[1],
-    ref_total_comp_pv_l = ref_labor$ref_total_comp_pv_l[1]
-  )]
-
-  state_labor[,
-    forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9
-  ]
-  state_labor[,
-    forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9
-  ]
-
-  ## merge with health and ghg
-  health_labor_ghg_df <- merge(
-    health_ghg_df,
-    state_labor[, .(
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      sum_total_comp_pv_h,
-      ref_total_comp_pv_h,
-      forgone_wages_bil_h,
-      sum_total_comp_pv_l,
-      ref_total_comp_pv_l,
-      forgone_wages_bil_l
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## add ghg perc reduction
-  health_labor_ghg_df <- merge(
-    health_labor_ghg_df,
-    perc_diff_df,
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## prepare to plot
-  plot_df <- health_labor_ghg_df[, .(
-    scen_id,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    sum_cost_pv_b,
-    sum_cost_2019_pv_b,
-    forgone_wages_bil_h,
-    forgone_wages_bil_l,
-    avoided_ghg,
-    perc_diff
-  )]
-
-  setnames(plot_df, "perc_diff", "ghg_perc_diff")
-
-  ## add values / avoided ghgs
-  plot_df[, avoided_health_cost := sum_cost_2019_pv_b * -1]
-  plot_df[, avoided_health_cost_annual_vsl := sum_cost_pv_b * -1]
-  plot_df[, sum_cost_2019_pv_b := NULL]
-  plot_df[, sum_cost_pv_b := NULL]
-
-  plot_df[, `:=`(
-    avoided_health_cost_ghg = avoided_health_cost / avoided_ghg,
-    avoided_health_cost_ghg_vsl2 = avoided_health_cost_annual_vsl / avoided_ghg,
-    forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg,
-    forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg
-  )]
-
-  plot_df_health <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      ghg_perc_diff,
-      avoided_health_cost,
-      avoided_health_cost_annual_vsl,
-      avoided_health_cost_ghg,
-      avoided_health_cost_ghg_vsl2
-    ) %>%
-    pivot_longer(
-      avoided_health_cost:avoided_health_cost_ghg_vsl2,
-      names_to = "metric",
-      values_to = "value"
-    )
-
-  ## add column for vsl
-  plot_df_health <- plot_df_health %>%
-    mutate(
-      segment = "health",
-      unit_desc = ifelse(
-        metric == "avoided_health_cost",
-        "USD billion (2019 VSL)",
-        ifelse(
-          metric == "avoided_health_cost_annual_vsl",
-          "USD billion (annual VSL)",
-          ifelse(
-            metric == "avoided_health_cost_ghg",
-            "USD billion per GHG (2019 VSL)",
-            "USD billion per GHG (annual VSL)"
-          )
-        )
-      ),
-      metric = ifelse(
-        metric %in% c("avoided_health_cost", "avoided_health_cost_annual_vsl"),
-        "avoided_health_cost",
-        "avoided_health_cost_ghg"
-      )
-    )
-
-  plot_df_labor <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      ghg_perc_diff,
-      forgone_wages_bil_h,
-      forgone_wages_bil_l,
-      forgone_wages_bil_h_ghg,
-      forgone_wages_bil_l_ghg
-    ) %>%
-    pivot_longer(
-      forgone_wages_bil_h:forgone_wages_bil_l_ghg,
-      names_to = "metric",
-      values_to = "value"
-    ) %>%
-    mutate(
-      segment = "labor",
-      unit_desc = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "USD billion",
-        "USD billion per GHG"
-      ),
-      estimate = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"),
-        "high",
-        "low"
-      ),
-      metric = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "forgone_wages_bil",
-        "forgone_wages_bil_ghg"
-      )
-    ) %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      ghg_perc_diff,
-      segment,
-      metric,
-      unit_desc,
-      estimate,
-      value
-    ) %>%
-    pivot_wider(names_from = estimate, values_from = value)
-
-  # plot_df_long <- rbind(plot_df_health, plot_df_labor)
-
-  ## prepare health for plotting ------------------------------
-  plot_df_health <- plot_df_health %>%
-    mutate(
-      title = ifelse(
-        metric == "avoided_health_cost",
-        "Health: avoided mortality",
-        "Health: avoided mortality per avoided GHG"
-      )
-    )
-
-  plot_df_health$title <- factor(
-    plot_df_health$title,
-    levels = c(
-      "Health: avoided mortality",
-      "Health: avoided mortality per avoided GHG"
-    )
-  )
-
-  ## rename
-  setDT(plot_df_health)
-  plot_df_health[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_health[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## refactor
-  plot_df_health$scenario <- factor(
-    plot_df_health$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_health[,
-    value := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      value * 1000,
-      value
-    )
-  ]
-  plot_df_health[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_health[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_health[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_health[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_health[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## save figure inputs
-  fwrite(
-    plot_df_health,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_npv_fig_inputs_health_constant_vsl.csv"
-    )
-  )
-  # fwrite(plot_df_health, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_health.csv"))
-
-  ## prepare labor ----------------------
-  plot_df_labor <- plot_df_labor %>%
-    mutate(
-      title = ifelse(
-        metric == "forgone_wages_bil",
-        "Labor: forgone wages",
-        "Labor: forgone wages per avoided GHG"
-      )
-    )
-
-  plot_df_labor$title <- factor(
-    plot_df_labor$title,
-    levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG")
-  )
-
-  ## rename
-  setDT(plot_df_labor)
-  plot_df_labor[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## refactor
-  plot_df_labor$scenario <- factor(
-    plot_df_labor$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_labor[,
-    high := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      high * 1000,
-      high
-    )
-  ]
-  plot_df_labor[,
-    low := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      low * 1000,
-      low
-    )
-  ]
-  plot_df_labor[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_labor[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_labor[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-  bau_scen <- "BAU historical production"
-
-  ## make the plot
-  ## ---------------------------------------------------
-
-  ## color for refining scenario
-  refin_colors <- c(
-    "LC1 low exports" = "#729b79",
-    "LC1 historical exports" = "#2F4858",
-    "BAU low exports" = "#F6AE2D",
-    "BAU historical exports" = "#F26419"
-  )
-
-  refin_labs <- c(
-    "LC1 low exports" = "Low demand, low exports",
-    "LC1 historical exports" = "Low demand, historical exports",
-    "BAU low exports" = "BAU demand, low exports",
-    "BAU historical exports" = "BAU demand, historical exports"
-  )
-
-  ## refactor
-  # plot_df_health$scen_id <- factor(plot_df_health$scen_id, levels = c('LC1 low exports',
-  #                                                                     'LC1 historical production',
-  #                                                                     'BAU demand\nlow exports',
-  #                                                                     'Low demand\nhistorical exports',
-  #                                                                     'Low demand\nlow exports',
-  #                                                                     'Low demand\nhistorical production'))
-  #
-
-  ## figs - make each separately
-  ## -------------------------------------------------------------------
-
-  hist_prod <- as.data.table(
-    plot_df_health %>%
-      filter(
-        scen_id == bau_scen,
-        unit == "NPV (2019 USD billion)",
-        unit_desc == "USD billion (2019 VSL)"
-      )
-  )
-
-  fig_bxm_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    geom_point(
-      data = plot_df_health %>%
-        filter(
-          !scen_id %in% remove_scen,
-          title == "Health: avoided mortality",
-          unit == "NPV (2019 USD billion)",
-          unit_desc == "USD billion (2019 VSL)",
-          !refining_scenario == "historical production"
-        ),
-      aes(x = ghg_perc_diff * -100, y = value, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    labs(
-      color = NULL,
-      title = "Health: avoided mortality",
-      y = "NPV (2019 USD billion)",
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(0, 50) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  # ## make separete df for labor high and low for plotting
-  # plot_df_labor_pts <- plot_df_labor %>%
-  #   filter(!scen_id %in% remove_scen,
-  #          title == "Labor: forgone wages",
-  #          unit == "NPV (2019 USD billion)",
-  #          refining_scenario != "historical production") %>%
-  #   select(scen_id, demand_scenario, refining_scenario, scenario, ghg_perc_diff, high, low) %>%
-  #   pivot_longer(high:low, names_to = "estimate", values_to =  "npv_2019_usd_billion")
-  #
-  fig_bxm_b <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historical production",
-    #                                                metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.9
-    ) +
-    labs(
-      color = NULL,
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  ## legends
-  low_legend_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historic production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 1
-    ) +
-    # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                            refining_scenario != "historic production",
-    #                                            metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = high, color = scen_id), shape = 1, size = 3, alpha = 0.8) +
-    labs(
-      color = "with re-emp:",
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      legend.title = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 1))
-
-  low_legend <- get_legend(
-    low_legend_fig
-  )
-
-  ## save legends
-  ggsave(
-    plot = low_legend,
-    device = "pdf",
-    filename = "fig3_low_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## legends
-  high_legend_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) +
-    # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                            refining_scenario != "historic production",
-    #                                            metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = low, color = scen_id), shape = 16, size = 3, alpha = 1) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historic production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.8
-    ) +
-    labs(
-      color = "no re-emp:",
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      legend.title = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 1))
-
-  high_legend <- get_legend(
-    high_legend_fig
-  )
-
-  ## save legends
-  ggsave(
-    plot = high_legend,
-    device = "pdf",
-    filename = "fig3_high_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## combine figure
-  ## ---------------------------------
-
-  ## shared x axis
-  xaxis_lab <- ggdraw() +
-    draw_label("GHG emissions reduction (%, 2045 vs 2019)", size = 12)
-
-  fig3_plot_grid_ab <- plot_grid(
-    fig_bxm_a,
-    fig_bxm_b,
-    align = "vh",
-    labels = c("A", "B"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    rel_widths = c(1, 1)
-  )
-
-  fig3_plot_grid2 <- plot_grid(
-    fig3_plot_grid_ab,
-    xaxis_lab,
-    align = "v",
-    # labels = c("(A)", "(B)", "(C)", ""),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    # hjust = -1,
-    ncol = 1,
-    rel_heights = c(0.95, 0.05)
-    # rel_widths = c(1, 1),
-  )
-}
-
-
-## -----------------------------------------------------------------------------
-## NPV figure: growing VSL, age-based vsl, cluser EFs
-## -----------------------------------------------------------------------------
-
-## NPV figure
-plot_npv_health_labor_growing_vsl <- function(
-  main_path,
-  save_path,
-  refining_mortality,
-  state_ghg_output,
-  dt_ghg_2019,
-  annual_labor
-) {
-  npv_df <- refining_mortality %>% as.data.table()
-
-  ## state level
-  state_npv_df <- npv_df[,
-    .(
-      sum_cost_2019_pv = sum(cost_2019_PV), ## constant VSL
-      sum_cost_pv = sum(cost_PV)
-    ), ## changing VSL
-    by = .(scen_id, demand_scenario, refining_scenario)
-  ]
-
-  ## add column
-  state_npv_df[, sum_cost_2019_pv_b := sum_cost_2019_pv / 1e9]
-  state_npv_df[, sum_cost_pv_b := sum_cost_pv / 1e9]
-
-  ## add ghg emission reduction
-  ## 2019 ghg
-  ghg_2019_val <- dt_ghg_2019$mtco2e[1]
-
-  ## 2045 vs 2019 ghg
-  ghg_2045 <- state_ghg_output[year == 2045 & source == "total"]
-  setnames(ghg_2045, "value", "ghg_kg")
-  ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6]
-  ghg_2045[, ghg_2019 := ghg_2019_val]
-  ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019]
-
-  perc_diff_df <- ghg_2045[, .(
-    demand_scenario,
-    refining_scenario,
-    ghg_2045,
-    ghg_2019,
-    perc_diff
-  )]
-
-  ## summarize by scenario, filter for total
-  state_ghg_df <- state_ghg_output[
-    source == "total",
-    .(total_ghg = sum(value)),
-    by = .(demand_scenario, refining_scenario)
-  ]
-
-  state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6]
-
-  ## reference
-  ref_df <- state_ghg_df[
-    demand_scenario == "BAU" & refining_scenario == "historic production",
-    .(total_ghg_mmt)
-  ]
-  setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt")
-  ref_value <- ref_df$ref_ghg_mmt[1]
-
-  ## merge with summarized df
-  state_ghg_df[, ref_ghg := ref_value]
-  state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1]
-
-  ## merge with health
-  health_ghg_df <- merge(
-    state_npv_df,
-    state_ghg_df[, .(
-      demand_scenario,
-      refining_scenario,
-      total_ghg_mmt,
-      ref_ghg,
-      avoided_ghg
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## summarize labor for state
-  state_labor <- annual_labor[,
-    .(
-      sum_total_emp = sum(total_emp),
-      sum_total_comp_pv_h = sum(total_comp_PV_h),
-      sum_total_comp_pv_l = sum(total_comp_PV_l)
-    ),
-    by = .(demand_scenario, refining_scenario, oil_price_scenario)
-  ]
-
-  state_labor <- state_labor[oil_price_scenario == "reference case", ]
-
-  ## ref labor
-  ref_labor <- state_labor[
-    demand_scenario == "BAU" & refining_scenario == "historic production"
-  ]
-  setnames(
-    ref_labor,
-    c("sum_total_emp", "sum_total_comp_pv_h", "sum_total_comp_pv_l"),
-    c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l")
-  )
-
-  ## add values to labor
-  state_labor[, `:=`(
-    ref_total_emp = ref_labor$ref_total_emp[1],
-    ref_total_comp_pv_h = ref_labor$ref_total_comp_pv_h[1],
-    ref_total_comp_pv_l = ref_labor$ref_total_comp_pv_l[1]
-  )]
-
-  state_labor[,
-    forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9
-  ]
-  state_labor[,
-    forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9
-  ]
-
-  ## merge with health and ghg
-  health_labor_ghg_df <- merge(
-    health_ghg_df,
-    state_labor[, .(
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      sum_total_comp_pv_h,
-      ref_total_comp_pv_h,
-      forgone_wages_bil_h,
-      sum_total_comp_pv_l,
-      ref_total_comp_pv_l,
-      forgone_wages_bil_l
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## add ghg perc reduction
-  health_labor_ghg_df <- merge(
-    health_labor_ghg_df,
-    perc_diff_df,
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## prepare to plot
-  plot_df <- health_labor_ghg_df[, .(
-    scen_id,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    sum_cost_pv_b,
-    sum_cost_2019_pv_b,
-    forgone_wages_bil_h,
-    forgone_wages_bil_l,
-    avoided_ghg,
-    perc_diff
-  )]
-
-  setnames(plot_df, "perc_diff", "ghg_perc_diff")
-
-  ## add values / avoided ghgs
-  plot_df[, avoided_health_cost := sum_cost_2019_pv_b * -1]
-  plot_df[, avoided_health_cost_annual_vsl := sum_cost_pv_b * -1]
-  plot_df[, sum_cost_2019_pv_b := NULL]
-  plot_df[, sum_cost_pv_b := NULL]
-
-  plot_df[, `:=`(
-    avoided_health_cost_ghg = avoided_health_cost / avoided_ghg,
-    avoided_health_cost_ghg_vsl2 = avoided_health_cost_annual_vsl / avoided_ghg,
-    forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg,
-    forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg
-  )]
-
-  plot_df_health <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      ghg_perc_diff,
-      avoided_health_cost,
-      avoided_health_cost_annual_vsl,
-      avoided_health_cost_ghg,
-      avoided_health_cost_ghg_vsl2
-    ) %>%
-    pivot_longer(
-      avoided_health_cost:avoided_health_cost_ghg_vsl2,
-      names_to = "metric",
-      values_to = "value"
-    )
-
-  ## add column for vsl
-  plot_df_health <- plot_df_health %>%
-    mutate(
-      segment = "health",
-      unit_desc = ifelse(
-        metric == "avoided_health_cost",
-        "USD billion (2019 VSL)",
-        ifelse(
-          metric == "avoided_health_cost_annual_vsl",
-          "USD billion (annual VSL)",
-          ifelse(
-            metric == "avoided_health_cost_ghg",
-            "USD billion per GHG (2019 VSL)",
-            "USD billion per GHG (annual VSL)"
-          )
-        )
-      ),
-      metric = ifelse(
-        metric %in% c("avoided_health_cost", "avoided_health_cost_annual_vsl"),
-        "avoided_health_cost",
-        "avoided_health_cost_ghg"
-      )
-    )
-
-  plot_df_labor <- plot_df %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      ghg_perc_diff,
-      forgone_wages_bil_h,
-      forgone_wages_bil_l,
-      forgone_wages_bil_h_ghg,
-      forgone_wages_bil_l_ghg
-    ) %>%
-    pivot_longer(
-      forgone_wages_bil_h:forgone_wages_bil_l_ghg,
-      names_to = "metric",
-      values_to = "value"
-    ) %>%
-    mutate(
-      segment = "labor",
-      unit_desc = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "USD billion",
-        "USD billion per GHG"
-      ),
-      estimate = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"),
-        "high",
-        "low"
-      ),
-      metric = ifelse(
-        metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"),
-        "forgone_wages_bil",
-        "forgone_wages_bil_ghg"
-      )
-    ) %>%
-    select(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      ghg_perc_diff,
-      segment,
-      metric,
-      unit_desc,
-      estimate,
-      value
-    ) %>%
-    pivot_wider(names_from = estimate, values_from = value)
-
-  # plot_df_long <- rbind(plot_df_health, plot_df_labor)
-
-  ## prepare health for plotting ------------------------------
-  plot_df_health <- plot_df_health %>%
-    mutate(
-      title = ifelse(
-        metric == "avoided_health_cost",
-        "Health: avoided mortality",
-        "Health: avoided mortality per avoided GHG"
-      )
-    )
-
-  plot_df_health$title <- factor(
-    plot_df_health$title,
-    levels = c(
-      "Health: avoided mortality",
-      "Health: avoided mortality per avoided GHG"
-    )
-  )
-
-  ## rename
-  setDT(plot_df_health)
-  plot_df_health[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_health[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## refactor
-  plot_df_health$scenario <- factor(
-    plot_df_health$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_health[,
-    value := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      value * 1000,
-      value
-    )
-  ]
-  plot_df_health[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_health[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_health[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_health[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_health[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## save figure inputs
-  fwrite(
-    plot_df_health,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_npv_fig_inputs_health_growing_vsl.csv"
-    )
-  )
-  # fwrite(plot_df_health, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_health.csv"))
-
-  ## prepare labor ----------------------
-  plot_df_labor <- plot_df_labor %>%
-    mutate(
-      title = ifelse(
-        metric == "forgone_wages_bil",
-        "Labor: forgone wages",
-        "Labor: forgone wages per avoided GHG"
-      )
-    )
-
-  plot_df_labor$title <- factor(
-    plot_df_labor$title,
-    levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG")
-  )
-
-  ## rename
-  setDT(plot_df_labor)
-  plot_df_labor[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## refactor
-  plot_df_labor$scenario <- factor(
-    plot_df_labor$scenario,
-    levels = c(
-      "BAU demand - historic production",
-      "BAU demand - historic exports",
-      "BAU demand - low exports",
-      "Low demand - historic exports",
-      "Low demand - low exports",
-      "Low demand - historic production"
-    )
-  )
-
-  ## convert value of scaled outputs (by ghg) to millions, add unit column
-  plot_df_labor[,
-    high := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      high * 1000,
-      high
-    )
-  ]
-  plot_df_labor[,
-    low := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"),
-      low * 1000,
-      low
-    )
-  ]
-  plot_df_labor[,
-    metric := fifelse(
-      metric == "forgone_wages_bil_ghg",
-      "forgone_wages_ghg",
-      metric
-    )
-  ]
-  plot_df_labor[,
-    unit := fifelse(
-      metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"),
-      "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)",
-      "NPV (2019 USD billion)"
-    )
-  ]
-
-  ## change historic to historical
-  plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_labor[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-  bau_scen <- "BAU historical production"
-
-  ## make the plot
-  ## ---------------------------------------------------
-
-  ## color for refining scenario
-  refin_colors <- c(
-    "LC1 low exports" = "#729b79",
-    "LC1 historical exports" = "#2F4858",
-    "BAU low exports" = "#F6AE2D",
-    "BAU historical exports" = "#F26419"
-  )
-
-  refin_labs <- c(
-    "LC1 low exports" = "Low demand, low exports",
-    "LC1 historical exports" = "Low demand, historical exports",
-    "BAU low exports" = "BAU demand, low exports",
-    "BAU historical exports" = "BAU demand, historical exports"
-  )
-
-  ## refactor
-  # plot_df_health$scen_id <- factor(plot_df_health$scen_id, levels = c('LC1 low exports',
-  #                                                                     'LC1 historical production',
-  #                                                                     'BAU demand\nlow exports',
-  #                                                                     'Low demand\nhistorical exports',
-  #                                                                     'Low demand\nlow exports',
-  #                                                                     'Low demand\nhistorical production'))
-  #
-
-  ## figs - make each separately
-  ## -------------------------------------------------------------------
-
-  hist_prod <- as.data.table(
-    plot_df_health %>%
-      filter(
-        scen_id == bau_scen,
-        unit == "NPV (2019 USD billion)",
-        unit_desc == "USD billion (annual VSL)"
-      )
-  )
-
-  fig_bxm_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    geom_point(
-      data = plot_df_health %>%
-        filter(
-          !scen_id %in% remove_scen,
-          title == "Health: avoided mortality",
-          unit == "NPV (2019 USD billion)",
-          unit_desc == "USD billion (annual VSL)",
-          !refining_scenario == "historical production"
-        ),
-      aes(x = ghg_perc_diff * -100, y = value, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    labs(
-      color = NULL,
-      title = "Health: avoided mortality",
-      y = "NPV (2019 USD billion)",
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(0, 50) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  # ## make separete df for labor high and low for plotting
-  # plot_df_labor_pts <- plot_df_labor %>%
-  #   filter(!scen_id %in% remove_scen,
-  #          title == "Labor: forgone wages",
-  #          unit == "NPV (2019 USD billion)",
-  #          refining_scenario != "historical production") %>%
-  #   select(scen_id, demand_scenario, refining_scenario, scenario, ghg_perc_diff, high, low) %>%
-  #   pivot_longer(high:low, names_to = "estimate", values_to =  "npv_2019_usd_billion")
-  #
-  fig_bxm_b <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historical production",
-    #                                                metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 0.9
-    ) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historical production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.9
-    ) +
-    labs(
-      color = NULL,
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 2))
-
-  ## legends
-  low_legend_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historic production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = low, color = scen_id),
-      shape = 16,
-      size = 3,
-      alpha = 1
-    ) +
-    # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                            refining_scenario != "historic production",
-    #                                            metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = high, color = scen_id), shape = 1, size = 3, alpha = 0.8) +
-    labs(
-      color = "with re-emp:",
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      legend.title = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 1))
-
-  low_legend <- get_legend(
-    low_legend_fig
-  )
-
-  ## save legends
-  ggsave(
-    plot = low_legend,
-    device = "pdf",
-    filename = "fig3_low_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## legends
-  high_legend_fig <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", size = 0.5) +
-    geom_vline(
-      xintercept = hist_prod[
-        title == "Health: avoided mortality",
-        ghg_perc_diff * -100
-      ],
-      color = "darkgray",
-      lty = 2
-    ) +
-    # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) +
-    # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                                refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) +
-    # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen,
-    #                                            refining_scenario != "historic production",
-    #                                            metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = low, color = scen_id), shape = 16, size = 3, alpha = 1) +
-    geom_point(
-      data = plot_df_labor %>%
-        filter(
-          !scen_id %in% remove_scen,
-          refining_scenario != "historic production",
-          metric == "forgone_wages_bil"
-        ),
-      aes(x = ghg_perc_diff * -100, y = high, color = scen_id),
-      shape = 1,
-      size = 3,
-      alpha = 0.8
-    ) +
-    labs(
-      color = "no re-emp:",
-      title = "Labor: forgone wages",
-      y = NULL,
-      x = "GHG emissions reduction (%, 2045 vs 2019)"
-    ) +
-    ylim(-50, 0) +
-    xlim(0, 80) +
-    scale_color_manual(
-      values = refin_colors,
-      labels = refin_labs
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.text = element_text(size = 10),
-      legend.title = element_text(size = 10),
-      plot.title = element_text(hjust = 0.5, size = 12),
-      axis.title.y = element_text(size = 12),
-      axis.title.x = element_text(size = 11),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm"),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11),
-      axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11)
-    ) +
-    guides(color = guide_legend(nrow = 1))
-
-  high_legend <- get_legend(
-    high_legend_fig
-  )
-
-  ## save legends
-  ggsave(
-    plot = high_legend,
-    device = "pdf",
-    filename = "fig3_high_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## combine figure
-  ## ---------------------------------
-
-  ## shared x axis
-  xaxis_lab <- ggdraw() +
-    draw_label("GHG emissions reduction (%, 2045 vs 2019)", size = 12)
-
-  fig3_plot_grid_ab <- plot_grid(
-    fig_bxm_a,
-    fig_bxm_b,
-    align = "vh",
-    labels = c("A", "B"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    rel_widths = c(1, 1)
-  )
-
-  fig3_plot_grid2 <- plot_grid(
-    fig3_plot_grid_ab,
-    xaxis_lab,
-    align = "v",
-    # labels = c("(A)", "(B)", "(C)", ""),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    # hjust = -1,
-    ncol = 1,
-    rel_heights = c(0.95, 0.05)
-    # rel_widths = c(1, 1),
-  )
-}
-
-
-# ## compute county-level outputs for low demand low export scenario
-# ## ---------------------------------------------------------------------
-#
-# create_county_health_labor_df <- function(main_path,
-#                                    refining_mortality,
-#                                    state_ghg_output,
-#                                    annual_labor,
-#                                    raw_ct_2020_all,
-#                                    raw_counties) {
-#
-#
-#   ## 2020 population
-#   ##---------------------------------------------------------------------
-#
-#   ## geoid to census tract
-#   county_df <- raw_ct_2020_all %>%
-#     select(census_tract = GEOID, COUNTYFP) %>%
-#     st_drop_geometry()
-#
-#   county_names <- raw_counties %>%
-#     select(COUNTYFP, NAME) %>%
-#     st_drop_geometry() %>%
-#     unique()
-#
-#   ## calc 2020 pop by demographic
-#   pop_2020 <- refining_mortality %>%
-#     filter(year == 2020) %>%
-#     select(census_tract, year, pop) %>%
-#     unique() %>%
-#     left_join(county_df) %>%
-#     as.data.table()
-#
-#   ## summarize by county
-#   pop_2020_county <- pop_2020[, .(pop_2020 = sum(pop)),
-#                               by = .(COUNTYFP)]
-#
-#
-#
-#   ## make maps for c and d
-#   ## ---------------------------------------------------------------------------
-#
-#   ## health
-#   health_map_df <- refining_mortality %>%
-#     filter(demand_scenario == "LC1" & refining_scenario == "low exports") %>%
-#     as.data.table()
-#
-#   ## rename
-#   health_map_df[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)]
-#   health_map_df[, scenario := gsub('LC1.', 'Low ', scenario)]
-#   health_map_df[, scenario := str_replace(scenario, "historic", "historical")]
-#   health_map_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-#
-#   ## group by scenario and census tract, sum cost_2019_pv
-#   health_map_npv_df <- health_map_df[, .(sum_cost_2019_pv = sum(cost_2019_PV)),
-#                                  by = .(census_tract, scenario, scen_id)]
-#
-#   # ## save figure inputs
-#   # fwrite(health_map_npv_df, file.path(main_path, save_path, "fig-csv-files", "ct_health_npv_fig_inputs.csv"))
-#
-#
-#   ## health
-#   health_map_npv_df <- merge(health_map_npv_df %>% rename(GEOID = census_tract), raw_ct_2020_all %>% select(GEOID, COUNTYFP, ALAND),
-#                              by = "GEOID")
-#
-#   health_map_npv_df <- health_map_npv_df %>%
-#     mutate(sum_cost_2019_pv = sum_cost_2019_pv * -1)
-#
-#   ## merge with county codes
-#   health_map_npv_df <- merge(health_map_npv_df, raw_counties %>% select(COUNTYFP, NAME) %>% st_drop_geometry(),
-#                             by = "COUNTYFP",
-#                             allow.cartesian = T,
-#                             all.x = T)
-#
-#   ## summarise by county, rank by sum of sum_cost_2019_pv
-#   health_county_df <- health_map_npv_df[ALAND > 0, .(sum_cost_2019_pv = sum(sum_cost_2019_pv)),
-#                                         by = .(NAME, COUNTYFP, scenario, scen_id)]
-#
-#
-#
-#   health_county_df <- health_county_df %>%
-#     select(scenario, scen_id, NAME, COUNTYFP, sum_cost_2019_pv) %>%
-#     left_join(pop_2020_county) %>%
-#     rename(npv_health_av_mort = sum_cost_2019_pv) %>%
-#     mutate(npv_health_av_mort_pc = npv_health_av_mort / pop_2020) %>%
-#     arrange(-npv_health_av_mort_pc)
-#
-#   # ## save figure inputs
-#   # fwrite(health_county_df, file.path(main_path, save_path, "fig-csv-files", "county_health_npv_fig_inputs.csv"))
-#
-#
-#   ## for plotting health
-#   health_map_npv_df <- st_as_sf(health_map_npv_df)
-#
-#   health_map_npv_df <- st_transform(health_map_npv_df, crs = "EPSG:4269")
-#
-#   health_map_npv_df <- st_make_valid(health_map_npv_df)
-#
-#
-#   ## consrtuct labor df
-#   labor_map_df <- copy(annual_labor)
-#
-#   labor_map_df[, scen_id := paste(demand_scenario, refining_scenario)]
-#   labor_map_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-#   labor_map_df[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)]
-#   labor_map_df[, scenario := gsub('LC1.', 'Low ', scenario)]
-#   labor_map_df[, scenario := str_replace(scenario, "historic", "historical")]
-#
-#   labor_map_df <- labor_map_df[scenario == "Low demand - low exports" | scenario == "BAU demand - historical production"]
-#
-#   ## group by scenario and census tract, sum cost_2019_pv
-#   labor_map_df <- labor_map_df[, .(sum_total_comp_usd19_h = sum(total_comp_usd19_h),
-#                                    sum_total_comp_usd19_l = sum(total_comp_usd19_l)),
-#                                by = .(destination, scenario, scen_id)]
-#
-#
-#
-#   labor_map_df[, ref := ifelse(scenario == "BAU demand - historical production", "bau", "alt")]
-#
-#   ## low
-#   labor_map_df_l <- labor_map_df %>%
-#     mutate(county = str_remove(destination, " County, CA")) %>%
-#     select(county, ref, sum_total_comp_usd19_l) %>%
-#     pivot_wider(names_from = ref, values_from = sum_total_comp_usd19_l) %>%
-#     mutate(diff = alt - bau) %>%
-#     rename(delta_total_comp_usd19_l = diff) %>%
-#     mutate(scenario = "Low demand - low exports") %>%
-#     select(scenario, county, delta_total_comp_usd19_l) %>%
-#     rename(NAME = county) %>%
-#     left_join(county_names) %>%
-#     left_join(pop_2020_county) %>%
-#     mutate(delta_total_comp_usd19_pc_l = delta_total_comp_usd19_l / pop_2020) %>%
-#     select(scenario, NAME, COUNTYFP, pop_2020, delta_total_comp_usd19_l, delta_total_comp_usd19_pc_l)
-#
-#   labor_map_df_h <- labor_map_df %>%
-#     mutate(county = str_remove(destination, " County, CA")) %>%
-#     select(county, ref, sum_total_comp_usd19_h) %>%
-#     pivot_wider(names_from = ref, values_from = sum_total_comp_usd19_h) %>%
-#     mutate(diff = alt - bau) %>%
-#     rename(delta_total_comp_usd19_h = diff) %>%
-#     mutate(scenario = "Low demand - low exports") %>%
-#     select(scenario, county, delta_total_comp_usd19_h) %>%
-#     rename(NAME = county) %>%
-#     left_join(county_names) %>%
-#     left_join(pop_2020_county) %>%
-#     mutate(delta_total_comp_usd19_pc_h = delta_total_comp_usd19_h / pop_2020) %>%
-#     select(scenario, NAME, COUNTYFP, pop_2020, delta_total_comp_usd19_h, delta_total_comp_usd19_pc_h)
-#
-#   ## merge
-#   health_map_df_hl <- merge(labor_map_df_l, labor_map_df_h,
-#                          by = c("scenario", "NAME", "COUNTYFP", "pop_2020"),
-#                          all.x = T)
-#
-#   ## join with county-level health
-#   county_map_df <- merge(health_county_df, health_map_df_hl %>% select(NAME, delta_total_comp_usd19_l, delta_total_comp_usd19_pc_l,
-#                                                                        delta_total_comp_usd19_h, delta_total_comp_usd19_pc_h),
-#                              by = "NAME",
-#                              all.x = T)
-#
-#   county_map_df <- county_map_df %>%
-#     select(scenario, scen_id, county = NAME, COUNTYFP, pop_2020, npv_health_av_mort, npv_health_av_mort_pc,
-#            delta_total_comp_usd19_l, delta_total_comp_usd19_pc_l,
-#            delta_total_comp_usd19_h, delta_total_comp_usd19_pc_h)
-#
-#
-#   ## save county-level inputs
-#   fwrite(county_map_df, file.path(main_path, save_path, "fig-csv-files", "county_health_labor_npv_npv_pc.csv"))
-#
-#   # ## make the maps
-#   # ##----------------------------------------------------------------------------
-#   #
-#   # california <- st_as_sf(maps::map("state", plot = FALSE, fill = TRUE)) %>%
-#   #   filter(ID == "california") %>%
-#   #   st_transform(crs = "EPSG:4269")
-#   #
-#   #
-#   # # # filter for census tracts that do not experience health benefits
-#   # # neg_npv_health <- health_map_npv_df %>%
-#   # #   filter(sum_cost_2019_pv < 0) %>%
-#   # #   filter(ALAND > 0)
-#   # #
-#   # # ggplot() +
-#   # #   geom_sf(data = neg_npv_health, mapping = aes(geometry = geometry, fill = sum_cost_2019_pv / 1000), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) +
-#   # #   geom_sf(data = california, fill = "transparent", color = "black")
-#   # #
-#   # ## filter refining output df for ct's with negative outputs
-#   # # neg_health_out <- health_map_df %>%
-#   # #   filter(census_tract %in% unique(neg_npv_health$GEOID)) %>%
-#   # #   select(census_tract, year, delta_total_pm25, mortality_delta, cost_2019_PV) %>%
-#   # #   pivot_longer(delta_total_pm25:cost_2019_PV, names_to = "metric", values_to = "value") %>%
-#   # #   mutate(lab = ifelse(metric == "delta_total_pm25", "pm25 - difference from reference",
-#   # #                       ifelse(metric == "mortality_delta", "avoided mortality - difference from reference", "avoided mortality - difference from reference (USD)")))
-#   # #
-#   # # ggplot(neg_health_out, aes(x = year, y = value, group = census_tract)) +
-#   # #   geom_line(alpha = 0.4) +
-#   # #   facet_wrap(~lab, scales = "free_y", nrow = 3) +
-#   # #   theme_bw()
-#   #
-#   #
-#   #
-#   #
-#   #
-#   # ## health maps
-#   # ## ---------------------------------------------------------------------------
-#   #
-#   # ## bounding box 1
-#   # bay_bb <- st_bbox(c(xmin = -123, ymin = 37.5, xmax = -121, ymax = 38.5), crs = st_crs(health_map_df))
-#   # bay_bb <- st_as_sfc(bay_bb)
-#   #
-#   # la_bb <- st_bbox(c(xmin = -119, ymin = 33, xmax = -117, ymax = 34.5), crs = st_crs(health_map_df))
-#   # la_bb <- st_as_sfc(la_bb)
-#   #
-#   #
-#   # # ## filter for bay area
-#   # # health_map_df2 <- health_map_df %>%
-#   # #   mutate(bay = st_intersects(geometry, bay_bb),
-#   # #          la = st_intersects(geometry, la_bb))
-#   #
-#   #
-#   #
-#   #
-#   # health_fig_bay <- ggplot() +
-#   #   geom_sf(data = health_map_npv_df %>% filter(ALAND > 0), mapping = aes(geometry = geometry, fill = sum_cost_2019_pv / 1000), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) +
-#   #   # geom_sf(data = california, fill = "transparent", color = "black") +
-#   #   scale_fill_gradient2(high = "navy",  mid = "#FAFAFA", low = "#A84268",
-#   #                        # high = "navy",  mid = "#FAFAFA", low = "#A84268",
-#   #                        midpoint = 0, space = "Lab",
-#   #                        # limits = c(min(health_map_df$sum_cost_2019_pv / 1000), max(health_map_df$sum_cost_2019_pv / 1000)),
-#   #                        limits = c(-2500, 99000),
-#   #                        breaks = c(-2500, 0, 49500, 99000),
-#   #                        labels = c(-2500, 0, 49500, 99000),
-#   #                        na.value = "grey50"
-#   #                        # ,
-#   #                        # labels=function(x) format(x, big.mark = ",", scientific = FALSE)
-#   #   ) +
-#   #   coord_sf(xlim = c(-123, -121), ylim = c(37.5, 38.5)) +
-#   #   labs(fill = "NPV (thousand USD 2019)",
-#   #        color = NULL,
-#   #        x = NULL,
-#   #        y = NULL) +
-#   #   theme_bw() +
-#   #   theme(
-#   #     # legend.justification defines the edge of the legend that the legend.position coordinates refer to
-#   #     # legend.justification = c(0, 1),
-#   #     # Set the legend flush with the left side of the plot, and just slightly below the top of the plot
-#   #     legend.position = "none",
-#   #     legend.key.width = unit(2, "line"),
-#   #     legend.key.height = unit(1, "line"),
-#   #     legend.title = element_text(size = 8),
-#   #     legend.text = element_text(size = 8),
-#   #     plot.margin = margin(0, 2, 0, 8),
-#   #     plot.title = element_text(face = 'bold', size = 10),
-#   #     plot.subtitle = element_text(face = 'bold', size = 8),
-#   #     panel.grid.major = element_blank(),
-#   #     panel.grid.minor = element_blank(),
-#   #     panel.background = element_blank(),
-#   #     axis.text = element_text(size = 8)) +
-#   #   guides(fill = guide_colourbar(title.position="top",
-#   #                                 title.hjust = 0,
-#   #                                 direction = "horizontal",
-#   #                                 ticks.colour = "black", frame.colour = "black"),
-#   #          color = "none")
-#   #
-#   #
-#   # health_fig_la <- ggplot() +
-#   #   geom_sf(data = health_map_npv_df %>% filter(ALAND > 0), mapping = aes(geometry = geometry, fill = sum_cost_2019_pv / 1000), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) +
-#   #   # geom_sf(data = california, fill = "transparent", color = "black") +
-#   #   scale_fill_gradient2(high = "navy",  mid = "#FAFAFA", low = "#A84268",
-#   #                        # high = "navy",  mid = "#FAFAFA", low = "#A84268",
-#   #                        midpoint = 0, space = "Lab",
-#   #                        # limits = c(min(health_map_df$sum_cost_2019_pv / 1000), max(health_map_df$sum_cost_2019_pv / 1000)),
-#   #                        limits = c(-2500, 99000),
-#   #                        breaks = c(-2500, 0, 49500, 99000),
-#   #                        labels = c(-2500, 0, 49500, 99000),
-#   #                        na.value = "grey50"
-#   #                        # ,
-#   #                        # labels=function(x) format(x, big.mark = ",", scientific = FALSE)
-#   #   ) +
-#   #   coord_sf(xlim = c(-119, -117), ylim = c(33.2, 34.4)) +
-#   #   labs(fill = "NPV (thousand USD 2019)",
-#   #        color = NULL,
-#   #        x = NULL,
-#   #        y = NULL) +
-#   #   theme_bw() +
-#   #   theme(
-#   #     # legend.justification defines the edge of the legend that the legend.position coordinates refer to
-#   #     # legend.justification = c(0, 1),
-#   #     # Set the legend flush with the left side of the plot, and just slightly below the top of the plot
-#   #     legend.position = "none",
-#   #     legend.key.width = unit(2, "line"),
-#   #     legend.key.height = unit(1, "line"),
-#   #     legend.title = element_text(size = 8),
-#   #     legend.text = element_text(size = 8),
-#   #     plot.margin = margin(0, 2, 0, 8),
-#   #     plot.title = element_text(face = 'bold', size = 10),
-#   #     plot.subtitle = element_text(face = 'bold', size = 8),
-#   #     panel.grid.major = element_blank(),
-#   #     panel.grid.minor = element_blank(),
-#   #     panel.background = element_blank(),
-#   #     axis.text = element_text(size = 8)) +
-#   #   guides(fill = guide_colourbar(title.position="top",
-#   #                                 title.hjust = 0,
-#   #                                 direction = "horizontal",
-#   #                                 ticks.colour = "black", frame.colour = "black"),
-#   #          color = "none")
-#   #
-#   # fig3c_leg <- ggplot() +
-#   #   geom_sf(data = health_map_df %>% filter(ALAND > 0), mapping = aes(geometry = geometry, fill = sum_cost_2019_pv / 1000), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) +
-#   #   # geom_sf(data = california, fill = "transparent", color = "black") +
-#   #   scale_fill_gradient2(low = "black",  mid = "#FAFAFA", high = "#A84268",
-#   #                        # high = "navy",  mid = "#FAFAFA", low = "#A84268",
-#   #                        midpoint = 0, space = "Lab",
-#   #                        # limits = c(min(health_map_df$sum_cost_2019_pv / 1000), max(health_map_df$sum_cost_2019_pv / 1000)),
-#   #                        limits = c(-2500, 99000),
-#   #                        breaks = c(-2500, 0, 49500, 99000),
-#   #                        labels = c(-2500, 0, 49500, 99000),
-#   #                        na.value = "grey50"
-#   #                        # ,
-#   #                        # labels=function(x) format(x, big.mark = ",", scientific = FALSE)
-#   #   ) +
-#   #   coord_sf(xlim = c(-123, -121), ylim = c(37.5, 38.5)) +
-#   #   labs(fill = "NPV (thousand USD 2019)",
-#   #        color = NULL,
-#   #        x = NULL,
-#   #        y = NULL) +
-#   #   theme_bw() +
-#   #   theme(
-#   #     # legend.justification defines the edge of the legend that the legend.position coordinates refer to
-#   #     # legend.justification = c(0, 1),
-#   #     # Set the legend flush with the left side of the plot, and just slightly below the top of the plot
-#   #     legend.position = "bottom",
-#   #     legend.key.width = unit(2, "line"),
-#   #     legend.key.height = unit(1, "line"),
-#   #     legend.title = element_text(size = 8),
-#   #     legend.text = element_text(size = 8),
-#   #     plot.margin = margin(0, 2, 0, 8),
-#   #     plot.title = element_text(face = 'bold', size = 10),
-#   #     plot.subtitle = element_text(face = 'bold', size = 8),
-#   #     panel.grid.major = element_blank(),
-#   #     panel.grid.minor = element_blank(),
-#   #     panel.background = element_blank(),
-#   #     axis.text = element_text(size = 8)) +
-#   #   guides(fill = guide_colourbar(title.position="top",
-#   #                                 title.hjust = 0,
-#   #                                 direction = "horizontal",
-#   #                                 ticks.colour = "black", frame.colour = "black"),
-#   #          color = "none")
-#   #
-#   #
-#   #
-#   # # # ## quantile for plotting
-#   # # # numclas <- 12
-#   # # # qbrks_h <- seq(0, 1, length.out = numclas + 1)
-#   # # # qbrks_h
-#   # # #
-#   # # # health_map_df <- health_map_df %>%
-#   # # #   mutate(valq = cut(sum_cost_2019_pv, breaks = quantile(sum_cost_2019_pv, breaks = qbrks_h),
-#   # # #                     include.lowest = T))
-#   # # #
-#   # # #
-#   # # fig3c_v2 <- ggplot() +
-#   # #   geom_sf(data = health_map_df %>% filter(ALAND > 0), mapping = aes(geometry = geometry, fill = valq), lwd = 0, alpha = 1, color = "darkgrey", show.legend = TRUE) +
-#   # #   # geom_sf(data = california, fill = "transparent", color = "black") +
-#   # #   scale_fill_discrete(labels=function(x) format(x, big.mark = ",", scientific = FALSE)) +
-#   # #   labs(fill = "NPV (USD 2019)",
-#   # #        color = NULL,
-#   # #        x = NULL,
-#   # #        y = NULL) +
-#   # #   theme_bw() +
-#   # #   theme(
-#   # #     # legend.justification defines the edge of the legend that the legend.position coordinates refer to
-#   # #     # legend.justification = c(0, 1),
-#   # #     # Set the legend flush with the left side of the plot, and just slightly below the top of the plot
-#   # #     legend.position = "bottom",
-#   # #     legend.key.width = unit(2, "line"),
-#   # #     legend.key.height = unit(1, "line"),
-#   # #     legend.title = element_text(size = 8),
-#   # #     legend.text = element_text(size = 8),
-#   # #     plot.margin = margin(0, 2, 0, 8),
-#   # #     plot.title = element_text(face = 'bold', size = 10),
-#   # #     plot.subtitle = element_text(face = 'bold', size = 8),
-#   # #     panel.grid.major = element_blank(),
-#   # #     panel.grid.minor = element_blank(),
-#   # #     panel.background = element_blank(),
-#   # #     axis.text = element_text(size = 8)) +
-#   # #   guides(fill = guide_colourbar(title.position="top",
-#   # #                                 title.hjust = 0,
-#   # #                                 direction = "horizontal",
-#   # #                                 ticks.colour = "black", frame.colour = "black"),
-#   # #          color = "none")
-#   #
-#   #
-#   # ## labor
-#   # labor_map_df <- merge(raw_ca_counties_sp %>% select(NAME), labor_map_df %>% rename(NAME = county),
-#   #                       by = "NAME",
-#   #                       all.x = T)
-#   #
-#   # labor_map_df <- st_as_sf(labor_map_df)
-#   #
-#   # labor_map_df <- st_transform(labor_map_df, crs = "EPSG:4269")
-#   #
-#   # labor_map_df <- st_make_valid(labor_map_df)
-#   #
-#   # ## labor
-#   # blues_pal <- c("#FAFAFA", "#778DA9", "#415A77", "#1B263B", "#0D1B2A")
-#   #
-#   # fig3d <- ggplot() +
-#   #   geom_sf(data = labor_map_df, mapping = aes(geometry = geometry, fill = delta_total_comp_usd19 / 1e9), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) +
-#   #   # geom_sf(data = california, fill = "transparent", color = "black") +
-#   #   scale_fill_gradientn(colors = rev(blues_pal),
-#   #                        na.value = "#FAFAFA") +
-#   #   # coord_sf(xlim = c(-123, -116), ylim = c(33, 39)) +
-#   #   labs(fill = "NPV (billion USD 2019)",
-#   #        color = NULL,
-#   #        x = NULL,
-#   #        y = NULL) +
-#   #   theme_bw() +
-#   #   theme(
-#   #     # legend.justification defines the edge of the legend that the legend.position coordinates refer to
-#   #     # legend.justification = c(0, 1),
-#   #     # Set the legend flush with the left side of the plot, and just slightly below the top of the plot
-#   #     legend.position = "none",
-#   #     legend.key.width = unit(2, "line"),
-#   #     legend.key.height = unit(1, "line"),
-#   #     legend.title = element_text(size = 8),
-#   #     legend.text = element_text(size = 8),
-#   #     plot.margin = margin(0, 2, 0, 8),
-#   #     plot.title = element_text(face = 'bold', size = 10),
-#   #     plot.subtitle = element_text(face = 'bold', size = 8),
-#   #     panel.grid.major = element_blank(),
-#   #     panel.grid.minor = element_blank(),
-#   #     panel.background = element_blank(),
-#   #     axis.text = element_text(size = 8)) +
-#   #   guides(fill = guide_colourbar(title.position="top",
-#   #                                 title.hjust = 0,
-#   #                                 direction = "horizontal",
-#   #                                 ticks.colour = "black", frame.colour = "black"),
-#   #          color = "none")
-#   #
-#   # fig3d_leg <- ggplot() +
-#   #   geom_sf(data = labor_map_df, mapping = aes(geometry = geometry, fill = delta_total_comp_usd19 / 1e9), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) +
-#   #   # geom_sf(data = california, fill = "transparent", color = "black") +
-#   #   scale_fill_gradientn(colors = rev(blues_pal),
-#   #                        na.value = "#FAFAFA") +
-#   #   # coord_sf(xlim = c(-123, -116), ylim = c(33, 39)) +
-#   #   labs(fill = "NPV (billion USD 2019)",
-#   #        color = NULL,
-#   #        x = NULL,
-#   #        y = NULL) +
-#   #   theme_bw() +
-#   #   theme(
-#   #     # legend.justification defines the edge of the legend that the legend.position coordinates refer to
-#   #     # legend.justification = c(0, 1),
-#   #     # Set the legend flush with the left side of the plot, and just slightly below the top of the plot
-#   #     legend.position = "bottom",
-#   #     legend.key.width = unit(2, "line"),
-#   #     legend.key.height = unit(1, "line"),
-#   #     legend.title = element_text(size = 8),
-#   #     legend.text = element_text(size = 8),
-#   #     plot.margin = margin(0, 2, 0, 8),
-#   #     plot.title = element_text(face = 'bold', size = 10),
-#   #     plot.subtitle = element_text(face = 'bold', size = 8),
-#   #     panel.grid.major = element_blank(),
-#   #     panel.grid.minor = element_blank(),
-#   #     panel.background = element_blank(),
-#   #     axis.text = element_text(size = 8)) +
-#   #   guides(fill = guide_colourbar(title.position="top",
-#   #                                 title.hjust = 0,
-#   #                                 direction = "horizontal",
-#   #                                 ticks.colour = "black", frame.colour = "black"),
-#   #          color = "none")
-#   #
-#   #
-#   #
-#   #
-#   #
-#   #
-#   # legend_fig_3c <- get_legend(
-#   #   fig3c_leg +
-#   #     theme(legend.title = element_text(size = 8),
-#   #           legend.text = element_text(size = 8))
-#   #
-#   # )
-#   #
-#   # legend_fig_3d <- get_legend(
-#   #   fig3d_leg +
-#   #     theme(legend.title = element_text(size = 8),
-#   #           legend.text = element_text(size = 8))
-#   #
-#   # )
-#   #
-#   #
-#
-#
-# }
-
-calc_county_pm25 <- function(
-  main_path,
-  save_path,
-  health_weighted,
-  raw_counties,
-  raw_ct_2020_all,
-  refining_mortality
-) {
-  ## calc 2020 pop by demographic
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    as.data.table()
-
-  health_df <- copy(health_weighted)
-
-  health_df <- health_df[
-    year == 2019 &
-      scen_id == "BAU historic exports"
-  ]
-
-  county_names <- raw_counties %>%
-    select(COUNTYFP, NAME) %>%
-    st_drop_geometry() %>%
-    unique()
-
-  ## geoid to census tract
-  county_df <- raw_ct_2020_all %>%
-    filter(STATEFP == "06") %>%
-    select(census_tract = GEOID, COUNTYFP, ALAND) %>%
-    st_drop_geometry() %>%
-    left_join(county_names) %>%
-    left_join(pop_2020) %>%
-    select(census_tract, COUNTYFP, NAME, pop, ALAND)
-
-  health_df <- merge(health_df, county_df, by = "census_tract", all.x = T)
-
-  health_county_df <- health_df %>%
-    group_by(NAME, COUNTYFP, year) %>%
-    summarise(
-      avg_pm25_areaw = weighted.mean(total_pm25, ALAND),
-      avg_pm25_popw = weighted.mean(total_pm25, pop)
-    ) %>%
-    ungroup() %>%
-    arrange(-avg_pm25_popw)
-
-  fwrite(
-    health_county_df,
-    file.path(main_path, save_path, "fig-csv-files", "avg_pm25_county_2019.csv")
-  )
-  # fwrite(health_county_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "avg_pm25_county_2019.csv"))
-
-  return(health_county_df)
-}
-
-
-plot_health_levels <- function(main_path, save_path, health_grp) {
-  fig2_df <- copy(health_grp)
-
-  ## change scenario names, factor
-  fig2_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # fig2_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  fig2_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## add scenario title
-  fig2_df[, scenario_title := str_replace(scenario, " - ", "\n")]
-
-  ## change historic to historical
-  fig2_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-  fig2_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  fig2_df[, scenario := str_replace(scenario, "historic", "historical")]
-  fig2_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## refactor
-  fig2_df$scenario_title <- factor(
-    fig2_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## refactor
-  fig2_df$scenario <- factor(
-    fig2_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  ## scenarios for filtering
-  # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports')
-  remove_scen <- c("LC1 historical production")
-
-  ## save figure inputs
-  fwrite(
-    fig2_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_levels_fig_inputs.csv"
-    )
-  )
-  # fwrite(fig2_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_fig_inputs.csv"))
-
-  # health_level_fig <- ggplot(fig2_df %>% filter(!scen_id %in% remove_scen), aes(x = year, y = num_over_den, color = group)) +
-  #   geom_line(linewidth = 1, alpha = 0.8) +
-  #   facet_grid(type ~ scenario) +
-  #   labs(x = NULL,
-  #        y = "num_over_den") +
-  #   theme_line +
-  #   theme(legend.position = "bottom",
-  #         axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #         axis.ticks.length.y = unit(0.1, 'cm'),
-  #         axis.ticks.length.x = unit(0.1, 'cm'))
-
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  health_level_fig_a <- ggplot(
-    fig2_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = mortality_level_dem, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 0.4)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figa <- health_level_fig_a + theme(legend.position = "right")
-
-  legend_a <- get_legend(
-    legend_figa +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_level_fig_b <- ggplot(
-    fig2_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = mortality_level_dem, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 0.4)) +
-    scale_linetype_manual(values = dac_lty) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figb <- health_level_fig_b + theme(legend.position = "right")
-
-  legend_b <- get_legend(
-    legend_figb +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_level_fig_c <- ggplot(
-    fig2_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = mortality_level_dem, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_linetype_manual(values = poverty_lty) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 0.4)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      legend.key.width = unit(10, "mm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figc <- health_level_fig_c + theme(legend.position = "right")
-
-  legend_c <- get_legend(
-    legend_figc +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ## shared y lab
-  # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person")),
-  #                                    size = 8, angle = 90)
-
-  yaxis_lab <- ggdraw() + draw_label("Mortalities", size = 8, angle = 90)
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  fig2_plot_grid <- plot_grid(
-    health_level_fig_b,
-    health_level_fig_c,
-    health_level_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heights = c(1, 1, 1, 1.05)
-  )
-
-  fig2_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    fig2_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  fig2_plot_grid2
-}
-
-
-plot_health_levels_pc <- function(
-  main_path,
-  save_path,
-  health_grp,
-  refining_mortality,
-  pop_ratios
-) {
-  mort_pc_df <- copy(health_grp)
-
-  ## calc 2020 pop by demographic
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    left_join(pop_ratios) %>%
-    as.data.table()
-
-  pop_2020[, demo_pop := pop * pct]
-
-  ## summarize by demographic group
-  pop_2020 <- pop_2020[,
-    .(pop_2020 = sum(demo_pop)),
-    by = .(demo_group, demo_cat)
-  ]
-
-  ## merge population back with results
-  mort_pc_df <- merge(
-    mort_pc_df,
-    pop_2020,
-    by = c("demo_group", "demo_cat"),
-    all.x = T
-  )
-
-  ## calculate per capita
-  mort_pc_df[, value := mortality_level_dem / pop_2020]
-  mort_pc_df[, value_pmil := value * 1e6]
-
-  ## change scenario names, factor
-  mort_pc_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # fig2_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  mort_pc_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## add scenario title
-  mort_pc_df[, scenario_title := str_replace(scenario, " - ", "\n")]
-
-  ## change historic to historical
-  mort_pc_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-  mort_pc_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  mort_pc_df[, scenario := str_replace(scenario, "historic", "historical")]
-  mort_pc_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## refactor
-  mort_pc_df$scenario_title <- factor(
-    mort_pc_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## refactor
-  mort_pc_df$scenario <- factor(
-    mort_pc_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  ## scenarios for filtering
-  # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports')
-  remove_scen <- c("LC1 historical production")
-
-  ## save figure inputs
-  fwrite(
-    mort_pc_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_levels_pmil_fig_inputs.csv"
-    )
-  )
-  # fwrite(mort_pc_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_pmil_fig_inputs.csv"))
-
-  # health_level_fig <- ggplot(fig2_df %>% filter(!scen_id %in% remove_scen), aes(x = year, y = num_over_den, color = group)) +
-  #   geom_line(linewidth = 1, alpha = 0.8) +
-  #   facet_grid(type ~ scenario) +
-  #   labs(x = NULL,
-  #        y = "num_over_den") +
-  #   theme_line +
-  #   theme(legend.position = "bottom",
-  #         axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #         axis.ticks.length.y = unit(0.1, 'cm'),
-  #         axis.ticks.length.x = unit(0.1, 'cm'))
-
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  health_level_fig_a <- ggplot(
-    mort_pc_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = value_pmil, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 40)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figa <- health_level_fig_a + theme(legend.position = "right")
-
-  legend_a <- get_legend(
-    legend_figa +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_level_fig_b <- ggplot(
-    mort_pc_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = value_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 40)) +
-    scale_linetype_manual(values = dac_lty) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figb <- health_level_fig_b + theme(legend.position = "right")
-
-  legend_b <- get_legend(
-    legend_figb +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_level_fig_c <- ggplot(
-    mort_pc_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = value_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_linetype_manual(values = poverty_lty) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 40)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      legend.key.width = unit(10, "mm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figc <- health_level_fig_c + theme(legend.position = "right")
-
-  legend_c <- get_legend(
-    legend_figc +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ## shared y lab
-  # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person")),
-  #                                    size = 8, angle = 90)
-
-  yaxis_lab <- ggdraw() +
-    draw_label("Mortalities per million people", size = 8, angle = 90)
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  fig2_plot_grid <- plot_grid(
-    health_level_fig_b,
-    health_level_fig_c,
-    health_level_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  fig2_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    fig2_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  fig2_plot_grid2
-}
-
-
-plot_health_levels_pm25 <- function(main_path, save_path, health_grp) {
-  fig2_df <- copy(health_grp)
-
-  ## change scenario names, factor
-  fig2_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # fig2_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  fig2_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## add scenario title
-  fig2_df[, scenario_title := str_replace(scenario, " - ", "\n")]
-
-  ## change historic to historical
-  fig2_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-  fig2_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  fig2_df[, scenario := str_replace(scenario, "historic", "historical")]
-  fig2_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## refactor
-  fig2_df$scenario_title <- factor(
-    fig2_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## refactor
-  fig2_df$scenario <- factor(
-    fig2_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  ## save figure inputs
-  fwrite(
-    fig2_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_levels_pm25_inputs.csv"
-    )
-  )
-  # fwrite(fig2_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_pm25_inputs.csv"))
-
-  ## scenarios for filtering
-  # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports')
-  remove_scen <- c("LC1 historical production")
-
-  # health_level_fig <- ggplot(fig2_df %>% filter(!scen_id %in% remove_scen), aes(x = year, y = num_over_den, color = group)) +
-  #   geom_line(linewidth = 1, alpha = 0.8) +
-  #   facet_grid(type ~ scenario) +
-  #   labs(x = NULL,
-  #        y = "num_over_den") +
-  #   theme_line +
-  #   theme(legend.position = "bottom",
-  #         axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #         axis.ticks.length.y = unit(0.1, 'cm'),
-  #         axis.ticks.length.x = unit(0.1, 'cm'))
-
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  health_level_fig_a <- ggplot(
-    fig2_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = num_over_den, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 0.4)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figa <- health_level_fig_a + theme(legend.position = "right")
-
-  legend_a <- get_legend(
-    legend_figa +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_level_fig_b <- ggplot(
-    fig2_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = num_over_den, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 0.4)) +
-    scale_linetype_manual(values = dac_lty) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figb <- health_level_fig_b + theme(legend.position = "right")
-
-  legend_b <- get_legend(
-    legend_figb +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_level_fig_c <- ggplot(
-    fig2_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = num_over_den, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_linetype_manual(values = poverty_lty) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 0.4)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      legend.key.width = unit(10, "mm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figc <- health_level_fig_c + theme(legend.position = "right")
-
-  legend_c <- get_legend(
-    legend_figc +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ## shared y lab
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      expression(paste(
-        "PM"[2.5],
-        " (",
-        mu,
-        "g ",
-        m^{
-          -3
-        },
-        ")",
-        " per person"
-      )),
-      size = 8,
-      angle = 90
-    )
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  fig2_plot_grid <- plot_grid(
-    health_level_fig_b,
-    health_level_fig_c,
-    health_level_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  fig2_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    fig2_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  fig2_plot_grid2
-}
-
-
-plot_health_levels_gaps <- function(main_path, save_path, health_grp) {
-  gaps_df <- copy(health_grp)
-
-  ## change scenario names, factor
-  gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## refactor
-  gaps_df[, scenario_title := scenario]
-  gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## calculate gaps (BAU - scenario)
-  bau_gaps_df <- gaps_df[scen_id == "BAU historic production"]
-  bau_gaps_df <- bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "mortality_level_dem"
-  )]
-  setnames(bau_gaps_df, "mortality_level_dem", "bau_mortality_level_dem")
-
-  gaps_df <- merge(
-    gaps_df,
-    bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  gaps_df[, gap := mortality_level_dem - bau_mortality_level_dem]
-
-  ## change historic to historical
-  gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-  gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  gaps_df$scenario <- factor(
-    gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  gaps_df$scenario_title <- factor(
-    gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## save figure inputs
-  fwrite(
-    gaps_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_levels_fig_gaps_inputs.csv"
-    )
-  )
-  # fwrite(gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_fig_gaps_inputs.csv"))
-
-  ## make figures
-  ## ---------------------------------------------------------
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-
-  ## figure a
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  health_gap_fig_a <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = gap, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(c(-0.31, 0)) +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figa <- health_gap_fig_a + theme(legend.position = "right")
-
-  legend_a <- get_legend(
-    legend_figa +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_gap_fig_b <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = gap, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-0.31, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    scale_linetype_manual(values = dac_lty) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figb <- health_gap_fig_b + theme(legend.position = "right")
-
-  legend_b <- get_legend(
-    legend_figb +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_gap_fig_c <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = gap, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_linetype_manual(values = poverty_lty) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    # ylim(c(-0.31, 0)) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figc <- health_gap_fig_c + theme(legend.position = "right")
-
-  legend_c <- get_legend(
-    legend_figc +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ## shared y lab
-  # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person, difference from reference")),
-  #                                    size = 8, angle = 90)
-
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      "Avoided mortalities, difference from reference",
-      size = 8,
-      angle = 90
-    )
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  gaps_plot_grid <- plot_grid(
-    health_gap_fig_b,
-    health_gap_fig_c,
-    health_gap_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    gaps_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  gaps_plot_grid2
-}
-
-
-plot_health_levels_gaps_pmil <- function(
-  main_path,
-  save_path,
-  health_grp,
-  refining_mortality,
-  pop_ratios
-) {
-  ## calc 2020 pop by demographic
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    left_join(pop_ratios) %>%
-    as.data.table()
-
-  pop_2020[, demo_pop := pop * pct]
-
-  ## summarize by demographic group
-  pop_2020 <- pop_2020[,
-    .(pop_2020 = sum(demo_pop)),
-    by = .(demo_group, demo_cat)
-  ]
-
-  ## compute gaps
-  ## ----------------------------------------------------
-  gaps_df <- copy(health_grp)
-
-  ## change scenario names, factor
-  gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## refactor
-  gaps_df[, scenario_title := scenario]
-  gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## calculate gaps (BAU - scenario)
-  bau_gaps_df <- gaps_df[scen_id == "BAU historic production"]
-  bau_gaps_df <- bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "mortality_level_dem"
-  )]
-  setnames(bau_gaps_df, "mortality_level_dem", "bau_mortality_level_dem")
-
-  gaps_df <- merge(
-    gaps_df,
-    bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  gaps_df[, gap := mortality_level_dem - bau_mortality_level_dem]
-
-  ## convert to per million
-  gaps_df <- merge(
-    gaps_df,
-    pop_2020,
-    by = c("demo_group", "demo_cat"),
-    all.x = T
-  )
-
-  ## calculate per capita
-  gaps_df[, value := gap / pop_2020]
-  gaps_df[, value_pmil := value * 1e6]
-
-  ## change historic to historical
-  gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-  gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  gaps_df$scenario <- factor(
-    gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  gaps_df$scenario_title <- factor(
-    gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## save figure inputs
-  fwrite(
-    gaps_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_levels_fig_gaps_pmil_inputs.csv"
-    )
-  )
-  # fwrite(gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_fig_gaps_pmil_inputs.csv"))
-
-  ## make figures
-  ## ---------------------------------------------------------
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-
-  ## figure a
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  health_gap_fig_a <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = value_pmil, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(c(-30, 0)) +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figa <- health_gap_fig_a + theme(legend.position = "right")
-
-  legend_a <- get_legend(
-    legend_figa +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_gap_fig_b <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = value_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-30, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    scale_linetype_manual(values = dac_lty) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figb <- health_gap_fig_b + theme(legend.position = "right")
-
-  legend_b <- get_legend(
-    legend_figb +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_gap_fig_c <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = value_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_linetype_manual(values = poverty_lty) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    # ylim(c(-30, 0)) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figc <- health_gap_fig_c + theme(legend.position = "right")
-
-  legend_c <- get_legend(
-    legend_figc +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ## shared y lab
-  # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person, difference from reference")),
-  #                                    size = 8, angle = 90)
-
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      "Avoided mortalities per million people, difference from reference",
-      size = 8,
-      angle = 90
-    )
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  gaps_plot_grid <- plot_grid(
-    health_gap_fig_b,
-    health_gap_fig_c,
-    health_gap_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    gaps_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  gaps_plot_grid2
-}
-
-
-## plot health pm2.5 gaps
-
-plot_health_levels_gaps_pm25 <- function(main_path, save_path, health_grp) {
-  gaps_df <- copy(health_grp)
-
-  ## change scenario names, factor
-  gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## refactor
-  gaps_df[, scenario_title := scenario]
-  gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## calculate gaps (BAU - scenario)
-  bau_gaps_df <- gaps_df[scen_id == "BAU historic production"]
-  bau_gaps_df <- bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "num_over_den"
-  )]
-  setnames(bau_gaps_df, "num_over_den", "bau_num_over_den")
-
-  gaps_df <- merge(
-    gaps_df,
-    bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  gaps_df[, gap := num_over_den - bau_num_over_den]
-
-  ## change historic to historical
-  gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-  gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  gaps_df$scenario <- factor(
-    gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  gaps_df$scenario_title <- factor(
-    gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## save figure inputs
-  fwrite(
-    gaps_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_levels_fig_gaps_pm25_inputs.csv"
-    )
-  )
-  # fwrite(gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_fig_gaps_pm25_inputs.csv"))
-
-  ## make figures
-  ## ---------------------------------------------------------
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-
-  ## figure a
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  health_gap_fig_a <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = gap, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(c(-0.31, 0)) +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figa <- health_gap_fig_a + theme(legend.position = "right")
-
-  legend_a <- get_legend(
-    legend_figa +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_gap_fig_b <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = gap, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-0.31, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    scale_linetype_manual(values = dac_lty) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figb <- health_gap_fig_b + theme(legend.position = "right")
-
-  legend_b <- get_legend(
-    legend_figb +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ##
-  health_gap_fig_c <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = gap, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_linetype_manual(values = poverty_lty) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    # ylim(c(-0.31, 0)) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  legend_figc <- health_gap_fig_c + theme(legend.position = "right")
-
-  legend_c <- get_legend(
-    legend_figc +
-      theme(legend.text = element_text(size = 8))
-  )
-
-  ## shared y lab
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      expression(paste(
-        "PM"[2.5],
-        " (",
-        mu,
-        "g ",
-        m^{
-          -3
-        },
-        ")",
-        " per person, difference from reference"
-      )),
-      size = 8,
-      angle = 90
-    )
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  gaps_plot_grid <- plot_grid(
-    health_gap_fig_b,
-    health_gap_fig_c,
-    health_gap_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    gaps_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  gaps_plot_grid2
-}
-
-###########################################################################
-## plot labor
-###########################################################################
-
-plot_labor_levels <- function(
-  main_path,
-  save_path,
-  ref_labor_demog_yr,
-  refining_mortality,
-  pop_ratios
-) {
-  # ## calc 2020 pop by demographic
-  # pop_2020 <- refining_mortality %>%
-  #   filter(year == 2020) %>%
-  #   select(census_tract, year, pop) %>%
-  #   unique() %>%
-  #   left_join(pop_ratios) %>%
-  #   as.data.table()
-  #
-  # pop_2020[, demo_pop := pop * pct]
-  #
-  # ## summarize by demographic group
-  # pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)),
-  #                      by = .(demo_group, demo_cat)]
-
-  fig2_l_df <- copy(ref_labor_demog_yr)
-
-  ## change scenario names, factor
-  fig2_l_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # fig2_l_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  fig2_l_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## scenarios for filtering
-  # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports')
-  remove_scen <- c("Low demand - historical production")
-
-  ## add scenario title
-  fig2_l_df[, scenario_title := str_replace(scenario, " - ", "\n")]
-
-  # ## sum for state
-  # fig2_l_df <- fig2_l_df[, .(
-  #   sum_demo_emp = sum(demo_emp),
-  #   sum_demo_comp_pv_h = sum(demo_comp_pv_h),
-  #   sum_demo_comp_pv_l = sum(demo_comp_pv_l)
-  # ),
-  # by = .(
-  #   year, demand_scenario, refining_scenario, oil_price_scenario,
-  #   scenario, scenario_title, demo_cat, demo_group, title
-  # )
-  # ]
-
-  # ## merge with 2020 pop
-  # fig2_l_df <- merge(fig2_l_df, pop_2020,
-  #                    by = c("demo_cat", "demo_group"),
-  #                    all.x = T)
-
-  # ## calculate per capita
-  # fig2_l_df[, demo_emp_pc := sum_demo_emp / pop_2020]
-  # fig2_l_df[, demo_comp_pc_h := sum_demo_comp_pv_h / pop_2020]
-  # fig2_l_df[, demo_comp_pc_l := sum_demo_comp_pv_l / pop_2020]
-  #
-  # ## select columns
-  # fig2_l_df <- fig2_l_df[, .(year, demand_scenario, refining_scenario,
-  #                            scenario, scenario_title, demo_cat, demo_group, title, sum_demo_emp,
-  #                            demo_emp_pc, sum_demo_comp_pv_h, sum_demo_comp_pv_l, demo_comp_pc_h, demo_comp_pc_l)]
-
-  ## change historic to historical
-  fig2_l_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  fig2_l_df[, scenario := str_replace(scenario, "historic", "historical")]
-  fig2_l_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## refactor
-  fig2_l_df$scenario_title <- factor(
-    fig2_l_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## refactor
-  fig2_l_df$scenario <- factor(
-    fig2_l_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  # ## test to see if this matches fig 5 outputs
-  # test_state <- fig2_l_df %>%
-  #   group_by(demand_scenario, refining_scenario, scenario, scenario_title, demo_cat, demo_group) %>%
-  #   summarise(sum_demo_comp_pv = sum(sum_demo_comp_pv),
-  #             demo_comp_pc = sum(demo_comp_pc)) %>%
-  #   ungroup()
-  #
-  # bau_state <- test_state %>%
-  #   filter(demand_scenario == "BAU" & refining_scenario == "historical production") %>%
-  #   select(demo_cat, demo_group, sum_demo_comp_pv, demo_comp_pc) %>%
-  #   rename(bau_comp = sum_demo_comp_pv,
-  #          bau_comp_pc = demo_comp_pc)
-  #
-  # test_state <- test_state %>%
-  #   left_join(bau_state) %>%
-  #   mutate(diff = sum_demo_comp_pv - bau_comp, nj,
-  #          diff_pc = demo_comp_pc - bau_comp_pc)
-  #
-  # ggplot(test_state %>% filter(!scenario_title %in% c("BAU demand\nhistorical production",
-  #                                                  "Low demand\nhistorical production")), aes(y = diff_pc, x = scenario_title, color = demo_group)) +
-  #   geom_point() +
-  #   facet_wrap(~demo_cat, nrow = 3)
-  #
-
-  ## save figure inputs
-  fwrite(
-    fig2_l_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_levels_labor_fig_inputs.csv"
-    )
-  )
-  # fwrite(fig2_l_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_labor_fig_inputs.csv"))
-
-  ## labor figure
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  # ## make the df longer - split in two, rbind
-  # fig2_l_df_h <- fig2_l_df %>%
-  #   select(-sum_demo_comp_pv_l, -demo_comp_pc_l) %>%
-  #   pivot_longer(sum_demo_comp_pv_h:demo_comp_pc_h, names_to = "comp_metric", values_to = "high_est") %>%
-  #   mutate(comp_metric = substr(comp_metric, 1, nchar(comp_metric) - 2))
-  #
-  # fig2_l_df <- fig2_l_df %>%
-  #   select(-sum_demo_comp_pv_h, -demo_comp_pc_h) %>%
-  #   pivot_longer(sum_demo_comp_pv_l:demo_comp_pc_l, names_to = "comp_metric", values_to = "low_est") %>%
-  #   mutate(comp_metric = substr(comp_metric, 1, nchar(comp_metric) - 2)) %>%
-  #   left_join(fig2_l_df_h)
-  #
-
-  ## labor fig a
-  labor_level_fig_a <- ggplot(
-    fig2_l_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race",
-        oil_price_scenario == "reference case"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Hispanic", "white", "Asian", "Black"))
-      ),
-    aes(x = year, y = sum_demo_emp / 1000, color = title, group = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 45)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figa <- labor_level_fig_a + theme(legend.position = "right")
-  #
-  # legend_a <- get_legend(
-  #   legend_figa +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ##
-  labor_level_fig_b <- ggplot(
-    fig2_l_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "DAC",
-        oil_price_scenario == "reference case"
-      ),
-    aes(x = year, y = sum_demo_emp / 1000, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = dac_lty
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 45)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figb <- labor_level_fig_b + theme(legend.position = "right")
-  #
-  # legend_b <- get_legend(
-  #   legend_figb +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ##
-  labor_level_fig_c <- ggplot(
-    fig2_l_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "Poverty",
-        oil_price_scenario == "reference case"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = sum_demo_emp / 1000, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_color_manual(
-      name = "",
-      values = poverty_lty
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 45)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      legend.key.width = unit(10, "mm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figc <- health_level_fig_c + theme(legend.position = "right")
-  #
-  # legend_c <- get_legend(
-  #   legend_figc +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ## shared y lab
-  yaxis_lab <- ggdraw() +
-    draw_label("Labor: FTE job-years (thousand)", size = 8, angle = 90)
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  fig2_l_plot_grid <- plot_grid(
-    labor_level_fig_b,
-    labor_level_fig_c,
-    labor_level_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  fig2_l_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    fig2_l_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  fig2_l_plot_grid2
-}
-
-
-plot_labor_levels_pmil <- function(
-  main_path,
-  save_path,
-  ref_labor_demog_yr,
-  refining_mortality,
-  pop_ratios
-) {
-  ## calc 2020 pop by demographic
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    left_join(pop_ratios) %>%
-    as.data.table()
-
-  pop_2020[, demo_pop := pop * pct]
-
-  ## summarize by demographic group
-  pop_2020 <- pop_2020[,
-    .(pop_2020 = sum(demo_pop)),
-    by = .(demo_group, demo_cat)
-  ]
-
-  fig2_l_df <- copy(ref_labor_demog_yr)
-
-  ## change scenario names, factor
-  fig2_l_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # fig2_l_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  fig2_l_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## scenarios for filtering
-  # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports')
-  remove_scen <- c("Low demand - historical production")
-
-  ## add scenario title
-  fig2_l_df[, scenario_title := str_replace(scenario, " - ", "\n")]
-
-  #
-  #   ## sum for state
-  #   fig2_l_df <- fig2_l_df[, .(
-  #     sum_demo_emp = sum(demo_emp),
-  #     sum_demo_comp_pv_h = sum(demo_comp_pv_h),
-  #     sum_demo_comp_pv_l = sum(demo_comp_pv_l)
-  #   ),
-  #   by = .(
-  #     year, demand_scenario, refining_scenario, oil_price_scenario,
-  #     scenario, scenario_title, demo_cat, demo_group, title
-  #   )
-  #   ]
-
-  ## merge with 2020 pop
-  fig2_l_df <- merge(
-    fig2_l_df,
-    pop_2020,
-    by = c("demo_cat", "demo_group"),
-    all.x = T
-  )
-
-  ## calculate per capita
-  fig2_l_df[, demo_emp_pc := sum_demo_emp / pop_2020]
-  fig2_l_df[, demo_emp_pmil := demo_emp_pc * 1e6]
-  fig2_l_df[, demo_comp_pc_h := sum_demo_comp_pv_h / pop_2020]
-  fig2_l_df[, demo_comp_pc_pmil_h := demo_comp_pc_h * 1e6]
-  fig2_l_df[, demo_comp_pc_l := sum_demo_comp_pv_l / pop_2020]
-  fig2_l_df[, demo_comp_pc_pmil_l := demo_comp_pc_l * 1e6]
-
-  ## select columns
-  fig2_l_df <- fig2_l_df[, .(
-    year,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    scenario,
-    scenario_title,
-    demo_cat,
-    demo_group,
-    title,
-    sum_demo_emp,
-    demo_emp_pc,
-    demo_emp_pmil,
-    sum_demo_comp_pv_h,
-    sum_demo_comp_pv_l,
-    demo_comp_pc_pmil_h,
-    demo_comp_pc_pmil_l,
-    demo_comp_pc_h,
-    demo_comp_pc_l
-  )]
-
-  ## change historic to historical
-  fig2_l_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  fig2_l_df[, scenario := str_replace(scenario, "historic", "historical")]
-  fig2_l_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## refactor
-  fig2_l_df$scenario_title <- factor(
-    fig2_l_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## refactor
-  fig2_l_df$scenario <- factor(
-    fig2_l_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  # ## test to see if this matches fig 5 outputs
-  # test_state <- fig2_l_df %>%
-  #   group_by(demand_scenario, refining_scenario, scenario, scenario_title, demo_cat, demo_group) %>%
-  #   summarise(sum_demo_comp_pv = sum(sum_demo_comp_pv),
-  #             demo_comp_pc = sum(demo_comp_pc)) %>%
-  #   ungroup()
-  #
-  # bau_state <- test_state %>%
-  #   filter(demand_scenario == "BAU" & refining_scenario == "historical production") %>%
-  #   select(demo_cat, demo_group, sum_demo_comp_pv, demo_comp_pc) %>%
-  #   rename(bau_comp = sum_demo_comp_pv,
-  #          bau_comp_pc = demo_comp_pc)
-  #
-  # test_state <- test_state %>%
-  #   left_join(bau_state) %>%
-  #   mutate(diff = sum_demo_comp_pv - bau_comp, nj,
-  #          diff_pc = demo_comp_pc - bau_comp_pc)
-  #
-  # ggplot(test_state %>% filter(!scenario_title %in% c("BAU demand\nhistorical production",
-  #                                                  "Low demand\nhistorical production")), aes(y = diff_pc, x = scenario_title, color = demo_group)) +
-  #   geom_point() +
-  #   facet_wrap(~demo_cat, nrow = 3)
-  #
-
-  ## save figure inputs
-  fwrite(
-    fig2_l_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_levels_labor_pmil_fig_inputs.csv"
-    )
-  )
-  # fwrite(fig2_l_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_labor_pmil_fig_inputs.csv"))
-
-  ## labor figure
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  ## make the df longer - split in two, rbind
-  fig2_l_df_h <- fig2_l_df %>%
-    select(-sum_demo_comp_pv_l, -demo_comp_pc_pmil_l, -demo_comp_pc_l) %>%
-    pivot_longer(
-      sum_demo_comp_pv_h:demo_comp_pc_h,
-      names_to = "comp_metric",
-      values_to = "no_re_emp"
-    ) %>%
-    mutate(comp_metric = substr(comp_metric, 1, nchar(comp_metric) - 2)) %>%
-    as.data.table()
-
-  fig2_l_df <- fig2_l_df %>%
-    select(-sum_demo_comp_pv_h, -demo_comp_pc_pmil_h, -demo_comp_pc_h) %>%
-    pivot_longer(
-      sum_demo_comp_pv_l:demo_comp_pc_l,
-      names_to = "comp_metric",
-      values_to = "with_re_emp"
-    ) %>%
-    mutate(comp_metric = substr(comp_metric, 1, nchar(comp_metric) - 2)) %>%
-    left_join(fig2_l_df_h) %>%
-    as.data.table()
-
-  ## just employment
-  fig2_l_df <- fig2_l_df %>%
-    select(year:demo_emp_pmil) %>%
-    unique()
-
-  ## labor fig a
-  labor_level_fig_a <- ggplot(
-    fig2_l_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race",
-        oil_price_scenario == "reference case"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Hispanic", "white", "Asian", "Black"))
-      ),
-    aes(x = year, y = demo_emp_pmil, color = title, group = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 3500)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figa <- labor_level_fig_a + theme(legend.position = "right")
-  #
-  # legend_a <- get_legend(
-  #   legend_figa +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ##
-  labor_level_fig_b <- ggplot(
-    fig2_l_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "DAC",
-        oil_price_scenario == "reference case"
-      ),
-    aes(x = year, y = demo_emp_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = dac_lty
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 3500)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figb <- labor_level_fig_b + theme(legend.position = "right")
-  #
-  # legend_b <- get_legend(
-  #   legend_figb +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ##
-  labor_level_fig_c <- ggplot(
-    fig2_l_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "Poverty",
-        oil_price_scenario == "reference case"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = demo_emp_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_color_manual(
-      name = "",
-      values = poverty_lty
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(0, 3500)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      legend.key.width = unit(10, "mm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figc <- health_level_fig_c + theme(legend.position = "right")
-  #
-  # legend_c <- get_legend(
-  #   legend_figc +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ## shared y lab
-  yaxis_lab <- ggdraw() +
-    draw_label("Labor: FTE job-years per million people", size = 8, angle = 90)
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  fig2_l_plot_grid <- plot_grid(
-    labor_level_fig_b,
-    labor_level_fig_c,
-    labor_level_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  fig2_l_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    fig2_l_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  fig2_l_plot_grid2
-}
-
-
-plot_labor_levels_gaps <- function(
-  main_path,
-  save_path,
-  ref_labor_demog_yr,
-  refining_mortality,
-  pop_ratios
-) {
-  # ## calc 2020 pop by demographic
-  # pop_2020 <- refining_mortality %>%
-  #   filter(year == 2020) %>%
-  #   select(census_tract, year, pop) %>%
-  #   unique() %>%
-  #   left_join(pop_ratios) %>%
-  #   as.data.table()
-  #
-  # pop_2020[, demo_pop := pop * pct]
-  #
-  # ## summarize by demographic group
-  # pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)),
-  #                      by = .(demo_group, demo_cat)]
-  #
-  ## labor outputs
-  l_gaps_df <- copy(ref_labor_demog_yr)
-  l_gaps_df <- l_gaps_df[oil_price_scenario == "reference case", ]
-
-  ## change scenario names, factor
-  l_gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  l_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## add scenario title
-  l_gaps_df[, scenario_title := scenario]
-  l_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## change historic to historical
-  l_gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  l_gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  l_gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## scenarios for filtering
-  remove_scen <- c(
-    "Low demand - historical production",
-    "BAU demand - historical production"
-  )
-
-  l_gaps_df$scenario <- factor(
-    l_gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  l_gaps_df$scenario_title <- factor(
-    l_gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  # ## sum for state
-  # l_gaps_df <- l_gaps_df[, .(sum_demo_emp = sum(demo_emp)),
-  #   by = .(
-  #     year, demand_scenario, refining_scenario, oil_price_scenario,
-  #     scenario, scenario_title, demo_cat, demo_group, title
-  #   )
-  # ]
-
-  ## calculate gaps (BAU - scenario)
-  l_bau_gaps_df <- l_gaps_df[scenario == "BAU demand - historical production"]
-  l_bau_gaps_df <- l_bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "sum_demo_emp"
-  )]
-  setnames(l_bau_gaps_df, "sum_demo_emp", "bau_sum_demo_emp")
-
-  l_gaps_df <- merge(
-    l_gaps_df,
-    l_bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  l_gaps_df[, gap_emp := sum_demo_emp - bau_sum_demo_emp]
-
-  # ## normalize if needed
-  # ## merge with 2020 pop
-  # l_gaps_df <- merge(l_gaps_df, pop_2020,
-  #                    by = c("demo_cat", "demo_group"),
-  #                    all.x = T)
-  #
-  # ## calculate per capita
-  # l_gaps_df[, demo_emp_pc := sum_demo_emp / pop_2020]
-  #
-  # ## select columns
-  l_gaps_df <- l_gaps_df[, .(
-    year,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    scenario,
-    scenario_title,
-    demo_cat,
-    demo_group,
-    title,
-    sum_demo_emp,
-    gap_emp
-  )]
-
-  ## save figure inputs
-  fwrite(
-    l_gaps_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_labor_levels_fig_gaps_inputs.csv"
-    )
-  )
-  # fwrite(l_gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_labor_levels_fig_gaps_inputs.csv"))
-
-  ## figure a
-  fig_title_vec <- c("Black", "Asian", "white", "Hispanic")
-
-  labor_gap_fig_a <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race",
-        oil_price_scenario == "reference case"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Asian", "white", "Hispanic"))
-      ),
-    aes(x = year, y = gap_emp / 1000, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-40, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figa <- health_gap_fig_a + theme(legend.position = "right")
-  #
-  # legend_a <- get_legend(
-  #   legend_figa +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ##
-  labor_gap_fig_b <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "DAC",
-        oil_price_scenario == "reference case"
-      ),
-    aes(x = year, y = gap_emp / 1000, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_linetype_manual(values = dac_lty) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-40, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(c(-3500, 0)) +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figb <- health_gap_fig_b + theme(legend.position = "right")
-  #
-  # legend_b <- get_legend(
-  #   legend_figb +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ##
-  labor_gap_fig_c <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "Poverty",
-        oil_price_scenario == "reference case"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = gap_emp / 1000, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_linetype_manual(values = poverty_lty) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-40, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-  #
-  #   legend_figc <- health_gap_fig_c + theme(legend.position = "right")
-  #
-  #   legend_c <- get_legend(
-  #     legend_figc +
-  #       theme(legend.text = element_text(size = 8)))
-
-  ## shared y lab
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      "Labor: FTE-jobs, difference from reference (thousand)",
-      size = 8,
-      angle = 90
-    )
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  l_gaps_plot_grid <- plot_grid(
-    labor_gap_fig_b,
-    labor_gap_fig_c,
-    labor_gap_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  l_gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    l_gaps_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  l_gaps_plot_grid2
-}
-#
-
-plot_labor_levels_gaps_pmil <- function(
-  main_path,
-  save_path,
-  ref_labor_demog_yr,
-  refining_mortality,
-  pop_ratios
-) {
-  ## calc 2020 pop by demographic
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    left_join(pop_ratios) %>%
-    as.data.table()
-
-  pop_2020[, demo_pop := pop * pct]
-
-  ## summarize by demographic group
-  pop_2020 <- pop_2020[,
-    .(pop_2020 = sum(demo_pop)),
-    by = .(demo_group, demo_cat)
-  ]
-
-  ## labor outputs
-  l_gaps_df <- copy(ref_labor_demog_yr)
-
-  ## change scenario names, factor
-  l_gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  l_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## add scenario title
-  l_gaps_df[, scenario_title := scenario]
-  l_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## change historic to historical
-  l_gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  l_gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  l_gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## scenarios for filtering
-  remove_scen <- c(
-    "Low demand - historical production",
-    "BAU demand - historical production"
-  )
-
-  l_gaps_df$scenario <- factor(
-    l_gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  l_gaps_df$scenario_title <- factor(
-    l_gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-  # ## sum for state
-  # l_gaps_df <- l_gaps_df[, .(sum_demo_emp = sum(demo_emp)),
-  #   by = .(
-  #     year, demand_scenario, refining_scenario, oil_price_scenario,
-  #     scenario, scenario_title, demo_cat, demo_group, title
-  #   )
-  # ]
-
-  ## select columns
-  l_gaps_df <- l_gaps_df[, .(
-    year,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    scenario,
-    scenario_title,
-    demo_cat,
-    demo_group,
-    title,
-    sum_demo_emp
-  )]
-
-  l_gaps_df <- l_gaps_df[oil_price_scenario == "reference case", ]
-
-  ## calculate gaps (BAU - scenario)
-  l_bau_gaps_df <- l_gaps_df[scenario == "BAU demand - historical production"]
-  l_bau_gaps_df <- l_bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "sum_demo_emp"
-  )]
-  setnames(l_bau_gaps_df, "sum_demo_emp", "bau_sum_demo_emp")
-
-  l_gaps_df <- merge(
-    l_gaps_df,
-    l_bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  l_gaps_df[, gap_emp := sum_demo_emp - bau_sum_demo_emp]
-
-  ## merge with 2020 pop
-  l_gaps_df <- merge(
-    l_gaps_df,
-    pop_2020,
-    by = c("demo_cat", "demo_group"),
-    all.x = T
-  )
-
-  ## calculate per capita
-  l_gaps_df[, gap_emp_pc := gap_emp / pop_2020]
-  l_gaps_df[, gap_emp_pmil := gap_emp_pc * 1e6]
-
-  ## save figure inputs
-  fwrite(
-    l_gaps_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_labor_levels_fig_gaps_pmil_inputs.csv"
-    )
-  )
-  # fwrite(l_gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_labor_levels_fig_gaps_pmil_inputs.csv"))
-
-  ## figure a
-  fig_title_vec <- c("Black", "Asian", "white", "Hispanic")
-
-  labor_gap_fig_a <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race",
-        oil_price_scenario == "reference case"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Asian", "white", "Hispanic"))
-      ),
-    aes(x = year, y = gap_emp_pmil, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(-2500, 0) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figa <- health_gap_fig_a + theme(legend.position = "right")
-  #
-  # legend_a <- get_legend(
-  #   legend_figa +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ##
-  labor_gap_fig_b <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "DAC",
-        oil_price_scenario == "reference case"
-      ),
-    aes(x = year, y = gap_emp_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_linetype_manual(values = dac_lty) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(-2500, 0) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(c(-3500, 0)) +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figb <- health_gap_fig_b + theme(legend.position = "right")
-  #
-  # legend_b <- get_legend(
-  #   legend_figb +
-  #     theme(legend.text = element_text(size = 8)))
-
-  ##
-  labor_gap_fig_c <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "Poverty",
-        oil_price_scenario == "reference case"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = gap_emp_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_linetype_manual(values = poverty_lty) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    # ylim(-2500, 0) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-  #
-  #   legend_figc <- health_gap_fig_c + theme(legend.position = "right")
-  #
-  #   legend_c <- get_legend(
-  #     legend_figc +
-  #       theme(legend.text = element_text(size = 8)))
-
-  ## shared y lab
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      "Labor: FTE-jobs, difference from reference per million people",
-      size = 8,
-      angle = 90
-    )
-
-  # ## plot together
-  # fig2l_a <- plot_grid(
-  #   health_level_fig_a,
-  #   legend_a,
-  #   align = 'h',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   ncol = 2,
-  #   rel_widths = c(0.95, 0.5),
-  #   rel_heighs = c(1, 1)
-  # )
-  #
-  #
-
-  l_gaps_plot_grid <- plot_grid(
-    labor_gap_fig_b,
-    labor_gap_fig_c,
-    labor_gap_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1),
-    rel_heighs = c(1, 1, 1, 1.05)
-  )
-
-  l_gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    l_gaps_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heighs = c(1, 1)
-  )
-
-  l_gaps_plot_grid2
-}
-
-
-############################################################################
-############################################################################
-
-plot_hl_levels_df <- function(
-  main_path,
-  save_path,
-  ref_mortality_demog,
-  ref_labor_demog,
-  state_ghg_output,
-  dt_ghg_2019
-) {
-  health_df <- copy(ref_mortality_demog)
-
-  ## group by scenario, demo_cat, demo_group, title, and sum
-  health_df <- health_df[,
-    .(
-      sum_cost_2019_pv = sum(demo_cost_2019_PV, na.rm = T), ## constant VSL
-      sum_cost_pv = sum(demo_cost_PV, na.rm = T)
-    ), ## changing VSL
-    by = .(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      demo_cat,
-      demo_group,
-      title
-    )
-  ]
-
-  ## multiply by -1
-  health_df[, sum_cost_2019_pv := sum_cost_2019_pv * -1]
-  health_df[, sum_cost_pv := sum_cost_pv * -1]
-
-  ## add ghg emission reduction ----------------------------
-  ## 2019 ghg
-  ghg_2019_val <- dt_ghg_2019$mtco2e[1]
-
-  ## 2045 vs 2019 ghg
-  ghg_2045 <- state_ghg_output[year == 2045 & source == "total"]
-  setnames(ghg_2045, "value", "ghg_kg")
-  ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6]
-  ghg_2045[, ghg_2019 := ghg_2019_val]
-  ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019]
-
-  perc_diff_df <- ghg_2045[, .(
-    demand_scenario,
-    refining_scenario,
-    ghg_2045,
-    ghg_2019,
-    perc_diff
-  )]
-
-  ## summarize by scenario, filter for total
-  state_ghg_df <- state_ghg_output[
-    source == "total",
-    .(total_ghg = sum(value)),
-    by = .(demand_scenario, refining_scenario)
-  ]
-
-  state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6]
-
-  ## reference
-  ref_df <- state_ghg_df[
-    demand_scenario == "BAU" & refining_scenario == "historic production",
-    .(total_ghg_mmt)
-  ]
-  setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt")
-  ref_value <- ref_df$ref_ghg_mmt[1]
-
-  ## merge with summarized df
-  state_ghg_df[, ref_ghg := ref_value]
-  state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1]
-
-  ## merge with health
-  health_ghg_df <- merge(
-    health_df,
-    state_ghg_df[, .(
-      demand_scenario,
-      refining_scenario,
-      total_ghg_mmt,
-      ref_ghg,
-      avoided_ghg
-    )],
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## labor
-  labor_df <- copy(ref_labor_demog)
-  labor_df <- labor_df[oil_price_scenario == "reference case", ]
-
-  ## summarize across years
-  labor_df <- labor_df[,
-    .(
-      sum_demo_emp = sum(sum_demo_emp),
-      sum_demo_comp_pv_h = sum(sum_demo_comp_pv_h),
-      sum_demo_comp_pv_l = sum(sum_demo_comp_pv_l)
-    ),
-    by = .(demand_scenario, refining_scenario, demo_cat, demo_group, title)
-  ]
-
-  ## ref labor
-  ref_labor <- labor_df[
-    demand_scenario == "BAU" & refining_scenario == "historic production"
-  ]
-  setnames(
-    ref_labor,
-    c("sum_demo_emp", "sum_demo_comp_pv_h", "sum_demo_comp_pv_l"),
-    c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l")
-  )
-  ref_labor <- ref_labor[, .(
-    demo_cat,
-    demo_group,
-    title,
-    ref_total_emp,
-    ref_total_comp_pv_h,
-    ref_total_comp_pv_l
-  )]
-
-  ## add values to labor
-  labor_df <- merge(
-    labor_df,
-    ref_labor,
-    by = c("demo_cat", "demo_group", "title")
-  )
-
-  ## calculate difference
-  labor_df[, forgone_wages_h := (sum_demo_comp_pv_h - ref_total_comp_pv_h)]
-  labor_df[, forgone_wages_l := (sum_demo_comp_pv_l - ref_total_comp_pv_l)]
-
-  ## merge with health and ghg
-  health_labor_ghg_df <- merge(
-    health_ghg_df,
-    labor_df[, .(
-      demand_scenario,
-      refining_scenario,
-      demo_cat,
-      demo_group,
-      title,
-      sum_demo_comp_pv_h,
-      sum_demo_comp_pv_l,
-      ref_total_comp_pv_h,
-      ref_total_comp_pv_l,
-      forgone_wages_h,
-      forgone_wages_l
-    )],
-    by = c(
-      "demand_scenario",
-      "refining_scenario",
-      "demo_cat",
-      "demo_group",
-      "title"
-    ),
-    all.x = T
-  )
-
-  ## add ghg perc reduction
-  health_labor_ghg_df <- merge(
-    health_labor_ghg_df,
-    perc_diff_df,
-    by = c("demand_scenario", "refining_scenario"),
-    all.x = T
-  )
-
-  ## prepare to plot
-  plot_df <- health_labor_ghg_df[, .(
-    scen_id,
-    demand_scenario,
-    refining_scenario,
-    demo_cat,
-    demo_group,
-    title,
-    sum_cost_pv,
-    sum_cost_2019_pv,
-    forgone_wages_h,
-    forgone_wages_l,
-    avoided_ghg,
-    perc_diff
-  )]
-
-  setnames(plot_df, "perc_diff", "ghg_perc_diff")
-
-  ## pivot longer
-  plot_df <- plot_df %>%
-    select(
-      scen_id:title,
-      ghg_perc_diff,
-      sum_cost_pv,
-      sum_cost_2019_pv,
-      forgone_wages_h,
-      forgone_wages_l
-    ) %>%
-    pivot_longer(
-      sum_cost_pv:forgone_wages_l,
-      names_to = "metric",
-      values_to = "value"
-    )
-
-  ## add column for vsl
-  plot_df_health <- plot_df %>%
-    filter(metric %in% c("sum_cost_pv", "sum_cost_2019_pv")) %>%
-    mutate(
-      segment = "health",
-      unit_desc = ifelse(
-        metric == "sum_cost_2019_pv",
-        "USD (2019 VSL)",
-        "USD (annual VSL)"
-      ),
-      metric_desc = "avoided_health_cost"
-    )
-
-  plot_df_labor <- plot_df %>%
-    filter(metric %in% c("forgone_wages_h", "forgone_wages_l")) %>%
-    mutate(
-      segment = "labor",
-      unit_desc = "USD",
-      metric_desc = "forgone_wages"
-    )
-
-  plot_df_long <- rbind(plot_df_health, plot_df_labor)
-
-  plot_df_long <- plot_df_long %>%
-    mutate(
-      seg_title = ifelse(
-        segment == "health",
-        "Health: avoided mortality",
-        "Labor: forgone wages"
-      )
-    )
-
-  ## rename
-  setDT(plot_df_long)
-  plot_df_long[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)]
-  plot_df_long[, scenario := gsub("LC1.", "Low ", scenario)]
-  # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)]
-  # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)]
-
-  ## change historic to historical
-  plot_df_long[, scen_id := str_replace(scen_id, "historic", "historical")]
-  plot_df_long[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  plot_df_long[, scenario := str_replace(scenario, "historic", "historical")]
-
-  ## refactor
-  plot_df_long$scenario <- factor(
-    plot_df_long$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  ## titles for plotting
-  plot_df_long[,
-    demand_title := ifelse(demand_scenario == "BAU", "BAU demand", "Low demand")
-  ]
-  plot_df_long[,
-    scen_title := paste0(demand_title, "\n", str_to_sentence(refining_scenario))
-  ]
-
-  plot_df_long$scen_title <- factor(
-    plot_df_long$scen_title,
-    levels = c(
-      "BAU demand\nHistorical production",
-      "BAU demand\nHistorical exports",
-      "BAU demand\nLow exports",
-      "Low demand\nHistorical exports",
-      "Low demand\nLow exports",
-      "Low demand\nHistorical production"
-    )
-  )
-
-  ## save figure inputs
-  fwrite(
-    plot_df_long,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_disaggregated_npv_fig_inputs.csv"
-    )
-  )
-  # fwrite(plot_df_long, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_disaggregated_npv_fig_inputs.csv"))
-
-  return(plot_df_long)
-}
-
-
-plot_hl_levels <- function(demographic_npv_df) {
-  plot_df_long <- copy(demographic_npv_df)
-
-  ## create the figure ---------------------------------------------
-  ## ---------------------------------------------------------------
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-  bau_scen <- "BAU historical production"
-
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  ## add column for defining shapes
-  plot_df_long[, demo_grp_metric := paste0(demo_group, "_", metric)]
-
-  ## health fig - race
-  health_level_fig_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Race",
-          unit_desc == "USD (2019 VSL)",
-          title %in% fig_title_vec
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Black", "Asian", "white", "Hispanic")
-          )
-        ),
-      aes(x = scen_title, y = value / 1e9, color = title),
-      size = 3,
-      alpha = 0.8
-    ) +
-    facet_wrap(~seg_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    # ylim(0, 12) +
-    labs(
-      y = "NPV (USD billion)",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## labor fig - race
-  labor_level_fig_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Race",
-          segment == "labor",
-          title %in% fig_title_vec
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Black", "Asian", "white", "Hispanic")
-          )
-        ),
-      aes(x = scen_title, y = value / 1e9, color = title, shape = metric),
-      size = 3,
-      alpha = 0.8
-    ) +
-    facet_wrap(~seg_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    scale_shape_manual(
-      name = "",
-      values = race_shape_ptc,
-      labels = high_low_labs
-    ) +
-    # ylim(-25, 0) +
-    labs(
-      y = "NPV (USD billion)",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## health fig - poverty
-  health_level_fig_b <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Poverty",
-          unit_desc == "USD (2019 VSL)"
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Below poverty line", "Above poverty line")
-          )
-        ),
-      aes(x = scen_title, y = value / 1e9, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(values = poverty_ptc_h) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = "NPV (USD billion)",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    # ylim(0, 20) +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## labor fig - poverty
-  labor_level_fig_b <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Poverty",
-          segment == "labor"
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Below poverty line", "Above poverty line")
-          )
-        ),
-      aes(x = scen_title, y = value / 1e9, shape = demo_grp_metric),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    scale_shape_manual(
-      values = poverty_ptc_l,
-      labels = poverty_hl_labs
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = "NPV (USD billion)",
-      x = NULL,
-      color = NULL
-    ) +
-    # ylim(-50, 0) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## health fig - DAC
-  health_level_fig_c <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "DAC",
-          unit_desc == "USD (2019 VSL)"
-        ),
-      aes(x = scen_title, y = value / 1e9, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(values = dac_ptc) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = "NPV (USD billion)",
-      x = NULL,
-      color = NULL
-    ) +
-    # ylim(0, 15) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## labor fig - DAC
-  labor_level_fig_c <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "DAC",
-          segment == "labor"
-        ),
-      aes(x = scen_title, y = value / 1e9, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(
-      values = dac_ptc
-    ) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = "NPV (USD billion)",
-      x = NULL,
-      color = NULL
-    ) +
-    # ylim(-40, 0) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## labor fig - DAC
-  labor_level_fig_c <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "DAC",
-          segment == "labor"
-        ),
-      aes(x = scen_title, y = value / 1e9, shape = demo_grp_metric),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(
-      values = dac_hl_ptc,
-      labels = dac_hl_labs
-    ) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = "NPV (USD billion)",
-      x = NULL,
-      color = NULL
-    ) +
-    # ylim(-40, 0) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## combine figure
-  ## ---------------------------------
-
-  fig_text_size <- 12
-
-  ## health
-  health_column_fig_nl <- plot_grid(
-    health_level_fig_c +
-      theme(
-        axis.text.x = element_blank(),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1),
-        strip.text.x = element_text(size = fig_text_size),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size)
-      ),
-    health_level_fig_b +
-      theme(
-        axis.text.x = element_blank(),
-        strip.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1)
-      ),
-    health_level_fig_a +
-      theme(
-        strip.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size),
-        axis.text.x = element_text(size = fig_text_size),
-        legend.position = "none"
-      ),
-    align = "vh",
-    labels = c("A", "B", "C"),
-    # # labels = 'AUTO',
-    label_size = 10,
-    hjust = -1,
-    nrow = 3
-    # rel_widths = c(1, 0.25, 1),
-    # rel_heights = c(1, 0.1, 1, 0.1, 1)
-  )
-
-  ## labor
-  labor_column_fig_nl <- plot_grid(
-    labor_level_fig_c +
-      labs(y = NULL) +
-      theme(
-        axis.text.x = element_blank(),
-        strip.text.x = element_text(size = fig_text_size),
-        axis.text.y = element_text(size = fig_text_size),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1)
-      ),
-    labor_level_fig_b +
-      labs(y = NULL) +
-      theme(
-        axis.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        strip.text.x = element_blank(),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1)
-      ),
-    labor_level_fig_a +
-      labs(y = NULL) +
-      theme(
-        strip.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.text.x = element_text(size = fig_text_size),
-        legend.position = "none"
-      ),
-    align = "vh",
-    labels = c("D", "E", "F"),
-    # # labels = 'AUTO',
-    label_size = 10,
-    hjust = -1,
-    nrow = 3
-    # rel_widths = c(1, 0.25, 1),
-    # rel_heights = c(1, 0.1, 1, 0.1, 1)
-  )
-
-  ## all together now
-  hl_pc_plot_grid_nl <- plot_grid(
-    health_column_fig_nl,
-    labor_column_fig_nl,
-    align = "h",
-    # labels = c("(A)", "(B)", "(C)", ""),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    # hjust = -1,
-    ncol = 2,
-    rel_widths = c(1, 1)
-    # rel_widths = c(1, 1, 1)
-  )
-
-  return(hl_pc_plot_grid_nl)
-}
-
-plot_hl_levels_pc <- function(
-  demographic_npv_df,
-  refining_mortality,
-  pop_ratios,
-  main_path,
-  save_path
-) {
-  ## copy npv results
-  plot_df_long <- copy(demographic_npv_df)
-
-  ## add column for defining shapes
-  plot_df_long[, demo_grp_metric := paste0(demo_group, "_", metric)]
-
-  ## calc 2020 pop by demographic
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    left_join(pop_ratios) %>%
-    as.data.table()
-
-  pop_2020[, demo_pop := pop * pct]
-
-  ## summarize by demographic group
-  pop_2020 <- pop_2020[,
-    .(pop_2020 = sum(demo_pop)),
-    by = .(demo_group, demo_cat)
-  ]
-
-  ## merge population back with results
-  plot_df_long <- merge(
-    plot_df_long,
-    pop_2020,
-    by = c("demo_group", "demo_cat"),
-    all.x = T
-  )
-
-  ## calculate per capita
-  plot_df_long[, value := value / pop_2020]
-
-  ## save figure inputs
-  fwrite(
-    plot_df_long,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_disaggregated_npv_pc_fig_inputs.csv"
-    )
-  )
-  # fwrite(plot_df_long, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_disaggregated_npv_pc_fig_inputs.csv"))
-
-  ## create the figure ---------------------------------------------
-  ## ---------------------------------------------------------------
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-  bau_scen <- "BAU historical production"
-
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  ## health fig - race
-  health_level_fig_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Race",
-          unit_desc == "USD (2019 VSL)",
-          title %in% fig_title_vec
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Black", "Hispanic", "Asian", "white")
-          )
-        ),
-      aes(x = scen_title, y = value, color = title),
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_wrap(~seg_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      y = "NPV per capita (USD)",
-      x = NULL,
-      color = NULL
-    ) +
-    scale_y_continuous(label = comma, limits = c(0, 3000)) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      strip.text = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## labor fig - race
-  labor_level_fig_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Race",
-          segment == "labor",
-          title %in% fig_title_vec
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Black", "Hispanic", "Asian", "white")
-          )
-        ),
-      aes(x = scen_title, y = value, color = title, shape = metric),
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_wrap(~seg_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    scale_shape_manual(
-      name = "",
-      values = race_shape_ptc,
-      labels = high_low_labs
-    ) +
-    labs(
-      y = " ",
-      x = NULL,
-      color = NULL
-    ) +
-    scale_y_continuous(label = comma, limits = c(-3000, 0)) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      strip.text = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # ## legend
-  # legend_figa <- labor_level_fig_a + theme(legend.position = "bottom")
-  #
-  # legend_a <- get_legend(
-  #   legend_figa +
-  #     theme(legend.text = element_text(size = 12)) +
-  #     guides(color = guide_legend(order = 1), shape = guide_legend(order = 2)))
-
-  # ## save version for presentation
-  # hl_plot_grid_a_pres <- plot_grid(
-  #   health_level_fig_a +
-  #     theme(axis.title.y = element_text(size = 12),
-  #           axis.text.x = element_text(size = 9),
-  #           axis.text.y = element_text(size = 12),
-  #           strip.text =  element_text(size = 12),
-  #           plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")),
-  #   labor_level_fig_a + labs(y = NULL) +
-  #     theme(axis.title.y = element_text(size = 12),
-  #           axis.text.x = element_text(size = 9),
-  #           axis.text.y = element_text(size = 12),
-  #           strip.text =  element_text(size = 12),
-  #           plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")),
-  #   align = 'vh',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   rel_widths = c(1, 1)
-  # )
-  #
-  # ## add legend
-  # hl_plot_grid_a_pres <- plot_grid(
-  #   hl_plot_grid_a_pres,
-  #   legend_a,
-  #   ncol = 1,
-  #   rel_heights = c(0.95, 0.05)
-  # )
-  #
-  #
-  #
-  # ggsave(plot = hl_plot_grid_a_pres,
-  #        filename = paste0(main_path, "outputs/academic-out/refining/figures/2025-health-revisions/presentation-figs/fig5-race.jpeg"),
-  #        device = "jpeg",
-  #        width = 9,
-  #        height = 4,
-  #        units= "in",
-  #        dpi = 300)
-  #
-
-  ## health fig - poverty
-  health_level_fig_b <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Poverty",
-          unit_desc == "USD (2019 VSL)"
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Below poverty line", "Above poverty line")
-          )
-        ),
-      aes(x = scen_title, y = value, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(values = poverty_ptc_h) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = "NPV per capita (USD)",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    scale_y_continuous(label = comma, limits = c(0, 3000)) +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## labor fig - poverty
-  labor_level_fig_b <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Poverty",
-          segment == "labor"
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Below poverty line", "Above poverty line")
-          )
-        ),
-      aes(x = scen_title, y = value, shape = demo_grp_metric),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(
-      values = poverty_ptc_l,
-      labels = poverty_hl_labs
-    ) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = " ",
-      x = NULL,
-      color = NULL
-    ) +
-    scale_y_continuous(label = comma, limits = c(-3000, 0)) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figb <- labor_level_fig_b +
-  #   theme(legend.position = "bottom")
-  #
-  # legend_b <- get_legend(
-  #   legend_figb +
-  #     theme(legend.text = element_text(size = 12)))
-  #
-
-  # ## save version for presentation
-  # hl_plot_grid_b_pres <- plot_grid(
-  #   health_level_fig_b +
-  #     theme(axis.title.y = element_text(size = 12),
-  #           axis.text.x = element_text(size = 9),
-  #           axis.text.y = element_text(size = 12),
-  #           strip.text =  element_text(size = 12),
-  #           plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")),
-  #   labor_level_fig_b + labs(y = NULL) +
-  #     theme(axis.title.y = element_text(size = 12),
-  #           axis.text.x = element_text(size = 9),
-  #           axis.text.y = element_text(size = 12),
-  #           strip.text =  element_text(size = 12),
-  #           plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")),
-  #   align = 'vh',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   rel_widths = c(1, 1)
-  # )
-  #
-  # ## add legend
-  # hl_plot_grid_b_pres <- plot_grid(
-  #   hl_plot_grid_b_pres,
-  #   legend_b,
-  #   ncol = 1,
-  #   rel_heights = c(0.95, 0.05)
-  # )
-  #
-  # ggsave(plot = hl_plot_grid_b_pres,
-  #        filename = paste0(main_path, "outputs/academic-out/refining/figures/2025-health-revisions/presentation-figs/fig5-income.jpeg"),
-  #        device = "jpeg",
-  #        width = 9,
-  #        height = 4,
-  #        units= "in",
-  #        dpi = 300)
-
-  ## health fig - DAC
-  health_level_fig_c <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "DAC",
-          unit_desc == "USD (2019 VSL)"
-        ),
-      aes(x = scen_title, y = value, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(values = dac_ptc) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = "NPV per capita (USD)",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    scale_y_continuous(label = comma, limits = c(0, 3000)) +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## labor fig - DAC
-  labor_level_fig_c <- ggplot() +
-    geom_point(
-      data = plot_df_long %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "DAC",
-          segment == "labor"
-        ),
-      aes(x = scen_title, y = value, shape = demo_grp_metric),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(
-      values = dac_hl_ptc,
-      labels = dac_hl_labs
-    ) +
-    facet_wrap(~seg_title) +
-    labs(
-      y = " ",
-      x = NULL,
-      color = NULL
-    ) +
-    scale_y_continuous(label = comma, limits = c(-3000, 0)) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  # legend_figc <- labor_level_fig_c + theme(legend.position = "bottom")
-  #
-  # legend_c <- get_legend(
-  #   legend_figc +
-  #     theme(legend.text = element_text(size = 12)))
-
-  # ## save version for presentation
-  # hl_plot_grid_c_pres <- plot_grid(
-  #   health_level_fig_c +
-  #     theme(axis.title.y = element_text(size = 12),
-  #           axis.text.x = element_text(size = 9),
-  #           axis.text.y = element_text(size = 12),
-  #           strip.text =  element_text(size = 12),
-  #           plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")),
-  #   labor_level_fig_c + labs(y = NULL) +
-  #     theme(axis.title.y = element_text(size = 12),
-  #           axis.text.x = element_text(size = 9),
-  #           axis.text.y = element_text(size = 12),
-  #           strip.text =  element_text(size = 12),
-  #           plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")),
-  #   align = 'vh',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   rel_widths = c(1, 1)
-  # )
-  #
-  # ## add legend
-  # hl_plot_grid_c_pres <- plot_grid(
-  #   hl_plot_grid_c_pres,
-  #   legend_c,
-  #   ncol = 1,
-  #   rel_heights = c(0.95, 0.05)
-  # )
-  #
-  #
-  # ggsave(plot = hl_plot_grid_c_pres,
-  #        filename = paste0(main_path, "outputs/academic-out/refining/figures/2025-health-revisions/presentation-figs/fig5-dac.jpeg"),
-  #        device = "jpeg",
-  #        width = 9,
-  #        height = 4,
-  #        units= "in",
-  #        dpi = 300)
-
-  ## combine figure
-  ## ---------------------------------
-
-  fig_text_size <- 12
-
-  ## health
-  health_column_fig_nl <- plot_grid(
-    health_level_fig_c +
-      theme(
-        axis.text.x = element_blank(),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1),
-        strip.text.x = element_text(size = fig_text_size),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size)
-      ),
-    health_level_fig_b +
-      theme(
-        axis.text.x = element_blank(),
-        strip.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1)
-      ),
-    health_level_fig_a +
-      theme(
-        strip.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size),
-        axis.text.x = element_text(size = fig_text_size),
-        legend.position = "none"
-      ),
-    align = "vh",
-    labels = c("A", "B", "C"),
-    # # labels = 'AUTO',
-    label_size = 10,
-    hjust = -1,
-    nrow = 3
-    # rel_widths = c(1, 0.25, 1),
-    # rel_heights = c(1, 0.1, 1, 0.1, 1)
-  )
-
-  ## labor
-  labor_column_fig_nl <- plot_grid(
-    labor_level_fig_c +
-      labs(y = NULL) +
-      theme(
-        axis.text.x = element_blank(),
-        strip.text.x = element_text(size = fig_text_size),
-        axis.text.y = element_text(size = fig_text_size),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1)
-      ),
-    labor_level_fig_b +
-      labs(y = NULL) +
-      theme(
-        axis.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        strip.text.x = element_blank(),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1)
-      ),
-    labor_level_fig_a +
-      labs(y = NULL) +
-      theme(
-        strip.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.text.x = element_text(size = fig_text_size),
-        legend.position = "none"
-      ),
-    align = "vh",
-    labels = c("D", "E", "F"),
-    # # labels = 'AUTO',
-    label_size = 10,
-    hjust = -1,
-    nrow = 3
-    # rel_widths = c(1, 0.25, 1),
-    # rel_heights = c(1, 0.1, 1, 0.1, 1)
-  )
-
-  ## all together now
-  hl_pc_plot_grid_nl <- plot_grid(
-    health_column_fig_nl,
-    labor_column_fig_nl,
-    align = "h",
-    # labels = c("(A)", "(B)", "(C)", ""),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    # hjust = -1,
-    ncol = 2,
-    rel_widths = c(1, 1)
-    # rel_widths = c(1, 1, 1)
-  )
-
-  hl_pc_plot_grid_nl
-
-  # ## race
-  # hl_plot_grid_a <- plot_grid(
-  #   health_level_fig_a + theme(strip.text.x = element_blank()),
-  #   labor_level_fig_a + labs(y = NULL) + theme(strip.text.x = element_blank()),
-  #   align = 'vh',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   rel_widths = c(1, 1)
-  # )
-  #
-  # ## add race legend
-  # hl_plot_grid_a <- plot_grid(
-  #   hl_plot_grid_a,
-  #   legend_a,
-  #   ncol = 1,
-  #   rel_heights = c(0.95, 0.05)
-  # )
-  #
-  # ## poverty
-  # hl_plot_grid_b <- plot_grid(
-  #   health_level_fig_b + theme(axis.text.x = element_blank(),
-  #                              strip.text.x = element_blank()),
-  #   labor_level_fig_b + labs(y = NULL) + theme(axis.text.x = element_blank(),
-  #                                              strip.text.x = element_blank()),
-  #   align = 'vh',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   rel_widths = c(1, 1)
-  # )
-  #
-  # ## add poverty legend
-  # hl_plot_grid_b <- plot_grid(
-  #   hl_plot_grid_b,
-  #   legend_b,
-  #   ncol = 1,
-  #   rel_heights = c(0.95, 0.05)
-  # )
-  #
-  # ## DAC
-  # hl_plot_grid_c <- plot_grid(
-  #   health_level_fig_c + theme(axis.text.x = element_blank()),
-  #   labor_level_fig_c + labs(y = NULL) + theme(axis.text.x = element_blank()),
-  #   align = 'vh',
-  #   # labels = c("A", "B", "C", "D", "E", "F"),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   hjust = -1,
-  #   nrow = 1,
-  #   rel_widths = c(1, 1)
-  # )
-  #
-  # ## add DAC legend
-  # hl_plot_grid_c <- plot_grid(
-  #   hl_plot_grid_c,
-  #   legend_c,
-  #   ncol = 1,
-  #   rel_heights = c(0.95, 0.05)
-  # )
-  #
-  #
-  # ## all together now
-  # hl_plot_grid_pc <- plot_grid(
-  #   hl_plot_grid_c,
-  #   NULL,
-  #   hl_plot_grid_b,
-  #   NULL,
-  #   hl_plot_grid_a,
-  #   align = "v",
-  #   # labels = c("(A)", "(B)", "(C)", ""),
-  #   # # labels = 'AUTO',
-  #   # label_size = 10,
-  #   # hjust = -1,
-  #   ncol = 1,
-  #   rel_heights = c(1, 0.1, 1, 0.1, 1)
-  #   # rel_widths = c(1, 1, 1)
-  # )
-  #
-
-  return(hl_pc_plot_grid_nl)
-}
-
-
-## npv shares
-## ----------------------------------------------------------------------------
-
-plot_hl_shares <- function(
-  main_path,
-  save_path,
-  demographic_npv_df,
-  state_pop_ratios
-) {
-  plot_df_long <- copy(demographic_npv_df)
-
-  ## calculate shares
-  plot_df_long[,
-    total_value := sum(value),
-    by = .(
-      scen_id,
-      demand_scenario,
-      refining_scenario,
-      demo_cat,
-      metric,
-      segment,
-      unit_desc,
-      metric_desc,
-      seg_title,
-      scenario,
-      demand_title,
-      scen_title
-    )
-  ]
-
-  plot_df_long[, share := value / total_value]
-
-  ## shares
-  pct_df <- copy(state_pop_ratios)
-
-  pct_df[, scen_title := "population"]
-
-  ## create one df for plotting
-  share_df <- plot_df_long[, .(
-    scen_id,
-    demand_scenario,
-    refining_scenario,
-    demo_cat,
-    title,
-    metric,
-    unit_desc,
-    segment,
-    metric_desc,
-    seg_title,
-    scenario,
-    demand_title,
-    scen_title,
-    share
-  )]
-
-  pop_share_df <- copy(share_df)
-  pop_share_df[, `:=`(
-    scen_id = "Population",
-    demand_scenario = NA,
-    refining_scenario = NA,
-    metric = "general_pop_share",
-    unit_desc = NA,
-    segment = "general",
-    metric_desc = "general",
-    seg_title = "State",
-    scenario = "State",
-    demand_title = NA,
-    scen_title = "State\npopulation",
-    share = NULL
-  )]
-
-  pop_share_df <- unique(pop_share_df)
-
-  ## merge
-  pop_share_df <- merge(
-    pop_share_df,
-    pct_df[, .(demo_cat, title, pct)],
-    by = c("demo_cat", "title")
-  )
-
-  setnames(pop_share_df, "pct", "share")
-
-  ## bind
-  share_df <- rbind(share_df, pop_share_df)
-
-  ## add column for defining shapes
-  share_df[, demo_grp_metric := paste0(title, "_", metric)]
-
-  ## save figure inputs
-  fwrite(
-    share_df,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_disaggreated_npv_share_fig_inputs.csv"
-    )
-  )
-  # fwrite(share_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_disaggreated_npv_share_fig_inputs.csv"))
-
-  ## create the figure ---------------------------------------------
-  ## ---------------------------------------------------------------
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-  bau_scen <- "BAU historical production"
-
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  ## health fig - race
-  health_share_fig_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Race",
-          unit_desc == "USD (2019 VSL)",
-          title %in% fig_title_vec
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Black", "Hispanic", "Asian", "white")
-          )
-        ),
-      aes(x = scen_title, y = share, color = title),
-      size = 3,
-      alpha = 0.8
-    ) +
-    facet_wrap(~seg_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    # ylim(0, 0.5) +
-    labs(
-      y = "NPV share",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## labor fig - race
-  labor_share_fig_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Race",
-          segment == "labor",
-          title %in% fig_title_vec
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Black", "Hispanic", "Asian", "white")
-          )
-        ),
-      aes(x = scen_title, y = share, color = title, shape = metric),
-      size = 3,
-      alpha = 0.8
-    ) +
-    facet_wrap(~seg_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    scale_shape_manual(
-      name = "",
-      values = race_shape_ptc,
-      labels = high_low_labs
-    ) +
-    # ylim(0, 0.6) +
-    labs(
-      y = "NPV share",
-      x = NULL,
-      color = "with re-emp:"
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_text(size = 8),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    ) +
-    guides(shape = "none")
-
-  # legend_fig_labor_h <- ggplot() +
-  #   geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-  #   geom_point(
-  #     data = share_df %>% filter(
-  #       !scen_id %in% remove_scen,
-  #       demo_cat == "Race",
-  #       segment == "labor",
-  #       title %in% fig_title_vec,
-  #       metric == "forgone_wages_h"
-  #     ) %>%
-  #       mutate(title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))),
-  #     aes(x = scen_title, y = share, color = title, shape = metric),
-  #     size = 3, alpha = 0.8
-  #   ) +
-  #   facet_wrap(~seg_title) +
-  #   scale_color_manual(
-  #     name = "no re-emp:",
-  #     values = race_col_pal
-  #   ) +
-  #   scale_shape_manual(
-  #     name = "",
-  #     values = race_shape_ptc,
-  #     labels = high_low_labs
-  #   ) +
-  #   # ylim(0, 0.6) +
-  #   labs(
-  #     y = "NPV share",
-  #     x = NULL,
-  #     color = "no re-emp:"
-  #   ) +
-  #   theme_line +
-  #   theme(
-  #     legend.position = "bottom",
-  #     legend.title = element_text(size = 8),
-  #     axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-  #     plot.margin = unit(c(0, 0, 0, 0), "cm"),
-  #     axis.ticks.length.y = unit(0.1, "cm"),
-  #     axis.ticks.length.x = unit(0.1, "cm")
-  #   ) +
-  #   guides(
-  #     color = guide_legend(override.aes = list(shape = 21)),
-  #     shape = "none"
-  #   )
-  #
-  #
-  # legend_a_h <- get_legend(
-  #   legend_fig_labor_h +
-  #     theme(legend.text = element_text(size = 12))
-  # )
-
-  # ## add no-emp legend to labor fig
-  # labor_share_fig_a2 <- plot_grid(
-  #   labor_share_fig_a,
-  #   legend_a_h,
-  #   ncol = 1,
-  #   rel_heights = c(0.95, 0.05),
-  #   rel_widths = c(1, 1)
-  # )
-
-  ## state fig - race
-  state_share_fig_a <- ggplot() +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Race",
-          segment == "general",
-          title %in% fig_title_vec
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Black", "Hispanic", "Asian", "white")
-          )
-        ),
-      aes(x = scen_title, y = share, color = title),
-      size = 3,
-      alpha = 0.8
-    ) +
-    facet_wrap(~seg_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    # ylim(0, 0.5) +
-    labs(
-      y = " ",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## health fig - poverty
-  health_share_fig_b <- ggplot() +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Poverty",
-          unit_desc == "USD (2019 VSL)"
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Below poverty line", "Above poverty line")
-          )
-        ),
-      aes(x = scen_title, y = share, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_wrap(~seg_title) +
-    scale_shape_manual(values = poverty_ptc_h) +
-    labs(
-      y = "NPV share",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    # ylim(0, 0.9) +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    ) +
-    guides(shape = guide_legend(nrow = 2, byrow = TRUE))
-
-  labor_share_fig_b <- ggplot() +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Poverty",
-          segment == "labor"
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Below poverty line", "Above poverty line")
-          ),
-          demo_grp_metric = factor(
-            demo_grp_metric,
-            levels = c(
-              "Above poverty line_forgone_wages_l",
-              "Below poverty line_forgone_wages_l",
-              "Above poverty line_forgone_wages_h",
-              "Below poverty line_forgone_wages_h"
-            )
-          )
-        ),
-      aes(x = scen_title, y = share, shape = demo_grp_metric),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    scale_shape_manual(
-      values = poverty_pt_share_l,
-      labels = poverty_hl_share_labs
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_wrap(~seg_title) +
-    # ylim(0, 0.9) +
-    labs(
-      y = "NPV share",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    ) +
-    guides(shape = guide_legend(nrow = 2, byrow = TRUE))
-
-  ## state - poverty
-  state_share_fig_b <- ggplot() +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "Poverty",
-          segment == "general"
-        ) %>%
-        mutate(
-          title = factor(
-            title,
-            levels = c("Below poverty line", "Above poverty line")
-          )
-        ),
-      aes(x = scen_title, y = share, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    scale_shape_manual(values = poverty_ptc_h) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_wrap(~seg_title) +
-    # ylim(0, 0.9) +
-    labs(
-      y = " ",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## health fig - DAC
-  health_share_fig_c <- ggplot() +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "DAC",
-          unit_desc == "USD (2019 VSL)"
-        ),
-      aes(x = scen_title, y = share, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(values = dac_ptc) +
-    facet_wrap(~seg_title) +
-    # ylim(0, 0.85) +
-    labs(
-      y = "NPV share",
-      x = NULL,
-      color = NULL
-    ) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    ) +
-    guides(shape = guide_legend(nrow = 2, byrow = TRUE))
-
-  ## labor fig - DAC
-  labor_share_fig_c <- ggplot() +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "DAC",
-          segment == "labor"
-        ) %>%
-        mutate(
-          demo_grp_metric = factor(
-            demo_grp_metric,
-            levels = c(
-              "DAC_forgone_wages_l",
-              "Non-DAC_forgone_wages_l",
-              "DAC_forgone_wages_h",
-              "Non-DAC_forgone_wages_h"
-            )
-          )
-        ),
-      aes(x = scen_title, y = share, shape = demo_grp_metric),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(
-      values = dac_hl_ptc_share,
-      labels = dac_hl_labs_share
-    ) +
-    facet_wrap(~seg_title) +
-    # ylim(0, 0.85) +
-    labs(
-      y = "NPV share",
-      x = NULL,
-      color = NULL
-    ) +
-    # ylim(-15, 0) +
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    ) +
-    guides(shape = guide_legend(nrow = 2, byrow = TRUE))
-
-  ## general fig - DAC
-  state_share_fig_c <- ggplot() +
-    geom_point(
-      data = share_df %>%
-        filter(
-          !scen_id %in% remove_scen,
-          demo_cat == "DAC",
-          segment == "general"
-        ),
-      aes(x = scen_title, y = share, shape = title),
-      color = "black",
-      size = 3,
-      alpha = 0.8
-    ) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    scale_shape_manual(values = dac_ptc) +
-    facet_wrap(~seg_title) +
-    # ylim(0, 0.85) +
-    labs(
-      y = " ",
-      x = NULL,
-      color = NULL
-    ) +
-    # ylim(-15, 0) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      # axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## save legends
-  ## -----------------------------------------------------------------------
-
-  health_dac_legend <- get_legend(
-    health_share_fig_c +
-      theme(legend.text = element_text(size = 12))
-  )
-
-  health_poverty_legend <- get_legend(
-    health_share_fig_b +
-      theme(legend.text = element_text(size = 12))
-  )
-
-  health_race_legend <- get_legend(
-    health_share_fig_a +
-      theme(legend.text = element_text(size = 12))
-  )
-
-  ## save legends
-  ggsave(
-    plot = health_dac_legend,
-    device = "pdf",
-    filename = "health_dac_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## save legends
-  ggsave(
-    plot = health_poverty_legend,
-    device = "pdf",
-    filename = "health_poverty_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## save legends
-  ggsave(
-    plot = health_race_legend,
-    device = "pdf",
-    filename = "health_race_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  labor_dac_legend <- get_legend(
-    labor_share_fig_c +
-      theme(legend.text = element_text(size = 12))
-  )
-
-  labor_poverty_legend <- get_legend(
-    labor_share_fig_b +
-      theme(legend.text = element_text(size = 12))
-  )
-
-  labor_race_legend <- get_legend(
-    labor_share_fig_a +
-      theme(legend.text = element_text(size = 12))
-  )
-
-  ## save legends
-  ggsave(
-    plot = labor_dac_legend,
-    device = "pdf",
-    filename = "labor_dac_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## save legends
-  ggsave(
-    plot = labor_poverty_legend,
-    device = "pdf",
-    filename = "labor_poverty_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## save legends
-  ggsave(
-    plot = labor_race_legend,
-    device = "pdf",
-    filename = "labor_race_legend.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## save legends
-  ggsave(
-    plot = legend_a_h,
-    device = "pdf",
-    filename = "labor_race_legend_no_re-emp.pdf",
-    path = file.path(main_path, save_path, "legends"),
-    dpi = 600
-  )
-
-  ## combine figure
-  ## ---------------------------------
-
-  fig_text_size <- 12
-
-  ## health
-  health_column_fig <- plot_grid(
-    health_share_fig_c +
-      ylim(0, 1) +
-      theme(
-        axis.text.x = element_blank(),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1),
-        strip.text.x = element_text(size = fig_text_size),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size)
-      ),
-    health_share_fig_b +
-      ylim(0, 1) +
-      theme(
-        axis.text.x = element_blank(),
-        strip.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1)
-      ),
-    health_share_fig_a +
-      ylim(0, 1) +
-      theme(
-        strip.text.x = element_blank(),
-        axis.text.y = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size),
-        axis.text.x = element_text(size = fig_text_size),
-        legend.position = "none"
-      ),
-    align = "vh",
-    labels = c("A", "B", "C"),
-    # # labels = 'AUTO',
-    label_size = 10,
-    hjust = -1,
-    nrow = 3
-    # rel_widths = c(1, 0.25, 1),
-    # rel_heights = c(1, 0.1, 1, 0.1, 1)
-  )
-
-  ## state
-  state_column_fig <- plot_grid(
-    state_share_fig_c +
-      ylim(0, 1) +
-      theme(
-        axis.text.x = element_blank(),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1),
-        strip.text.x = element_text(size = fig_text_size),
-        axis.title.y = element_text(size = fig_text_size),
-        # axis.text.y = element_text(size = fig_text_size),
-        axis.text.y = element_blank()
-      ),
-    state_share_fig_b +
-      ylim(0, 1) +
-      theme(
-        axis.text.x = element_blank(),
-        strip.text.x = element_blank(),
-        axis.title.y = element_text(size = fig_text_size),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1),
-        # axis.text.y = element_text(size = fig_text_size),
-        axis.text.y = element_blank()
-      ),
-    state_share_fig_a +
-      ylim(0, 1) +
-      theme(
-        strip.text.x = element_blank(),
-        axis.title.y = element_text(size = fig_text_size),
-        axis.text.x = element_text(size = fig_text_size),
-        legend.position = "none",
-        # axis.text.y = element_text(size = fig_text_size),
-        axis.text.y = element_blank()
-      ),
-    align = "vh",
-    labels = c("D", "E", "F"),
-    # # labels = 'AUTO',
-    label_size = 10,
-    hjust = -1,
-    nrow = 3
-    # rel_widths = c(1, 0.25, 1),
-    # rel_heights = c(1, 0.1, 1, 0.1, 1)
-  )
-
-  ## labor
-  labor_column_fig <- plot_grid(
-    labor_share_fig_c +
-      ylim(0, 1) +
-      labs(y = " ") +
-      theme(
-        axis.text.x = element_blank(),
-        strip.text.x = element_text(size = fig_text_size),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1),
-        # axis.text.y = element_text(size = fig_text_size),
-        axis.text.y = element_blank()
-      ),
-    labor_share_fig_b +
-      ylim(0, 1) +
-      labs(y = " ") +
-      theme(
-        axis.text.x = element_blank(),
-        strip.text.x = element_blank(),
-        legend.position = "none",
-        plot.margin = margin(1, 1, 20, 1),
-        # axis.text.y = element_text(size = fig_text_size),
-        axis.text.y = element_blank()
-      ),
-    labor_share_fig_a +
-      ylim(0, 1) +
-      labs(y = " ") +
-      theme(
-        strip.text.x = element_blank(),
-        axis.text.x = element_text(size = fig_text_size),
-        legend.position = "none",
-        # axis.text.y = element_text(size = fig_text_size),
-        axis.text.y = element_blank()
-      ),
-    align = "vh",
-    labels = c("G", "H", "I"),
-    # # labels = 'AUTO',
-    label_size = 10,
-    hjust = -1,
-    nrow = 3
-    # rel_widths = c(1, 0.25, 1),
-    # rel_heights = c(1, 0.1, 1, 0.1, 1)
-  )
-
-  ## all together now
-  hl_pc_plot_grid_nl <- plot_grid(
-    health_column_fig,
-    state_column_fig,
-    labor_column_fig,
-    align = "h",
-    # labels = c("(A)", "(B)", "(C)", ""),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    # hjust = -1,
-    ncol = 3,
-    rel_widths = c(1, 0.3, 0.9)
-    # rel_widths = c(1, 1, 1)
-  )
-
-  return(hl_pc_plot_grid_nl)
-}
-
-create_health_labor_table <- function(
-  main_path,
-  save_path,
-  demographic_npv_df,
-  ref_labor_demog,
-  pop_ratios,
-  refining_mortality
-) {
-  ## create table of total health benefit (NPV), labor loss (NPV), change in job years,
-  ## and avoided premature mortality (scenario x demographic group, state)
-
-  ## NPV values
-  npv_out <- demographic_npv_df[, .(
-    demand_scenario,
-    refining_scenario,
-    demo_cat,
-    demo_group,
-    title,
-    segment,
-    metric,
-    metric_desc,
-    unit_desc,
-    value
-  )]
-
-  ## fte-job-years
-  emp_out <- ref_labor_demog %>%
-    # select(-sum_demo_comp_pv_h, -sum_demo_comp_pv_l) %>%
-    select(demand_scenario:title, sum_demo_emp) %>%
-    rename(value = sum_demo_emp) %>%
-    group_by(
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      demo_cat,
-      demo_group,
-      title
-    ) %>%
-    summarise(value = sum(value)) %>%
-    # summarise(value = sum(value),
-    #           value_revised = sum(value_revised)) %>%
-    ungroup() %>%
-    mutate(
-      segment = "labor",
-      metric = "fte_job_years",
-      metric_desc = "job_loss",
-      unit_desc = "fte-jobs"
-    )
-
-  ## bau scen
-  bau_emp_out <- emp_out %>%
-    filter(
-      demand_scenario == "BAU",
-      refining_scenario == "historic production"
-    ) %>%
-    select(oil_price_scenario, demo_cat, demo_group, title, value) %>%
-    rename(bau_value = value)
-  # ,
-  #          bau_revised = value_revised)
-
-  ## difference
-  emp_out <- merge(
-    emp_out,
-    bau_emp_out,
-    by = c("oil_price_scenario", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  emp_out <- emp_out %>%
-    mutate(
-      delta_value = value - bau_value
-      # ,
-      # delta_revised_value = value_revised - bau_revised
-    ) %>%
-    select(
-      oil_price_scenario:refining_scenario,
-      segment,
-      metric,
-      metric_desc,
-      unit_desc,
-      delta_value
-    ) %>%
-    rename(value = delta_value) %>%
-    mutate(
-      refining_scenario = str_replace(
-        refining_scenario,
-        "historic",
-        "historical"
-      )
-    ) %>%
-    filter(oil_price_scenario == "reference case") %>%
-    select(-oil_price_scenario)
-
-  ## avoided mortality
-  avoid_m_out <- copy(refining_mortality) %>% as.data.table()
-
-  ## select columns
-  avoid_m_out <- avoid_m_out %>%
-    select(
-      census_tract,
-      demand_scenario,
-      refining_scenario,
-      year,
-      mortality_delta
-    )
-
-  ## merge with pop ratios
-  avoid_m_out <- merge(
-    avoid_m_out,
-    pop_ratios,
-    by = "census_tract",
-    all.x = TRUE,
-    allow.cartesian = TRUE
-  )
-
-  setDT(avoid_m_out)
-
-  ## calc value by demographic group
-  avoid_m_out[, value := mortality_delta * pct]
-
-  avoid_m_out_total <- avoid_m_out[,
-    .(value = sum(value)),
-    by = .(
-      demand_scenario,
-      refining_scenario,
-      demo_cat,
-      demo_group,
-      title
-    )
-  ]
-
-  avoid_m_out_total <- avoid_m_out_total %>%
-    mutate(
-      refining_scenario = str_replace(
-        refining_scenario,
-        "historic",
-        "historical"
-      ),
-      segment = "health",
-      metric = "avoided_mortality",
-      metric_desc = "avoided_mortality",
-      unit_desc = "persons"
-    ) %>%
-    select(
-      demand_scenario,
-      refining_scenario,
-      demo_cat,
-      demo_group,
-      title,
-      segment,
-      metric,
-      metric_desc,
-      unit_desc,
-      value
-    )
-
-  ## bind
-  result_output <- rbind(npv_out, emp_out, avoid_m_out_total)
-
-  ## save figure inputs
-  fwrite(
-    result_output,
-    file.path(
-      main_path,
-      save_path,
-      "fig-csv-files",
-      "state_health_labor_ouputs.csv"
-    )
-  )
-  # fwrite(result_output, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_health_labor_ouputs.csv"))
-
-  return(result_output)
-}
-
-
-fig4_hl <- function(
-  health_grp,
-  ref_labor_demog_yr,
-  refining_mortality,
-  pop_ratios
-) {
-  gaps_df <- copy(health_grp)
-
-  ## change scenario names, factor
-  gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## refactor
-  gaps_df[, scenario_title := scenario]
-  gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## calculate gaps (BAU - scenario)
-  bau_gaps_df <- gaps_df[scen_id == "BAU historic production"]
-  bau_gaps_df <- bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "mortality_level_dem"
-  )]
-  setnames(bau_gaps_df, "mortality_level_dem", "bau_mortality_level_dem")
-
-  gaps_df <- merge(
-    gaps_df,
-    bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  gaps_df[, gap := mortality_level_dem - bau_mortality_level_dem]
-
-  ## change historic to historical
-  gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-  gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## refactor
-  gaps_df$scenario <- factor(
-    gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  gaps_df$scenario_title <- factor(
-    gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## make figures
-  ## ---------------------------------------------------------
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-
-  ## figure a
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  health_gap_fig_a <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = gap, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(c(-0.31, 0)) +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## figure b
-  health_gap_fig_b <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = gap, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-0.31, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    scale_linetype_manual(values = dac_lty) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## figure c
-  health_gap_fig_c <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = gap, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_linetype_manual(values = poverty_lty) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    # ylim(c(-0.31, 0)) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## shared y lab
-  # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person, difference from reference")),
-  #                                    size = 8, angle = 90)
-
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      "Avoided mortalities, difference from reference",
-      size = 8,
-      angle = 90
-    )
-
-  gaps_plot_grid_h <- plot_grid(
-    health_gap_fig_b,
-    health_gap_fig_c,
-    health_gap_fig_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1),
-    rel_heights = c(1.1, 0.9, 0.9)
-  )
-
-  gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    gaps_plot_grid_h,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heights = c(1, 1)
-  )
-
-  ## labor
-  ## -------------------------------------------------------
-
-  # ## calc 2020 pop by demographic
-  # pop_2020 <- refining_mortality %>%
-  #   filter(year == 2020) %>%
-  #   select(census_tract, year, pop) %>%
-  #   unique() %>%
-  #   left_join(pop_ratios) %>%
-  #   as.data.table()
-  #
-  # pop_2020[, demo_pop := pop * pct]
-  #
-  # ## summarize by demographic group
-  # pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)),
-  #                      by = .(demo_group, demo_cat)]
-  #
-  ## labor outputs
-  l_gaps_df <- copy(ref_labor_demog_yr)
-
-  ## change scenario names, factor
-  l_gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  l_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## add scenario title
-  l_gaps_df[, scenario_title := scenario]
-  l_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## change historic to historical
-  l_gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  l_gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  l_gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## scenarios for filtering
-  remove_scen <- c(
-    "Low demand - historical production",
-    "BAU demand - historical production"
-  )
-
-  l_gaps_df$scenario <- factor(
-    l_gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  l_gaps_df$scenario_title <- factor(
-    l_gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## sum for state
-  l_gaps_df <- l_gaps_df[,
-    .(sum_demo_emp = sum(sum_demo_emp)),
-    by = .(
-      year,
-      demand_scenario,
-      refining_scenario,
-      oil_price_scenario,
-      scenario,
-      scenario_title,
-      demo_cat,
-      demo_group,
-      title
-    )
-  ]
-
-  # ## merge with 2020 pop
-  # l_gaps_df <- merge(l_gaps_df, pop_2020,
-  #                    by = c("demo_cat", "demo_group"),
-  #                    all.x = T)
-  #
-  # ## calculate per capita
-  # l_gaps_df[, demo_emp_pc := sum_demo_emp / pop_2020]
-
-  ## select columns
-  l_gaps_df <- l_gaps_df[, .(
-    year,
-    demand_scenario,
-    refining_scenario,
-    oil_price_scenario,
-    scenario,
-    scenario_title,
-    demo_cat,
-    demo_group,
-    title,
-    sum_demo_emp
-  )]
-
-  # filter for oil px == reference case
-  l_gaps_df <- l_gaps_df[oil_price_scenario == "reference case"]
-
-  ## calculate gaps (BAU - scenario)
-  l_bau_gaps_df <- l_gaps_df[scenario == "BAU demand - historical production"]
-  l_bau_gaps_df <- l_bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "sum_demo_emp"
-  )]
-  setnames(l_bau_gaps_df, "sum_demo_emp", "bau_sum_demo_emp")
-  # setnames(l_bau_gaps_df, "demo_emp_pc", "bau_demo_emp_pc")
-
-  l_gaps_df <- merge(
-    l_gaps_df,
-    l_bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  l_gaps_df[, gap_emp := sum_demo_emp - bau_sum_demo_emp]
-  # l_gaps_df[, gap_emp_pc :=  demo_emp_pc - bau_demo_emp_pc]
-
-  ## figure labor a
-  labor_gap_fig_a <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = gap_emp / 1000, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(-35, 0) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## legend a
-  legend_a <- labor_gap_fig_a +
-    geom_line(linewidth = 1, alpha = 0.9) +
-    theme(legend.position = "right")
-
-  legend_a <- get_legend(
-    legend_a +
-      theme(legend.text = element_text(size = 6))
-  )
-
-  ## labor b
-  labor_gap_fig_b <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = gap_emp / 1000, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_linetype_manual(values = dac_lty) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-35, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## legend b
-  legend_b <- labor_gap_fig_b +
-    geom_line(linewidth = 1, alpha = 0.9) +
-    theme(legend.position = "right")
-
-  legend_b <- get_legend(
-    legend_b +
-      theme(legend.text = element_text(size = 6))
-  )
-
-  ## labor c
-  labor_gap_fig_c <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = gap_emp / 1000, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_linetype_manual(values = poverty_lty) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(-35, 0) +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## legend c
-  legend_c <- labor_gap_fig_c +
-    geom_line(linewidth = 1, alpha = 0.9) +
-    theme(legend.position = "right")
-
-  legend_c <- get_legend(
-    legend_c +
-      theme(legend.text = element_text(size = 6))
-  )
-
-  ## shared y lab
-  # yaxis_lab <- ggdraw() + draw_label("Labor: FTE job-years, difference from reference", size = 8, angle = 90)
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      "Labor: FTE job-years, difference from reference (thousand)",
-      size = 8,
-      angle = 90
-    )
-
-  l_gaps_plot_grid <- plot_grid(
-    labor_gap_fig_b + theme(legend.position = "none"),
-    labor_gap_fig_c + theme(legend.position = "none"),
-    labor_gap_fig_a + theme(legend.position = "none"),
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1),
-    rel_heights = c(1.05, 0.9, 0.9)
-  )
-
-  l_gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    l_gaps_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heights = c(1, 1)
-  )
-
-  l_gaps_plot_grid2
-
-  ## plot legends
-  legends <- plot_grid(
-    legend_b,
-    legend_c,
-    legend_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 3,
-    ncol = 1,
-    rel_widths = c(1, 1, 1),
-    rel_heights = c(1, 1, 1)
-  )
-
-  ## plot side by side
-  ## ----------------------------------------------
-
-  health_labor_plot <- plot_grid(
-    gaps_plot_grid2,
-    l_gaps_plot_grid2,
-    NULL,
-    legends,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 4,
-    rel_widths = c(1, 1, 0.05, 0.2),
-    rel_heights = c(1, 1, 1, 1)
-  )
-
-  return(health_labor_plot)
-}
-
-
-fig4_hl_pmil <- function(
-  health_grp,
-  ref_labor_demog_yr,
-  refining_mortality,
-  pop_ratios
-) {
-  ## calc 2020 pop by demographic
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    left_join(pop_ratios) %>%
-    as.data.table()
-
-  pop_2020[, demo_pop := pop * pct]
-
-  ## summarize by demographic group
-  pop_2020 <- pop_2020[,
-    .(pop_2020 = sum(demo_pop)),
-    by = .(demo_group, demo_cat)
-  ]
-
-  gaps_df <- copy(health_grp)
-
-  ## change scenario names, factor
-  gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## refactor
-  gaps_df[, scenario_title := scenario]
-  gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## calculate gaps (BAU - scenario)
-  bau_gaps_df <- gaps_df[scen_id == "BAU historic production"]
-  bau_gaps_df <- bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "mortality_level_dem"
-  )]
-  setnames(bau_gaps_df, "mortality_level_dem", "bau_mortality_level_dem")
-
-  gaps_df <- merge(
-    gaps_df,
-    bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  gaps_df[, gap := mortality_level_dem - bau_mortality_level_dem]
-
-  ## convert to per million
-  gaps_df <- merge(
-    gaps_df,
-    pop_2020,
-    by = c("demo_group", "demo_cat"),
-    all.x = T
-  )
-
-  ## calculate per capita
-  gaps_df[, value := gap / pop_2020]
-  gaps_df[, value_pmil := value * 1e6]
-
-  ## change historic to historical
-  gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")]
-  gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## refactor
-  gaps_df$scenario <- factor(
-    gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  gaps_df$scenario_title <- factor(
-    gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  ## make figures
-  ## ---------------------------------------------------------
-
-  ## scenarios for filtering
-  remove_scen <- c("LC1 historical production", "BAU historical production")
-
-  ## figure a
-  fig_title_vec <- c("Asian", "Black", "Hispanic", "white")
-
-  fig_text_size <- 12
-
-  health_gap_fig_a <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = value_pmil, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(c(-30, 0)) +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(
-        vjust = 0.5,
-        hjust = 0.5,
-        size = fig_text_size
-      ),
-      axis.text.y = element_text(size = fig_text_size),
-      strip.text.y = element_text(size = fig_text_size),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## figure b
-  health_gap_fig_b <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = value_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-30, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    scale_linetype_manual(values = dac_lty) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(
-        vjust = 0.5,
-        hjust = 0.5,
-        size = fig_text_size
-      ),
-      axis.text.y = element_text(size = fig_text_size),
-      strip.text = element_text(size = fig_text_size),
-      # strip.text.x = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## figure c
-  health_gap_fig_c <- ggplot(
-    gaps_df %>%
-      filter(
-        !scen_id %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = value_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_linetype_manual(values = poverty_lty) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    # ylim(c(-30, 0)) +
-    theme_line +
-    theme(
-      legend.position = "none",
-      legend.title = element_blank(),
-      axis.text.x = element_text(
-        vjust = 0.5,
-        hjust = 0.5,
-        size = fig_text_size
-      ),
-      axis.text.y = element_text(size = fig_text_size),
-      strip.text.y = element_text(size = fig_text_size),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## shared y lab
-  # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person, difference from reference")),
-  #                                    size = 8, angle = 90)
-
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      "Health: Avoided mortalities per million people (difference from reference)",
-      size = fig_text_size,
-      angle = 90
-    )
-
-  gaps_plot_grid_h <- plot_grid(
-    NULL,
-    health_gap_fig_b,
-    NULL,
-    health_gap_fig_c,
-    NULL,
-    health_gap_fig_a,
-    align = "v",
-    labels = c("", "A", "", "B", "", "C"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -0.5,
-    vjust = 0.25,
-    nrow = 6,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1, 1, 1),
-    rel_heights = c(0.15, 1.1, 0.15, 0.9, 0.15, 0.9)
-  )
-  gaps_plot_grid_h
-
-  gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    gaps_plot_grid_h,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heights = c(1, 1)
-  )
-
-  ## labor
-  ## -------------------------------------------------------
-
-  # ## calc 2020 pop by demographic
-  # pop_2020 <- refining_mortality %>%
-  #   filter(year == 2020) %>%
-  #   select(census_tract, year, pop) %>%
-  #   unique() %>%
-  #   left_join(pop_ratios) %>%
-  #   as.data.table()
-  #
-  # pop_2020[, demo_pop := pop * pct]
-  #
-  # ## summarize by demographic group
-  # pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)),
-  #   by = .(demo_group, demo_cat)
-  # ]
-
-  ## labor outputs
-  l_gaps_df <- copy(ref_labor_demog_yr)
-
-  l_gaps_df <- l_gaps_df[oil_price_scenario == "reference case", ]
-
-  ## change scenario names, factor
-  l_gaps_df[,
-    scenario := paste0(demand_scenario, " demand - ", refining_scenario)
-  ]
-  # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)]
-  l_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## add scenario title
-  l_gaps_df[, scenario_title := scenario]
-  l_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-  ## change historic to historical
-  l_gaps_df[,
-    refining_scenario := str_replace(
-      refining_scenario,
-      "historic",
-      "historical"
-    )
-  ]
-  l_gaps_df[, scenario := str_replace(scenario, "historic", "historical")]
-  l_gaps_df[,
-    scenario_title := str_replace(scenario_title, "historic", "historical")
-  ]
-
-  ## scenarios for filtering
-  remove_scen <- c(
-    "Low demand - historical production",
-    "BAU demand - historical production"
-  )
-
-  l_gaps_df$scenario <- factor(
-    l_gaps_df$scenario,
-    levels = c(
-      "BAU demand - historical production",
-      "BAU demand - historical exports",
-      "BAU demand - low exports",
-      "Low demand - historical exports",
-      "Low demand - low exports",
-      "Low demand - historical production"
-    )
-  )
-
-  l_gaps_df$scenario_title <- factor(
-    l_gaps_df$scenario_title,
-    levels = c(
-      "BAU demand\nhistorical production",
-      "BAU demand\nhistorical exports",
-      "BAU demand\nlow exports",
-      "Low demand\nhistorical exports",
-      "Low demand\nlow exports",
-      "Low demand\nhistorical production"
-    )
-  )
-
-  # ## sum for state
-  # l_gaps_df <- l_gaps_df[, .(sum_demo_emp = sum(demo_emp)),
-  #   by = .(
-  #     year, demand_scenario, refining_scenario,
-  #     scenario, scenario_title, demo_cat, demo_group, title
-  #   )
-  # ]
-
-  ## select columns
-  l_gaps_df <- l_gaps_df[, .(
-    year,
-    demand_scenario,
-    refining_scenario,
-    scenario,
-    scenario_title,
-    demo_cat,
-    demo_group,
-    title,
-    sum_demo_emp
-  )]
-
-  ## calculate gaps (BAU - scenario)
-  l_bau_gaps_df <- l_gaps_df[scenario == "BAU demand - historical production"]
-  l_bau_gaps_df <- l_bau_gaps_df[, c(
-    "year",
-    "demo_cat",
-    "demo_group",
-    "title",
-    "sum_demo_emp"
-  )]
-  setnames(l_bau_gaps_df, "sum_demo_emp", "bau_sum_demo_emp")
-
-  l_gaps_df <- merge(
-    l_gaps_df,
-    l_bau_gaps_df,
-    by = c("year", "demo_cat", "demo_group", "title"),
-    all.x = T
-  )
-
-  l_gaps_df[, gap_emp := sum_demo_emp - bau_sum_demo_emp]
-
-  ## merge with 2020 pop
-  l_gaps_df <- merge(
-    l_gaps_df,
-    pop_2020,
-    by = c("demo_cat", "demo_group"),
-    all.x = T
-  )
-
-  ## calculate per capita
-  l_gaps_df[, gap_emp_pc := gap_emp / pop_2020]
-  l_gaps_df[, gap_emp_pmil := gap_emp_pc * 1e6]
-
-  ## figure labor a
-  labor_gap_fig_a <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        title %in% fig_title_vec,
-        demo_cat == "Race"
-      ) %>%
-      mutate(
-        title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))
-      ),
-    aes(x = year, y = gap_emp_pmil, color = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_color_manual(
-      name = "",
-      values = race_col_pal
-    ) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      axis.text.x = element_text(
-        vjust = 0.5,
-        hjust = 0.5,
-        size = fig_text_size
-      ),
-      axis.text.y = element_text(size = fig_text_size),
-      strip.text.y = element_text(size = fig_text_size),
-      strip.text.x = element_blank(),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## legend a
-  legend_a <- labor_gap_fig_a +
-    geom_line(linewidth = 1, alpha = 0.9) +
-    theme(legend.position = "right")
-
-  legend_a <- get_legend(
-    legend_a +
-      theme(legend.text = element_text(size = fig_text_size))
-  )
-
-  ## labor b
-  labor_gap_fig_b <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "DAC"
-      ),
-    aes(x = year, y = gap_emp_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    scale_linetype_manual(values = dac_lty) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    # ylim(c(-0.31, 0)) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(
-        vjust = 0.5,
-        hjust = 0.5,
-        size = fig_text_size
-      ),
-      axis.text.y = element_text(size = fig_text_size),
-      strip.text.y = element_text(size = fig_text_size),
-      strip.text.x = element_text(size = fig_text_size),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      # axis.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## legend b
-  legend_b <- labor_gap_fig_b +
-    geom_line(linewidth = 1, alpha = 0.9) +
-    theme(legend.position = "right")
-
-  legend_b <- get_legend(
-    legend_b +
-      theme(legend.text = element_text(size = fig_text_size))
-  )
-
-  ## labor c
-  labor_gap_fig_c <- ggplot(
-    l_gaps_df %>%
-      filter(
-        !scenario %in% remove_scen,
-        demo_cat == "Poverty"
-      ) %>%
-      mutate(
-        title = factor(
-          title,
-          levels = c("Below poverty line", "Above poverty line")
-        )
-      ),
-    aes(x = year, y = gap_emp_pmil, lty = title)
-  ) +
-    geom_line(linewidth = 1, alpha = 0.8, color = "black") +
-    scale_linetype_manual(values = poverty_lty) +
-    geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-    facet_grid(demo_cat ~ scenario_title) +
-    labs(
-      x = NULL,
-      y = NULL
-    ) +
-    scale_x_continuous(
-      breaks = c(2020, 2045), # Specify tick mark positions
-      labels = c(2020, 2045)
-    ) + # Specify tick mark labels
-    theme_line +
-    # ylim(-70, 0) +
-    theme(
-      legend.position = "bottom",
-      legend.title = element_blank(),
-      axis.text.x = element_text(
-        vjust = 0.5,
-        hjust = 0.5,
-        size = fig_text_size
-      ),
-      axis.text.y = element_text(size = fig_text_size),
-      strip.text.y = element_text(size = fig_text_size),
-      legend.key.width = unit(10, "mm"),
-      plot.margin = unit(c(0, 0, 0, 0), "cm"),
-      strip.text.x = element_blank(),
-      axis.ticks.length.y = unit(0.1, "cm"),
-      axis.ticks.length.x = unit(0.1, "cm")
-    )
-
-  ## legend c
-  legend_c <- labor_gap_fig_c +
-    geom_line(linewidth = 1, alpha = 0.9) +
-    theme(legend.position = "right")
-
-  legend_c <- get_legend(
-    legend_c +
-      theme(legend.text = element_text(size = fig_text_size))
-  )
-
-  ## shared y lab
-  # yaxis_lab <- ggdraw() + draw_label("Labor: FTE job-years, difference from reference", size = 8, angle = 90)
-  yaxis_lab <- ggdraw() +
-    draw_label(
-      "Labor: FTE employment changes per million people (difference from reference)",
-      size = fig_text_size,
-      angle = 90
-    )
-
-  l_gaps_plot_grid <- plot_grid(
-    NULL,
-    labor_gap_fig_b + theme(legend.position = "none"),
-    NULL,
-    labor_gap_fig_c + theme(legend.position = "none"),
-    NULL,
-    labor_gap_fig_a + theme(legend.position = "none"),
-    align = "v",
-    labels = c("", "D", "", "E", "", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -0.5,
-    vjust = 0.25,
-    nrow = 6,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1, 1, 1),
-    rel_heights = c(0.15, 1.1, 0.15, 0.9, 0.15, 0.9)
-  )
-
-  l_gaps_plot_grid2 <- plot_grid(
-    yaxis_lab,
-    l_gaps_plot_grid,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 2,
-    rel_widths = c(0.05, 1),
-    rel_heights = c(1, 1)
-  )
-
-  l_gaps_plot_grid2
-
-  ## plot legends
-  legends <- plot_grid(
-    NULL,
-    legend_b,
-    NULL,
-    legend_c,
-    NULL,
-    legend_a,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 6,
-    ncol = 1,
-    rel_widths = c(1, 1, 1, 1, 1, 1),
-    rel_heights = c(0.15, 1, 0.15, 1, 0.15, 1)
-  )
-
-  ## plot side by side
-  ## ----------------------------------------------
-
-  health_labor_plot <- plot_grid(
-    gaps_plot_grid2,
-    l_gaps_plot_grid2,
-    NULL,
-    legends,
-    align = "v",
-    # labels = c("A", "B", "C", "D", "E", "F"),
-    # # labels = 'AUTO',
-    # label_size = 10,
-    hjust = -1,
-    nrow = 1,
-    ncol = 4,
-    rel_widths = c(1, 1, 0.05, 0.2),
-    rel_heights = c(1, 1, 1, 1)
-  )
-
-  return(health_labor_plot)
-}
diff --git a/before-targets/archive/labor_functions.R b/before-targets/archive/labor_functions.R
deleted file mode 100644
index 8c759ff..0000000
--- a/before-targets/archive/labor_functions.R
+++ /dev/null
@@ -1,1088 +0,0 @@
-## labor functions ##
-
-create_prod_px_spread <- function(proc_oil_px_df) {
-  crack_spread <- tibble(
-    product = c("gasoline", "jet_fuel", "diesel"),
-    spread = c(23, 20, 23)
-  )
-
-  crack_spread_ex <- expand.grid(
-    year = c(2019:2045),
-    product = unique(crack_spread$product)
-  )
-
-  crack_spread <- merge(crack_spread_ex, crack_spread,
-    all.x = T
-  )
-
-  prod_price <- copy(proc_oil_px_df)
-  # prod_price <- proc_oil_px_df[oil_price_scenario == "reference case"]
-
-  prod_price <- merge(crack_spread, prod_price,
-    by = c("year"),
-    all = T,
-    allow.cartesian = T
-  )
-
-  setDT(prod_price)
-
-  prod_price[, product_price := oil_price_usd_per_bbl + spread]
-
-  prod_price
-}
-
-
-calc_labor_outputs <- function(main_path,
-                               indiv_prod_output,
-                               dt_refcap,
-                               product_px,
-                               cpi2019,
-                               cpi2020,
-                               discount_rate,
-                               alpha_comp,
-                               alpha_emp,
-                               refin_locs_ct,
-                               dt_direct_multipliers) {
-  ## add product for calculating price
-  county_out_refining <- copy(indiv_prod_output)
-
-  county_out_refining[, fuel := as.character(fuel)]
-
-  county_out_refining[, product := fifelse(
-    fuel %chin% c("gasoline", "drop-in gasoline"), "gasoline",
-    fifelse(fuel %chin% c("diesel", "renewable diesel"), "diesel", "jet_fuel")
-  )]
-
-  ## merge with counties
-  county_df <- dt_refcap[, .(site_id, county)]
-  county_df[, site_id := as.character(site_id)]
-
-
-  county_out_refining <- merge(county_out_refining, county_df,
-    by = c("site_id"),
-    all.x = T
-  )
-
-  # fill in missing counties
-  county_out_refining[, county := fifelse(
-    site_id == "342-2", "Contra Costa",
-    fifelse(
-      site_id == "99999", "Kern",
-      fifelse(site_id == "t-800", "Los Angeles", county)
-    )
-  )]
-
-  ## merge with prices
-  product_df <- copy(product_px)
-  product_df <- product_df[, .(year, oil_price_scenario, product, product_price)]
-
-  county_out_refining <- merge(county_out_refining, product_df,
-    by = c("year", "product"),
-    all.x = T,
-    allow.cartesian = T
-  )
-
-  ## calculate revenue
-  county_out_refining[, revenue := value * product_price]
-
-  ## merge with census tracts
-  ct_out_refining <- merge(county_out_refining,
-                           refin_locs_ct[, .(GEOID, site_id)],
-                           by = c("site_id"),
-                           all.x = T)
-  
-  ## step 1: summarize at the census tract level
-  ct_out_refining_summary <- ct_out_refining[, .(
-    production_bbl = sum(value),
-    revenue = sum(revenue)
-  ), by = .(
-    demand_scenario, refining_scenario,
-    oil_price_scenario, year, county, GEOID
-  )]
-  
-  ## fix columns
-  ct_out_refining_summary[, county := fifelse(county == "Solano County", "Solano", county)]
-  
-  ct_out_refining_summary[, w_tract_geocode := substr(GEOID, 1, 11)]
-  
-  ## step 2: merge with direct multipliers on the "w_tract_geocode"
-  ct_out_refining_direct <- merge(ct_out_refining_summary, 
-                                  dt_direct_multipliers,
-                                  by = c("w_tract_geocode"),
-                                  all.x = TRUE,
-                                  allow.cartesian = TRUE)
-  
-  ## step 3: divide revenue by $1 million and multiply it separately 
-  ## by emp.rev (employment multiplier) and ec.rev (compensation multiplier).
-  ct_out_refining_direct <- ct_out_refining_direct[, `:=` (empl_direct_impact = (revenue / 1e6) * emp.rev,
-                                                           comp_direct_impact = (revenue / 1e6) * ec.rev)]
-  
-  
-  ## step 4: summarize by “h” census tract. This is what you will use for 
-  ## Figure 1 D, Figure 4 D-F, and Figure 5 D-F.
-  
-  ct_out_refining_direct <- ct_out_refining_direct[, .(
-    total_emp = sum(empl_direct_impact),
-    total_comp = sum(comp_direct_impact)
-  ), .(demand_scenario, refining_scenario, oil_price_scenario, year, h_tract_geocode)]
-
-  
-  ct_out_refining_direct <- ct_out_refining_direct[, .(h_tract_geocode,
-                                                       demand_scenario,
-                                                       refining_scenario,
-                                                       oil_price_scenario,
-                                                       year,
-                                                       total_emp,
-                                                       total_comp)]
-  
-  ## convert comp to 2019 dollars
-  ct_out_refining_direct[, total_comp_usd19 := total_comp * cpi2019 / cpi2020]
-  
-  ## calc PV
-  ct_out_refining_direct[, total_comp_PV := total_comp_usd19 / ((1 + discount_rate)^(year - 2019))]
-  
-  ## rename columns
-  setnames(ct_out_refining_direct, c("total_comp", "total_comp_usd19", "total_comp_PV"), c("total_comp_h", "total_comp_usd19_h", "total_comp_PV_h"))
-  
-  
-  ## revision: unclear if we need to do the following
-  ## --------------------------------------------------------------------------
-  
-  ## calculate the lower bound value
-  ct_out_refining_direct <- ct_out_refining_direct %>%
-    rename(census_tract = h_tract_geocode) %>%
-    arrange(demand_scenario, refining_scenario, oil_price_scenario, census_tract, year) %>%
-    group_by(demand_scenario, refining_scenario, oil_price_scenario, census_tract) %>%
-    mutate(
-      prev_emp = ifelse(year == 2020, NA, lag(total_emp)),
-      total_emp_revised = ifelse(year == 2020, total_emp, total_emp - ((1 - alpha_emp) * prev_emp)),
-      prev_comp_usd19h = ifelse(year == 2020, NA, lag(total_comp_usd19_h)),
-      total_comp_usd19_l = ifelse(year == 2020, total_comp_usd19_h, total_comp_usd19_h - ((1 - alpha_comp) * prev_comp_usd19h))
-    ) %>%
-    ungroup() %>%
-    as.data.table()
-  
-  review_df <- ct_out_refining_direct %>%
-    select(demand_scenario, 
-           refining_scenario, 
-           oil_price_scenario, 
-           census_tract, 
-           year, 
-           total_comp_usd19_h, 
-           prev_comp_usd19h, 
-           total_comp_usd19_l, 
-           total_emp, 
-           prev_emp,
-           total_emp_revised)
-  
-  ## save for review
-  write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/labor_result_for_review.csv"))
-  # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/labor_result_for_review.csv"))
-  
-  ## calc discounted low
-  ct_out_refining_direct[, total_comp_PV_l := total_comp_usd19_l / ((1 + discount_rate)^(year - 2019))]
-  
-  ## select columns
-  ct_out_refining_direct <- ct_out_refining_direct[, .(
-    demand_scenario, refining_scenario, oil_price_scenario, census_tract, year, total_emp, total_emp_revised, total_comp_h,
-    total_comp_usd19_h, total_comp_usd19_l, total_comp_PV_h, total_comp_PV_l
-  )]
-  
-  
-  return(ct_out_refining_direct)
-  
-  # ## summarize at the county level
-  # county_out_refining_summary <- county_out_refining[, .(
-  #   production_bbl = sum(value),
-  #   revenue = sum(revenue)
-  # ), by = .(
-  #   demand_scenario, refining_scenario,
-  #   oil_price_scenario, year, county
-  # )]
-# 
-#   ## calculate labor impacts
-#   county_out_refining_summary[, county := fifelse(county == "Solano County", "Solano", county)]
-# 
-# 
-#   ## merge with labor multipliers, calculate labor vals
-#   county_out_labor <- merge(county_out_refining_summary, proc_labor_dest_df,
-#     by = c("county"),
-#     all.x = T,
-#     allow.cartesian = T
-#   )
-
-  # county_out_labor[, ':=' (c.dire_emp = (revenue / (10 ^ 6)) * dire_emp_mult,
-  #                          c.indi_emp = (revenue / (10 ^ 6)) * indi_emp_mult,
-  #                          c.indu_emp = (revenue / (10 ^ 6)) * indu_emp_mult,
-  #                          c.dire_comp = (revenue / (10 ^ 6)) * dire_comp_mult,
-  #                          c.indi_comp = (revenue / (10 ^ 6)) * ip.indi_comp_mult,
-  #                          c.indu_comp = (revenue / (10 ^ 6)) * ip.indu_comp_mult)]
-  #
-  # county_out_labor[, ':=' (total_emp = c.dire_emp + c.indi_emp + c.indu_emp,
-  #                          total_comp = c.dire_comp + c.indi_comp + c.indu_comp)]
-# 
-#   county_out_labor[, ":="(c.emp = (revenue / (10^6)) * employment,
-#     c.comp = (revenue / (10^6)) * emp_comp)]
-# 
-#   county_out_labor <- county_out_labor[, .(
-#     total_production_bbl = sum(production_bbl),
-#     total_revenue = sum(revenue),
-#     total_emp = sum(c.emp),
-#     total_comp = sum(c.comp)
-#   ), .(demand_scenario, refining_scenario, oil_price_scenario, year, destination)]
-
-  # ## convert to 2019 dollars
-  # county_out_labor[, total_comp_usd19 := total_comp * cpi2019 / cpi2020]
-  # 
-  # ## calc PV
-  # county_out_labor[, total_comp_PV := total_comp_usd19 / ((1 + discount_rate)^(year - 2019))]
-  # 
-  # ## rename columns
-  # setnames(county_out_labor, c("total_comp", "total_comp_usd19", "total_comp_PV"), c("total_comp_h", "total_comp_usd19_h", "total_comp_PV_h"))
-  # 
-  # ## calculate the lower bound value
-  # county_out_labor <- county_out_labor %>%
-  #   arrange(demand_scenario, refining_scenario, oil_price_scenario, destination, year) %>%
-  #   group_by(demand_scenario, refining_scenario, oil_price_scenario, destination) %>%
-  #   mutate(
-  #     prev_emp = ifelse(year == 2020, NA, lag(total_emp)),
-  #     total_emp_revised = ifelse(year == 2020, total_emp, total_emp - ((1 - alpha_emp) * prev_emp)),
-  #     prev_comp_usd19h = ifelse(year == 2020, NA, lag(total_comp_usd19_h)),
-  #     total_comp_usd19_l = ifelse(year == 2020, total_comp_usd19_h, total_comp_usd19_h - ((1 - alpha_comp) * prev_comp_usd19h))
-  #   ) %>%
-  #   ungroup() %>%
-  #   as.data.table()
-  # 
-  # review_df <- county_out_labor %>%
-  #   select(demand_scenario, refining_scenario, oil_price_scenario, destination, year, total_production_bbl, total_revenue, total_comp_usd19_h, prev_comp_usd19h, total_comp_usd19_l, total_emp, total_emp_revised)
-  # 
-  # ## save for review
-  # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-update/fig-csv-files/labor_result_for_review.csv"))
-  # # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-beta-adj/fig-csv-files/labor_result_for_review.csv"))
-  # 
-  # ## calc discounted low
-  # county_out_labor[, total_comp_PV_l := total_comp_usd19_l / ((1 + discount_rate)^(year - 2019))]
-  # 
-  # ## select columns
-  # county_out_labor <- county_out_labor[, .(
-  #   demand_scenario, refining_scenario, oil_price_scenario, destination, year, total_emp, total_emp_revised, total_comp_h,
-  #   total_comp_usd19_h, total_comp_usd19_l, total_comp_PV_h, total_comp_PV_l
-  # )]
-  # 
-  # county_out_labor
-}
-
-
-calc_state_direct_impacts <- function(annual_direct_labor) {
-  
-  dt <- annual_direct_labor
-  
-  dt_state <- dt[, .(
-    state_emp_h = sum(total_emp),
-    state_emp_l = sum(total_emp_revised),
-    state_comp_h = sum(total_comp_h),
-    state_comp_usd19_h = sum(total_comp_usd19_h),
-    state_comp_PV_h = sum(total_comp_PV_h),
-    state_comp_usd19_l = sum(total_comp_usd19_l),
-    state_comp_PV_l = sum(total_comp_PV_l)), 
-    .(demand_scenario, refining_scenario, oil_price_scenario, year)]
-  
-  return(dt_state)
-  
-}
-
-
-
-
-calc_labor_all_impacts_outputs <- function(main_path,
-                                           state_annual_direct_impacts,
-                                           indiv_prod_output,
-                                           dt_refcap,
-                                           product_px,
-                                           cpi2019,
-                                           cpi2020,
-                                           discount_rate,
-                                           alpha_comp,
-                                           alpha_emp,
-                                           dt_indirect_state_multipliers) {
-  ## add product for calculating price
-  county_out_refining <- copy(indiv_prod_output)
-
-  county_out_refining[, fuel := as.character(fuel)]
-
-  county_out_refining[, product := fifelse(
-    fuel %chin% c("gasoline", "drop-in gasoline"), "gasoline",
-    fifelse(fuel %chin% c("diesel", "renewable diesel"), "diesel", "jet_fuel")
-  )]
-
-  ## merge with counties
-  county_df <- dt_refcap[, .(site_id, county)]
-  county_df[, site_id := as.character(site_id)]
-
-
-  county_out_refining <- merge(county_out_refining, county_df,
-    by = c("site_id"),
-    all.x = T
-  )
-
-  # fill in missing counties
-  county_out_refining[, county := fifelse(
-    site_id == "342-2", "Contra Costa",
-    fifelse(
-      site_id == "99999", "Kern",
-      fifelse(site_id == "t-800", "Los Angeles", county)
-    )
-  )]
-
-  ## merge with prices
-  product_df <- copy(product_px)
-  product_df <- product_df[, .(year, oil_price_scenario, product, product_price)]
-
-  county_out_refining <- merge(county_out_refining, product_df,
-    by = c("year", "product"),
-    all.x = T,
-    allow.cartesian = T
-  )
-
-  ## calculate revenue
-  county_out_refining[, revenue := value * product_price]
-  
-  ## step 1: summarize at the state level
-  state_out_refining_summary <- county_out_refining[, .(
-    production_bbl = sum(value),
-    revenue = sum(revenue)
-  ), by = .(
-    demand_scenario, refining_scenario,
-    oil_price_scenario, year
-  )]
-
-  
-  ## step 2: process induced df
-  ## step 3: skip
-  total_indir_induc_multipliers <- dt_indirect_state_multipliers[, .(emp.rev = sum(emp.rev),
-                                                                     ec.rev = sum(ec.rev),
-                                                                     emp.li = sum(emp.li, na.rm = T),
-                                                                     ec.li = sum(ec.li, na.rm = T)), by = .(DestinationRegion)]
-  
-  ## step 4: divide revenue by $1 million. 
-  ## multiple the resulting number separately by emp.rev and ec.rev. 
-  ## You should now have the combined indirect and induced impacts by year and scenario.
-  state_out_refining_summary[, `:=` (empl_indir_induc_impact = (revenue / 1e6) * total_indir_induc_multipliers$emp.rev[1],
-                                     comp_indir_induc_impact = (revenue / 1e6) * total_indir_induc_multipliers$ec.rev[1])]
-  
-  
-  
-  ## step 5: merge with state direct impacts
-  state_out_refining_all_impacts <- merge(state_out_refining_summary, state_annual_direct_impacts,
-                                      by = c("year", 
-                                             "demand_scenario", 
-                                             "refining_scenario", 
-                                             "oil_price_scenario"),
-                                      all = T)
-  
-  ## step 6: sum direct and indirct/induced impacts
-  state_out_refining_all_impacts <- state_out_refining_all_impacts[, state_comp_all_impacts := comp_indir_induc_impact + state_comp_h]
-
-  state_out_refining_all_impacts <- state_out_refining_all_impacts[, .(demand_scenario,
-                                                                       refining_scenario,
-                                                                       oil_price_scenario,
-                                                                       year,
-                                                                       production_bbl,
-                                                                       revenue,
-                                                                       state_comp_all_impacts)]
-  
-  
-  ## merge with BAU to compute relative impact and induced and indirect impacts
-  state_out_refining_all_impacts_bau <- filter(state_out_refining_all_impacts,
-                                               demand_scenario=="BAU" & refining_scenario=="historic production" & oil_price_scenario=="reference case")
-
-  state_out_refining_all_impacts_bau <- state_out_refining_all_impacts_bau[, .(year,
-                                                                               state_comp_all_impacts)]
-  
-  setnames(state_out_refining_all_impacts_bau, c("state_comp_all_impacts"), c("state_comp_all_impacts_bau"))
-  
-  ## step 7: lag state_comp_all_impacts by one year.
-  ## step 8: multiply lagged state_comp_all_impacts by 0.8 and 
-  ## then separately by emp.li and ec.li. This will leave you with 
-  ## induced employment and compensation effects from rehires’ labor 
-  ## income at their new jobs for each year and scenario. 
-  state_out_refining_all_impacts <- merge(state_out_refining_all_impacts, state_out_refining_all_impacts_bau,
-                                          by=c("year"),
-                                          all.x = T) %>%
-    arrange(demand_scenario, refining_scenario, oil_price_scenario, year) %>%
-    group_by(demand_scenario, refining_scenario, oil_price_scenario) %>%
-    mutate(
-      prev_comp = ifelse(year == 2020, NA, lag(state_comp_all_impacts)),
-      prev_comp_bau = ifelse(year == 2020, NA, lag(state_comp_all_impacts_bau)),
-      state_comp_all_impacts_l = ifelse(year == 2020, state_comp_all_impacts, state_comp_all_impacts - ((1 - alpha_comp) * prev_comp)),
-      state_comp_all_impacts_l_bau = ifelse(year == 2020, state_comp_all_impacts_bau, state_comp_all_impacts_bau - ((1 - alpha_comp) * prev_comp_bau)),
-      state_comp_all_impacts_l_relative = state_comp_all_impacts_l - state_comp_all_impacts_l_bau,
-      state_comp_all_impacts_l_relative_adj = ifelse(state_comp_all_impacts_l_relative > 0, NA, state_comp_all_impacts_l_relative),
-      prev_comp_l = ifelse(year == 2020, NA, lag(state_comp_all_impacts_l_relative_adj)),
-      state_comp_emp_li = ifelse(year == 2020, NA, 
-                                 (prev_comp_l / 1e6) * total_indir_induc_multipliers$emp.li[1]),
-      state_comp_ec_li = ifelse(year == 2020, NA, 
-                                 (prev_comp_l / 1e6) * total_indir_induc_multipliers$ec.li[1])
-    ) %>%
-    ungroup() %>%
-    as.data.table()
-  
-  ## save for review
-  write_csv(state_out_refining_all_impacts, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/step_8_output_for_review.csv"))
-  
-  ## step 9: calc revised statewide indirect and induced impact that is equal to
-  ## the indirect and induced impact from step 4 - direct and indirect impact from 
-  ## step 7.
-  state_out_refining_summary <- merge(state_out_refining_summary, 
-                                      state_out_refining_all_impacts[, .(demand_scenario,
-                                                                         refining_scenario,
-                                                                         oil_price_scenario,
-                                                                         year,
-                                                                         state_comp_all_impacts,
-                                                                         state_comp_emp_li,
-                                                                         state_comp_ec_li)],
-                                      by = c("demand_scenario", 
-                                             "refining_scenario", 
-                                             "oil_price_scenario",
-                                             "year"),
-                                      all = T)
-  
-  
-  ## subtract indirect and induced values computed in step 8 (if year 2020, use original 2020 value)
-  state_out_refining_summary <- arrange(state_out_refining_summary,
-                                        demand_scenario, refining_scenario, oil_price_scenario, year) %>%
-    group_by(demand_scenario, refining_scenario, oil_price_scenario) %>%
-    mutate(
-      prev_comp = ifelse(year == 2020, NA, lag(comp_indir_induc_impact)),
-      prev_empl = ifelse(year == 2020, NA, lag(empl_indir_induc_impact)),
-      comp_indir_induc_impact_l = ifelse(year == 2020, comp_indir_induc_impact, comp_indir_induc_impact - ((1 - alpha_comp) * prev_comp)),
-      empl_indir_induc_impact_l = ifelse(year == 2020, empl_indir_induc_impact, empl_indir_induc_impact - ((1 - alpha_emp) * prev_empl))
-      ) %>%
-    as.data.table()
-  
-  state_out_refining_summary[, `:=` (empl_indir_induc_impact_l = 
-                                       fifelse(year == 2020, empl_indir_induc_impact_l, empl_indir_induc_impact_l - state_comp_emp_li),
-                                     comp_indir_induc_impact_l = 
-                                       fifelse(year == 2020, comp_indir_induc_impact_l, comp_indir_induc_impact_l - state_comp_ec_li))]
-  
-  ## create df with low and high induced and indirect impacts, convert to 2019, calc pv
-  state_out_labor_induc_indir <- state_out_refining_summary[, .(demand_scenario,
-                                                                refining_scenario,
-                                                                oil_price_scenario,
-                                                                year,
-                                                                empl_indir_induc_impact,
-                                                                empl_indir_induc_impact_l,
-                                                                comp_indir_induc_impact,
-                                                                comp_indir_induc_impact_l)]
-  
-  setnames(state_out_labor_induc_indir, 
-           c("empl_indir_induc_impact", "comp_indir_induc_impact"),
-           c("empl_indir_induc_impact_h", "comp_indir_induc_impact_h"))
-  
-  ## convert into to 2019 dollars
-  state_out_labor_induc_indir[, `:=` (comp_indir_induc_impact_h_usd19 = comp_indir_induc_impact_h * cpi2019 / cpi2020,
-                                      comp_indir_induc_impact_l_usd19 = comp_indir_induc_impact_l * cpi2019 / cpi2020)]
-  
-  ## calc PV
-  state_out_labor_induc_indir[, `:=` (comp_indir_induc_impact_h_PV = comp_indir_induc_impact_h_usd19 / ((1 + discount_rate)^(year - 2019)),
-                                      comp_indir_induc_impact_l_PV = comp_indir_induc_impact_l_usd19 / ((1 + discount_rate)^(year - 2019)))]
-   
-
-  ## merge with direct impact 
-  state_out_labor_induc_indir <- merge(state_out_labor_induc_indir, 
-                                       state_annual_direct_impacts[, .(demand_scenario,
-                                                                    refining_scenario,
-                                                                    oil_price_scenario,
-                                                                    year,
-                                                                    state_emp_h,
-                                                                    state_emp_l,
-                                                                    state_comp_h,
-                                                                    state_comp_usd19_h,
-                                                                    state_comp_PV_h,
-                                                                    state_comp_usd19_l,
-                                                                    state_comp_PV_l)])
-  
-  ## compute total impact, high and low
-  state_out_labor_induc_indir[, `:=` (empl_all_impacts_h = state_emp_h + empl_indir_induc_impact_h,
-                                      emp_all_impacts_l = state_emp_l + empl_indir_induc_impact_l,
-                                      comp_all_impacts_h = comp_indir_induc_impact_h + state_comp_h,
-                                      comp_all_impacts_usd19_h = comp_indir_induc_impact_h_usd19 + state_comp_usd19_h,
-                                      comp_all_impacts_PV_h = comp_indir_induc_impact_h_PV + state_comp_PV_h,
-                                      comp_all_impacts_usd19_l = comp_indir_induc_impact_l_usd19 + state_comp_usd19_l,
-                                      comp_all_impacts_PV_l = comp_indir_induc_impact_l_PV + state_comp_PV_l)]
-  
-  
-  ## final df
-  state_out_labor_all_impacts <- state_out_labor_induc_indir[, .(demand_scenario,
-                                                                 refining_scenario,
-                                                                 oil_price_scenario,
-                                                                 year,
-                                                                 state_emp_h,
-                                                                 empl_indir_induc_impact_h,
-                                                                 empl_all_impacts_h,
-                                                                 state_emp_l,
-                                                                 empl_indir_induc_impact_l,
-                                                                 emp_all_impacts_l,
-                                                                 state_comp_h,
-                                                                 comp_indir_induc_impact_h,
-                                                                 comp_all_impacts_h,
-                                                                 state_comp_usd19_h,
-                                                                 comp_indir_induc_impact_h_usd19,
-                                                                 comp_all_impacts_usd19_h,
-                                                                 state_comp_PV_h,
-                                                                 comp_indir_induc_impact_h_PV,
-                                                                 comp_all_impacts_PV_h,
-                                                                 state_comp_usd19_l,
-                                                                 comp_indir_induc_impact_l_usd19,
-                                                                 comp_all_impacts_usd19_l,
-                                                                 state_comp_PV_l,
-                                                                 comp_indir_induc_impact_l_PV,
-                                                                 comp_all_impacts_PV_l)]
-  
-  # state_out_labor_all_impacts <- state_out_labor_all_impacts |>
-  #   rename(comp_dir_impact_h = state_comp_h,
-  #          comp_dir_usd19_h = state_comp_usd19_h,
-  #          comp_dir_PV_h = state_comp_PV_h,
-  #          comp_dir_usd19_l = state_comp_usd19_l,
-  #          comp_dir_PV_l  = state_comp_PV_l) |>
-  #   as.data.table()
-  
-  ## save for review
-  write_csv(state_out_labor_all_impacts, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_out_labor_all_impacts.csv"))
-  
-  return(state_out_labor_all_impacts)
-  
-  # ## summarize at the county level
-  # county_out_refining_summary <- county_out_refining[, .(
-  #   production_bbl = sum(value),
-  #   revenue = sum(revenue)
-  # ), by = .(
-  #   demand_scenario, refining_scenario,
-  #   oil_price_scenario, year, county
-  # )]
-  # 
-  # 
-  # ## calculate labor impacts
-  # county_out_refining_summary[, county := fifelse(county == "Solano County", "Solano", county)]
-  # 
-  # 
-  # ## merge with labor multipliers, calculate labor vals
-  # county_out_labor <- merge(county_out_refining_summary, proc_labor_dest_df,
-  #   by = c("county"),
-  #   all.x = T,
-  #   allow.cartesian = T
-  # )
-  # 
-  # # county_out_labor[, ':=' (c.dire_emp = (revenue / (10 ^ 6)) * dire_emp_mult,
-  # #                          c.indi_emp = (revenue / (10 ^ 6)) * indi_emp_mult,
-  # #                          c.indu_emp = (revenue / (10 ^ 6)) * indu_emp_mult,
-  # #                          c.dire_comp = (revenue / (10 ^ 6)) * dire_comp_mult,
-  # #                          c.indi_comp = (revenue / (10 ^ 6)) * ip.indi_comp_mult,
-  # #                          c.indu_comp = (revenue / (10 ^ 6)) * ip.indu_comp_mult)]
-  # #
-  # # county_out_labor[, ':=' (total_emp = c.dire_emp + c.indi_emp + c.indu_emp,
-  # #                          total_comp = c.dire_comp + c.indi_comp + c.indu_comp)]
-  # 
-  # 
-  # county_out_labor[, ":="(c.emp = (revenue / (10^6)) * employment,
-  #   c.comp = (revenue / (10^6)) * emp_comp)]
-  # 
-  # county_out_labor <- county_out_labor[, .(
-  #   total_production_bbl = sum(production_bbl),
-  #   total_revenue = sum(revenue),
-  #   total_emp = sum(c.emp),
-  #   total_comp = sum(c.comp)
-  # ), .(
-  #   demand_scenario,
-  #   refining_scenario,
-  #   oil_price_scenario,
-  #   impact_type,
-  #   year,
-  #   destination
-  # )]
-  # 
-  # ## convert to 2019 dollars
-  # county_out_labor[, total_comp_usd19 := total_comp * cpi2019 / cpi2020]
-  # 
-  # ## calc PV
-  # county_out_labor[, total_comp_PV := total_comp_usd19 / ((1 + discount_rate)^(year - 2019))]
-  # 
-  # ## rename columns
-  # setnames(county_out_labor, c("total_comp", "total_comp_usd19", "total_comp_PV"), c("total_comp_h", "total_comp_usd19_h", "total_comp_PV_h"))
-  # 
-  # ## calculate the lower bound value
-  # county_out_labor <- county_out_labor %>%
-  #   arrange(demand_scenario, refining_scenario, oil_price_scenario, impact_type, destination, year) %>%
-  #   group_by(demand_scenario, refining_scenario, oil_price_scenario, impact_type, destination) %>%
-  #   mutate(
-  #     prev_emp = ifelse(year == 2020, NA, lag(total_emp)),
-  #     total_emp_revised = ifelse(year == 2020, total_emp, total_emp - ((1 - alpha_emp) * prev_emp)),
-  #     prev_comp_usd19h = ifelse(year == 2020, NA, lag(total_comp_usd19_h)),
-  #     total_comp_usd19_l = ifelse(year == 2020, total_comp_usd19_h, total_comp_usd19_h - ((1 - alpha_comp) * prev_comp_usd19h))
-  #   ) %>%
-  #   ungroup() %>%
-  #   as.data.table()
-  # 
-  # review_df <- county_out_labor %>%
-  #   select(
-  #     demand_scenario, refining_scenario, oil_price_scenario, impact_type,
-  #     destination, year, total_production_bbl, total_revenue, total_comp_usd19_h,
-  #     prev_comp_usd19h, total_comp_usd19_l, total_emp, total_emp_revised
-  #   )
-  # 
-  # ## save file
-  # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-update/fig-csv-files/labor_result_x_impact_type_for_review.csv"))
-  # # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-beta-adj/fig-csv-files/labor_result_x_impact_type_for_review.csv"))
-  # 
-  # 
-  # ## calc discounted low
-  # county_out_labor[, total_comp_PV_l := total_comp_usd19_l / ((1 + discount_rate)^(year - 2019))]
-  # 
-  # ## select columns
-  # county_out_labor <- county_out_labor[, .(
-  #   demand_scenario, refining_scenario, oil_price_scenario, impact_type, destination, year, total_emp, total_emp_revised, total_comp_h,
-  #   total_comp_usd19_h, total_comp_usd19_l, total_comp_PV_h, total_comp_PV_l
-  # )]
-  # 
-  # 
-  # ## save file
-  # write_csv(county_out_labor, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-update/fig-csv-files/labor_result_x_impact_type.csv"))
-  # # write_csv(county_out_labor, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-beta-adj/fig-csv-files/labor_result_x_impact_type.csv"))
-  # 
-  # 
-  # county_out_labor
-}
-
-
-
-
-
-## labor results grouped by demographic
-
-calculate_labor_x_demg_annual <- function(main_path,
-                                          annual_direct_labor,
-                                          pop_ratios) {
-  
-  # ## census pop
-  # census_pop <- refining_mortality %>%
-  #   ungroup() %>%
-  #   filter(year == 2020) %>%
-  #   select(census_tract, pop) %>%
-  #   unique() %>%
-  #   rename(total_pop = pop) %>%
-  #   as.data.table()
-  
-  ## merge with demographic info
-  ct_out_demo <- merge(annual_direct_labor,
-                       pop_ratios,
-                       by = c("census_tract"),
-                       all.x = T,
-                       allow.cartesian = T)
-  
-  ct_out_demo <- ct_out_demo[, .(demand_scenario,
-                                 refining_scenario,
-                                 oil_price_scenario,
-                                 census_tract,
-                                 demo_cat,
-                                 demo_group,
-                                 title,
-                                 pct,
-                                 year,
-                                 total_emp,
-                                 total_emp_revised,
-                                 total_comp_h,
-                                 total_comp_usd19_h,
-                                 total_comp_usd19_l,
-                                 total_comp_PV_h,
-                                 total_comp_PV_l)]
-  
-  # 
-  # ## merge with population
-  # ct_out_demo <- merge(ct_out_demo, census_pop,
-  #                      by = c("census_tract"),
-  #                      all.x = T)
-  
-  ## multiply by pct
-  ct_out_demo[, `:=` (demo_emp = total_emp * pct,
-                      demo_emp_revised = total_emp_revised * pct,
-                      demo_comp_h = total_comp_h * pct,
-                      demo_comp_usd19_h = total_comp_usd19_h * pct,
-                      demo_comp_usd19_l = total_comp_usd19_l * pct,
-                      demo_comp_PV_h = total_comp_PV_h * pct,
-                      demo_comp_PV_l = total_comp_PV_l * pct)]
-  
-  ## summarize at the state level
-  state_demo_labor_out <- ct_out_demo[, .(sum_demo_emp = sum(demo_emp),
-                                          sum_demo_emp_revised = sum(demo_emp_revised),
-                                          sum_demo_usd19_h = sum(demo_comp_usd19_h),
-                                          sum_demo_usd19_l = sum(demo_comp_usd19_l),
-                                          sum_demo_comp_pv_h = sum(demo_comp_PV_h),
-                                          sum_demo_comp_pv_l = sum(demo_comp_PV_l)
-                                      ),
-                                      by = .(demand_scenario, refining_scenario, oil_price_scenario, year, demo_cat, demo_group, title)]
-  
-  
-  state_demo_labor_out <- state_demo_labor_out[, .(demand_scenario,
-                                                   refining_scenario,
-                                                   oil_price_scenario,
-                                                   demo_cat,
-                                                   demo_group,
-                                                   title,
-                                                   year,
-                                                   sum_demo_emp,
-                                                   sum_demo_emp_revised,
-                                                   sum_demo_usd19_h,
-                                                   sum_demo_usd19_l,
-                                                   sum_demo_comp_pv_h,
-                                                   sum_demo_comp_pv_l)]
-  
-  ## save for review
-  write_csv(state_demo_labor_out, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_labor_direct_impacts_demo_annual.csv"))
-  
-  
-  return(state_demo_labor_out)
-  
-  
-  # 
-  # ## get county and census tracts
-  # c_ct_df <- raw_pop_income_2021[state == "California"]
-  # c_ct_df[, census_tract := as.character(substr(geoid, 10, nchar(geoid)))]
-  # c_ct_df <- c_ct_df[, .(county, census_tract)]
-  # c_ct_df[, county := str_remove(county, " County")]
-  # 
-  # c_ct_df <- merge(c_ct_df, ca_regions,
-  #   by = c("county"),
-  #   allow.cartesian = T
-  # )
-# 
-#   ## get pop from refining_mortality
-#   pop_df <- refining_mortality %>%
-#     select(census_tract, year, pop) 
-# 
-#   # ## get
-  # ratio_df <- copy(county_grp_pop_ratios)
-  # setnames(ratio_df, "region", "destination")
-  # 
-  # ## merge
-  # labor_pct_df <- merge(annual_labor, ratio_df,
-  #   by = c("destination"),
-  #   allow.cartesian = T
-  # )
-  # 
-  # ## multiply by pct
-  # # labor_pct_df[, demo_emp := total_emp_revised * pct]
-  # labor_pct_df[, demo_emp := total_emp * pct]
-  # labor_pct_df[, demo_comp_pv_h := total_comp_PV_h * pct]
-  # labor_pct_df[, demo_comp_pv_l := total_comp_PV_l * pct]
-# 
-#   ## merge with population
-#   labor_pct_df <- merge(labor_pct_df, pop_df,
-#     by = c("destination", "year"),
-#     all.x = T
-#   )
-# 
-#   ## rename pop column
-#   setnames(labor_pct_df, "pop", "region_pop")
-# 
-#   return(labor_pct_df)
-}
-
-
-## function for summarizing labor data
-calculate_labor_x_demg <- function(ref_labor_demog_yr) {
-  labor_pct <- copy(ref_labor_demog_yr)
-
-  # ## county pop by demographic group
-  # labor_pct[, demo_pop := region_pop * pct]
-
-  ## summarise over years
-  labor_pct <- labor_pct[, .(
-    sum_demo_emp = sum(sum_demo_emp),
-    sum_demo_comp_pv_h = sum(sum_demo_comp_pv_h),
-    sum_demo_comp_pv_l = sum(sum_demo_comp_pv_l)
-  ),
-  by = .(demand_scenario, refining_scenario, oil_price_scenario, demo_cat, demo_group, title)
-  ]
-
-  return(labor_pct)
-}
-
-## function for creating labor output df
-calculate_annual_labor_x_demg_hl <- function(main_path,
-                                             ref_labor_demog_yr,
-                                             refining_mortality,
-                                             pop_ratios) {
-  labor_pct <- copy(ref_labor_demog_yr)
-
-  # ## county pop by demographic group
-  # labor_pct[, demo_emp_revised := total_emp_revised * pct]
-# 
-#   ## summarise over years
-#   labor_pct <- labor_pct[, .(
-#     sum_demo_emp = sum(demo_emp),
-#     sum_demo_emp_revised = sum(demo_emp_revised),
-#     sum_demo_comp_pv_h = sum(demo_comp_pv_h),
-#     sum_demo_comp_pv_l = sum(demo_comp_pv_l)
-#   ),
-#   by = .(demand_scenario, refining_scenario, oil_price_scenario, demo_cat, demo_group, title, year)
-#   ]
-
-  ## change scenario names, factor
-  labor_pct[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)]
-  labor_pct[, scenario := gsub("LC1.", "Low ", scenario)]
-
-  ## change historic to historical
-  labor_pct[, refining_scenario := str_replace(refining_scenario, "historic", "historical")]
-  labor_pct[, scenario := str_replace(scenario, "historic", "historical")]
-
-  # ## select columns
-  # labor_pct <- labor_pct[, .(
-  #   scenario, demand_scenario, refining_scenario, oil_price_scenario, year, demo_cat, demo_group,
-  #   title, sum_demo_emp, sum_demo_emp_revised, sum_demo_comp_pv_h, sum_demo_comp_pv_l
-  # )]
-
-  ## compute per million stat
-  ## ---------------------------------------------------------
-
-  ## calc 2020 pop by demographic
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    left_join(pop_ratios) %>%
-    as.data.table()
-
-  pop_2020[, demo_pop := pop * pct]
-
-  ## summarize by demographic group
-  pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)),
-    by = .(demo_group, demo_cat)
-  ]
-
-
-  ## merge with 2020 pop
-  labor_pct <- merge(labor_pct, pop_2020,
-    by = c("demo_cat", "demo_group"),
-    all.x = T
-  )
-
-  ## calculate per capita
-  labor_pct[, demo_emp_pc_h := sum_demo_emp / pop_2020]
-  labor_pct[, demo_emp_pmil_h := demo_emp_pc_h * 1e6]
-  labor_pct[, demo_emp_pc_l := sum_demo_emp_revised / pop_2020]
-  labor_pct[, demo_emp_pmil_l := demo_emp_pc_l * 1e6]
-  labor_pct[, demo_comp_pc_h := sum_demo_comp_pv_h / pop_2020]
-  labor_pct[, demo_comp_pc_pmil_h := demo_comp_pc_h * 1e6]
-  labor_pct[, demo_comp_pc_l := sum_demo_comp_pv_l / pop_2020]
-  labor_pct[, demo_comp_pc_pmil_l := demo_comp_pc_l * 1e6]
-  ## remove pop
-  labor_pct[, pop_2020 := NULL]
-  labor_pct[, title := NULL]
-
-  ## for renaming
-  high_est_vec <- c("sum_demo_emp", "demo_emp_pc_h", "demo_emp_pmil_h", "sum_demo_comp_pv_h", "demo_comp_pc_h", "demo_comp_pc_pmil_h")
-  low_est_vec <- c("sum_demo_emp_revised", "demo_emp_pc_l", "demo_emp_pmil_l", "sum_demo_comp_pv_l", "demo_comp_pc_l", "demo_comp_pc_pmil_l")
-
-  labor_metric_df <- tibble(
-    metric = c(high_est_vec, low_est_vec),
-    metric_name = c(
-      "employment",
-      "employment_pc",
-      "employment_pmil",
-      "compensation_pv",
-      "compensation_pv_pc",
-      "compensation_pv_pmil",
-      "employment",
-      "employment_pc",
-      "employment_pmil",
-      "compensation_pv",
-      "compensation_pv_pc",
-      "compensation_pv_pmil"
-    )
-  )
-
-  ## pivot longer
-  labor_pct_long <- melt(labor_pct, id.vars = c("demo_cat", "demo_group", "scenario", "demand_scenario", "refining_scenario", "oil_price_scenario", "year"), variable.name = "metric", value.name = "value")
-  labor_pct_long[, estimate := fifelse(metric %in% high_est_vec, "high", "low")]
-
-  ## merge
-  labor_pct_long <- merge(labor_pct_long, labor_metric_df,
-    by = "metric",
-    all.x = T
-  )
-
-  labor_pct_long <- labor_pct_long[, .(
-    demo_cat, demo_group, scenario, demand_scenario,
-    refining_scenario, oil_price_scenario, year, metric_name, estimate, value
-  )]
-
-
-  ## save df
-  fwrite(labor_pct_long, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/", "labor_high_low_annual_outputs.csv"))
-  # fwrite(labor_pct_long, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-beta-adj/fig-csv-files", "labor_high_low_annual_outputs.csv"))
-
-
-  return(labor_pct_long)
-}
-
-calc_county_level_outputs <- function(main_path,
-                                      annual_direct_labor,
-                                      refining_mortality,
-                                      raw_pop_income_2021,
-                                      pop_ratios) {
-  
-  
-  
-  ## merge with demographic info
-  ct_out_demo <- merge(annual_direct_labor,
-                       pop_ratios,
-                       by = c("census_tract"),
-                       all.x = T,
-                       allow.cartesian = T)
-  
-  ct_out_demo <- ct_out_demo[, .(demand_scenario,
-                                 refining_scenario,
-                                 oil_price_scenario,
-                                 census_tract,
-                                 demo_cat,
-                                 demo_group,
-                                 title,
-                                 pct,
-                                 year,
-                                 total_emp,
-                                 total_emp_revised,
-                                 total_comp_h,
-                                 total_comp_usd19_h,
-                                 total_comp_usd19_l,
-                                 total_comp_PV_h,
-                                 total_comp_PV_l)]
-  
-  ## multiply by pct
-  ct_out_demo[, `:=` (demo_emp = total_emp * pct,
-                      demo_emp_revised = total_emp_revised * pct,
-                      demo_comp_h = total_comp_h * pct,
-                      demo_comp_usd19_h = total_comp_usd19_h * pct,
-                      demo_comp_usd19_l = total_comp_usd19_l * pct,
-                      demo_comp_PV_h = total_comp_PV_h * pct,
-                      demo_comp_PV_l = total_comp_PV_l * pct)]
-  
-  
-
-  ## compute county populations
-  pop_2020 <- refining_mortality %>%
-    filter(year == 2020) %>%
-    select(census_tract, year, pop) %>%
-    unique() %>%
-    as.data.table()
-
-  ## census tract x county
-  c_ct_df <- raw_pop_income_2021[state == "California"]
-  c_ct_df[, census_tract := as.character(substr(geoid, 10, nchar(geoid)))]
-  c_ct_df <- c_ct_df[, .(county, census_tract)]
-  c_ct_df[, county := str_remove(county, " County")]
-
-  ## merge with counties
-  pop_2020 <- merge(pop_2020, c_ct_df,
-    by = c("census_tract"),
-    all.x = T
-  )
-  
-  pop_2020 <- pop_2020[, .(census_tract, pop, county)]
-
-  ## merge
-  labor_county_out <- merge(ct_out_demo, pop_2020,
-                    by = c("census_tract"),
-                    all.x = T
-  )
-  
-  
-  ## direct impact
-  ## make labor outputs long and sum by county region
-  labor_county_out <- labor_county_out %>%
-    select(
-      demand_scenario, 
-      refining_scenario,
-      oil_price_scenario, 
-      demo_cat, 
-      demo_group, 
-      title, 
-      census_tract,
-      county,
-      pop,
-      year, 
-      demo_emp, 
-      demo_emp_revised, 
-      demo_comp_PV_h, 
-      demo_comp_PV_l)
-  
-
-  ## summarize by county
-  labor_county_out <- labor_county_out[, .(county_pop = sum(pop),
-                                           demo_emp = sum(demo_emp),
-                                           demo_emp_revised = sum(demo_emp_revised),
-                                           demo_comp_PV_h = sum(demo_comp_PV_h),
-                                           demo_comp_PV_l = sum(demo_comp_PV_l)), 
-                                       by = .(demand_scenario,
-                                                     refining_scenario,
-                                                     oil_price_scenario,
-                                                     demo_cat,
-                                                     demo_group,
-                                                     title,
-                                                     county)]
-
-  
-  # ## test
-  # test_df <- labor_county_region_out %>%
-  #   group_by(county, demand_scenario, refining_scenario, demo_cat, metric) %>%
-  #   summarize(sum_value = sum(value)) %>%
-  #   ungroup()
-  #
-  # ## total for county
-  # total_county_test <- ref_labor_demog_yr %>%
-  #   select(demand_scenario, refining_scenario, destination, year, total_emp, total_emp_revised,
-  #          total_comp_PV_h, total_comp_PV_l) %>%
-  #   unique() %>%
-  #   rename(region = destination) %>%
-  #   full_join(county_region_ratio) %>%
-  #   mutate(total_emp_h_county = total_emp * county_ratio,
-  #          total_emp_l_county = total_emp_revised * county_ratio,
-  #          total_comp_PV_h_county = total_comp_PV_h * county_ratio,
-  #          total_comp_PV_l_county = total_comp_PV_l * county_ratio) %>%
-  #   select(county, demand_scenario, refining_scenario, year, total_emp_h_county,
-  #          total_emp_l_county, total_comp_PV_l_county, total_comp_PV_h_county) %>%
-  #   pivot_longer(total_emp_h_county:total_comp_PV_h_county, names_to = "metric", values_to = "value") %>%
-  #   group_by(county, demand_scenario, refining_scenario, metric) %>%
-  #   summarise(value = sum(value)) %>%
-  #   ungroup()
-  #
-
-
-  ## for renaming
-  high_est_vec <- c("demo_emp", "demo_comp_PV_h")
-  low_est_vec <- c("demo_emp_revised","demo_comp_PV_l")
-
-  labor_metric_df <- tibble(
-    metric = c(high_est_vec, low_est_vec),
-    metric_name = c(
-      "employment",
-      "compensation_pv",
-      "employment",
-      "compensation_pv"
-    )
-  )
-
-  labor_county_out_df <- labor_county_out %>%
-    pivot_longer(demo_emp:demo_comp_PV_l, names_to = "metric", values_to = "value") %>%
-    left_join(labor_metric_df) %>%
-    mutate(estimate = ifelse(metric %in% high_est_vec, "high", "low")) %>%
-    select(demand_scenario:county_pop, metric_name, estimate, value) %>%
-    as.data.table()
-
-
-  ## save df
-  fwrite(labor_county_out_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/", "labor_county_outputs.csv"))
-  # fwrite(labor_county_out_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files", "labor_county_outputs.csv"))
-
-
-  return(labor_county_out_df)
-}
diff --git a/before-targets/exploratory/labor_revisions_prep.R b/before-targets/data-prep/labor_revisions_prep.R
similarity index 100%
rename from before-targets/exploratory/labor_revisions_prep.R
rename to before-targets/data-prep/labor_revisions_prep.R
diff --git a/before-targets/exploratory/compare-health-npv-plot.R b/before-targets/exploratory/compare-health-npv-plot.R
deleted file mode 100644
index 7018867..0000000
--- a/before-targets/exploratory/compare-health-npv-plot.R
+++ /dev/null
@@ -1,16 +0,0 @@
-## compare health npv plot outputs
-
-library(data.table)
-library(tidyverse)
-
-main_path <- "/Users/tracey/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/"
-
-submission_df <- "2024-orig-submission/fig-csv-files/state_npv_fig_inputs_health.csv"
-newer_df <- "2025-health-revisions/fig-csv-files/state_npv_fig_inputs_health.csv"
-
-orig_df <- fread(paste0(main_path, submission_df))
-
-new_df <- fread(paste0(main_path, newer_df)) |>
-  rename(new_val = value) |>
-  left_join(orig_df) |>
-  mutate(diff = new_val - value)
diff --git a/before-targets/exploratory/compare_demo.R b/before-targets/exploratory/compare_demo.R
deleted file mode 100644
index 66d89e9..0000000
--- a/before-targets/exploratory/compare_demo.R
+++ /dev/null
@@ -1,80 +0,0 @@
-library(data.table)
-library(ggplot2)
-library(tidyr)
-library(dplyr)
-library(cowplot)
-
-# $tar_target(name = file_population, command = file.path(main_path, "data/benmap/processed/ct_inc_45.csv"), format = "file"),
-# tar_target(name = file_dt_ct_inc_pop, command = file.path(main_path, "data/health/processed/ct_inc_45_2020.csv"), format = "file"),
-
-path <- c("G://Shared drives/emlab/projects/current-projects/calepa-cn")
-
-old <- fread(paste0(path, "/data/benmap/processed/ct_inc_45.csv"))
-new_old <- fread(paste0(path, "/data/health/processed/ct_inc_45_2020_old.csv"))
-new <- fread(paste0(path, "/data/health/processed/ct_inc_45_2020.csv"))
-
-# First compare total pop in 2020
-
-old %>%
-  filter(year == 2020) %>%
-  summarize(pop = sum(pop) / 1000000)
-new_old %>%
-  filter(year == 2020) %>%
-  summarize(pop = sum(pop))
-new %>%
-  filter(year == 2020) %>%
-  summarize(pop = sum(pop))
-
-# Compare age groups
-old %>%
-  select(lower_age, upper_age) %>%
-  distinct()
-new_old %>%
-  select(start_age, end_age) %>%
-  distinct()
-new %>%
-  select(start_age, end_age) %>%
-  distinct()
-
-# figs by age group
-
-old_fig <- old %>%
-  filter(year > 2019) %>%
-  mutate(age_group = ifelse(lower_age < 30, "<30", ">=30")) %>%
-  group_by(age_group, year) %>%
-  summarise(pop = sum(pop) / 1000000) %>%
-  ggplot(aes(x = year, y = pop, color = age_group, group = age_group)) +
-  geom_line() +
-  # scale_y_continuous(limits = c(0,25), breaks = c(0,5,10,15,20,25))+
-  scale_y_continuous(limits = c(10, 30), breaks = c(10, 15, 20, 25, 30)) +
-  theme_cowplot() +
-  labs(y = "Population (millions)", x = "Year") +
-  guides(color = "none")
-old_fig
-
-new_old_fig <- new_old %>%
-  filter(year > 2019) %>%
-  mutate(age_group = ifelse(start_age < 30, "<30", ">=30")) %>%
-  group_by(age_group, year) %>%
-  summarise(pop = sum(pop) / 1000000) %>%
-  ggplot(aes(x = year, y = pop, color = age_group, group = age_group)) +
-  geom_line() +
-  # scale_y_continuous(limits = c(0,25), breaks = c(0,5,10,15,20,25))+
-  scale_y_continuous(limits = c(10, 30), breaks = c(10, 15, 20, 25, 30)) +
-  theme_cowplot() +
-  labs(y = NULL, x = "Year") +
-  guides(color = "none")
-new_old_fig
-
-new_fig <- new %>%
-  mutate(age_group = ifelse(start_age < 30, "<30", ">=30")) %>%
-  group_by(age_group, year) %>%
-  summarise(pop = sum(pop) / 1000000) %>%
-  ggplot(aes(x = year, y = pop, color = age_group, group = age_group)) +
-  geom_line() +
-  theme_cowplot() +
-  scale_y_continuous(limits = c(10, 30), breaks = c(10, 15, 20, 25, 30)) +
-  labs(y = "Population (millions)", color = "Age group", x = "Year")
-new_fig
-
-plot_grid(old_fig, new_old_fig, new_fig, labels = c("A. Old population data", "B. New (old) population data", "C. New population data"))
diff --git a/before-targets/exploratory/cumlutative_number_health.R b/before-targets/exploratory/cumlutative_number_health.R
deleted file mode 100644
index 1c84258..0000000
--- a/before-targets/exploratory/cumlutative_number_health.R
+++ /dev/null
@@ -1,57 +0,0 @@
-### Cumulative NPV for health
-
-# setwd("G://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/2024-08-update/fig-csv-files")
-setwd("H://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/2024-08-update/fig-csv-files")
-
-cum_h <- fread("state_npv_fig_inputs_health.csv", stringsAsFactors = F)
-
-cum_h %>%
-  filter(metric %in% "avoided_health_cost") %>%
-  filter(scen_id %in% c("BAU historical production", "BAU historical exports", "LC1 low exports")) %>%
-  select(scen_id, value, unit_desc, unit)
-
-# cumulative mortality and pop
-
-# cum_m <- fread("cumulative_avoided_mortality.csv"  , stringsAsFactors = F) ; cum_m
-#
-# cum_m%>%
-#   filter(demo_cat %in% c("Race"))%>%
-#   group_by(scen_id)%>%
-#   summarise(cumul_mort_level = sum(cumul_mort_level ))
-# filter(scen_id %in% c("BAU historical production","LC1 historical exports","LC1 low exports", "BAU historical exports"))
-
-# cumulative pop
-
-# fread("state_disaggregated_npv_pc_fig_inputs.csv"  , stringsAsFactors = F)
-
-### SENSITIVITY ANALYSIS TO DIFF BETA
-
-# Krewski et al Beta (0.0058) and SE (0.0009628)
-
-# 90% CI [5 percentile; 95 percentile]
-
-beta_95 <- 0.0058 + 1.64 * 0.000963
-beta_95
-beta_5 <- 0.0058 - 1.64 * 0.000963
-beta_5
-
-## High
-
-setwd("H://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/2024-08-beta-adj-high/fig-csv-files")
-cum_h <- fread("state_npv_fig_inputs_health.csv", stringsAsFactors = F)
-
-cum_h %>%
-  filter(metric %in% "avoided_health_cost") %>%
-  filter(scen_id %in% c("BAU historical production", "BAU historical exports", "LC1 low exports")) %>%
-  select(scen_id, value, unit_desc, unit)
-
-
-## High
-
-setwd("H://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/2024-08-beta-adj-low/fig-csv-files")
-cum_h <- fread("state_npv_fig_inputs_health.csv", stringsAsFactors = F)
-
-cum_h %>%
-  filter(metric %in% "avoided_health_cost") %>%
-  filter(scen_id %in% c("BAU historical production", "BAU historical exports", "LC1 low exports")) %>%
-  select(scen_id, value, unit_desc, unit)
diff --git a/before-targets/exploratory/debug_refinery_level_emission_factors.R b/before-targets/exploratory/debug_refinery_level_emission_factors.R
deleted file mode 100644
index 842e86c..0000000
--- a/before-targets/exploratory/debug_refinery_level_emission_factors.R
+++ /dev/null
@@ -1,72 +0,0 @@
-
-
-renewables_info_altair <- tar_read(renewables_info_altair)
-dt_ef_ref <- tar_read(dt_ef_ref)
-dt_ef <- tar_read(dt_ef)
-dt_refcap <- tar_read(dt_refcap)
-
-ref_ei <- fread("H:/Shared drives/emlab/projects/current-projects/calepa-cn/data-staged-for-deletion/health/processed/refinery_emission_factor.csv"
-                , stringsAsFactors = F)
-
-
-#refining <- copy(refining_sites_cons_ghg_2019_2045)
-refining <- fread("C:/Users/vince/Desktop/refining_sites_cons_ghg_2019_2045_for_review.csv", stringsAsFactors = F)
-
-cluster_cw <- dt_refcap %>%
-  dplyr::select(site_id, region) %>%
-  mutate(site_id = as.character(site_id)) %>%
-  bind_rows(renewables_info_altair %>% dplyr::select(site_id, region)) %>%
-  distinct()
-
-refining <- merge(refining, cluster_cw, by = "site_id", all.x = T, allow.cartesian = T, no.dups = T)
-
-refining[, site_id := ifelse(site_id == "t-800", "800", site_id)]
-refining[, site_id := ifelse(site_id == "342-2", "34222", site_id)]
-refining[, site_id := as.numeric(site_id)]
-
-# Cluster-level emission factors -----------------------------------------
-dt_ef <- dt_ef %>%
-  mutate(ton_bbl = kg_bbl / 1000) %>%
-  dplyr::select(-kg_bbl) %>%
-  spread(pollutant_code, ton_bbl)
-
-# refining <- merge(refining, dt_ef, by.x = "region", by.y = "cluster", all.x = T, allow.cartesian = T, no.dups = T)
-# 
-# refining <- refining %>%
-#   mutate(
-#     nh3 = bbls_consumed * NH3,
-#     nox = bbls_consumed * NOX,
-#     pm25 = bbls_consumed * `PM25-PRI`,
-#     sox = bbls_consumed * SO2,
-#     voc = bbls_consumed * VOC
-#   ) %>%
-#   dplyr::select(-NH3:-VOC)
-
-# Refinery-level emission factors -----------------------------------------
-
-dt_ef_ref <- dt_ef_ref %>%
-  mutate(ton_bbl = kg_bbl / 1000) %>%
-  dplyr::select(-kg_bbl) %>%
-  spread(pollutant_code, ton_bbl)%>% #0 for NH3 for facility 271 (San Joaquin Refining Company Inc., Bakersfield Refinery)
-  mutate(NH3 = replace_na(NH3,0)) # didnt report any for NEI 2011, 2014 and 2017  
-
-refining <- merge(refining, dt_ef_ref, by.x = "site_id", by.y = "id1", all.x = T, allow.cartesian = T, no.dups = T)
-refining <- merge(refining, dt_ef, by.x = "region", by.y = "cluster", all.x = T, allow.cartesian = T, no.dups = T)
-
-#Assign cluster-specific EF for renewable fuel refineries
-refining <- refining%>%
-  mutate(NH3 = coalesce(NH3.x,NH3.y),
-         NOX = coalesce(NOX.x,NOX.y),
-         `PM25-PRI` = coalesce(`PM25-PRI.x`,`PM25-PRI.y`),
-         SO2 = coalesce(SO2.x,SO2.y),
-         VOC = coalesce(VOC.x,VOC.y))
-
-refining <- refining %>%
-  mutate(
-    nh3 = bbls_consumed * NH3,
-    nox = bbls_consumed * NOX,
-    pm25 = bbls_consumed * `PM25-PRI`,
-    sox = bbls_consumed * SO2,
-    voc = bbls_consumed * VOC
-  ) %>%
-  dplyr::select(-NH3.x:-VOC)
\ No newline at end of file
diff --git a/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25.png b/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25.png
deleted file mode 100644
index ebf90ff..0000000
Binary files a/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25.png and /dev/null differ
diff --git a/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25_no_outliers.png b/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25_no_outliers.png
deleted file mode 100644
index 77b9876..0000000
Binary files a/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25_no_outliers.png and /dev/null differ
diff --git a/before-targets/exploratory/exploratory-figs/srm_ratio_nh3_pm25.png b/before-targets/exploratory/exploratory-figs/srm_ratio_nh3_pm25.png
deleted file mode 100644
index fa98949..0000000
Binary files a/before-targets/exploratory/exploratory-figs/srm_ratio_nh3_pm25.png and /dev/null differ
diff --git a/before-targets/exploratory/exploratory-figs/srm_ratio_pollutant_pm25.png b/before-targets/exploratory/exploratory-figs/srm_ratio_pollutant_pm25.png
deleted file mode 100644
index 91d027e..0000000
Binary files a/before-targets/exploratory/exploratory-figs/srm_ratio_pollutant_pm25.png and /dev/null differ
diff --git a/before-targets/exploratory/exploratory_health_figs.R b/before-targets/exploratory/exploratory_health_figs.R
deleted file mode 100644
index f78022e..0000000
--- a/before-targets/exploratory/exploratory_health_figs.R
+++ /dev/null
@@ -1,50 +0,0 @@
-# setwd("G://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/refining-2024/health")
-setwd("H://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/refining-2024/health")
-
-mort <- fread("refining_mortality_2023.csv", stringsAsFactors = F)
-
-## pop-weighted pollution exposure
-
-mort %>%
-  group_by(disadvantaged, year) %>%
-  mutate(weight = pop / sum(pop, na.rm = F)) %>%
-  ungroup() %>%
-  group_by(scen_id, disadvantaged, year) %>%
-  summarise(per_cap = sum(total_pm25 * weight, na.rm = T)) %>%
-  ungroup() %>%
-  ggplot(aes(x = year, y = per_cap, group = disadvantaged, color = disadvantaged)) +
-  facet_wrap(~scen_id) +
-  geom_line() +
-  geom_point() +
-  theme_cowplot() +
-  labs(y = "PM2.5 (pop-weighted)", color = "DAC")
-
-## pop-weighted health cost per cap
-
-mort %>%
-  mutate(per_cap = cost_2019_PV / pop) %>%
-  group_by(scen_id, disadvantaged, year) %>%
-  summarise(per_cap = mean(per_cap, na.rm = T)) %>%
-  ungroup() %>%
-  ggplot(aes(x = year, y = per_cap, group = disadvantaged, color = disadvantaged)) +
-  facet_wrap(~scen_id) +
-  geom_line() +
-  geom_point() +
-  theme_cowplot() +
-  labs(y = "NPV per cap (unweighted)", color = "DAC")
-
-## pop-unweighted health cost per cap
-
-mort %>%
-  group_by(disadvantaged, year, scen_id) %>%
-  summarize(
-    cost_2019_PV = sum(cost_2019_PV, na.rm = F),
-    pop = sum(pop, na.rm = F)
-  ) %>%
-  ungroup() %>%
-  ggplot(aes(x = year, y = cost_2019_PV / pop, group = disadvantaged, color = disadvantaged)) +
-  facet_wrap(~scen_id) +
-  geom_line() +
-  geom_point() +
-  theme_cowplot() +
-  labs(y = "NPV per cap (pop-weighted)", color = "DAC")
diff --git a/before-targets/exploratory/labor_check.R b/before-targets/exploratory/labor_check.R
deleted file mode 100644
index 9fdf384..0000000
--- a/before-targets/exploratory/labor_check.R
+++ /dev/null
@@ -1,424 +0,0 @@
-###### CHECK LABOR OUTPUTS
-
-rm(list = ls())
-
-list.of.packages <- c("dplyr", "data.table", "lubridate", "tidyr", "readxl", "fixest", "modelsummary", "flextable")
-new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])]
-if (length(new.packages)) install.packages(new.packages)
-
-library(lubridate)
-library(tidyr)
-library(dplyr)
-library(data.table)
-library(readxl)
-library(stringr)
-library(readr)
-library(fixest)
-library(modelsummary)
-library(flextable)
-library(svglite)
-library(sf)
-library(ggplot2)
-library(tigris)
-library(cowplot)
-library(janitor)
-
-
-# setwd('C:/Users/mall0065/Dropbox/calepa/refining-labor')
-setwd("~/Dropbox/calepa/refining-labor")
-
-### define function for "not in"
-"%!in%" <- function(x, y) !("%in%"(x, y))
-
-# Check that string doesn't match any non-letter
-letters_only <- function(x) !grepl("[^A-Za-z*-]", x)
-
-
-# READ IN LABOR MULTIPLIERS
-
-multipliers <- fread("20240605-census_regions-Detail Economic Indicators.csv")
-
-multipliers <- janitor::clean_names(multipliers)
-
-multipliers <- multipliers[, .(
-  employment = sum(employment),
-  emp_comp = sum(employee_compensation)
-), .(origin_region, destination_region, impact_type)]
-
-multipliers[, impact_type := tolower(impact_type)]
-
-multipliers[, county := str_remove(origin_region, " County, CA Group")]
-
-setnames(multipliers, c("origin_region", "destination_region"), c("origin", "destination"))
-
-multipliers <- multipliers[, .(county, origin, destination, impact_type, employment, emp_comp)]
-
-str(multipliers)
-
-multipliers %>%
-  filter(destination == "Los Angeles County, CA") %>%
-  summary()
-
-multipliers %>%
-  filter(destination != "Los Angeles County, CA") %>%
-  summary()
-
-# READ IN LABOR IMPACTS
-
-df <- fread("labor_result_for_review (2).csv")
-str(df)
-
-## CALCULATE DIFFERENCE FROM BAU AND AGGREGATE
-
-df.agg <- group_by(
-  df,
-  demand_scenario, refining_scenario, year
-) %>%
-  summarize(
-    total_comp_usd19_h = sum(total_comp_usd19_h, na.rm = TRUE),
-    total_comp_usd19_l = sum(total_comp_usd19_l, na.rm = TRUE),
-    total_emp = sum(total_emp, na.rm = TRUE),
-    total_emp_revised = sum(total_emp_revised, na.rm = TRUE),
-    total_revenue = sum(total_revenue, na.rm = TRUE),
-    total_production_bbl = sum(total_production_bbl, na.rm = TRUE)
-  ) %>%
-  ungroup()
-summary(df.agg$total_comp_usd19_h)
-summary(df.agg$total_comp_usd19_l)
-summary(df.agg$total_emp)
-summary(df.agg$total_emp_revised)
-
-
-df.bau <- filter(
-  df,
-  demand_scenario == "BAU" & refining_scenario == "historic production"
-) %>%
-  group_by(demand_scenario, refining_scenario, year) %>%
-  summarize(
-    total_comp_usd19_h = sum(total_comp_usd19_h, na.rm = TRUE),
-    total_comp_usd19_l = sum(total_comp_usd19_l, na.rm = TRUE),
-    total_emp = sum(total_emp, na.rm = TRUE),
-    total_emp_revised = sum(total_emp_revised, na.rm = TRUE),
-    total_revenue = sum(total_revenue, na.rm = TRUE),
-    total_production_bbl = sum(total_production_bbl, na.rm = TRUE)
-  ) %>%
-  rename(
-    total_comp_usd19_h_bau = total_comp_usd19_h,
-    total_comp_usd19_l_bau = total_comp_usd19_l,
-    total_emp_bau = total_emp,
-    total_emp_revised_bau = total_emp_revised,
-    total_revenue_bau = total_revenue,
-    total_production_bbl_bau = total_production_bbl
-  ) %>%
-  ungroup() %>%
-  dplyr::select(-demand_scenario, -refining_scenario)
-
-df.county.bau <- filter(
-  df,
-  demand_scenario == "BAU" & refining_scenario == "historic production"
-) %>%
-  group_by(demand_scenario, refining_scenario, year, destination) %>%
-  summarize(
-    total_comp_usd19_h = sum(total_comp_usd19_h, na.rm = TRUE),
-    total_comp_usd19_l = sum(total_comp_usd19_l, na.rm = TRUE),
-    total_emp = sum(total_emp, na.rm = TRUE),
-    total_emp_revised = sum(total_emp_revised, na.rm = TRUE),
-    total_revenue = sum(total_revenue, na.rm = TRUE),
-    total_production_bbl = sum(total_production_bbl, na.rm = TRUE)
-  ) %>%
-  rename(
-    total_comp_usd19_h_bau = total_comp_usd19_h,
-    total_comp_usd19_l_bau = total_comp_usd19_l,
-    total_emp_bau = total_emp,
-    total_emp_revised_bau = total_emp_revised,
-    total_revenue_bau = total_revenue,
-    total_production_bbl_bau = total_production_bbl
-  ) %>%
-  ungroup() %>%
-  dplyr::select(-demand_scenario, -refining_scenario)
-
-
-df.agg <- left_join(df.agg, df.bau, by = c("year")) %>%
-  mutate(
-    total_comp_usd19_h = total_comp_usd19_h - total_comp_usd19_h_bau,
-    total_comp_usd19_l = total_comp_usd19_l - total_comp_usd19_l_bau,
-    total_emp = total_emp - total_emp_bau,
-    total_emp_revised = total_emp_revised - total_emp_revised_bau
-  )
-
-summary(df.agg$total_comp_usd19_h)
-summary(df.agg$total_comp_usd19_l)
-summary(df.agg$total_emp)
-summary(df.agg$total_emp_revised)
-summary(df.agg$total_revenue)
-summary(df.agg$total_production_bbl)
-
-df.county <- left_join(df, df.county.bau, by = c("destination", "year")) %>%
-  mutate(
-    total_comp_usd19_h = total_comp_usd19_h - total_comp_usd19_h_bau,
-    total_comp_usd19_l = total_comp_usd19_l - total_comp_usd19_l_bau,
-    total_emp = total_emp - total_emp_bau,
-    total_emp_revised = total_emp_revised - total_emp_revised_bau,
-    total_revenue_gap = total_revenue - total_revenue_bau,
-    total_production_bbl_gap = total_production_bbl - total_production_bbl_bau
-  )
-
-### PATHWAYS
-
-#### FIG 1: COMPENSATION LOWER BOUND
-fig1 <- filter(
-  df.agg,
-  demand_scenario == "LC1" & refining_scenario == "low exports"
-) %>%
-  ggplot(aes(y = total_comp_usd19_l / 1000, x = year)) +
-  geom_line(size = 1, color = "#841617") +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(0,20)) +
-  labs(y = "Compensation (2019 USD)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig1
-
-#### FIG 2: COMPENSATION UPPER BOUND
-fig2 <- filter(
-  df.agg,
-  demand_scenario == "LC1" & refining_scenario == "low exports"
-) %>%
-  ggplot(aes(y = total_comp_usd19_h / 1000, x = year)) +
-  geom_line(size = 1, color = "#841617") +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(0,20)) +
-  labs(y = "Compensation (2019 USD)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig2
-
-#### FIG 3: FTE JOBS
-fig3 <- filter(
-  df.agg,
-  demand_scenario == "LC1" & refining_scenario == "low exports"
-) %>%
-  ggplot(aes(y = total_emp_revised / 1000, x = year)) +
-  geom_line(size = 1, color = "#841617") +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(-60,60)) +
-  labs(y = "Labor FTE Job-years (Thousands)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig3
-
-
-## FIG 4: FTE JOBS BY COUNTY
-
-fig4 <- filter(
-  df.county,
-  demand_scenario == "LC1" & refining_scenario == "low exports"
-) %>%
-  ggplot(aes(y = total_emp_revised / 1000, x = year, color = destination)) +
-  geom_line(size = 1, aes(color = destination)) +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(-60,60)) +
-  labs(y = "Labor FTE Job-years (Thousands)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig4
-
-ggsave("emp_check.png", fig4, width = 10, height = 5, units = "in", dpi = 300, bg = "white")
-
-
-## FIG 5: Compensation BY COUNTY
-
-fig5 <- filter(
-  df.county,
-  demand_scenario == "LC1" & refining_scenario == "low exports"
-) %>%
-  ggplot(aes(y = total_comp_usd19_l / 1000000, x = year, color = destination)) +
-  geom_line(size = 1, aes(color = destination)) +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(-60,60)) +
-  labs(y = "Compensation (Millions)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig5
-
-ggsave("comp_check.png", fig5, width = 10, height = 5, units = "in", dpi = 300, bg = "white")
-
-
-#### FIG 6: COMPENSATION LOWER BOUND
-fig6 <- filter(
-  df.county,
-  demand_scenario == "LC1" & refining_scenario == "low exports" & year > 2022 & destination == "Siskiyou County, CA"
-) %>%
-  ggplot(aes(y = total_comp_usd19_l / 1000000, x = year)) +
-  geom_line(size = 1, color = "#841617") +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(0,20)) +
-  labs(y = "Compensation (2019 USD)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig6
-
-#### FIG 7: FTE JOBS
-fig7 <- filter(
-  df.county,
-  demand_scenario == "LC1" & refining_scenario == "historic exports"
-) %>%
-  ggplot(aes(y = total_emp_revised / 1000, x = year)) +
-  geom_line(size = 1, color = "#841617") +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(-60,60)) +
-  labs(y = "Labor FTE Job-years (Thousands)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig7
-
-
-#### FIG 8: REVENUE
-fig8 <- filter(
-  df.county,
-  demand_scenario == "LC1" & refining_scenario == "low exports"
-) %>%
-  ggplot(aes(y = total_revenue_gap / 10^9, x = year, color = destination)) +
-  geom_line(size = 1, aes(color = destination)) +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(-60,60)) +
-  labs(y = "Revenue (Billions of Dollars, Difference with BAU)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig8
-
-ggsave("revenue_check.png", fig8, width = 10, height = 5, units = "in", dpi = 300, bg = "white")
-
-#### FIG 9: PRODUCTION GAP
-
-fig9 <- filter(
-  df.county,
-  demand_scenario == "LC1" & refining_scenario == "low exports"
-) %>%
-  ggplot(aes(y = total_production_bbl_gap / 10^9, x = year, color = destination)) +
-  geom_line(size = 1, aes(color = destination)) +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(-60,60)) +
-  labs(y = "Total Production (Billions of Barrels, Difference with BAU)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig9
-
-#### FIG 10: PRODUCTION LEVEL
-fig10 <- filter(
-  df.agg,
-  demand_scenario == "LC1" & refining_scenario == "historic exports"
-) %>%
-  ggplot(aes(y = total_production_bbl_bau / 10^9, x = year)) +
-  geom_line(size = 1, color = "#841617") +
-  geom_line(aes(y = total_production_bbl / 10^9), size = 1, color = "blue") +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(0,20)) +
-  labs(y = "Production (Billions of Barrels)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig10
-
-
-#### FIG 11: REVENUE GAP, JUST LA
-fig11 <- filter(
-  df.county,
-  demand_scenario == "LC1" & refining_scenario == "low exports" & destination == "Los Angeles"
-) %>%
-  ggplot(aes(y = total_revenue / 10^9, x = year)) +
-  geom_line(size = 1, color = "#841617") +
-  geom_line(aes(y = total_revenue_bau / 10^9), size = 1, color = "blue") +
-  theme_cowplot(12) +
-  scale_x_continuous(limits = c(2020, 2045)) +
-  # scale_y_continuous(limits = c(-60,60)) +
-  labs(y = "Revenue (Billions of Dollars)", x = "Year", color = "", linetype = "") +
-  theme(
-    panel.background = element_blank(),
-    legend.position = "top", legend.justification = "center",
-    panel.grid.major.y = element_line(color = "gray", size = 0.5),
-    panel.grid.major.x = element_blank(),
-    panel.grid.minor.x = element_blank(),
-    axis.text = element_text(size = 14),
-    axis.title = element_text(size = 14)
-  )
-fig11
diff --git a/before-targets/exploratory/labor_text_numbers.R b/before-targets/exploratory/labor_text_numbers.R
deleted file mode 100644
index 4de8954..0000000
--- a/before-targets/exploratory/labor_text_numbers.R
+++ /dev/null
@@ -1,434 +0,0 @@
-###### CHECK LABOR OUTPUTS
-
-rm(list = ls())
-
-list.of.packages <- c("dplyr", "data.table", "lubridate", "tidyr", "readxl", "fixest", "modelsummary", "flextable")
-new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])]
-if (length(new.packages)) install.packages(new.packages)
-
-library(lubridate)
-library(tidyr)
-library(dplyr)
-library(data.table)
-library(readxl)
-library(stringr)
-library(readr)
-library(fixest)
-library(modelsummary)
-library(flextable)
-library(svglite)
-library(sf)
-library(ggplot2)
-library(tigris)
-library(cowplot)
-library(janitor)
-
-
-# setwd('G:/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures')
-setwd("~/Library/CloudStorage/GoogleDrive-cmalloy@ucsb.edu/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures")
-
-### define function for "not in"
-"%!in%" <- function(x, y) !("%in%"(x, y))
-
-# Check that string doesn't match any non-letter
-letters_only <- function(x) !grepl("[^A-Za-z*-]", x)
-
-# IMPORT DISAGGREGATED RESULTS
-
-df <- fread("2024-08-update/fig-csv-files/labor_high_low_annual_outputs.csv")
-str(df)
-summary(df$sum_demo_emp)
-summary(df$sum_demo_emp_revised)
-
-# IMPORT COUNTY LEVEL RESULTS
-
-df.county <- fread("2024-08-update/fig-csv-files/labor_county_outputs.csv") %>%
-  filter(oil_price_scenario == "reference case")
-str(df.county)
-
-# CALCULATE PERCENT REDUCTION RELATIVE TO BAU
-
-## AGGREGATE IMPACTS ACROSS DEMOGRAPHIC GROUPS AND YEARS, SELECT 1 DEMOGRAPHIC CATEGORY SO IMPACTS ARENT DUPLICATED
-df <- pivot_wider(df,
-  id_cols = c("scenario", "demo_cat", "demo_group", "demand_scenario", "refining_scenario", "oil_price_scenario", "year"),
-  names_from = c("metric_name", "estimate"),
-  values_from = value
-) %>%
-  filter(oil_price_scenario == "reference case")
-str(df)
-
-df.agg <- filter(
-  df,
-  demo_cat == "DAC"
-) %>%
-  group_by(scenario, year) %>%
-  summarize(
-    sum_demo_emp = sum(employment_high),
-    sum_demo_emp_revised = sum(employment_low),
-    sum_demo_comp_pv_h = sum(compensation_pv_high),
-    sum_demo_comp_pv_l = sum(compensation_pv_low)
-  ) %>%
-  ungroup()
-
-## SUBTRACT BAU FROM SCENARIO IMPACT AND DIVIDE BY BAU FOR PERCENT REDUCTION
-
-df.lc1 <- filter(
-  df.agg,
-  scenario == "Low demand - low exports"
-)
-
-df.bau <- filter(
-  df.agg,
-  scenario == "BAU demand - historical production"
-) %>%
-  rename(
-    sum_demo_emp_bau = sum_demo_emp,
-    sum_demo_emp_revised_bau = sum_demo_emp_revised,
-    sum_demo_comp_pv_h_bau = sum_demo_comp_pv_h,
-    sum_demo_comp_pv_l_bau = sum_demo_comp_pv_l
-  ) %>%
-  dplyr::select(-scenario)
-
-df.agg <- filter(
-  df.agg,
-  scenario != "BAU demand - historical production"
-) %>%
-  left_join(df.bau, by = "year") %>%
-  group_by(scenario) %>%
-  summarize(
-    sum_demo_emp = sum(sum_demo_emp),
-    sum_demo_emp_revised = sum(sum_demo_emp_revised),
-    sum_demo_comp_pv_h = sum(sum_demo_comp_pv_h),
-    sum_demo_comp_pv_l = sum(sum_demo_comp_pv_l),
-    sum_demo_emp_bau = sum(sum_demo_emp_bau),
-    sum_demo_emp_revised_bau = sum(sum_demo_emp_revised_bau),
-    sum_demo_comp_pv_h_bau = sum(sum_demo_comp_pv_h_bau),
-    sum_demo_comp_pv_l_bau = sum(sum_demo_comp_pv_l_bau)
-  ) %>%
-  ungroup() %>%
-  mutate(
-    perc_emp = 1 - sum_demo_emp / sum_demo_emp_bau,
-    perc_emp_revised = 1 - sum_demo_emp_revised / sum_demo_emp_revised_bau,
-    perc_comp_h = 1 - sum_demo_comp_pv_h / sum_demo_comp_pv_h_bau,
-    perc_comp_l = 1 - sum_demo_comp_pv_l / sum_demo_comp_pv_l_bau,
-    gap_emp = sum_demo_emp - sum_demo_emp_bau,
-    gap_emp_revised = sum_demo_emp_revised - sum_demo_emp_revised_bau,
-    gap_comp_h = sum_demo_comp_pv_h - sum_demo_comp_pv_h_bau,
-    gap_comp_l = sum_demo_comp_pv_l - sum_demo_comp_pv_l_bau
-  )
-
-summary(df.agg$perc_emp)
-summary(df.agg$perc_emp_revised)
-summary(df.agg$perc_comp_h)
-summary(df.agg$perc_comp_l)
-
-
-# CALCULATE SHARE OF TOTAL IMPACT FROM EACH COUNTY
-
-df.total <- group_by(
-  df.county,
-  demand_scenario, refining_scenario, metric_name, estimate
-) %>%
-  summarize(total_value = sum(value)) %>%
-  ungroup()
-
-
-df.county.agg <- group_by(
-  df.county,
-  county, demand_scenario, refining_scenario, metric_name, estimate
-) %>%
-  summarize(value = sum(value)) %>%
-  ungroup()
-
-df.county.agg <- left_join(df.county.agg, df.total, by = c("demand_scenario", "refining_scenario", "metric_name", "estimate")) %>%
-  mutate(share = value / total_value) %>%
-  arrange(demand_scenario, refining_scenario, metric_name, estimate, -share)
-summary(df.county.agg$share)
-
-
-# CALCULATE SHARE OF IMPACT RELATIVE TO BAU FOR EACH COUNTY
-
-df.total.bau <- filter(
-  df.county,
-  demand_scenario == "BAU" & refining_scenario == "historic production"
-) %>%
-  group_by(metric_name, estimate) %>%
-  summarize(total_value_bau = sum(value)) %>%
-  ungroup()
-
-df.total2 <- filter(
-  df.county,
-  demand_scenario != "BAU" & refining_scenario != "historic production"
-) %>%
-  group_by(demand_scenario, refining_scenario, metric_name, estimate) %>%
-  summarize(total_value = sum(value)) %>%
-  ungroup() %>%
-  left_join(df.total.bau, by = c("metric_name", "estimate")) %>%
-  mutate(gap_total_value = total_value - total_value_bau)
-
-
-df.county.agg.bau <- filter(
-  df.county,
-  demand_scenario == "BAU" & refining_scenario == "historic production"
-) %>%
-  group_by(county, metric_name, estimate) %>%
-  summarize(value_bau = sum(value)) %>%
-  ungroup()
-
-df.county.agg2 <- filter(
-  df.county,
-  demand_scenario != "BAU" & refining_scenario != "historic production"
-) %>%
-  group_by(county, demand_scenario, refining_scenario, metric_name, estimate) %>%
-  summarize(value = sum(value)) %>%
-  ungroup() %>%
-  left_join(df.county.agg.bau, by = c("county", "metric_name", "estimate")) %>%
-  mutate(gap_value = value - value_bau) %>%
-  left_join(df.total2, by = c("demand_scenario", "refining_scenario", "metric_name", "estimate")) %>%
-  mutate(share = gap_value / gap_total_value) %>%
-  arrange(demand_scenario, refining_scenario, metric_name, estimate, -share)
-
-
-
-# AVERAGE ANNUAL JOB LOSSES (DAN AND NON-DAC) SHOULD THIS BE ADJUSTED FOR FIRST FEW YRS?
-
-df.agg2 <- filter(
-  df,
-  demo_cat == "Poverty" 
-) %>%
-  group_by(scenario, year, demo_group) %>%
-  summarize(
-    sum_demo_emp = sum(employment_pc_high),
-    sum_demo_emp_revised = sum(employment_pc_low),
-    sum_demo_comp_pv_h = sum(compensation_pv_pc_high),
-    sum_demo_comp_pv_l = sum(compensation_pv_pc_low)
-  ) %>%
-  ungroup()
-
-df.bau2 <- filter(
-  df,
-  demo_cat == "Poverty" &
-    scenario == "BAU demand - historical production" 
-) %>%
-  group_by(year, demo_group) %>%
-  summarize(
-    sum_demo_emp_bau = sum(employment_pc_high),
-    sum_demo_emp_revised_bau = sum(employment_pc_low),
-    sum_demo_comp_pv_h_bau = sum(compensation_pv_pc_high),
-    sum_demo_comp_pv_l_bau = sum(compensation_pv_pc_low)
-  ) %>%
-  ungroup()
-
-
-
-# TREND IN EMPLOYMENT IMPACT DIFFERENCE BETWEEN DAC AND NON-DAC
-df.year <- filter(
-  df.agg2,
-  scenario != "BAU demand - historical production"
-) %>%
-  left_join(df.bau2, by = c("year", "demo_group")) %>%
-  mutate(
-    gap_emp = sum_demo_emp - sum_demo_emp_bau,
-    gap_emp_revised = sum_demo_emp_revised - sum_demo_emp_revised_bau,
-    gap_comp_h = sum_demo_comp_pv_h - sum_demo_comp_pv_h_bau,
-    gap_comp_l = sum_demo_comp_pv_l - sum_demo_comp_pv_l_bau
-  )
-
-
-fig1 <- filter(df.year, scenario == "Low demand - low exports") %>%
-  ggplot(aes(y = gap_emp, x = year, group = demo_group)) +
-  geom_line(aes(color = demo_group))
-fig1
-
-
-## COMPARE TO FIG 4 CSV FILE
-
-df.f4 <- fread("2022-12-update/fig-csv-files/state_labor_levels_fig_gaps_inputs.csv")
-
-# AVERAGE ANNUAL JOB LOSSES (DAN AND NON-DAC) SHOULD THIS BE ADJUSTED FOR FIRST FEW YRS?
-
-df.agg2 <- filter(
-  df.agg2,
-  scenario != "BAU demand - historical production"
-) %>%
-  left_join(df.bau2, by = c("year", "demo_group")) %>%
-  mutate(
-    gap_emp = sum_demo_emp - sum_demo_emp_bau,
-    gap_emp_revised = sum_demo_emp_revised - sum_demo_emp_revised_bau,
-    gap_comp_h = sum_demo_comp_pv_h - sum_demo_comp_pv_h_bau,
-    gap_comp_l = sum_demo_comp_pv_l - sum_demo_comp_pv_l_bau
-  ) %>%
-  group_by(scenario, demo_group) %>%
-  summarize(
-    gap_emp = sum(gap_emp),
-    gap_emp_revised = sum(gap_emp_revised),
-    gap_comp_h = sum(gap_comp_h),
-    gap_comp_l = sum(gap_comp_l)
-  ) %>%
-  ungroup()
-
-
-
-
-# IMPORT DISAGGREGATED RESULTS WITH 3 DIFFERENT OIL PRICE PATHWAYS
-
-df <- fread("2024-08-update/fig-csv-files/labor_high_low_annual_outputs.csv")
-str(df)
-summary(df$sum_demo_emp)
-summary(df$sum_demo_emp_revised)
-
-
-# KEEP JUST 2045 AND COMPUTE GAP BETWEEN SCENARIO AND BAU FOR EACH OIL PRICE PATHWAY
-
-# df <- filter(df,year==2045)
-
-## AGGREGATE IMPACTS ACROSS DEMOGRAPHIC GROUPS AND YEARS, SELECT 1 DEMOGRAPHIC CATEGORY SO IMPACTS ARENT DUPLICATED
-df <- pivot_wider(df,
-  id_cols = c("scenario", "demo_cat", "demo_group", "demand_scenario", "refining_scenario", "oil_price_scenario", "year"),
-  names_from = c("metric_name", "estimate"),
-  values_from = value
-) %>%
-  filter(demo_cat == "DAC") %>%
-  group_by(scenario, oil_price_scenario, year) %>%
-  summarize(
-    compensation_pv_high = sum(compensation_pv_high),
-    compensation_pv_low = sum(compensation_pv_low),
-    employment_high = sum(employment_high),
-    employment_low = sum(employment_low)
-  ) %>%
-  ungroup()
-
-## BAU
-
-df.bau <- filter(df, scenario == "BAU demand - historical production") %>%
-  rename(
-    compensation_pv_high_bau = compensation_pv_high,
-    compensation_pv_low_bau = compensation_pv_low,
-    employment_high_bau = employment_high,
-    employment_low_bau = employment_low
-  ) %>%
-  dplyr::select(-scenario)
-
-
-## JOIN BAU TO DF AND COMPUTE GAPS
-
-df <- filter(df, scenario != "BAU demand - historical production") %>%
-  left_join(df.bau, by = c("oil_price_scenario", "year")) %>%
-  group_by(scenario, oil_price_scenario) %>%
-  summarize(
-    compensation_pv_high = sum(compensation_pv_high),
-    compensation_pv_low = sum(compensation_pv_low),
-    employment_high = sum(employment_high),
-    employment_low = sum(employment_low),
-    compensation_pv_high_bau = sum(compensation_pv_high_bau),
-    compensation_pv_low_bau = sum(compensation_pv_low_bau),
-    employment_high_bau = sum(employment_high_bau),
-    employment_low_bau = sum(employment_low_bau)
-  ) %>%
-  ungroup() %>%
-  mutate(
-    gap_comp_pv_high = compensation_pv_high - compensation_pv_high_bau,
-    gap_comp_pv_low = compensation_pv_low - compensation_pv_low_bau,
-    gap_emp_high = employment_high - employment_high_bau,
-    gap_emp_low = employment_low - employment_low_bau
-  )
-
-
-## NEEDS: COMP AND EMP IN 2019 (FROM IMPLAN), COMP AND EMP IN 2045 HIGH AND LOW ESTIMATES FOR EACH SCENARIO
-
-## FROM IMPLAN: SPREADSHEETS ARE AT /Dropbox/calepa/refining-labor/2019-baseline-ica
-fte.per.emp <- 0.991150442477876
-
-direct.emp.2019 <- 11456.6380044314 * fte.per.emp
-indirect.emp.2019 <- 70118.2031857474 * fte.per.emp
-induced.emp.2019 <- 43395.822784118 * fte.per.emp
-total.emp.2019 <- direct.emp.2019 + indirect.emp.2019 + induced.emp.2019
-
-direct.comp.2019 <- 3028965833
-indirect.comp.2019 <- 4812737405.89
-induced.comp.2019 <- 2273210360.05
-total.comp.2019 <- direct.comp.2019 + indirect.comp.2019 + induced.comp.2019
-
-
-## READ IN IMPACTS
-
-# IMPORT DISAGGREGATED RESULTS ADD ACROSS COUNTIES
-
-df <- fread("2024-08-update/fig-csv-files/labor_result_for_review.csv") %>%
-  filter(oil_price_scenario == "reference case") %>%
-  group_by(demand_scenario, refining_scenario, year) %>%
-  summarize(
-    total_emp = sum(total_emp),
-    total_emp_revised = sum(total_emp_revised),
-    total_comp_usd19_h = sum(total_comp_usd19_h),
-    total_comp_usd19_l = sum(total_comp_usd19_l)
-  ) %>%
-  ungroup()
-str(df)
-
-
-# KEEP JUST 2045 AND COMPUTE 1-SHARE OF 2019 EMPLOMENT OR COMPENSATION FOR W/REEMPLOYMENT (HIGH)
-
-df.2045 <- filter(df, year == 2045)
-
-df.high <- mutate(df.2045,
-  emp_perc_h = 1 - (total_emp / total.emp.2019),
-  emp_comp_h = 1 - (total_comp_usd19_h / total.comp.2019)
-)
-
-
-
-# DIRECT,INDIRECT,INDUCED SHARES RELATIVE TO 2019
-
-df <- fread("2024-08-update/fig-csv-files/labor_result_x_impact_type.csv")
-str(df)
-unique(df$oil_price_scenario)
-
-df.review <- fread("2024-08-update/fig-csv-files/labor_result_x_impact_type_for_review.csv")
-str(df.review)
-unique(df.review$oil_price_scenario)
-
-df <- filter(
-  df,
-  oil_price_scenario == "reference case"
-) %>%
-  group_by(demand_scenario, refining_scenario, impact_type, year) %>%
-  summarize(
-    total_emp = sum(total_emp),
-    total_emp_revised = sum(total_emp_revised),
-    total_comp_usd19_h = sum(total_comp_usd19_h),
-    total_comp_usd19_l = sum(total_comp_usd19_l)
-  ) %>%
-  ungroup()
-
-# KEEP JUST 2045 AND COMPUTE 1-SHARE OF 2019 EMPLOMENT OR COMPENSATION FOR W/REEMPLOYMENT (HIGH)
-
-df.2045 <- filter(df, year == 2045)
-
-df.high <- mutate(df.2045,
-  emp_perc_h = ifelse(impact_type == "direct", 1 - (total_emp / direct.emp.2019),
-    ifelse(impact_type == "indirect", 1 - (total_emp / indirect.emp.2019),
-      ifelse(impact_type == "induced", 1 - (total_emp / induced.emp.2019), NA)
-    )
-  ),
-  comp_perc_h = ifelse(impact_type == "direct", 1 - (total_comp_usd19_h / direct.comp.2019),
-    ifelse(impact_type == "indirect", 1 - (total_comp_usd19_h / indirect.comp.2019),
-      ifelse(impact_type == "induced", 1 - (total_comp_usd19_h / induced.comp.2019), NA)
-    )
-  )
-)
-
-
-# TOTAL IMPACT LEVEL BY SCENARIO
-
-df.cumu <- group_by(
-  df,
-  demand_scenario, refining_scenario
-) %>%
-  summarize(
-    total_emp = sum(total_emp),
-    total_emp_revised = sum(total_emp_revised),
-    total_comp_usd19_h = sum(total_comp_usd19_h),
-    total_comp_usd19_l = sum(total_comp_usd19_l)
-  ) %>%
-  ungroup()
diff --git a/before-targets/exploratory/ncomms_labor_multipliers.R b/before-targets/exploratory/ncomms_labor_multipliers.R
deleted file mode 100644
index 4cede8c..0000000
--- a/before-targets/exploratory/ncomms_labor_multipliers.R
+++ /dev/null
@@ -1,515 +0,0 @@
-###### NCOMMS REVISIONS 
-
-rm(list = ls())
-
-list.of.packages <- c("dplyr", "data.table", "lubridate", "tidyr", "readxl", "fixest", "modelsummary", "flextable")
-new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])]
-if (length(new.packages)) install.packages(new.packages)
-
-library(lubridate)
-library(tidyr)
-library(dplyr)
-library(data.table)
-library(readxl)
-library(stringr)
-library(readr)
-library(fixest)
-library(modelsummary)
-library(flextable)
-library(svglite)
-library(sf)
-library(ggplot2)
-library(tigris)
-library(cowplot)
-library(janitor)
-
-
-# setwd('G:/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures')
-setwd("~/Dropbox/ou/ncomms-revisions")
-
-### define function for "not in"
-"%!in%" <- function(x, y) !("%in%"(x, y))
-
-# Check that string doesn't match any non-letter
-letters_only <- function(x) !grepl("[^A-Za-z*-]", x)
-
-
-# IMPORT LODES DATA--FILTERED USING CENSUS BLOCK IDs
-
-df <- fread('ca_od_main_JT02_2020.csv') %>%
-  filter(w_geocode == 60379800021006 |
-           w_geocode == 60379800301000 |
-           w_geocode == 60133780001024 |
-           w_geocode == 60952521022025 |
-           w_geocode == 60290024011034 |
-           w_geocode == 60379800141083 |
-           w_geocode == 60379800051007 |
-           w_geocode == 60290005072025 |
-           w_geocode == 60133200011067 |
-           w_geocode == 60133150001075 |
-           w_geocode == 60133580005003 |
-           w_geocode == 60379800151000 |
-           w_geocode == 60790123061000 | 
-           w_geocode == 60375535021000 |
-           w_geocode == 60290005072077) %>%
-  dplyr::select(w_geocode,h_geocode,S000,SI01) %>% 
-  mutate(refinery_name = ifelse(w_geocode == 60379800021006,"Marathon Carson",
-                                ifelse(w_geocode == 60379800301000,"Chevron El Segundo",
-                                       ifelse(w_geocode == 60133780001024,"Chevron Richmond",
-                                              ifelse(w_geocode == 60952521022025,"Valero Benicia",
-                                                     ifelse(w_geocode == 60290024011034,"Kern Oil Bakersfield",
-                                                            ifelse(w_geocode == 60379800141083,"Valero Wilmington",
-                                                                   ifelse(w_geocode == 60379800051007,"PBF Torrance",
-                                                                          ifelse(w_geocode == 60290005072025,"San Joaquin Bakersfield",
-                                                                                 ifelse(w_geocode == 60133200011067,"PBF Martinez",
-                                                                                        ifelse(w_geocode == 60133150001075,"Marathon Golden Eagle",
-                                                                                               ifelse(w_geocode == 60133580005003,"Phillips 66 Rodeo",
-                                                                                                      ifelse(w_geocode == 60379800151000,"Phillips 66 Wilmington",
-                                                                                                             ifelse(w_geocode == 60790123061000,"Phillips 66 Santa Maria",
-                                                                                                                    ifelse(w_geocode == 60375535021000,"AltAir Paramount",
-                                                                                                                           ifelse(w_geocode == 60290005072077,"Global Clean Energy",NA))))))))))))))))
-str(df)
-summary(df$SI01)
-summary(df$S000)
-
-df.refinery <- group_by(df,refinery_name) %>% 
-  summarize(SI01 = sum(SI01), 
-            S000 = sum(S000)) %>% 
-  ungroup()
-summary(df.refinery$SI01)
-summary(df.refinery$S000)
-
-
-
-# IMPORT LODES -- FILTER USING CENSUS TRACT IDS
-
-df.tract <- fread('ca_od_main_JT02_2020.csv') %>%
-  filter(substr(as.character(w_geocode),1,10) == 6037980002 |
-           substr(as.character(w_geocode),1,10) == 6037980030 |
-           substr(as.character(w_geocode),1,10) == 6013378000 |
-           substr(as.character(w_geocode),1,10) == 6095252102 |
-           substr(as.character(w_geocode),1,10) == 6029002401 |
-           substr(as.character(w_geocode),1,10) == 6037980014 |
-           substr(as.character(w_geocode),1,10) == 6037980005 |
-           substr(as.character(w_geocode),1,10) == 6029000507 |
-           substr(as.character(w_geocode),1,10) == 6013320001 |
-           substr(as.character(w_geocode),1,10) == 6013315000 |
-           substr(as.character(w_geocode),1,10) == 6013358000 |
-           substr(as.character(w_geocode),1,10) == 6037980015 |
-           substr(as.character(w_geocode),1,10) == 6079012306 | 
-           substr(as.character(w_geocode),1,10) == 6037553502 |
-           substr(as.character(w_geocode),1,10) == 6029000507) %>%
-  mutate(w_tract_geocode = substr(as.character(w_geocode),1,10), 
-         h_tract_geocode = substr(as.character(h_geocode),1,10)) %>%
-  dplyr::select(w_geocode,h_geocode,w_tract_geocode,h_tract_geocode,S000,SI01) %>% 
-  mutate(refinery_name = ifelse(substr(as.character(w_geocode),1,10) == 6037980002,"Marathon Carson",
-                                ifelse(substr(as.character(w_geocode),1,10) == 6037980030,"Chevron El Segundo",
-                                       ifelse(substr(as.character(w_geocode),1,10) == 6013378000,"Chevron Richmond",
-                                              ifelse(substr(as.character(w_geocode),1,10) == 6095252102,"Valero Benicia",
-                                                     ifelse(substr(as.character(w_geocode),1,10) == 6029002401,"Kern Oil Bakersfield",
-                                                            ifelse(substr(as.character(w_geocode),1,10) == 6037980014,"Valero Wilmington",
-                                                                   ifelse(substr(as.character(w_geocode),1,10) == 6037980005,"PBF Torrance",
-                                                                          ifelse(substr(as.character(w_geocode),1,10) == 6029000507,"San Joaquin Bakersfield",
-                                                                                 ifelse(substr(as.character(w_geocode),1,10) == 6013320001,"PBF Martinez",
-                                                                                        ifelse(substr(as.character(w_geocode),1,10) == 6013315000,"Marathon Golden Eagle",
-                                                                                               ifelse(substr(as.character(w_geocode),1,10) == 6013358000,"Phillips 66 Rodeo",
-                                                                                                      ifelse(substr(as.character(w_geocode),1,10) == 6037980015,"Phillips 66 Wilmington",
-                                                                                                             ifelse(substr(as.character(w_geocode),1,10) == 6079012306,"Phillips 66 Santa Maria",
-                                                                                                                    ifelse(substr(as.character(w_geocode),1,10) == 6037553502,"AltAir Paramount",
-                                                                                                                           ifelse(substr(as.character(w_geocode),1,10) == 6029000507,"Global Clean Energy",NA))))))))))))))))
-str(df)
-summary(df$SI01)
-summary(df$S000)
-
-df.refinery <- group_by(df.tract,refinery_name) %>% 
-  summarize(SI01 = sum(SI01), 
-            S000 = sum(S000)) %>% 
-  ungroup()
-summary(df.refinery$SI01)
-summary(df.refinery$S000)
-
-
-# ADD UP JOBS BY WORK-RESIDENCE TRACT PAIRS 
-
-df.tract <- group_by(df.tract,
-                     w_tract_geocode,h_tract_geocode) %>% 
-  summarize(SI01 = sum(SI01),
-            refinery_name = first(refinery_name)) %>% 
-  filter(SI01 > 0) %>%
-  ungroup()
-
-df.tract <- mutate(df.tract,
-                   w_tract_geocode = str_pad(as.character(w_tract_geocode),11,pad="0"),
-                   h_tract_geocode = str_pad(as.character(h_tract_geocode),11,pad="0"))
-
-# IMPORT CENSUS TRACTS FOR CA 
-
-ca.tracts <- tracts(state="CA",year=2020) 
-
-# JOIN SHAPEFILE TO LODES BY RESIDENTIAL TRACT 
-
-df.tract.r <- left_join(df.tract,ca.tracts,by=c("h_tract_geocode"="GEOID"))
-unique(is.na(df.tract.r$STATEFP)) # FALSE FOR ALL OBSERVATIONS 
-
-df.tract.r <- st_as_sf(df.tract.r,
-                       crs=4269,
-                       sf_column_name = "geometry")
-
-
-sf1 <- filter(df.tract.r, refinery_name=="Marathon Carson" | refinery_name=="Chevron El Segundo" | 
-                refinery_name=="Valero Wilmington" | refinery_name=="PBF Torrance" | 
-                refinery_name=="Phillips 66 Wilmington" | refinery_name=="AltAir Paramount") %>% 
-  mutate(refinery_name = as.factor(refinery_name)) %>% 
-  rename(South_Cluster=SI01)
-sf2 <- filter(df.tract.r, refinery_name!="Marathon Carson" & refinery_name!="Chevron El Segundo" & 
-                refinery_name!="Valero Wilmington" & refinery_name!="PBF Torrance" & 
-                refinery_name!="Phillips 66 Wilmington" & refinery_name!="AltAir Paramount") %>% 
-  mutate(refinery_name = as.factor(refinery_name)) %>% 
-  rename(North_Cluster=SI01)
-
-library(ggnewscale)
-fig1 <- ggplot(data=sf1) + 
-  geom_sf(aes(color=South_Cluster)) +
-  scale_color_viridis_c(option = "D") +
-  new_scale_color() + 
-  geom_sf(data=sf2,aes(color=North_Cluster)) +
-  scale_color_viridis_c(option = "C") +
-  scale_fill_gradient() +
-  geom_sf(data=ca.tracts,fill=NA) +
-  theme_bw() +
-  theme(panel.border = element_blank(),
-        panel.grid = element_blank(),
-        axis.ticks.x = element_blank(),
-        axis.text.x = element_blank(),
-        axis.ticks.y = element_blank(),
-        axis.text.y = element_blank(),
-        axis.title = element_blank()) 
-fig1          
-
-sf1 <- mutate(sf1, South_Cluster = 1,North_Cluster=0)
-sf2 <- mutate(sf2, North_Cluster = 1,South_Cluster=0)
-
-sf3 <- bind_rows(sf1,sf2)
-
-fig2 <- ggplot(data=sf3) + 
-  geom_sf(aes(fill=as.factor(South_Cluster))) +
-  geom_sf(data=ca.tracts,fill=NA) +
-  theme_bw() +
-  theme(panel.border = element_blank(),
-        panel.grid = element_blank(),
-        axis.ticks.x = element_blank(),
-        axis.text.x = element_blank(),
-        axis.ticks.y = element_blank(),
-        axis.text.y = element_blank(),
-        axis.title = element_blank()) 
-fig2  
-
-sf4 <- filter(df.tract.r, SI01>1)
-
-sf1 <- filter(sf4, refinery_name=="Marathon Carson" | refinery_name=="Chevron El Segundo" | 
-                refinery_name=="Valero Wilmington" | refinery_name=="PBF Torrance" | 
-                refinery_name=="Phillips 66 Wilmington" | refinery_name=="AltAir Paramount") %>% 
-  mutate(refinery_name = as.factor(refinery_name)) %>% 
-  rename(South_Cluster=SI01)
-sf2 <- filter(sf4, refinery_name!="Marathon Carson" & refinery_name!="Chevron El Segundo" & 
-                refinery_name!="Valero Wilmington" & refinery_name!="PBF Torrance" & 
-                refinery_name!="Phillips 66 Wilmington" & refinery_name!="AltAir Paramount") %>% 
-  mutate(refinery_name = as.factor(refinery_name)) %>% 
-  rename(North_Cluster=SI01)
-
-sf1 <- mutate(sf1, South_Cluster = 1,North_Cluster=0)
-sf2 <- mutate(sf2, North_Cluster = 1,South_Cluster=0)
-
-sf3 <- bind_rows(sf1,sf2)
-
-fig3 <- ggplot(data=sf3) + 
-  geom_sf(aes(fill=as.factor(South_Cluster))) +
-  geom_sf(data=ca.tracts,fill=NA) +
-  theme_bw() +
-  theme(panel.border = element_blank(),
-        panel.grid = element_blank(),
-        axis.ticks.x = element_blank(),
-        axis.text.x = element_blank(),
-        axis.ticks.y = element_blank(),
-        axis.text.y = element_blank(),
-        axis.title = element_blank()) 
-fig3  
-
-ggsave('ns_emp_map.png',fig1)
-ggsave('ns_emp_map_v2.png',fig2)
-ggsave('ns_emp_map_v3.png',fig3)
-rm(fig1,fig2,fig3,sf1,sf2,sf3,sf4,df.tract.r,df.tract,df.refinery,df,ca.tracts)
-
-# SHARE OF ZIP CODE OUTPUT VALUE FROM REFINING 
-filenames <- list.files(".", pattern="Industry Detail_9*", full.names=TRUE)
-zips <- as.numeric(gsub(".*?([0-9]+).*", "\\1", filenames))
-zips <- rep(zips,times=c(rep(528,12)))
-ldf <- lapply(filenames, fread)
-df <- rbindlist(ldf) %>% 
-  filter(is.na(V1)==FALSE)
-df$zcta <- zips 
-names(df) <- make.names(names(df))
-str(df)
-rm(zips,filenames,ldf)
-
-df <- mutate(df,
-             Total.Output = str_remove_all(Total.Output,"[$,]"),
-             Total.Output = as.numeric(Total.Output)) 
-
-df_total <- group_by(df,
-                     zcta) %>% 
-  summarize(Total.Output = sum(Total.Output)) %>% 
-  ungroup()
-
-df_refining <- group_by(df,
-                        zcta) %>% 
-  filter(Industry.Code==146) %>% 
-  summarize(refinery.output = sum(Total.Output)) %>% 
-  ungroup() %>% 
-  left_join(df_total,by="zcta") %>% 
-  mutate(share = refinery.output/Total.Output)
-
-summary(df_refining$share)
-
-rm(df,df_total)
-
-# SHARE OF CLUSTER AND STATEWIDE OUTPUT 
-
-df_nc <- fread('Industry Detail_north_cluster.csv') 
-names(df_nc) <- make.names(names(df_nc))
-df_nc <- filter(df_nc,
-                Industry.Code==146 & is.na(V1)==FALSE) %>% 
-  mutate(Total.Output = str_remove_all(Total.Output,"[$,]"),
-         Total.Output = as.numeric(Total.Output)) %>% 
-  summarize(total_north = sum(Total.Output))
-str(df_nc)
-
-df_sc <- fread('Industry Detail_south_cluster.csv') 
-names(df_sc) <- make.names(names(df_sc))
-df_sc <- filter(df_sc,
-                Industry.Code==146 & is.na(V1)==FALSE) %>% 
-  mutate(Total.Output = str_remove_all(Total.Output,"[$,]"),
-         Total.Output = as.numeric(Total.Output)) %>% 
-  summarize(total_south = sum(Total.Output))
-str(df_sc)
-
-
-df_refining$total_north <- rep(df_nc$total_north,12)
-df_refining$total_south <- rep(df_sc$total_south,12)
-df_refining <- mutate(df_refining, 
-                      share_north = refinery.output/total_north,
-                      share_south = refinery.output/total_south,
-                      share_state = refinery.output/(total_north+total_south))
-summary(df_refining$share_north)
-summary(df_refining$share_south)
-summary(df_refining$share_state)
-
-
-
-
-### SHARE OF WORKERS IN ESTABLISHMENT TRACT THAT LIVE IN HOME TRACT 
-
-df.tract <- fread('ca_od_main_JT02_2020.csv') %>%
-  mutate(w_tract_geocode = substr(as.character(w_geocode),1,10), 
-         h_tract_geocode = substr(as.character(h_geocode),1,10)) %>% 
-  dplyr::select(w_tract_geocode, h_tract_geocode,S000,SI01,SI02,SI03) %>%
-  group_by(w_tract_geocode,h_tract_geocode) %>% 
-  summarize(S000 = sum(S000,na.rm=TRUE),
-            SI01 = sum(SI01,na.rm=TRUE),
-            SI02 = sum(SI02,na.rm=TRUE),
-            SI03 = sum(SI03,na.rm=TRUE)) %>%
-  ungroup() 
-
-df.work.total <- group_by(df.tract,
-                          w_tract_geocode) %>%
-  summarize(S000_w = sum(S000,na.rm=TRUE),
-            SI01_w = sum(SI01,na.rm=TRUE),
-            SI02_w = sum(SI02,na.rm=TRUE),
-            SI03_w = sum(SI03,na.rm=TRUE)) %>%
-  ungroup()
-
-df.tract <- left_join(df.tract,df.work.total, by="w_tract_geocode") %>%
-  mutate(S000_share = S000/S000_w,
-         SI01_share = SI01/SI01_w,
-         SI02_share = SI02/SI02_w,
-         SI03_share = SI03/SI03_w)
-summary(df.tract$S000_share)
-summary(df.tract$SI01_share)
-summary(df.tract$SI02_share)
-summary(df.tract$SI03_share)
-
-df.tract <- mutate(df.tract,
-                   w_tract_geocode = str_pad(as.character(w_tract_geocode),11,pad="0"),
-                   h_tract_geocode = str_pad(as.character(h_tract_geocode),11,pad="0"))
-
-######################################## 
-# Census Tract to Zip Code Xwalk
-
-df <- read_excel('TRACT_ZIP_122024.xlsx') %>% 
-  filter(USPS_ZIP_PREF_STATE=="CA") %>% 
-  dplyr::select(TRACT,ZIP,BUS_RATIO) 
-str(df)
-summary(df$BUS_RATIO)
-
-
-# Crosswalk work census tract to work zip code 
-
-df2 <- left_join(df.tract,df,by=c("w_tract_geocode"="TRACT")) 
-
-df2 <- mutate(df2, 
-              S000 = ifelse(is.na(BUS_RATIO)==FALSE,S000*BUS_RATIO,S000),
-              SI01 = ifelse(is.na(BUS_RATIO)==FALSE,SI01*BUS_RATIO,SI01),
-              SI02 = ifelse(is.na(BUS_RATIO)==FALSE,SI02*BUS_RATIO,SI02),
-              SI03 = ifelse(is.na(BUS_RATIO)==FALSE,SI03*BUS_RATIO,SI03)) %>% 
-  filter(is.na(ZIP)==FALSE) %>% 
-  group_by(ZIP,h_tract_geocode) %>% 
-  summarize(S000 = sum(S000,na.rm=TRUE),
-            SI01 = sum(SI01,na.rm=TRUE),
-            SI02 = sum(SI02,na.rm=TRUE),
-            SI03 = sum(SI03,na.rm=TRUE)) %>% 
-  ungroup()
-
-total.zip <- group_by(df2,
-                      ZIP) %>% 
-  summarize(S000_w = sum(S000,na.rm=TRUE),
-            SI01_w = sum(SI01,na.rm=TRUE),
-            SI02_w = sum(SI02,na.rm=TRUE),
-            SI03_w = sum(SI03,na.rm=TRUE)) %>% 
-  ungroup()
-
-df2 <- left_join(df2,total.zip, by="ZIP") %>%
-  mutate(S000_share = S000/S000_w,
-         SI01_share = SI01/SI01_w,
-         SI02_share = SI02/SI02_w,
-         SI03_share = SI03/SI03_w)
-summary(df2$S000_share)
-summary(df2$SI01_share)
-summary(df2$SI02_share)
-summary(df2$SI03_share)
-
-######################################## 
-# IMPLAN zip code tables 
-
-load.implan.zip.agg <- function(x){
-  df <- fread(paste0("implan-zip/",x)) 
-  names(df) <- make.names(names(df))
-  separated.string <- str_split(x,"_")
-  
-  df <- dplyr::select(df,
-                      Industry.Code,Description,Total.Output) %>% 
-    filter(Description != "" & Description != "* Employment and payroll of federal govt, military" & Description != "* Employment and payroll of federal govt, non-military") %>% 
-    mutate(zip = str_extract(separated.string[[1]][4],pat <- "(\\d)+"),
-           county = separated.string[[1]][3],
-           Total.Output = as.numeric(str_remove_all(Total.Output,"[$,]"))) 
-  
-  
-  df <- left_join(df,implan.naics.xwalk, by=c("Industry.Code"="Implan546Index")) %>% 
-    mutate(ind_type = ifelse((substr(as.character(`2017NaicsCode`),1,2)=="11" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="21" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="23" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="31" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="32" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="33" | 
-                                Description == "Construction of new commercial structures, including farm structures" | 
-                                Description == "Construction of new multifamily residential structures" | 
-                                Description == "Construction of other new residential structures" | 
-                                Description == "Maintenance and repair construction of nonresidential structures" | 
-                                Description == "Maintenance and repair construction of residential structures" | 
-                                Description == "Maintenance and repair construction of highways, streets, bridges, and tunnels" | 
-                                Description == "Dog and cat food manufacturing" | 
-                                Description == "Other animal food manufacturing" | 
-                                Description == "Flour milling" | 
-                                Description == "Rice milling" | 
-                                Description == "Malt manufacturing" | 
-                                Description == "Construction of other new nonresidential structures" | 
-                                Description == "Construction of new single-family residential structures"), "SI01",
-                             ifelse((substr(as.character(`2017NaicsCode`),1,2)=="42" | 
-                                       substr(as.character(`2017NaicsCode`),1,2)=="44" | 
-                                       substr(as.character(`2017NaicsCode`),1,2)=="45" | 
-                                       substr(as.character(`2017NaicsCode`),1,2)=="48" | 
-                                       substr(as.character(`2017NaicsCode`),1,2)=="49" |
-                                       substr(as.character(`2017NaicsCode`),1,2)=="22"), "SI02","SI03")),
-           ind_type = ifelse(Description=="Veterinary services","SI03",ind_type),
-           cluster = ifelse((county == "Ventura County" | 
-                               county == "Los Angeles County" | 
-                               county == "San Bernardino County" | 
-                               county == "Orange County" |
-                               county == "Riverside County" |
-                               county == "Imperial County" |
-                               county == "San Diego County"), "south", "north")) %>% 
-    dplyr::select(-`2017NaicsCode`,-Description,-Industry.Code,-NaicsDescription) %>% 
-    group_by(zip,county,cluster,ind_type) %>% 
-    summarize(Total.Output = sum(Total.Output,na.rm=TRUE)) %>% 
-    ungroup() %>% 
-    pivot_wider(id_cols=c("zip","county","cluster"),values_from = "Total.Output", names_from = "ind_type",names_prefix = "output_")
-  
-}
-
-
-# disaggregated by IMPLAN industry and zip 
-
-load.implan.zip <- function(x){
-  df <- fread(paste0("implan-zip/",x)) 
-  names(df) <- make.names(names(df))
-  separated.string <- str_split(x,"_")
-  
-  df <- dplyr::select(df,
-                      Industry.Code,Description,Total.Output) %>% 
-    filter(Description != "" & Description != "* Employment and payroll of federal govt, military" & Description != "* Employment and payroll of federal govt, non-military") %>% 
-    mutate(zip = str_extract(separated.string[[1]][4],pat <- "(\\d)+"),
-           county = separated.string[[1]][3],
-           Total.Output = as.numeric(str_remove_all(Total.Output,"[$,]"))) 
-  
-  
-  df <- left_join(df,implan.naics.xwalk, by=c("Industry.Code"="Implan546Index")) %>% 
-    mutate(ind_type = ifelse((substr(as.character(`2017NaicsCode`),1,2)=="11" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="21" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="23" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="31" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="32" | 
-                                substr(as.character(`2017NaicsCode`),1,2)=="33" | 
-                                Description == "Construction of new commercial structures, including farm structures" | 
-                                Description == "Construction of new multifamily residential structures" | 
-                                Description == "Construction of other new residential structures" | 
-                                Description == "Maintenance and repair construction of nonresidential structures" | 
-                                Description == "Maintenance and repair construction of residential structures" | 
-                                Description == "Maintenance and repair construction of highways, streets, bridges, and tunnels" | 
-                                Description == "Dog and cat food manufacturing" | 
-                                Description == "Other animal food manufacturing" | 
-                                Description == "Flour milling" | 
-                                Description == "Rice milling" | 
-                                Description == "Malt manufacturing" | 
-                                Description == "Construction of other new nonresidential structures" | 
-                                Description == "Construction of new single-family residential structures"), "SI01",
-                             ifelse((substr(as.character(`2017NaicsCode`),1,2)=="42" | 
-                                       substr(as.character(`2017NaicsCode`),1,2)=="44" | 
-                                       substr(as.character(`2017NaicsCode`),1,2)=="45" | 
-                                       substr(as.character(`2017NaicsCode`),1,2)=="48" | 
-                                       substr(as.character(`2017NaicsCode`),1,2)=="49" |
-                                       substr(as.character(`2017NaicsCode`),1,2)=="22"), "SI02","SI03")),
-           ind_type = ifelse(Description=="Veterinary services","SI03",ind_type),
-           cluster = ifelse((county == "Ventura County" | 
-                               county == "Los Angeles County" | 
-                               county == "San Bernardino County" | 
-                               county == "Orange County" |
-                               county == "Riverside County" |
-                               county == "Imperial County" |
-                               county == "San Diego County"), "south", "north")) %>% 
-    dplyr::select(-`2017NaicsCode`) 
-}
-
-file.names <- list.files(path="implan-zip",pattern="*.csv")
-implan.naics.xwalk <- read_xlsx("Bridge_2017NaicsToImplan546.xlsx") %>% 
-  dplyr::select(Implan546Index,`2017NaicsCode`) %>% 
-  mutate(`2017NaicsCode` = substr(`2017NaicsCode`,1,2)) %>% 
-  group_by(Implan546Index,`2017NaicsCode`) %>% 
-  summarize() %>% 
-  ungroup()
-
-
-output <- lapply(file.names,load.implan.zip) %>% 
-  bind_rows()
-
-fwrite(output,'implan_zip_output.csv')
-
-
diff --git a/before-targets/exploratory/ncomms_labor_revisions.R b/before-targets/exploratory/ncomms_labor_revisions.R
deleted file mode 100644
index da62e5b..0000000
--- a/before-targets/exploratory/ncomms_labor_revisions.R
+++ /dev/null
@@ -1,687 +0,0 @@
-###### NCOMMS REVISIONS 
-
-rm(list = ls())
-
-list.of.packages <- c("dplyr", "data.table", "lubridate", "tidyr", "readxl", "fixest", "modelsummary", "flextable")
-new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])]
-if (length(new.packages)) install.packages(new.packages)
-
-library(lubridate)
-library(tidyr)
-library(dplyr)
-library(data.table)
-library(readxl)
-library(stringr)
-library(readr)
-library(fixest)
-library(modelsummary)
-library(flextable)
-library(svglite)
-library(sf)
-library(ggplot2)
-library(tigris)
-library(cowplot)
-library(janitor)
-
-
-# setwd('G:/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures')
-setwd("~/Dropbox/ou/ncomms-revisions")
-
-### define function for "not in"
-"%!in%" <- function(x, y) !("%in%"(x, y))
-
-# Check that string doesn't match any non-letter
-letters_only <- function(x) !grepl("[^A-Za-z*-]", x)
-
-
-# EMPLOYMENT MULTIPLIERS (FROM IMPLAN)
-rev.mrio <- fread('multipliers/mrio/output-value/20250402-ns_cluster_mrio-Detail Economic Indicators.csv') %>% 
-  mutate(cluster = ifelse(DestinationRegion=="north_cluster (2023)","north","south")) %>% 
-  dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation,cluster) %>% 
-  rename(emp.rev = Employment, 
-         ec.rev = EmployeeCompensation)
-li.mrio <- fread('multipliers/mrio/labor-income/20250402-ns_cluster_mrio-Detail Economic Indicators.csv') %>% 
-  mutate(cluster = ifelse(DestinationRegion=="north_cluster (2023)","north","south")) %>% 
-  dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation,cluster) %>% 
-  rename(emp.li = Employment, 
-         ec.li = EmployeeCompensation)
-mrio <- left_join(rev.mrio,li.mrio, by=c("OriginRegion","DestinationRegion","IndustryCode","ImpactType","cluster"))
-
-rev.not.mrio <- fread('multipliers/non-mrio/output-value/20250402-ns_cluster-Detail Economic Indicators.csv') %>% 
-  mutate(cluster = ifelse(DestinationRegion=="north_cluster (2023)","north","south")) %>% 
-  dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation,cluster) %>% 
-  rename(emp.rev = Employment, 
-         ec.rev = EmployeeCompensation)
-li.not.mrio <- fread('multipliers/non-mrio/labor-income/20250402-ns_cluster-Detail Economic Indicators.csv') %>% 
-  mutate(cluster = ifelse(DestinationRegion=="north_cluster (2023)","north","south")) %>% 
-  dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation,cluster) %>% 
-  rename(emp.li = Employment, 
-         ec.li = EmployeeCompensation)
-not.mrio <- left_join(rev.not.mrio,li.not.mrio, by=c("OriginRegion","DestinationRegion","IndustryCode","ImpactType","cluster"))
-
-rev.statewide <- fread('multipliers/statewide/output-value/20250402-statewide_ca_refining-Detail Economic Indicators.csv') %>% 
-  dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation) %>% 
-  rename(emp.rev = Employment, 
-         ec.rev = EmployeeCompensation)
-li.statewide <- fread('multipliers/statewide/labor-income/20250402-statewide_ca_refining-Detail Economic Indicators.csv') %>% 
-  dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation) %>% 
-  rename(emp.li = Employment, 
-         ec.li = EmployeeCompensation)
-statewide <- left_join(rev.statewide,li.statewide, by=c("OriginRegion","DestinationRegion","IndustryCode","ImpactType"))
-
-rm(rev.mrio,li.mrio,rev.not.mrio,li.not.mrio,rev.statewide,li.statewide)
-
-
-### ADD UP WORKERS FROM BLOCK TO TRACT LEVEL
-
-df.tract <- fread('ca_od_main_JT02_2020.csv') %>%
-  mutate(w_tract_geocode = substr(as.character(w_geocode),1,10), 
-         h_tract_geocode = substr(as.character(h_geocode),1,10)) %>% 
-  dplyr::select(w_tract_geocode, h_tract_geocode,S000,SI01,SI02,SI03) %>%
-  group_by(w_tract_geocode,h_tract_geocode) %>% 
-  summarize(S000 = sum(S000,na.rm=TRUE),
-            SI01 = sum(SI01,na.rm=TRUE),
-            SI02 = sum(SI02,na.rm=TRUE),
-            SI03 = sum(SI03,na.rm=TRUE)) %>%
-  ungroup() 
-
-df.tract <- mutate(df.tract,
-       w_tract_geocode = str_pad(as.character(w_tract_geocode),11,pad="0"),
-       h_tract_geocode = str_pad(as.character(h_tract_geocode),11,pad="0"))
-
-######################################## 
-# Census Tract to Zip Code Xwalk
-
-df <- read_excel('TRACT_ZIP_122024.xlsx') %>% 
-  filter(USPS_ZIP_PREF_STATE=="CA") %>% 
-  dplyr::select(TRACT,ZIP,BUS_RATIO) 
-str(df)
-summary(df$BUS_RATIO)
-
-
-# Crosswalk work census tract to work zip code 
-
-df.zip <- left_join(df.tract,df,by=c("w_tract_geocode"="TRACT")) 
-
-df.zip <- mutate(df.zip, 
-              S000 = ifelse(is.na(BUS_RATIO)==FALSE,S000*BUS_RATIO,S000),
-              SI01 = ifelse(is.na(BUS_RATIO)==FALSE,SI01*BUS_RATIO,SI01),
-              SI02 = ifelse(is.na(BUS_RATIO)==FALSE,SI02*BUS_RATIO,SI02),
-              SI03 = ifelse(is.na(BUS_RATIO)==FALSE,SI03*BUS_RATIO,SI03)) %>% 
-  filter(is.na(ZIP)==FALSE) %>% 
-  group_by(ZIP,h_tract_geocode) %>% 
-  summarize(S000 = sum(S000,na.rm=TRUE),
-            SI01 = sum(SI01,na.rm=TRUE),
-            SI02 = sum(SI02,na.rm=TRUE),
-            SI03 = sum(SI03,na.rm=TRUE)) %>% 
-  ungroup()
-
-total.zip <- group_by(df.zip,
-                      ZIP) %>% 
-  summarize(S000_w = sum(S000,na.rm=TRUE),
-            SI01_w = sum(SI01,na.rm=TRUE),
-            SI02_w = sum(SI02,na.rm=TRUE),
-            SI03_w = sum(SI03,na.rm=TRUE)) %>% 
-  ungroup()
-
-df.zip <- left_join(df.zip,total.zip, by="ZIP") %>%
-  mutate(S000_share = ifelse(S000_w > 0, S000/S000_w, 0),
-         SI01_share = ifelse(SI01_w > 0, SI01/SI01_w, 0),
-         SI02_share = ifelse(SI02_w > 0, SI02/SI02_w, 0),
-         SI03_share = ifelse(SI03_w > 0, SI03/SI03_w, 0))
-summary(df.zip$S000_share)
-summary(df.zip$SI01_share)
-summary(df.zip$SI02_share)
-summary(df.zip$SI03_share)
-
-rm(total.zip)
-
-######################################## 
-# IMPLAN zip code tables 
-# 
-# load.implan.zip.agg <- function(x){
-#   df <- fread(paste0("implan-zip/",x)) 
-#   names(df) <- make.names(names(df))
-#   separated.string <- str_split(x,"_")
-#   
-#   df <- dplyr::select(df,
-#                       Industry.Code,Description,Total.Output) %>% 
-#     filter(Description != "" & Description != "* Employment and payroll of federal govt, military" & Description != "* Employment and payroll of federal govt, non-military") %>% 
-#     mutate(zip = str_extract(separated.string[[1]][4],pat <- "(\\d)+"),
-#            county = separated.string[[1]][3],
-#            Total.Output = as.numeric(str_remove_all(Total.Output,"[$,]"))) 
-#   
-# 
-#   df <- left_join(df,implan.naics.xwalk, by=c("Industry.Code"="Implan546Index")) %>% 
-#     mutate(ind_type = ifelse((substr(as.character(`2017NaicsCode`),1,2)=="11" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="21" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="23" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="31" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="32" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="33" | 
-#                                 Description == "Construction of new commercial structures, including farm structures" | 
-#                                 Description == "Construction of new multifamily residential structures" | 
-#                                 Description == "Construction of other new residential structures" | 
-#                                 Description == "Maintenance and repair construction of nonresidential structures" | 
-#                                 Description == "Maintenance and repair construction of residential structures" | 
-#                                 Description == "Maintenance and repair construction of highways, streets, bridges, and tunnels" | 
-#                                 Description == "Dog and cat food manufacturing" | 
-#                                 Description == "Other animal food manufacturing" | 
-#                                 Description == "Flour milling" | 
-#                                 Description == "Rice milling" | 
-#                                 Description == "Malt manufacturing" | 
-#                                 Description == "Construction of other new nonresidential structures" | 
-#                                 Description == "Construction of new single-family residential structures"), "SI01",
-#                              ifelse((substr(as.character(`2017NaicsCode`),1,2)=="42" | 
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="44" | 
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="45" | 
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="48" | 
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="49" |
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="22"), "SI02","SI03")),
-#            ind_type = ifelse(Description=="Veterinary services","SI03",ind_type),
-#            cluster = ifelse((county == "Ventura County" | 
-#                                county == "Los Angeles County" | 
-#                                county == "San Bernardino County" | 
-#                                county == "Orange County" |
-#                                county == "Riverside County" |
-#                                county == "Imperial County" |
-#                                county == "San Diego County"), "south", "north")) %>% 
-#     dplyr::select(-`2017NaicsCode`,-Description,-Industry.Code,-NaicsDescription) %>% 
-#     group_by(zip,county,cluster,ind_type) %>% 
-#     summarize(Total.Output = sum(Total.Output,na.rm=TRUE)) %>% 
-#     ungroup() %>% 
-#     pivot_wider(id_cols=c("zip","county","cluster"),values_from = "Total.Output", names_from = "ind_type",names_prefix = "output_")
-#   
-# }
-# 
-# 
-# # disaggregated by IMPLAN industry and zip 
-# 
-# load.implan.zip <- function(x){
-#   df <- fread(paste0("implan-zip/",x)) 
-#   names(df) <- make.names(names(df))
-#   separated.string <- str_split(x,"_")
-#   
-#   df <- dplyr::select(df,
-#                       Industry.Code,Description,Total.Output) %>% 
-#     filter(Description != "" & Description != "* Employment and payroll of federal govt, military" & Description != "* Employment and payroll of federal govt, non-military") %>% 
-#     mutate(zip = str_extract(separated.string[[1]][4],pat <- "(\\d)+"),
-#            county = separated.string[[1]][3],
-#            Total.Output = as.numeric(str_remove_all(Total.Output,"[$,]"))) 
-#   
-#   
-#   df <- left_join(df,implan.naics.xwalk, by=c("Industry.Code"="Implan546Index")) %>% 
-#     mutate(ind_type = ifelse((substr(as.character(`2017NaicsCode`),1,2)=="11" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="21" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="23" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="31" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="32" | 
-#                                 substr(as.character(`2017NaicsCode`),1,2)=="33" | 
-#                                 Description == "Construction of new commercial structures, including farm structures" | 
-#                                 Description == "Construction of new multifamily residential structures" | 
-#                                 Description == "Construction of other new residential structures" | 
-#                                 Description == "Maintenance and repair construction of nonresidential structures" | 
-#                                 Description == "Maintenance and repair construction of residential structures" | 
-#                                 Description == "Maintenance and repair construction of highways, streets, bridges, and tunnels" | 
-#                                 Description == "Dog and cat food manufacturing" | 
-#                                 Description == "Other animal food manufacturing" | 
-#                                 Description == "Flour milling" | 
-#                                 Description == "Rice milling" | 
-#                                 Description == "Malt manufacturing" | 
-#                                 Description == "Construction of other new nonresidential structures" | 
-#                                 Description == "Construction of new single-family residential structures"), "SI01",
-#                              ifelse((substr(as.character(`2017NaicsCode`),1,2)=="42" | 
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="44" | 
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="45" | 
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="48" | 
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="49" |
-#                                        substr(as.character(`2017NaicsCode`),1,2)=="22"), "SI02","SI03")),
-#            ind_type = ifelse(Description=="Veterinary services","SI03",ind_type),
-#            cluster = ifelse((county == "Ventura County" | 
-#                                county == "Los Angeles County" | 
-#                                county == "San Bernardino County" | 
-#                                county == "Orange County" |
-#                                county == "Riverside County" |
-#                                county == "Imperial County" |
-#                                county == "San Diego County"), "south", "north")) %>% 
-#     dplyr::select(-`2017NaicsCode`) 
-# }
-# 
-# file.names <- list.files(path="implan-zip",pattern="*.csv")
-# implan.naics.xwalk <- read_xlsx("Bridge_2017NaicsToImplan546.xlsx") %>% 
-#   dplyr::select(Implan546Index,`2017NaicsCode`) %>% 
-#   mutate(`2017NaicsCode` = substr(`2017NaicsCode`,1,2)) %>% 
-#   group_by(Implan546Index,`2017NaicsCode`) %>% 
-#   summarize() %>% 
-#   ungroup()
-# 
-# 
-# output <- lapply(file.names,load.implan.zip) %>% 
-#   bind_rows()
-# 
-# fwrite(output,'implan_zip_output.csv')
-
-## IMPORT ZIP CODE TABLES, CALCULATE SHARE OF CLUSTER OR STATEWIDE OUTPUT ATTRIBUTABLE TO EACH ZIP CODE 
-zip.output <- fread('implan_zip_output.csv')
-
-cluster.output <- group_by(zip.output,
-                           cluster,Industry.Code) %>% 
-  summarize(cluster_output = sum(Total.Output,na.rm=TRUE)) %>% 
-  ungroup()
-
-state.output <- group_by(zip.output,
-                         Industry.Code) %>% 
-  summarize(state_output = sum(Total.Output,na.rm=TRUE)) %>% 
-  ungroup()
-
-zip.output <- left_join(zip.output,cluster.output,by=c("cluster","Industry.Code")) %>% 
-  mutate(share_output = ifelse(cluster_output > 0, Total.Output/cluster_output,0)) %>% 
-  dplyr::select(-cluster_output)
-summary(zip.output$share_output)
-
-zip.output <- left_join(zip.output,state.output,by=c("Industry.Code")) %>% 
-  mutate(share_output_state = ifelse(state_output > 0, Total.Output/state_output,0)) %>% 
-  dplyr::select(-state_output)
-summary(zip.output$share_output_state)
-
-rm(cluster.output,state.output)
-
-## JOIN ZIP CODE SHARE OF OUTPUT TO IMPLAN MULTIPLIERS, CALCULATE EFFECT FOR EACH ZIP CODE, ADD UP ACROSS INDUSTRIES
-
-### MRIO 
-mrio.multipliers <- left_join(mrio,zip.output,by=c("IndustryCode"="Industry.Code","cluster")) %>% 
-  mutate(emp.rev = emp.rev*share_output,
-         ec.rev = ec.rev*share_output,
-         emp.li = emp.li*share_output,
-         ec.li = ec.li*share_output) %>% 
-  group_by(OriginRegion,cluster,zip,ImpactType,county,ind_type) %>% 
-  summarize(emp.rev = sum(emp.rev, na.rm=TRUE),
-            ec.rev = sum(ec.rev, na.rm=TRUE),
-            emp.li = sum(emp.li, na.rm=TRUE),
-            ec.li = sum(ec.li, na.rm=TRUE)) %>% 
-  ungroup()
-
-
-mrio.multipliers <- pivot_wider(mrio.multipliers,
-                    id_cols=c("zip","county","cluster","OriginRegion","ImpactType"),values_from = c("emp.rev","ec.rev","emp.li","ec.li"), names_from = "ind_type") %>% 
-  mutate(emp.rev_SI01 = ifelse(is.na(emp.rev_SI01)==TRUE,0,emp.rev_SI01),
-         emp.rev_SI02 = ifelse(is.na(emp.rev_SI02)==TRUE,0,emp.rev_SI02),
-         emp.rev_SI03 = ifelse(is.na(emp.rev_SI03)==TRUE,0,emp.rev_SI03),
-         ec.rev_SI01 = ifelse(is.na(ec.rev_SI01)==TRUE,0,ec.rev_SI01),
-         ec.rev_SI02 = ifelse(is.na(ec.rev_SI02)==TRUE,0,ec.rev_SI02),
-         ec.rev_SI03 = ifelse(is.na(ec.rev_SI03)==TRUE,0,ec.rev_SI03),
-         emp.li_SI01 = ifelse(is.na(emp.li_SI01)==TRUE,0,emp.li_SI01),
-         emp.li_SI02 = ifelse(is.na(emp.li_SI02)==TRUE,0,emp.li_SI02),
-         emp.li_SI03 = ifelse(is.na(emp.li_SI03)==TRUE,0,emp.li_SI03),
-         ec.li_SI01 = ifelse(is.na(ec.li_SI01)==TRUE,0,ec.li_SI01),
-         ec.li_SI02 = ifelse(is.na(ec.li_SI02)==TRUE,0,ec.li_SI02),
-         ec.li_SI03 = ifelse(is.na(ec.li_SI03)==TRUE,0,ec.li_SI03))
-
-### NOT MRIO 
-not.mrio.multipliers <- left_join(not.mrio,zip.output,by=c("IndustryCode"="Industry.Code","cluster")) %>% 
-  mutate(emp.rev = emp.rev*share_output,
-         ec.rev = ec.rev*share_output,
-         emp.li = emp.li*share_output,
-         ec.li = ec.li*share_output) %>% 
-  group_by(OriginRegion,cluster,zip,ImpactType,county,ind_type) %>% 
-  summarize(emp.rev = sum(emp.rev, na.rm=TRUE),
-            ec.rev = sum(ec.rev, na.rm=TRUE),
-            emp.li = sum(emp.li, na.rm=TRUE),
-            ec.li = sum(ec.li, na.rm=TRUE)) %>% 
-  ungroup()
-
-
-not.mrio.multipliers <- pivot_wider(not.mrio.multipliers,
-                                id_cols=c("zip","county","cluster","OriginRegion","ImpactType"),values_from = c("emp.rev","ec.rev","emp.li","ec.li"), names_from = "ind_type") %>% 
-  mutate(emp.rev_SI01 = ifelse(is.na(emp.rev_SI01)==TRUE,0,emp.rev_SI01),
-         emp.rev_SI02 = ifelse(is.na(emp.rev_SI02)==TRUE,0,emp.rev_SI02),
-         emp.rev_SI03 = ifelse(is.na(emp.rev_SI03)==TRUE,0,emp.rev_SI03),
-         ec.rev_SI01 = ifelse(is.na(ec.rev_SI01)==TRUE,0,ec.rev_SI01),
-         ec.rev_SI02 = ifelse(is.na(ec.rev_SI02)==TRUE,0,ec.rev_SI02),
-         ec.rev_SI03 = ifelse(is.na(ec.rev_SI03)==TRUE,0,ec.rev_SI03),
-         emp.li_SI01 = ifelse(is.na(emp.li_SI01)==TRUE,0,emp.li_SI01),
-         emp.li_SI02 = ifelse(is.na(emp.li_SI02)==TRUE,0,emp.li_SI02),
-         emp.li_SI03 = ifelse(is.na(emp.li_SI03)==TRUE,0,emp.li_SI03),
-         ec.li_SI01 = ifelse(is.na(ec.li_SI01)==TRUE,0,ec.li_SI01),
-         ec.li_SI02 = ifelse(is.na(ec.li_SI02)==TRUE,0,ec.li_SI02),
-         ec.li_SI03 = ifelse(is.na(ec.li_SI03)==TRUE,0,ec.li_SI03))
-
-### STATEWIDE 
-
-state.multipliers <- left_join(statewide,zip.output,by=c("IndustryCode"="Industry.Code")) %>% 
-  mutate(emp.rev = emp.rev*share_output_state,
-         ec.rev = ec.rev*share_output_state,
-         emp.li = emp.li*share_output_state,
-         ec.li = ec.li*share_output_state) %>% 
-  group_by(OriginRegion,zip,ImpactType,county,ind_type) %>% 
-  summarize(emp.rev = sum(emp.rev, na.rm=TRUE),
-            ec.rev = sum(ec.rev, na.rm=TRUE),
-            emp.li = sum(emp.li, na.rm=TRUE),
-            ec.li = sum(ec.li, na.rm=TRUE)) %>% 
-  ungroup()
-
-
-state.multipliers <- pivot_wider(state.multipliers,
-                                id_cols=c("zip","county","OriginRegion","ImpactType"),values_from = c("emp.rev","ec.rev","emp.li","ec.li"), names_from = "ind_type") %>% 
-  mutate(emp.rev_SI01 = ifelse(is.na(emp.rev_SI01)==TRUE,0,emp.rev_SI01),
-         emp.rev_SI02 = ifelse(is.na(emp.rev_SI02)==TRUE,0,emp.rev_SI02),
-         emp.rev_SI03 = ifelse(is.na(emp.rev_SI03)==TRUE,0,emp.rev_SI03),
-         ec.rev_SI01 = ifelse(is.na(ec.rev_SI01)==TRUE,0,ec.rev_SI01),
-         ec.rev_SI02 = ifelse(is.na(ec.rev_SI02)==TRUE,0,ec.rev_SI02),
-         ec.rev_SI03 = ifelse(is.na(ec.rev_SI03)==TRUE,0,ec.rev_SI03),
-         emp.li_SI01 = ifelse(is.na(emp.li_SI01)==TRUE,0,emp.li_SI01),
-         emp.li_SI02 = ifelse(is.na(emp.li_SI02)==TRUE,0,emp.li_SI02),
-         emp.li_SI03 = ifelse(is.na(emp.li_SI03)==TRUE,0,emp.li_SI03),
-         ec.li_SI01 = ifelse(is.na(ec.li_SI01)==TRUE,0,ec.li_SI01),
-         ec.li_SI02 = ifelse(is.na(ec.li_SI02)==TRUE,0,ec.li_SI02),
-         ec.li_SI03 = ifelse(is.na(ec.li_SI03)==TRUE,0,ec.li_SI03))
-
-rm(zip.output)
-
-## JOIN ZIP CODE EFFECTS TO LODES DATA AND MULTIPLY BY THE SHARE OF WORKERS IN EACH ZIP CODE WORKING IN DIFFERENT CENSUS TRACTS 
-df.zip <- mutate(df.zip,
-                 ZIP = as.numeric(ZIP))
-
-### MRIO
-mrio.multipliers <- left_join(mrio.multipliers,df.zip,by=c("zip"="ZIP")) %>% 
-  mutate(emp.rev_SI01 = emp.rev_SI01*SI01_share,
-         emp.rev_SI02 = emp.rev_SI02*SI02_share,
-         emp.rev_SI03 = emp.rev_SI03*SI03_share,
-         ec.rev_SI01 = ec.rev_SI01*SI01_share,
-         ec.rev_SI02 = ec.rev_SI02*SI02_share,
-         ec.rev_SI03 = ec.rev_SI03*SI03_share,
-         emp.rev = emp.rev_SI01+emp.rev_SI02+emp.rev_SI03,
-         ec.rev = ec.rev_SI01+ec.rev_SI02+ec.rev_SI03,
-         emp.li_SI01 = emp.li_SI01*SI01_share,
-         emp.li_SI02 = emp.li_SI02*SI02_share,
-         emp.li_SI03 = emp.li_SI03*SI03_share,
-         ec.li_SI01 = ec.li_SI01*SI01_share,
-         ec.li_SI02 = ec.li_SI02*SI02_share,
-         ec.li_SI03 = ec.li_SI03*SI03_share,
-         emp.li = emp.li_SI01+emp.li_SI02+emp.li_SI03,
-         ec.li = ec.li_SI01+ec.li_SI02+ec.li_SI03) %>%
-  group_by(OriginRegion,cluster,h_tract_geocode,county,ImpactType) %>% 
-  summarize(emp.rev = sum(emp.rev,na.rm=TRUE),
-            ec.rev = sum(ec.rev,na.rm=TRUE),
-            emp.li = sum(emp.li,na.rm=TRUE),
-            ec.li = sum(ec.li,na.rm=TRUE)) 
-
-### NOT MRIO 
-not.mrio.multipliers <- left_join(not.mrio.multipliers,df.zip,by=c("zip"="ZIP")) %>% 
-  mutate(emp.rev_SI01 = emp.rev_SI01*SI01_share,
-         emp.rev_SI02 = emp.rev_SI02*SI02_share,
-         emp.rev_SI03 = emp.rev_SI03*SI03_share,
-         ec.rev_SI01 = ec.rev_SI01*SI01_share,
-         ec.rev_SI02 = ec.rev_SI02*SI02_share,
-         ec.rev_SI03 = ec.rev_SI03*SI03_share,
-         emp.rev = emp.rev_SI01+emp.rev_SI02+emp.rev_SI03,
-         ec.rev = ec.rev_SI01+ec.rev_SI02+ec.rev_SI03,
-         emp.li_SI01 = emp.li_SI01*SI01_share,
-         emp.li_SI02 = emp.li_SI02*SI02_share,
-         emp.li_SI03 = emp.li_SI03*SI03_share,
-         ec.li_SI01 = ec.li_SI01*SI01_share,
-         ec.li_SI02 = ec.li_SI02*SI02_share,
-         ec.li_SI03 = ec.li_SI03*SI03_share,
-         emp.li = emp.li_SI01+emp.li_SI02+emp.li_SI03,
-         ec.li = ec.li_SI01+ec.li_SI02+ec.li_SI03) %>%
-  group_by(OriginRegion,cluster,h_tract_geocode,county,ImpactType) %>% 
-  summarize(emp.rev = sum(emp.rev,na.rm=TRUE),
-            ec.rev = sum(ec.rev,na.rm=TRUE),
-            emp.li = sum(emp.li,na.rm=TRUE),
-            ec.li = sum(ec.li,na.rm=TRUE))
-
-### STATEWIDE 
-state.multipliers <- left_join(state.multipliers,df.zip,by=c("zip"="ZIP")) %>% 
-  mutate(emp.rev_SI01 = emp.rev_SI01*SI01_share,
-         emp.rev_SI02 = emp.rev_SI02*SI02_share,
-         emp.rev_SI03 = emp.rev_SI03*SI03_share,
-         ec.rev_SI01 = ec.rev_SI01*SI01_share,
-         ec.rev_SI02 = ec.rev_SI02*SI02_share,
-         ec.rev_SI03 = ec.rev_SI03*SI03_share,
-         emp.rev = emp.rev_SI01+emp.rev_SI02+emp.rev_SI03,
-         ec.rev = ec.rev_SI01+ec.rev_SI02+ec.rev_SI03,
-         emp.li_SI01 = emp.li_SI01*SI01_share,
-         emp.li_SI02 = emp.li_SI02*SI02_share,
-         emp.li_SI03 = emp.li_SI03*SI03_share,
-         ec.li_SI01 = ec.li_SI01*SI01_share,
-         ec.li_SI02 = ec.li_SI02*SI02_share,
-         ec.li_SI03 = ec.li_SI03*SI03_share,
-         emp.li = emp.li_SI01+emp.li_SI02+emp.li_SI03,
-         ec.li = ec.li_SI01+ec.li_SI02+ec.li_SI03) %>%
-  group_by(OriginRegion,h_tract_geocode,county,ImpactType) %>% 
-  summarize(emp.rev = sum(emp.rev,na.rm=TRUE),
-            ec.rev = sum(ec.rev,na.rm=TRUE),
-            emp.li = sum(emp.li,na.rm=TRUE),
-            ec.li = sum(ec.li,na.rm=TRUE))
-
-
-###########################################################################
-
-# DIRECT IMPACTS--IMPLAN MULTIPLIER * SHARE OF OUTPUT AT EACH REFINERY CENSUS TRACT * LODES SHARES AT TRACT LEVEL
-## IMPLAN MULTIPLIERS
-mrio <- filter(mrio, ImpactType=="Direct")
-statewide <- filter(statewide,ImpactType=="Direct")
-
-## SHARE WORKERS IN EACH REFINERY CENSUS TRACT LIVING IN RESIDENTIAL CENSUS TRACT R
-
-w.tract.total <- group_by(df.tract,
-                          w_tract_geocode) %>% 
-  summarize(S000_w = sum(S000,na.rm=TRUE),
-            SI01_w = sum(SI01,na.rm=TRUE)) %>% 
-  ungroup()
-
-wh.tract.share <- left_join(df.tract,w.tract.total,by="w_tract_geocode") %>% 
-  mutate(S000_share = ifelse(S000_w > 0, S000/S000_w, 0),
-         SI01_share = ifelse(SI01_w > 0, SI01/SI01_w, 0)) %>% 
-  filter(w_tract_geocode == "06037980002" |
-           w_tract_geocode == "06037980030" |
-           w_tract_geocode == "06013378000" |
-           w_tract_geocode == "06095252102" |
-           w_tract_geocode == "06029002401" |
-           w_tract_geocode == "06037980014" |
-           w_tract_geocode == "06037980005" |
-           w_tract_geocode == "06029000507" |
-           w_tract_geocode == "06013320001" |
-           w_tract_geocode == "06013315000" |
-           w_tract_geocode == "06013358000" |
-           w_tract_geocode == "06037980015" |
-           w_tract_geocode == "06079012306" | 
-           w_tract_geocode == "06037553502" |
-           w_tract_geocode == "06029000507") %>%
-  dplyr::select(w_tract_geocode,h_tract_geocode,S000_share,SI01_share) %>% 
-  mutate(refinery_name = ifelse(w_tract_geocode == "06037980002","Marathon Carson",
-                                ifelse(w_tract_geocode == "06037980030","Chevron El Segundo",
-                                       ifelse(w_tract_geocode == "06013378000","Chevron Richmond",
-                                              ifelse(w_tract_geocode == "06095252102","Valero Benicia",
-                                                     ifelse(w_tract_geocode == "06029002401","Kern Oil Bakersfield",
-                                                            ifelse(w_tract_geocode == "06037980014","Valero Wilmington",
-                                                                   ifelse(w_tract_geocode == "06037980005","PBF Torrance",
-                                                                          ifelse(w_tract_geocode == "06029000507","San Joaquin Bakersfield",
-                                                                                 ifelse(w_tract_geocode == "06013320001","PBF Martinez",
-                                                                                        ifelse(w_tract_geocode == "06013315000","Marathon Golden Eagle",
-                                                                                               ifelse(w_tract_geocode == "06013358000","Phillips 66 Rodeo",
-                                                                                                      ifelse(w_tract_geocode == "06037980015","Phillips 66 Wilmington",
-                                                                                                             ifelse(w_tract_geocode == "06079012306","Phillips 66 Santa Maria",
-                                                                                                                    ifelse(w_tract_geocode == "06037553502","AltAir Paramount",
-                                                                                                                           ifelse(w_tract_geocode == "06029000507","Global Clean Energy",NA))))))))))))))),
-         cluster = ifelse((refinery_name=="Marathon Carson" | refinery_name=="Chevron El Segundo" | refinery_name=="Valero Wilmington" | 
-                             refinery_name=="PBF Torrance" | refinery_name=="Phillips 66 Wilmington" | refinery_name=="AltAir Paramount"),"south","north"))
-summary(wh.tract.share$S000_share)
-summary(wh.tract.share$SI01_share)
-
-mrio <- dplyr::select(mrio,
-                      emp.rev,ec.rev,cluster) %>% 
-  rename(emp.rev.mrio = emp.rev, 
-         ec.rev.mrio = ec.rev)
-direct.multipliers <- left_join(wh.tract.share,mrio,by="cluster") 
-direct.multipliers$emp.rev.state <- statewide$emp.rev
-direct.multipliers$ec.rev.state <- statewide$ec.rev
-
-direct.multipliers <- mutate(direct.multipliers,
-                             emp.rev.mrio = emp.rev.mrio*SI01_share,
-                             ec.rev.mrio = ec.rev.mrio*SI01_share,
-                             emp.rev.state = emp.rev.state*SI01_share,
-                             ec.rev.state = ec.rev.state*SI01_share) %>% 
-  dplyr::select(-S000_share, -SI01_share)
-
-summary(direct.multipliers$emp.rev.mrio)
-summary(direct.multipliers$ec.rev.mrio)
-summary(direct.multipliers$emp.rev.state)
-summary(direct.multipliers$ec.rev.state)
-
-
-
-# COMPARE TO OLD MULTIPLIERS
-
-mrio.multipliers <- filter(mrio.multipliers,
-                           ImpactType != "Direct") %>% 
-  ungroup() %>% 
-  mutate(county = str_remove_all(county," County"),
-         region = ifelse((county=="Butte" | county=="Colusa" | county=="El Dorado" | county=="Glenn" | county=="Lassen" | 
-                            county=="Modoc" | county=="Nevada" | county=="Placer" | county=="Plumas" | county=="Sacramento" |
-                            county=="Shasta" | county=="Sierra" | county=="Siskiyou" | county=="Sutter" | county=="Tehama" |
-                            county=="Yolo" | county=="Yuba"),"1",
-                          ifelse((county=="Del Norte" | county=="Humboldt" | county=="Lake" | county=="Mendocino" | county=="Napa" |
-                                    county=="Sonoma" | county=="Trinity"),"2",
-                                 ifelse((county=="Alameda" | county=="Marin" | county=="San Francisco" | county=="San Mateo" | county=="Santa Clara"),"3",
-                                        ifelse((county=="Alpine" | county=="Amador" | county=="Calaveras" | county=="Madera" | county=="Mariposa" |
-                                                  county=="Merced" | county=="Mono" | county=="San Joaquin" | county=="Stanislaus" | county=="Tuolumne"),"4",
-                                               ifelse((county=="Monterey" | county=="San Benito" | county=="Santa Barbara" | county=="Santa Cruz" | county=="Ventura"),"5",
-                                                       ifelse((county=="Fresno" | county=="Inyo" | county=="Kings" | county=="Tulare"),"6",
-                                                              ifelse((county=="Riverside" | county=="San Bernardino"),"7",
-                                                                     ifelse(county=="Orange","9",
-                                                                            ifelse((county=="Imperial" | county=="San Diego"),"10",county)))))))))) %>% 
-  group_by(region,ImpactType) %>% 
-  summarize(emp.rev = sum(emp.rev,na.rm = FALSE),
-            ec.rev = sum(ec.rev,na.rm = FALSE)) %>% 
-  ungroup()
-  
-
-not.mrio.multipliers <- filter(not.mrio.multipliers,
-                           ImpactType != "Direct") %>% 
-  ungroup() %>% 
-  mutate(county = str_remove_all(county," County"),
-         region = ifelse((county=="Butte" | county=="Colusa" | county=="El Dorado" | county=="Glenn" | county=="Lassen" | 
-                            county=="Modoc" | county=="Nevada" | county=="Placer" | county=="Plumas" | county=="Sacramento" |
-                            county=="Shasta" | county=="Sierra" | county=="Siskiyou" | county=="Sutter" | county=="Tehama" |
-                            county=="Yolo" | county=="Yuba"),"1",
-                         ifelse((county=="Del Norte" | county=="Humboldt" | county=="Lake" | county=="Mendocino" | county=="Napa" |
-                                   county=="Sonoma" | county=="Trinity"),"2",
-                                ifelse((county=="Alameda" | county=="Marin" | county=="San Francisco" | county=="San Mateo" | county=="Santa Clara"),"3",
-                                       ifelse((county=="Alpine" | county=="Amador" | county=="Calaveras" | county=="Madera" | county=="Mariposa" |
-                                                 county=="Merced" | county=="Mono" | county=="San Joaquin" | county=="Stanislaus" | county=="Tuolumne"),"4",
-                                              ifelse((county=="Monterey" | county=="San Benito" | county=="Santa Barbara" | county=="Santa Cruz" | county=="Ventura"),"5",
-                                                     ifelse((county=="Fresno" | county=="Inyo" | county=="Kings" | county=="Tulare"),"6",
-                                                            ifelse((county=="Riverside" | county=="San Bernardino"),"7",
-                                                                   ifelse(county=="Orange","9",
-                                                                          ifelse((county=="Imperial" | county=="San Diego"),"10",county)))))))))) %>% 
-  group_by(region,ImpactType) %>% 
-  summarize(emp.rev = sum(emp.rev,na.rm = FALSE),
-            ec.rev = sum(ec.rev,na.rm = FALSE)) %>% 
-  ungroup()
-
-state.multipliers <- filter(state.multipliers,
-                           ImpactType != "Direct") %>% 
-  ungroup() %>% 
-  mutate(county = str_remove_all(county," County"),
-         region = ifelse((county=="Butte" | county=="Colusa" | county=="El Dorado" | county=="Glenn" | county=="Lassen" | 
-                            county=="Modoc" | county=="Nevada" | county=="Placer" | county=="Plumas" | county=="Sacramento" |
-                            county=="Shasta" | county=="Sierra" | county=="Siskiyou" | county=="Sutter" | county=="Tehama" |
-                            county=="Yolo" | county=="Yuba"),"1",
-                         ifelse((county=="Del Norte" | county=="Humboldt" | county=="Lake" | county=="Mendocino" | county=="Napa" |
-                                   county=="Sonoma" | county=="Trinity"),"2",
-                                ifelse((county=="Alameda" | county=="Marin" | county=="San Francisco" | county=="San Mateo" | county=="Santa Clara"),"3",
-                                       ifelse((county=="Alpine" | county=="Amador" | county=="Calaveras" | county=="Madera" | county=="Mariposa" |
-                                                 county=="Merced" | county=="Mono" | county=="San Joaquin" | county=="Stanislaus" | county=="Tuolumne"),"4",
-                                              ifelse((county=="Monterey" | county=="San Benito" | county=="Santa Barbara" | county=="Santa Cruz" | county=="Ventura"),"5",
-                                                     ifelse((county=="Fresno" | county=="Inyo" | county=="Kings" | county=="Tulare"),"6",
-                                                            ifelse((county=="Riverside" | county=="San Bernardino"),"7",
-                                                                   ifelse(county=="Orange","9",
-                                                                          ifelse((county=="Imperial" | county=="San Diego"),"10",county)))))))))) %>% 
-  group_by(region,ImpactType) %>% 
-  summarize(emp.rev = sum(emp.rev,na.rm = FALSE),
-            ec.rev = sum(ec.rev,na.rm = FALSE)) %>% 
-  ungroup()
-  
-
-lodes.xwalk <- fread('ca_xwalk.csv') %>% 
-  dplyr::select(trct,ctyname) %>% 
-  mutate(trct = str_pad(as.character(trct),11,pad="0"),
-         ctyname = str_remove_all(ctyname," County,CA"))
-str(lodes.xwalk)
-
-## where are new impacts actually felt? all 58 counties 
-direct.v2 <- inner_join(direct.multipliers,lodes.xwalk,by=c("h_tract_geocode"="trct")) %>% 
-  group_by(ctyname) %>% 
-  summarize(emp.rev.mrio = sum(emp.rev.mrio,na.rm = FALSE),
-            ec.rev.mrio = sum(ec.rev.mrio,na.rm = FALSE),
-            emp.rev.state = sum(emp.rev.state,na.rm = FALSE),
-            ec.rev.state = sum(ec.rev.state,na.rm = FALSE)) %>% 
-  ungroup() %>% 
-  mutate(ec.rev.mrio.per.worker = ec.rev.mrio/emp.rev.mrio)
-summary(direct.v2$ec.rev.mrio.per.worker)
-
-fwrite(direct.v2, 'direct_impact_by_home_county.csv')
-
-direct.multipliers <- mutate(direct.multipliers,
-               county = ifelse((refinery_name=="Marathon Golden Eagle" | refinery_name=="Chevron Richmond" | 
-                                  refinery_name=="PBF Martinez" | refinery_name=="Phillips 66 Rodeo"),"Contra Costa",
-                                ifelse((refinery_name=="Marathon Carson" | refinery_name=="Chevron El Segundo" |
-                                          refinery_name=="Valero Wilmington" | refinery_name=="PBF Torrance" |
-                                          refinery_name=="Phillips 66 Wilmington" | refinery_name=="AltAir Paramount"), "Los Angeles",
-                                       ifelse((refinery_name=="Valero Benicia"),"Solano",
-                                              ifelse((refinery_name=="Kern Oil Bakersfield" | refinery_name=="San Joaquin Bakersfield"),"Kern",
-                                                     ifelse(refinery_name=="Phillips 66 Santa Maria","San Luis Obispo",NA)))))) %>% 
-  group_by(county) %>% 
-  summarize(emp.rev.mrio = sum(emp.rev.mrio,na.rm = FALSE),
-            ec.rev.mrio = sum(ec.rev.mrio,na.rm = FALSE),
-            emp.rev.state = sum(emp.rev.state,na.rm = FALSE),
-            ec.rev.state = sum(ec.rev.state,na.rm = FALSE)) %>% 
-  mutate(n_refineries = ifelse(county=="Contra Costa",4,
-                               ifelse(county=="Los Angeles",6,
-                                      ifelse(county=="Solano",1,
-                                             ifelse(county=="Kern",2,
-                                                    ifelse(county=="San Luis Obispo",1,NA)))))) %>% 
-  ungroup()
-
-### COMPARISON IS OLD MULTIPLIERS 
-old.mult <- fread('old_multipliers.csv') %>% 
-  filter(ImpactType=="Direct") %>% 
-  rename(old_emp = Employment, 
-         old_ec = EmployeeCompensation) %>% 
-  mutate(county = str_remove_all(DestinationRegion," County, CA"),
-         county = trimws(county)) %>% 
-  dplyr::select(county, old_emp, old_ec) 
-str(old.mult)
-
-old.mult.indir.indu <- fread('old_multipliers.csv') %>% 
-  filter(ImpactType=="Indirect" | ImpactType=="Induced") %>% 
-  rename(old_emp = Employment, 
-         old_ec = EmployeeCompensation) %>% 
-  mutate(county = str_remove_all(DestinationRegion," County, CA"),
-         county = trimws(county)) %>% 
-  dplyr::select(county,ImpactType, old_emp, old_ec) %>% 
-  group_by(county,ImpactType) %>% 
-  summarize(old_emp = sum(old_emp,na.rm=TRUE),
-            old_ec = sum(old_ec, na.rm=TRUE)) %>% 
-  ungroup()
-str(old.mult.indir.indu)
-
-comparison.df <- left_join(direct.multipliers,old.mult,by="county")
-fwrite(comparison.df,'multiplier_comparison.csv')
-
-comparison.df.indir.indu <- left_join(mrio.multipliers,old.mult.indir.indu,by=c("region"="county","ImpactType"))
-
-
-statewide.indir.indu <- filter(rev.statewide, ImpactType != "Direct") %>% 
-  group_by(DestinationRegion,ImpactType) %>% 
-  summarize(emp.rev = sum(emp.rev, na.rm=TRUE),
-            ec.rev = sum(ec.rev, na.rm=TRUE))
-
-li.statewide <- filter(statewide, ImpactType=="Induced") %>% 
-  group_by(DestinationRegion) %>% 
-  summarize(emp.li = sum(emp.li, na.rm=TRUE),
-            ec.li = sum(ec.li, na.rm=TRUE))
-
-
-  
diff --git a/before-targets/exploratory/srm-120-exploration.R b/before-targets/exploratory/srm-120-exploration.R
deleted file mode 100644
index d43dd16..0000000
--- a/before-targets/exploratory/srm-120-exploration.R
+++ /dev/null
@@ -1,426 +0,0 @@
-## tracey mangin
-## july 7, 2025
-## srm exploration for refinery 120
-
-## attach libraries
-library(data.table)
-library(sf)
-library(rnaturalearth)
-library(rnaturalearthhires)
-library(tidyverse)
-
-## user
-user <- "vincent-home"
-
-## paths
-## -------------------------------------------
-
-# list paths
-list_paths <- c(
-    "tracey-laptop" = "/Users/traceymangin/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared\ drives/emlab/projects/current-projects/calepa-cn/",
-    "tracey-desktop" = "/Users/tracey/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared\ drives/emlab/projects/current-projects/calepa-cn/",
-    "vincent-work" = "H://Shared drives/emlab/projects/current-projects/calepa-cn",
-    "vincent-home" = "G://Shared drives/emlab/projects/current-projects/calepa-cn",
-    "meas" = "data"
-  )
-
-# set main path
-main_path <- list_paths[user]
-
-file_inmap_re <- file.path(main_path, 
-                           "data-staged-for-deletion/health/source_receptor_matrix/inmap_processed_srm/refining")
-
-file_refin_locs <- file.path(main_path, 
-                             "/data-staged-for-deletion/stocks-flows/processed/refinery_lat_long_revised.csv")
-
-file_refin_locs_orig <- file.path(main_path,
-                                  "data-staged-for-deletion/GIS/raw/Petroleum_Refineries_US_EIA/Petroleum_Refineries_US_2019_v2.shp"
-)
-
-ca_crs <- 3310
-
-## functions
-## --------------------------------------------
-
-read_inmap_data <- function(inmap_path, bsite) {
-  
-  if (bsite != 120) {
-  
-  nh3 <- fread(
-    paste0(inmap_path, "/nh3/srm_nh3_site", bsite, ".csv"),
-    header = TRUE,
-    colClasses = c(GEOID = "character")
-  )
-  } else {nh3 <- data.table()}
-  
-  nox <- fread(
-    paste0(inmap_path, "/nox/srm_nox_site", bsite, ".csv"),
-    header = TRUE,
-    colClasses = c(GEOID = "character")
-  )
-  pm25 <- fread(
-    paste0(inmap_path, "/pm25/srm_pm25_site", bsite, ".csv"),
-    header = TRUE,
-    colClasses = c(GEOID = "character")
-  )
-  sox <- fread(
-    paste0(inmap_path, "/sox/srm_sox_site", bsite, ".csv"),
-    header = TRUE,
-    colClasses = c(GEOID = "character")
-  )
-  voc <- fread(
-    paste0(inmap_path, "/voc/srm_voc_site", bsite, ".csv"),
-    header = TRUE,
-    colClasses = c(GEOID = "character")
-  )
-  
-  nh3[, pollutant := "nh3"]
-  nox[, pollutant := "nox"]
-  pm25[, pollutant := "pm25"]
-  sox[, pollutant := "sox"]
-  voc[, pollutant := "voc"]
-  
-  all_pollutants <- rbind(nox, pm25, sox, voc, nh3, fill = TRUE)
-  all_pollutants[, site := bsite]
-  
-}
-
-process_weighted_pm25 <- function(dt_inmap_re) {
-  dt <- copy(dt_inmap_re)
-  setnames(dt, "totalpm25_aw", "weighted_totalpm25")
-
-  dt_wide <- dcast(
-    dt,
-    GEOID + site ~ pollutant,
-    value.var = "weighted_totalpm25"
-  )
-  setnames(
-    dt_wide,
-    c("nh3", "nox", "pm25", "sox", "voc"),
-    paste0("weighted_totalpm25_", c("nh3", "nox", "pm25", "sox", "voc"))
-  )
-  
-  dt_wide <- dt_wide[!is.na(GEOID)]
-  
-  setnames(dt_wide, "site", "site_id")
-
-  dt_wide <- unique(dt_wide)
-
-  dt_wide
-}
-
-read_refin_locs <- function(file_refin_locs, file_refin_locs_orig, ca_crs) {
-  refin_crs <- st_crs(st_read(file_refin_locs_orig))
-  
-  ## Refineries plus
-  refin_new_locations <- fread(file_refin_locs) %>%
-    mutate(coords = gsub("^c\\(|\\)$", "", geometry)) %>%
-    separate(coords, c("lon", "lat"), sep = ",") %>%
-    select(-geometry) %>%
-    st_as_sf(
-      coords = c("lon", "lat"),
-      crs = refin_crs
-    ) %>%
-    st_transform(ca_crs)
-}
-
-
-## read data
-## --------------------------------------------
-
-buff_sites <- c(
-  97,
-  119,
-  120,
-  164,
-  202,
-  209,
-  226,
-  271,
-  279,
-  332,
-  342,
-  343,
-  800,
-  3422,
-  34222,
-  99999
-)
-
-# refinery locations
-refin_locs <- read_refin_locs(file_refin_locs,
-  file_refin_locs_orig,
-  ca_crs
-)
-
-# read inputs
-dt_inmap_re <-  rbindlist(lapply(
-  buff_sites,
-  read_inmap_data,
-  inmap_path = file_inmap_re
-))
-
-# process srm
-srm_weighted_pm25 <- process_weighted_pm25(dt_inmap_re)
-
-## find closest refineries
-## --------------------------------------------------
-
-# Select the reference point (e.g., id == "A")
-ref_point <- refin_locs[refin_locs$site_id == 120, ]
-
-# Compute distances from the reference point to all others
-distances <- st_distance(ref_point, refin_locs)
-
-# Optional: add to the original data
-refin_locs$distance_to_120 <- as.numeric(distances)  # units in meters by default
-
-# rank
-refin_locs$dist_rank <- rank(refin_locs$distance_to_120, ties.method = "min")
-
-# disance in km
-refin_locs$dist_km <- refin_locs$distance_to_120 / 1000
-
-## modify so that 120 doesn't count
-refin_locs <- refin_locs |>
-  mutate(dist_rank = ifelse(site_id == 120, 0, dist_rank - 1),
-         site_name = paste0(site_id, " dist = ", round(dist_km, 1), " km"))
-
-refin_locs <- refin_locs %>%
-  mutate(site_name = fct_reorder(site_name, dist_rank))
-
-# usa <- ne_states(country = "united states of america", returnclass = "sf")
-# california <- usa[usa$name == "California", ]
-# 
-# california <- st_transform(california, ca_crs)
-# 
-# refinery_fig <- ggplot() +
-#   geom_sf(data = california, fill = NA, color = "black") +
-#   geom_sf(data = refin_locs, aes(color = site_id), alpha = 0.7, size = 1) +    # point locations
-#   # geom_sf_text(data = refin_locs, aes(label = site_id), nudge_y = 0.05) +  # labels
-#   theme_minimal()
-# 
-# plotly::ggplotly(refinery_fig)
-
-## compute ratio, NH3 to PM2.5
-## --------------------------------------------
-
-srm_ratio_df_nh3 <- srm_weighted_pm25 |>
-  select(GEOID, site_id, weighted_totalpm25_nh3)
-
-srm_ratio_df <- srm_weighted_pm25 |>
-  pivot_longer(weighted_totalpm25_nh3:weighted_totalpm25_voc, 
-               names_to = "pollutant",
-               values_to = "concentration") |>
-  filter(pollutant != "weighted_totalpm25_nh3") |>
-  left_join(srm_ratio_df_nh3) |>
-  mutate(site_id = as.character(site_id),
-         ratio_nh3_pol = weighted_totalpm25_nh3 / concentration) |>
-  left_join(refin_locs |>
-              select(site_id, cluster, distance_to_120, dist_rank, site_name, geometry)) |>
-  mutate(pollutant_name = str_remove(pattern = "weighted_totalpm25_", pollutant))
-
-allboxplot <- ggplot(srm_ratio_df |> filter(site_id != 120,
-                              cluster == "Bay Area"), aes(x = pollutant_name, y = ratio_nh3_pol)) +
-  geom_boxplot() +
-  facet_wrap(~ site_name, scales = "fixed") +  # or facet_grid(pollutant ~ site_id)
-  scale_fill_brewer(palette = "Set2") +  # or manual colors
-  labs(y = "concentration ratio: NH3 to pollutant",
-       x = NULL) +
-  theme_minimal() +
-  theme(
-    axis.text.x = element_text(angle = 45, hjust = 1),
-    strip.background = element_rect(fill = "white", color = NA),
-    strip.text = element_text(face = "bold")
-  )
-
-ggsave(filename = "srm_ratio_pollutant_pm25.png",
-       plot = allboxplot,
-       path = here::here("outputs", "exploratory", "exploratory-figs"),
-       width = 8,
-       height = 6,
-       dpi = 300)
-
-
-boxplot_summary_df <- srm_ratio_df |> 
-  filter(!site_id %in% c(120, 3422),
-         cluster == "Bay Area",
-         pollutant_name == "pm25") |>
-  group_by(site_id, site_name, pollutant) |>
-  summarise(
-    Q1 = quantile(ratio_nh3_pol, 0.25, na.rm = TRUE),
-    Median = quantile(ratio_nh3_pol, 0.5, na.rm = TRUE),
-    Q3 = quantile(ratio_nh3_pol, 0.75, na.rm = TRUE),
-    IQR = IQR(ratio_nh3_pol, na.rm = TRUE),
-    Lower_Whisker = max(min(ratio_nh3_pol), Q1 - 1.5 * IQR),
-    Upper_Whisker = min(max(ratio_nh3_pol), Q3 + 1.5 * IQR)
-  )
-
-ratio_fig <- ggplot(srm_ratio_df |> filter(!site_id %in% c(120, 3422),
-                                           cluster == "Bay Area",
-                                           pollutant_name == "pm25"), 
-                    aes(x = site_name, y = ratio_nh3_pol)) +
-  geom_boxplot() +
-  geom_text(data = boxplot_summary_df, aes(x = site_name, y = Median, label = round(Median, 2)),
-            vjust = -0.5, size = 3, color = "black") +
-  labs(y = "concentration ratio: NH3 to PM2.5",
-       x = NULL) +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1),
-        strip.background = element_rect(fill = "white", color = NA),
-        strip.text = element_text(face = "bold")
-  )
-
-ggsave(filename = "srm_ratio_nh3_pm25.png",
-       plot = ratio_fig,
-       path = here::here("exploratory", "exploratory", "exploratory-figs"),
-       width = 8,
-       height = 6,
-       dpi = 300)
-
-## repeat, 120 PM2.5 to other site PM2.5
-## --------------------------------------------
-
-srm_ratio_df_120 <- srm_weighted_pm25 |>
-  filter(site_id == 120) |>
-  select(GEOID, pm25_120 = weighted_totalpm25_pm25) 
-  
-srm_ratio_pm25_df <- srm_weighted_pm25 |>
-  filter(site_id != 120) |>
-  select(GEOID, site_id, pm25 = weighted_totalpm25_pm25) |>
-  left_join(srm_ratio_df_120) |>
-  mutate(site_id = as.character(site_id),
-         ratio_pm25 = pm25_120 / pm25) |>
-  left_join(refin_locs |>
-            select(site_id, cluster, distance_to_120, dist_rank, site_name, geometry)) 
-
-pm25_boxplot_summary_df <- srm_ratio_pm25_df |> 
-  filter(!site_id %in% c(120, 3422),
-         cluster == "Bay Area") |>
-  group_by(site_id, site_name) |>
-  summarise(
-    Q1 = quantile(ratio_pm25, 0.25, na.rm = TRUE),
-    Median = quantile(ratio_pm25, 0.5, na.rm = TRUE),
-    Q3 = quantile(ratio_pm25, 0.75, na.rm = TRUE),
-    IQR = IQR(ratio_pm25, na.rm = TRUE),
-    Lower_Whisker = max(min(ratio_pm25), Q1 - 1.5 * IQR),
-    Upper_Whisker = min(max(ratio_pm25), Q3 + 1.5 * IQR)
-  )
-
-pm25_ratio_fig <- ggplot(srm_ratio_pm25_df |> filter(!site_id %in% c(120, 3422),
-                                           cluster == "Bay Area"), 
-                         aes(x = site_name, y = ratio_pm25)) +
-  geom_boxplot() +
-  geom_text(data = pm25_boxplot_summary_df, aes(x = site_name, y = Median, label = round(Median, 2)),
-            vjust = -0.5, size = 3, color = "black") +
-  labs(y = "Concentration ratio: site 120 PM2.5 to PM2.5",
-       x = NULL) +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1),
-        strip.background = element_rect(fill = "white", color = NA),
-        strip.text = element_text(face = "bold")
-  )
-
-ggsave(filename = "srm_ratio_120pm25_pm25.png",
-       plot = pm25_ratio_fig,
-       path = here::here("exploratory", "exploratory", "exploratory-figs"),
-       width = 8,
-       height = 6,
-       dpi = 300)
-
-pm25_ratio_fig_no <- ggplot(srm_ratio_pm25_df |> filter(!site_id %in% c(120, 3422),
-                                                     cluster == "Bay Area"), 
-                         aes(x = site_name, y = ratio_pm25)) +
-  geom_boxplot(outlier.shape = NA) +
-  geom_text(data = pm25_boxplot_summary_df, aes(x = site_name, y = Median, label = round(Median, 2)),
-            vjust = -0.2, size = 3, color = "black") +
-  labs(y = "Concentration ratio: site 120 PM2.5 to PM2.5",
-       x = NULL) +
-  ylim(0.75, 1.3) +
-  theme_minimal() +
-  theme(axis.text.x = element_text(angle = 45, hjust = 1),
-        strip.background = element_rect(fill = "white", color = NA),
-        strip.text = element_text(face = "bold")
-  )
-
-ggsave(filename = "srm_ratio_120pm25_pm25_no_outliers.png",
-       plot = pm25_ratio_fig_no,
-       path = here::here("exploratory", "exploratory", "exploratory-figs"),
-       width = 8,
-       height = 6,
-       dpi = 300)
-
-#### importance of NH3
-
-#across sites
-total <- srm_weighted_pm25 %>%
-  #filter(!(site_id %in% "120"))%>%
-  group_by(site_id)%>%
-  mutate(across(starts_with("weighted"), sum))%>%
-  select(-GEOID)%>%
-  ungroup()%>%
-  unique()%>%
-  gather("poll", "total_pm25", -site_id)%>%
-  mutate(poll = toupper(gsub("^.*_", "", poll)))
-
-#refinery specific EF
-ef <- fread(paste0(main_path,"/data-staged-for-deletion/health/processed/refinery_emission_factor.csv"), 
-      stringsAsFactors =  F)%>%
-  mutate(ton_bbl = 0.001*kg_bbl)%>%
-  select(-kg_bbl)
-
-poll_importance <- ef %>% 
-  mutate(pollutant_code = ifelse(pollutant_code == "SO2", "SOX", pollutant_code),
-         pollutant_code = ifelse(pollutant_code == "PM25-PRI", "PM25", pollutant_code))%>%
-  full_join(total, by = c("id1"="site_id", "pollutant_code"= "poll"))%>%
-  mutate(total_pm25_bbl = ton_bbl*total_pm25)
-
-#cluster level EF
-ef_cluster <- fread(paste0(main_path,"/data-staged-for-deletion/health/processed/cluster_emission_factor_v2.csv"), 
-            stringsAsFactors =  F)%>%
-  mutate(ton_bbl = 0.001*kg_bbl)%>%
-  select(-kg_bbl)
-
-site_cluster <- fread(paste0(main_path,"/data-staged-for-deletion/health/raw/ref_match/ref_to_match_eia.csv"), 
-     stringsAsFactors =  F)%>%
-  select(site_id, cluster)
-
-
-poll_importance <- ef_cluster %>% 
-  mutate(pollutant_code = ifelse(pollutant_code == "SO2", "SOX", pollutant_code),
-         pollutant_code = ifelse(pollutant_code == "PM25-PRI", "PM25", pollutant_code))%>%
-  right_join(total %>% left_join(site_cluster, by = c("site_id")) %>% drop_na(cluster), 
-                                by = c("cluster", "pollutant_code"="poll"))%>%
-  mutate(total_pm25_bbl = ton_bbl*total_pm25)
-
-#tot pm25 dispersed per barrel
-poll_importance %>%
-  group_by(site_id)%>%
-  mutate(total_site_pm25 = sum(total_pm25_bbl,na.rm = T),
-         share = total_pm25_bbl/total_site_pm25)%>%
-  ungroup()%>%
-  ggplot(aes(x= as.factor(site_id), y = share))+
-  geom_point()+
-  facet_wrap(~pollutant_code)+
-  labs(x = "Site ID", y = "Share of total dispersed secondary PM2.5 dispersed per site per barrel")+
-  theme_gray(16)
-
-#tot pm25 dispersed per ton of precursor
-poll_importance %>%
-  group_by(site_id)%>%
-  mutate(total_site_pm25 = sum(total_pm25,na.rm = T),
-         share = total_pm25/total_site_pm25)%>%
-  ungroup()%>%
-  ggplot(aes(x= as.factor(id1), y = share))+
-  geom_point()+
-  facet_wrap(~pollutant_code)+
-  labs(x = "Site ID", y = "Share of total secondary PM2.5 dispersed per site per ton emitted")+
-  theme_gray(16)
-
-#what is site 800? its a renewable fuel
-
-ref_analysis <- fread(paste0(main_path,"/data-staged-for-deletion/stocks-flows/processed/refinery_loc_cap_manual.csv"),
-                      stringsAsFactors = F) 
-
-ref <- fread(paste0(main_path,"/data-staged-for-deletion/stocks-flows/processed/refinery_lat_long.csv"),
-                      stringsAsFactors = F) 
diff --git a/extras/health_results.R b/extras/health_results.R
deleted file mode 100644
index 9fd66fc..0000000
--- a/extras/health_results.R
+++ /dev/null
@@ -1,157 +0,0 @@
-# CalEPA: Summary stats from health results
-# vincent.thivierge@uottawa.ca
-# created: 07/08/2025
-# updated: 07/08/2025
-
-# set up environment
-
-rm(list = ls())
-`%notin%` <- Negate(`%in%`)
-gc()
-
-options(scipen = 999)
-
-## Packages
-
-packages <- c(
-  "data.table", "dplyr", "janitor", "stringr", "ggplot2", "cowplot",
-  "forcats", "readxl", "forcats", "tidyr"
-)
-
-for (i in packages) {
-  if (require(i, character.only = TRUE) == FALSE) {
-    install.packages(i, repos = "http://cran.us.r-project.org")
-  } else {
-    require(i, character.only = TRUE)
-  }
-}
-
-## Directory
-
-wd <- c("C:\\git\\ca-refining\\outputs\\rev-submission\\cuf=0.6_beta-scenario=main\\tables\\health") #Vincent's WD
-setwd(wd)
-getwd()
-
-## State-wide mortality results
-
-fread("./cumulative_avoided_mortality.csv", stringsAsFactors = F)%>%
-  filter(demo_cat%in%"DAC")%>%
-  group_by(scen_id)%>%
-  summarise(cumul_mort_level = sum(cumul_mort_level))%>%
-  ungroup()%>%
-  mutate(diff = cumul_mort_level[scen_id == "BAU historic production"]-cumul_mort_level,
-         percent = (cumul_mort_level/cumul_mort_level[scen_id == "BAU historic production"])-1)
-
-## State-wide monetized mortality results
-
-fread("./cumulative_health_x_county.csv", stringsAsFactors = F)%>%
-  filter(demo_cat%in%"DAC")%>%
-  group_by(scen_id)%>%
-  summarise(mortality_pv = sum(mortality_pv_dem)/1000000000)%>%
-  ungroup()%>%
-  mutate(diff_pv = mortality_pv[scen_id == "BAU historic production"]-mortality_pv)
-
-  
-## State-wide monetized mortality by county (new results)
-
-fread("./cumulative_health_x_county.csv", stringsAsFactors = F)%>%
-  filter(demo_cat%in%"DAC")%>%
-  filter(scen_id %in% c("BAU historic production","LC1 low exports"))%>%
-  group_by(scen_id,NAME)%>%
-  summarize(mortality_pv_dem = sum(mortality_pv_dem))%>%
-  ungroup()%>%  
-  group_by(scen_id)%>%
-  mutate(mortality_pv = sum(mortality_pv_dem))%>%
-  ungroup()%>%
-  mutate(diff_pv_state = mortality_pv[scen_id == "BAU historic production"]-mortality_pv,
-         diff_pv = mortality_pv_dem[scen_id == "BAU historic production"]-mortality_pv_dem)%>%
-  mutate(share_pv = (diff_pv/diff_pv_state))%>%
-  #filter(scen_id %in% c("LC1 low exports"))%>%
-  arrange(-share_pv)
-
-## By demographic
-
-fig5 <- fread("C:\\git\\ca-refining\\outputs\\rev-submission\\cuf=0.6_beta-scenario=main\\results\\figures\\figure-5\\state_disaggregated_npv_pc_fig_inputs.csv",
-              stringsAsFactors = F) 
-
-fread("C:\\git\\ca-refining\\outputs\\rev-submission\\cuf=0.6_beta-scenario=main\\results\\figures\\figure-5\\state_disaggregated_npv_pc_fig_inputs.csv",
-      stringsAsFactors = F)%>%
-  filter(scen_id %in% c("BAU historical production","LC1 low exports") & demo_cat %in% "DAC" & segment %in% "health")
-
-## Sensitivity analyses
-
-#Refinery-level
-fread("./state_npv_fig_inputs_health_ref.csv", stringsAsFactors = F)%>%
-  filter(unit_desc %in% "USD billion (2019 VSL)")%>%
-  #filter(scen_id %in% c("BAU historical production","LC1 low exports") & unit_desc %in% "USD billion (2019 VSL)")%>%
-  select(scen_id,value)%>%
-  distinct()
-  
-#Growing age-based VSL
-fread("C:\\git\\ca-refining\\outputs\\rev-submission\\cuf=0.6_beta-scenario=main\\results\\figures\\figure-3\\state_npv_fig_inputs_health.csv",
-      stringsAsFactors = F)%>%
-  filter(scen_id %in% c("BAU historical production","LC1 low exports") & unit_desc %in% "USD billion (annual VSL)")
-  
-#Constant VSL
-fread("./state_npv_fig_inputs_health_non_age_vsl.csv", stringsAsFactors = F)%>%
-  filter(scen_id %in% c("BAU historical production","LC1 low exports") & unit_desc %in% "USD billion (2019 VSL)")
-
-########################################################################################################################
-############# DEBUGGGING ########################################################################################################################
-########################################################################################################################
-
-
-fread("./cumulative_health_x_county.csv", stringsAsFactors = F)%>%
-  filter(demo_cat%in%"DAC")%>%
-  filter(scen_id %in% c("BAU historic production","LC1 low exports"))%>%
-  group_by(scen_id,NAME)%>%
-  summarize(mortality_pv_dem = sum(mortality_pv_dem),
-            mortality_level_dem = sum(mortality_level_dem))%>%
-  ungroup()%>%  
-  group_by(scen_id)%>%
-  mutate(mortality_pv = sum(mortality_pv_dem))%>%
-  ungroup()%>%
-  mutate(diff_pv_state = mortality_pv[scen_id == "BAU historic production"]-mortality_pv,
-         diff_pv = mortality_pv_dem[scen_id == "BAU historic production"]-mortality_pv_dem)%>%
-  mutate(share_pv = (diff_pv/diff_pv_state))%>%
-  #filter(scen_id %in% c("LC1 low exports"))%>%
-  arrange(-share_pv)%>%
-  filter(NAME %in% c("Solano","San Bernardino"))
-  
-
-## State-wide monetized mortality by county (old results)
-
-fread("C:/Users/vince/Desktop/cumulative_health_x_county.csv", stringsAsFactors = F)%>%
-  filter(demo_cat%in%"DAC")%>%
-  filter(scen_id %in% c("BAU historic production","LC1 low exports"))%>%
-  group_by(scen_id,NAME)%>%
-  summarize(mortality_pv_dem = sum(mortality_pv_dem),
-            mortality_level_dem = sum(mortality_level_dem))%>%
-  ungroup()%>%  
-  group_by(scen_id)%>%
-  mutate(mortality_pv = sum(mortality_pv_dem))%>%
-  ungroup()%>%
-  mutate(diff_pv_state = mortality_pv[scen_id == "BAU historic production"]-mortality_pv,
-         diff_pv = mortality_pv_dem[scen_id == "BAU historic production"]-mortality_pv_dem)%>%
-  mutate(share_pv = (diff_pv/diff_pv_state))%>%
-  #filter(scen_id %in% c("LC1 low exports"))%>%
-  arrange(-share_pv)
-
-fread("C:/Users/vince/Desktop/cumulative_health_x_county.csv", stringsAsFactors = F)%>%
-  filter(demo_cat%in%"DAC")%>%
-  filter(scen_id %in% c("BAU historic production","LC1 low exports"))%>%
-  group_by(scen_id,NAME)%>%
-  summarize(mortality_pv_dem = sum(mortality_pv_dem),
-            mortality_level_dem = sum(mortality_level_dem))%>%
-  ungroup()%>%  
-  group_by(scen_id)%>%
-  mutate(mortality_pv = sum(mortality_pv_dem))%>%
-  ungroup()%>%
-  mutate(diff_pv_state = mortality_pv[scen_id == "BAU historic production"]-mortality_pv,
-         diff_pv = mortality_pv_dem[scen_id == "BAU historic production"]-mortality_pv_dem)%>%
-  mutate(share_pv = (diff_pv/diff_pv_state))%>%
-  #filter(scen_id %in% c("LC1 low exports"))%>%
-  arrange(-share_pv)%>%
-  filter(NAME %in% c("Solano","San Bernardino"))
-
-
diff --git a/extras/labor_plots.R b/extras/labor_plots.R
deleted file mode 100644
index f11c33e..0000000
--- a/extras/labor_plots.R
+++ /dev/null
@@ -1,184 +0,0 @@
-main_path <- "/Users/tracey/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared\ drives/emlab/projects/current-projects/calepa-cn/"
-
-tar_load(annual_labor)
-tar_load(county_pop_ratios)
-
-labor_tmp <- annual_labor %>%
-  mutate(scenario = paste0(demand_scenario, "-", refining_scenario))
-
-
-ggplot(labor_tmp, aes(x = year, y = revenue / 1e9, color = county)) +
-  geom_line() +
-  facet_wrap(~scenario) +
-  labs(
-    x = NULL,
-    y = "Revenue (USD billion)",
-    color = NULL
-  ) +
-  theme_line
-
-## dac
-## --------------------------------------------------------------
-
-tar_load(ref_labor_demog_yr)
-
-dac_df <- copy(ref_labor_demog_yr)
-
-## change scenario names, factor
-dac_df[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)]
-dac_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-## scenarios for filtering
-remove_scen <- c("Low demand - historic production")
-
-## add scenario title
-dac_df[, scenario_title := str_replace(scenario, " - ", "\n")]
-
-## refactor
-dac_df$scenario_title <- factor(dac_df$scenario_title, levels = c(
-  "BAU demand\nhistoric production",
-  "BAU demand\nhistoric exports",
-  "BAU demand\nlow exports",
-  "Low demand\nhistoric exports",
-  "Low demand\nlow exports",
-  "Low demand\nhistoric production"
-))
-
-
-## refactor
-dac_df$scenario <- factor(dac_df$scenario, levels = c(
-  "BAU demand - historic production",
-  "BAU demand - historic exports",
-  "BAU demand - low exports",
-  "Low demand - historic exports",
-  "Low demand - low exports",
-  "Low demand - historic production"
-))
-
-dac_fig <- ggplot(dac_df %>% filter(
-  !scenario %in% remove_scen,
-  demo_cat == "DAC"
-), aes(x = year, y = demo_emp / 1000, lty = title, color = county)) +
-  geom_line(linewidth = 0.8, alpha = 0.7) +
-  geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-  facet_grid(demo_cat ~ scenario_title) +
-  labs(
-    x = NULL,
-    y = "Labor: FTE jobs (thousand)"
-  ) +
-  # ylim(c(0, 35)) +
-  scale_x_continuous(
-    breaks = c(2020, 2045), # Specify tick mark positions
-    labels = c(2020, 2045)
-  ) + # Specify tick mark labels
-  theme_line +
-  theme(
-    legend.position = "bottom",
-    legend.title = element_blank(),
-    axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-    # strip.text.x = element_blank(),
-    plot.margin = unit(c(0, 0, 0, 0), "cm"),
-    # axis.text.x = element_blank(),
-    axis.ticks.length.y = unit(0.1, "cm"),
-    axis.ticks.length.x = unit(0.1, "cm")
-  )
-
-ggsave(
-  plot = dac_fig,
-  device = "png",
-  filename = "dac_labor_levels.png",
-  path = paste0(main_path, "outputs/academic-out/refining/figures/2022-12-update/labor/"),
-  width = 12,
-  height = 4,
-  dpi = 600
-)
-
-
-
-## gaps
-## -------------------------------------------------------------
-
-dac_gaps_df <- copy(ref_labor_demog_yr)
-
-## change scenario names, factor
-dac_gaps_df[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)]
-dac_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)]
-
-## scenarios for filtering
-remove_scen <- c("Low demand - historic production", "BAU demand - historic production")
-
-## refactor
-dac_gaps_df[, scenario_title := scenario]
-dac_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")]
-
-dac_gaps_df$scenario <- factor(dac_gaps_df$scenario, levels = c(
-  "BAU demand - historic production",
-  "BAU demand - historic exports",
-  "BAU demand - low exports",
-  "Low demand - historic exports",
-  "Low demand - low exports",
-  "Low demand - historic production"
-))
-
-dac_gaps_df$scenario_title <- factor(dac_gaps_df$scenario_title, levels = c(
-  "BAU demand\nhistoric production",
-  "BAU demand\nhistoric exports",
-  "BAU demand\nlow exports",
-  "Low demand\nhistoric exports",
-  "Low demand\nlow exports",
-  "Low demand\nhistoric production"
-))
-## filter for dac
-dac_gaps_df <- dac_gaps_df[demo_cat == "DAC"]
-
-
-## calculate gaps (BAU - scenario)
-dac_bau_gaps_df <- dac_gaps_df[scenario == "BAU demand - historic production"]
-dac_bau_gaps_df <- dac_bau_gaps_df[, c("county", "year", "demo_cat", "demo_group", "title", "demo_emp")]
-setnames(dac_bau_gaps_df, "demo_emp", "bau_demo_emp")
-
-dac_gaps_df <- merge(dac_gaps_df, dac_bau_gaps_df,
-  by = c("county", "year", "demo_cat", "demo_group", "title"),
-  all.x = T
-)
-
-dac_gaps_df[, gap := demo_emp - bau_demo_emp]
-
-## fig
-dac_gap_fig_b <- ggplot(dac_gaps_df %>% filter(
-  !scenario %in% remove_scen,
-  demo_cat == "DAC"
-), aes(x = year, y = gap / 1000, lty = title, color = county)) +
-  geom_line(linewidth = 0.8, alpha = 0.7) +
-  geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) +
-  facet_grid(demo_cat ~ scenario_title) +
-  labs(
-    x = NULL,
-    y = "Labor: FTE jobs difference from reference (thousand)"
-  ) +
-  # ylim(c(-0.31, 0)) +
-  scale_x_continuous(
-    breaks = c(2020, 2045), # Specify tick mark positions
-    labels = c(2020, 2045)
-  ) + # Specify tick mark labels
-  theme_line +
-  theme(
-    legend.position = "bottom",
-    legend.title = element_blank(),
-    axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
-    # strip.text.x = element_blank(),
-    plot.margin = unit(c(0, 0, 0, 0), "cm"),
-    # axis.text.x = element_blank(),
-    axis.ticks.length.y = unit(0.1, "cm"),
-    axis.ticks.length.x = unit(0.1, "cm")
-  )
-
-ggsave(
-  plot = dac_gap_fig_b,
-  device = "png",
-  filename = "dac_labor_gaps.png",
-  path = paste0(main_path, "outputs/academic-out/refining/figures/2022-12-update/labor/"),
-  width = 12,
-  height = 4,
-  dpi = 600
-)
diff --git a/stata/0_data_prep.do b/stata/0_data_prep.do
deleted file mode 100644
index 07bfe65..0000000
--- a/stata/0_data_prep.do
+++ /dev/null
@@ -1,198 +0,0 @@
-
-********************************************************************************
-********************************************************************************
-global startDir "/Users/paigeweber/Dropbox/Refining"
-cd $startDir
-
-
-
-
-
-
-
-********************************************************************************
-********************************************************************************
-
-cd data/
-
-********************************************************************************
-* refinery bbl processed
-import delimited "reg_refin_crude_receipts.csv", clear 
-
-gen ym = ym(year,month)
-format ym %tm
-
-destring bbls, replace force
-
-keep ym bbls region
-
-sort ym region
-bysort ym region: egen bbl_r = sum(bbls)
-bysort ym region: gen count = _n
-
-replace bbl_r = bbl_r/10^6
-label variable bbl_r "bbl millions"
-drop bbls 
-keep if count ==1
-drop count
-rename bbl_r bbl
-reshape wide bbl, i(ym) j(region) string
-
-
-
-
-reshape long bbl, i(ym) j(product) string
-
-sort ym product
-
-gen tho_bbl = bbl*10^3
-label variable tho_bbl "thousands of barrels"
-
-drop bbl
-
-save "reg_refin_ym.dta", replace
-
-isid ym product
-********************************************************************************
-
-
-clear
-import delimited "Cushing_OK_WTI_Spot_Price_FOB.csv", clear 
-
-rename cushing* WTI
-
-split(month), parse("-")
-drop month
-
-rename month2 year
-destring year, replace
-
-gen year2 = 2000 + year
-
-replace year2 = year2-100 if year2>2023
-tab year2
-
-
-drop year
-rename year2 year
-
-rename month1 month
-
-gen month2 = month(date(month,"M"))
-drop month
-rename month2 month
-gen ym = ym(year,month)
-format ym %tm
-
-save "WTI_monthly_1986_2023.dta", replace
-********************************************************************************
-
-import excel "Finished_Products_Movements.xlsx", sheet("Gasoline Chart Data") cellrange(A5:FB24) clear
-** UNITS IN THIS SHEET ARE THOUSANDS OF BARRELS **
-duplicates drop
-
-rename A product
-
-drop if product ==""
-
-
-foreach v of varlist C-FB{
-	
-	rename `v' date_`v'
-}
-
-drop B
-
-reshape long date, i(product) j(ym) string
-
-rename date quantity
-
-egen seq month = seq(), f(1) t(12)
-egen seq year = seq(), f(2007) t(2019) b(12)
-
-drop ym
-
-gen date = mdy(month, 1, year)
-format date %td
-
-gen ym = ym(year, month)
-format ym %tm
-
-drop date
-
-*reshape wide to construct net variables
-drop month year
-br
-
-isid ym product
-
-rename quantity tho_bbl
-
-* clean up product names
-replace product = trim(product)
-
-replace product = "NC_FEx" if product =="NC Foreign Export"
-replace product = "NC_FIm" if product =="NC Foreign Import"
-replace product = "NC_NetIm" if product =="NC Net Imports"
-replace product = "NC_InterEx" if product =="NC Interstate Export"
-replace product = "NC_InterIm" if product =="NC Interstate Import"
-replace product = "NC_Reno" if product =="NC Reno (North)"
-replace product = "N_to_S" if product =="North to South"
-replace product = "S_to_N" if product =="South to North"
-
-replace product = "Gas_BlndStck" if product =="Product: Gasoline & Blendstocks"
-replace product = "SC_CalEvn_W_Adj" if product =="SC CalNEv P/L (West) Adjusted"
-replace product = "SC_FEx" if product =="SC Foreign Export"
-replace product = "SC_FIm" if product =="SC Foreign Import"
-replace product = "SC_InterEx" if product =="SC Interstate Export"
-replace product = "SC_InterIm" if product =="SC Interstate Import"
-
-replace product = "SC_W_Adj" if product =="SC Phoenix (West) Adjusted"
-replace product = "Pipeline_KM" if product =="Source: Pipeline (KM Export Spreadsheet)"
-
-replace product = "SC_W_Adj" if product =="SC Phoenix (West) Adjusted"
-replace product = "Pipeline_SC" if product =="Total SC Pipeline"
-
-
-replace product = "SC_NetIm" if product =="SC Net Imports"
-
-
-isid ym product
-
-
-
-
-merge m:1 ym using "WTI_monthly_1986_2023"
-
-keep if _merge ==3
-
-tab year
-
-drop _merge
-
-append using "reg_refin_ym"
-
-drop year
-gen date = dofm(ym)
-format date %d
-gen year = year(date)
-
-keep if year >2006
-
-sort ym product
-
-order ym product
-
-drop month
-
-gen month = month(date)
-
-bysort ym: carryforward WTI, replace
-
-tab year
-
-isid ym product
-
-drop if tho_bbl ==.
-save "temp.dta", replace
-
diff --git a/stata/1_regressions_figures_stats.do b/stata/1_regressions_figures_stats.do
deleted file mode 100644
index d547366..0000000
--- a/stata/1_regressions_figures_stats.do
+++ /dev/null
@@ -1,304 +0,0 @@
-
-
-
-
-
-********************************************************************************
-********************************************************************************
-global startDir "/Users/paigeweber/Dropbox/Refining"
-cd $startDir
-
-
-
-
-
-
-
-********************************************************************************
-********************************************************************************
-
-
-*run code/0_data_prep.do // this creates temp.dta for analysis
-
-
-use "data/temp.dta", clear
-
-drop if tho_bbl ==.
-
-isid ym product
-
-rename tho_bbl bbl
- 
-reshape wide bbl, i(ym) j(product) string
-
-rename bbl* *
-
-********************************************************************************
-********************************************************************************
-
-
-
-
-gen NC_net = NC_FEx + NC_FIm
-gen SC_net = SC_FEx + SC_FIm
-
-
-replace NC_net = -1*NC_net
-replace SC_net = -1*SC_net
-
-su NC_net, detail
-scalar NCmean = r(mean)
-
-su SC_net, detail // mean is negative
-scalar SCmean = r(mean)
-
-* rescaling so both are above zero
-
-scalar pct = 100*(SCmean - NCmean)/SCmean
-
-di "percent larger exports in NC is:"
-di pct 
-
-gen pct = 100*(SC_net - NC_net)/SC_net
-
-su pct, detail
-
-
-bysort year: egen sum_pct = mean(pct)
-
-gen pct_mean = .
-gen pct_high = .
-gen pct_low = .
-
-su pct, detail
-gen pct_all = r(mean)
-
-foreach y of numlist 2007(1)2020{
-	
-	su pct if year ==`y', detail
-	replace pct_mean = r(mean) if year ==`y'
-	replace pct_high = r(mean) + r(sd) if year == `y'
-	replace pct_low = r(mean) - r(sd) if year == `y'
-
-	}
-
-set scheme lean1
-
-twoway (scatter pct_mean year, msymbol(o) mcolor(blue%60)) ///
-		(rcap pct_high pct_low year) ///
-		(line pct_all year, lpattern(dash)), ///
-		 xtitle("") ytitle("Percent", size(medsmall)) ///
-		xscale(r(2007 2020)) xlabel(2010(2)2020) ///
-		legend(pos(6) col(3) label(1 "Annual mean") label( 2 "95 Pct. CI") label(3 "Average all years")) ///
-		title("NC exports compared to SC exports", size(medsmall)) 
-
-	
-
-* sum to year since model does not include monthly
-
-
-bysort year: egen yr_NC_net = sum(NC_net)
-bysort year: egen yr_SC_net = sum(SC_net)
-
-bysort year: egen yr_north = sum(north)
-bysort year: egen yr_south = sum(south)
-
-bysort year: egen yr_WTI = mean(WTI)
-
-bysort year: gen count = _n
-count if count ==1
-
-gen northXWTI = north*WTI
-gen southXWTI = south*WTI
-
-gen WTIsq = WTI*WTI
-gen northsq = north*north
-gen southsq = south*south
-
-gen yearsq = year*year
-
-
-****************************************************
-
-
-reghdfe NC_net north
-estimates store m0
-
-reghdfe NC_net north south WTI
-estimates store m1
-
-reghdfe NC_net north south WTI northXWTI southXWTI 
-estimates store m2
-
-reghdfe NC_net northsq southsq WTI WTIsq northXWTI southXWTI north south
-estimates store m3
-
-reghdfe NC_net northsq southsq WTI WTIsq northXWTI southXWTI north south year yearsq
-estimates store m3b
-
-reghdfe NC_net northsq southsq WTI WTIsq northXWTI southXWTI north south year yearsq N_to_S S_to_N
-estimates store m3c
-
-
-
-reghdfe SC_net south
-estimates store m4a
-
-reghdfe SC_net north south WTI
-estimates store m4
-
-reghdfe SC_net north south WTI northXWTI southXWTI 
-estimates store m5
-
-reghdfe SC_net north south northsq southsq WTI WTIsq northXWTI southXWTI
-estimates store m6 
-
-reghdfe SC_net north south northsq southsq WTI WTIsq northXWTI southXWTI year yearsq
-estimates store m6b
-
-
-reghdfe NC_net northsq southsq WTI WTIsq northXWTI southXWTI north south year yearsq N_to_S S_to_N
-estimates store m6c
-
-
-esttab m6b m6c m3b m3c, stats(N r2 stars) p
-
-
-
-
-
-
-********************************************************************************
-********************************************************************************
-
-gen CA_net = NC_net + SC_net 
-gen CA = north + south
-gen CAXWTI = CA*WTI
-
-gen CAsq = CA*CA
-
-reghdfe CA_net CA WTI, absorb(i.year i.month)
-estimates store m7
-
-reghdfe CA_net CA WTI CAXWTI, absorb(i.year i.month)
-estimates store m8
-
-reghdfe CA_net CA CAsq WTI WTIsq CAXWTI, absorb(i.year i.month)
-estimates store m9
-
-esttab m7 m8 m9, stats(N r2 stars) p
-
-
-reg CA_net WTI WTIsq i.year i.month
-predict hat
-
-gen resid = CA_net - hat
-
-binscatter resid CA
-
-binscatter CA CA_net
-
-binscatter north NC_net
-
-binscatter south SC_net
-
-
-twoway (scatter CA_net CA) ///
-		(lfit CA_net CA, lpattern(solid) lwidth(medthick) lcolor(midblue)), ///
-		legend(off) ///
-		xtitle("In state consumption (tho BBL)", size(small)) ytitle("Net exports (tho BBL)", size(small)) ///
-		title("California", size(small)) xlabel(,labsize(small)) ylabel(,labsize(small))
-
-graph save figures/CA_net.gph, replace
-
-twoway (scatter NC_net north) ///
-		(lfit NC_net north, lpattern(solid) lwidth(medthick) lcolor(emerald)), ///
-		legend(off) ///
-		xtitle("In state consumption (tho BBL)", size(small)) ytitle("Net exports (tho BBL)", size(small)) ///
-		title("Northern CA", size(small)) xlabel(,labsize(small)) ylabel(,labsize(small)) yline(0, lpattern(dash))
-
-graph save figures/NC_net.gph, replace
-
-twoway (scatter SC_net north) ///
-		(lfit SC_net north, lpattern(solid) lwidth(medthick) lcolor(purple)), ///
-		legend(off) ///
-		xtitle("In state consumption (tho BBL)", size(small)) ytitle("Net exports (tho BBL)", size(small)) ///
-		title("Southern CA", size(small)) xlabel(,labsize(small)) ylabel(,labsize(small)) yline(0, lpattern(dash))
-
-graph save figures/SC_net.gph, replace
-
-
-
-graph combine figures/NC_net.gph figures/SC_net.gph, rows(1) ycommon 
-
-
-********************************************************************************
-********************************************************************************
-
-* norht to south export share
-
-gen NoverS = NC_net/SC_net
-su NoverS, detail
-
-* monthly 
-twoway (line NC_net ym, lcolor(midblue)) ///
-		(line SC_net ym, lcolor(gs8)), ///
-		legend(pos(6) col(2) label(1 "North CA") label(2 "South CA")) ///
-		yline(0, lcolor(black)) xtitle("") title("Net exports", size(medsmall)) ///
-		ytitle("BBL (tho)")
-		
-* annual average
-
-
-twoway (line yr_NC_net year, lcolor(midblue)) ///
-		(line yr_north year, lcolor(gs8) yaxis(2) ytitle(BBL (tho))), ///
-		legend(pos(6) col(2) label(1 "North CA net exports") label(2 "North comsumption")) ///
-		xtitle("") title("North: Net exports and consumption", size(medsmall)) ///
-		ytitle("BBL (tho)")
-
-save figures/north_net_consumption.gph, replace
-
-	
-twoway (line yr_SC_net year, lcolor(green%60)) ///
-		(line yr_south year, lcolor(gs8) yaxis(2) ytitle(BBL (tho))), ///
-		legend(pos(6) col(2) label(1 "South CA net exports") label(2 "South comsumption")) ///
-		xtitle("") title("South: Net exports and consumption", size(medsmall)) ///
-		ytitle("BBL (tho)")
-
-save figures/south_net_consumption.gph, replace
-
-cd figures/
-graph combine south_net_consumption.gph  north_net_consumption.gph
-
-
-
-twoway (line yr_NC_net year, lcolor(midblue)) ///
-		(line yr_SC_net year, lcolor(gs8) ), ///
-		legend(pos(6) col(2) label(1 "North CA") label(2 "South CA")) ///
-		yline(0, lcolor(black)) xtitle("") title("Net exports", size(medsmall)) ///
-		ytitle("BBL (tho)")
-		
-twoway (line yr_SC_net year, lcolor(green%50)) ///
-		(line yr_south year, lcolor(gs8) yaxis(2)), ///
-		legend(pos(6) col(2) label(1 "North CA") label(2 "South CA")) ///
-		yline(0, lcolor(black)) xtitle("") title("Net exports", size(medsmall)) ///
-		ytitle("BBL (tho)")
-
-
-
-twoway (line NC_net ym, lcolor(midblue) lpattern(dash)) ///
-		(line NC_hat_3 ym, lcolor(gs6) lpattern(solid)), ///
-		legend(pos(6) col(2) label(1 "Actuals") label(2 "Predicted")) ///
-		yline(0, lcolor(black)) xtitle("") title("North, net exports", size(medsmall)) ///
-		ytitle("BBL (tho)")
-		
-twoway (line SC_net ym, lcolor(green%60) lpattern(dash)) ///
-		(line SC_hat_6 ym, lcolor(gs6) lpattern(solid)), ///
-		legend(pos(6) col(2) label(1 "Actuals") label(2 "Predicted")) ///
-		yline(0, lcolor(black)) xtitle("") title("South, net exports", size(medsmall)) ///
-		ytitle("BBL (tho)")
-
-
-
-
-
diff --git a/stata/clean_acs5_race_refining.do b/stata/clean_acs5_race_refining.do
deleted file mode 100644
index a5bc124..0000000
--- a/stata/clean_acs5_race_refining.do
+++ /dev/null
@@ -1,60 +0,0 @@
-/***************************   HEADER **********************************/
-	clear all
-
-	//set local directory
-	if regexm("`c(pwd)'","")==1  { //Paige's machine
-		global startDir ""
-		global repoDir ""
-	}
-	if regexm("`c(pwd)'","/Users/kylemeng")==1  { //Kyle's machine
-		global startDir ""
-		global repoDir ""
-		//sysdir set PLUS $startDir/../../toolbox/STATA_toolbox/plus
-		sysdir set PLUS $repoDir/scripts/toolbox/STATA/plus 
-	}
-	else if regexm(c(os),"Windows")==1 { //Danae's machine
-		global startDir "C:\Users\dhern125\Dropbox\UCSB-PhD\emLab\CALEPA\data_refining_paper"
-		global repoDir "C:\Users\Danae\Documents\GitHub\us_ej_disparities"
-	}
-	if "$startDir"=="" exit 
-	disp "local path is $startDir"
-
-	//set subfolders
-	global rawDir "$startDir/raw"
-	global processedDir "$startDir/processed"
-	global tempDir "$startDir/temp"
-	global tablesDir "$repoDir/tables"
-	global figuresDir "$repoDir/figures"
-	
-	
-	***************2020
-	import delimited using $rawDir/nhgis0024_csv/nhgis0024_ds249_20205_tract, clear
-	
-	gen total_pop=amp3e001
-	gen hispanic=amp3e012
-	gen white=amp3e003
-	gen black=amp3e004
-	gen aialnative=amp3e005
-	gen asian=amp3e006
-	
-	gen median_income=amr8e001
-	
-	keep gisjoin total_pop hispanic white black aialnative asian median_income geoid year
-	
-	saveold $processedDir/pop_income_2020, replace
-	
-	***************2021
-	import delimited using $rawDir/nhgis0024_csv/nhgis0024_ds254_20215_tract, clear
-	
-	gen total_pop=aooce001
-	gen hispanic=aooce012
-	gen white=aooce003
-	gen black=aooce004
-	gen aialnative=aooce005
-	gen asian=aooce006
-	
-	gen median_income=aoqie001
-	rename geo_id geoid
-	keep gisjoin total_pop hispanic white black aialnative asian median_income geoid year
-	
-	saveold $processedDir/pop_income_2021, replace	
\ No newline at end of file
diff --git a/stata/merge_acs_refining.do b/stata/merge_acs_refining.do
deleted file mode 100644
index 48c45ea..0000000
--- a/stata/merge_acs_refining.do
+++ /dev/null
@@ -1,273 +0,0 @@
-global dataDir "C:\Users\dhern125\Dropbox\UCSB-PhD\emLab\CALEPA\data_refining_paper"
-global figuresDir "C:\Users\dhern125\Dropbox\UCSB-PhD\emLab\CALEPA\data_refining_paper\figures"
-
-********************************************************************************
-import delimited $dataDir/raw/refining-2023/refining_health_income_2023.csv, clear
-
-	saveold $dataDir/processed/refining_health_income_2023, replace
-	
-	
-	
-use $dataDir/processed/pop_income_2020, clear
-*2020: 14000US01001020100
-	gen census_tract=substr(geoid,8,.)
-	destring census_tract, replace
-		drop year
-	duplicates tag census_tract, gen(dup)
-	sort geoid
-	browse if dup==1
-	
-	saveold $dataDir/processed/pop_income_2020_mod, replace
-	
-use $dataDir/processed/refining_health_income_2023, clear
-	merge m:1 census_tract using $dataDir/processed/pop_income_2020_mod
-	/*
-		*issue here
-    Result                           # of obs.
-    -----------------------------------------
-    not matched                       269,189
-        from master                   190,674  (_merge==1)
-        from using                     78,515  (_merge==2)
-
-    matched                         1,114,560  (_merge==3)
-    -----------------------------------------
-	*/
-	drop if _merge==2
-	
-	
-/*
-	rename et1001 total_pop
-	gen minority_pct=(et2002+et2003+et2004+et2005+et2006+et2007+et2008+et2009+et2010)/total_pop
-	gen black_pct=et2002/total_pop
-	gen hispanic_pct=(et2006+et2007+et2008+et2009+et2010)/total_pop
-	sum minority_pct, det
-*/	
-	
-	gen minority_pct=(hispanic+black+aialnative+asian)/total_pop
-	gen black_pct=black/total_pop
-	gen hispanic_pct=hispanic/total_pop
-	gen asian_pct=asian/total_pop
-	gen aialnative_pct=aialnative/total_pop
-
-	rename total_pm25 totalpm25
-	
-	gen white_num=totalpm25*(1-minority_pct)*total_pop
-	gen white_den=(1-minority_pct)*total_pop
-
-local group "minority black hispanic asian aialnative"
-
-foreach g in `group'{
-	gen `g'_num=totalpm25*`g'_pct*total_pop
-	gen `g'_den=`g'_pct*total_pop
-}
-collapse (sum) white_num white_den minority_num minority_den black_num black_den hispanic_num hispanic_den asian_num asian_den aialnative_num aialnative_den, by(scen_id demand_scenario refining_scenario year)
-
-gen W=white_num/white_den
-gen B=black_num/black_den
-gen H=hispanic_num/hispanic_den
-gen M=minority_num/minority_den
-gen A=asian_num/asian_den
-gen AIAL=aialnative_num/aialnative_den
-
-gen stat_BW=B-W
-gen stat_MW=M-W
-gen stat_HW=H-W
-gen stat_AW=A-W
-gen stat_AIALW=AIAL-W
-
-*FIGURE 1
-preserve
-keep if scen_id=="BAU historic exports"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU historic exports")
-		graph export $figuresDir/bau_his_exports.png, as(png)
-restore
-
-*FIGURE 2
-preserve
-keep if scen_id=="BAU historic production"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU historic production")
-		graph export $figuresDir/bau_his_prod.png, as(png)
-restore
-
-*FIGURE 3
-preserve
-keep if scen_id=="BAU low exports"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU low exports")
-		graph export $figuresDir/bau_low_exports.png, as(png)
-restore
-
-*FIGURE 4
-preserve
-keep if scen_id=="LC1 historic exports"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 historic exports")
-		graph export $figuresDir/lc1_his_exports.png, as(png)
-restore
-
-*FIGURE 5
-preserve
-keep if scen_id=="LC1 historic production"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 historic production")
-		graph export $figuresDir/lc1_his_prod.png, as(png)
-restore
-
-*FIGURE 6
-preserve
-keep if scen_id=="LC1 low exports"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 low exports")
-		graph export $figuresDir/lc1_low_exports.png, as(png)
-restore
-
-
-***GAPS
-*FIGURE 1
-preserve
-keep if scen_id=="BAU historic exports"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("BAU historic exports")
-		graph export $figuresDir/bau_his_exports_gap.png, as(png)
-restore
-
-*FIGURE 2
-preserve
-keep if scen_id=="BAU historic production"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("BAU historic production")
-		graph export $figuresDir/bau_his_prod_gap.png, as(png)
-restore
-
-*FIGURE 3
-preserve
-keep if scen_id=="BAU low exports"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("BAU low exports")
-		graph export $figuresDir/bau_low_exports_gap.png, as(png)
-restore
-
-*FIGURE 4
-preserve
-keep if scen_id=="LC1 historic exports"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("LC1 historic exports")
-		graph export $figuresDir/lc1_his_exports_gap.png, as(png)
-restore
-
-*FIGURE 5
-preserve
-keep if scen_id=="LC1 historic production"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("LC1 historic production")
-		graph export $figuresDir/lc1_his_prod_gap.png, as(png)
-restore
-
-*FIGURE 6
-preserve
-keep if scen_id=="LC1 low exports"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("LC1 low exports")
-		graph export $figuresDir/lc1_low_exports_gap.png, as(png)
-restore
-
-
-
-	
-*******************************************************************************	
-	***INCOME
-use $dataDir/processed/pop_income_2021, clear	
-*2021: 1400000US01001020100
-	gen census_tract=substr(geoid,10,.)
-	destring census_tract, replace
-		drop year
-	duplicates tag census_tract, gen(dup)
-	sort geoid
-	browse if dup==1
-	
-	saveold $dataDir/processed/pop_income_2021_mod, replace
-	
-use $dataDir/processed/refining_health_income_2023, clear
-	merge m:1 census_tract using $dataDir/processed/pop_income_2021_mod
-	
-/*
-*issue here
-
-    -----------------------------------------
-    not matched                       269,189
-        from master                   190,674  (_merge==1)
-        from using                     78,515  (_merge==2)
-
-    matched                         1,114,560  (_merge==3)
-    -----------------------------------------
-*/	
-	
-	
-	
-egen deciles_inc=xtile(median_hh_income), nq(10) by(year)
-
-forvalues d=1/10{
-	gen dec_`d'=(deciles_inc==`d')
-	gen deciles_num_`d'=total_pm25*dec_`d'*total_pop
-	gen deciles_den_`d'=dec_`d'*total_pop
-}
-
-egen quantiles_inc=xtile(median_income), nq(4) by(year)
-
-forvalues d=1/4{
-	gen quan_`d'=(deciles_inc==`d')
-	gen quantiles_num_`d'=total_pm25*quan_`d'*total_pop
-	gen quantiles_den_`d'=quan_`d'*total_pop
-}
-
-collapse (sum) deciles_num_* deciles_den_* quantiles_den_* quantiles_num_*, by(scen_id demand_scenario refining_scenario year)
-
-gen D1=deciles_num_1/deciles_den_1
-gen D10=deciles_num_10/deciles_den_10
-gen Q1=quantiles_num_1/quantiles_den_1
-gen Q2=quantiles_num_2/quantiles_den_2
-gen Q3=quantiles_num_3/quantiles_den_3
-gen Q4=quantiles_num_4/quantiles_den_4
-
-
-gen stat_deciles=D1-D10	
-gen stat_quantiles=Q1-Q4
-
-la var stat_deciles "Decile 1-Decile 10 Gap"
-la var stat_deciles "Q 1-Q 10 Gap"
-
-
-*FIGURE 1
-preserve
-keep if scen_id=="BAU historic exports"
-	twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall)) title("BAU historic exports")
-		graph export $figuresDir/bau_his_exports_inc.png, as(png) replace
-restore
-
-*FIGURE 2
-preserve
-keep if scen_id=="BAU historic production"
-	twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall)) title("BAU historic production")
-		graph export $figuresDir/bau_his_prod_inc.png, as(png) replace
-restore
-
-*FIGURE 3
-preserve
-keep if scen_id=="BAU low exports"
-	twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall))  title("BAU low exports")
-		graph export $figuresDir/bau_low_exports_inc.png, as(png) replace
-restore
-
-*FIGURE 4
-preserve
-keep if scen_id=="LC1 historic exports"
-	twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall))  title("LC1 historic exports")
-		graph export $figuresDir/lc1_his_exports_inc.png, as(png) replace
-restore
-
-*FIGURE 5
-preserve
-keep if scen_id=="LC1 historic production"
-	twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall))  title("LC1 historic production")
-		graph export $figuresDir/lc1_his_prod_inc.png, as(png) replace
-restore
-
-*FIGURE 6
-preserve
-keep if scen_id=="LC1 low exports"
-	twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall))  title("LC1 low exports")
-		graph export $figuresDir/lc1_low_exports_inc.png, as(png) replace
-restore
-
-
-
diff --git a/stata/repo_clean_acs5_race_refining.do b/stata/repo_clean_acs5_race_refining.do
deleted file mode 100644
index f368c2c..0000000
--- a/stata/repo_clean_acs5_race_refining.do
+++ /dev/null
@@ -1,67 +0,0 @@
-/***************************   HEADER **********************************/
-	clear all
-
-	//set local directory
-	if regexm("`c(pwd)'","")==1  { //Paige's machine
-		global startDir ""
-		global repoDir ""
-	}
-	else if regexm(c(os),"Windows")==1 { //Danae's machine
-		global startDir "drives/emlab/projects/current-projects/calepa-cn"
-		global repoDir "C:\Users\Danae\Documents\GitHub\ca-refining"
-	}
-	if "$startDir"=="" exit 
-	disp "local path is $startDir"
-
-	//set subfolders
-	global rawDir "$startDir/data/Census/nhgis_2020"
-	global processedDir "$startDir/data/Census"
-	
-	***************2020
-	import delimited using $rawDir/nhgis0024_csv/nhgis0024_ds249_20205_tract, clear
-	
-	gen total_pop=amp3e001
-	gen hispanic=amp3e012
-	gen white=amp3e003
-	gen black=amp3e004
-	gen aialnative=amp3e005
-	gen asian=amp3e006
-	
-	gen median_income=amr8e001
-	
-	keep gisjoin total_pop hispanic white black aialnative asian median_income geoid year state
-	
-	saveold $processedDir/pop_income_2020, replace
-		keep if state=="California"
-		export delimited $processedDir/pop_CA_geoid.csv, replace
-	
-	***************2021
-	import delimited using $rawDir/nhgis0024_csv/nhgis0024_ds254_20215_tract, clear
-	
-	gen total_pop=aooce001
-	gen hispanic=aooce012
-	gen white=aooce003
-	gen black=aooce004
-	gen aialnative=aooce005
-	gen asian=aooce006
-	
-	gen median_income=aoqie001
-	rename geo_id geoid
-	keep gisjoin total_pop hispanic white black aialnative asian median_income geoid year
-	
-	saveold $processedDir/pop_income_2021, replace	
-	
-	
-	****************2017-2021 poverty 
-	import delimited using $rawDir/nhgis0029_csv/nhgis0029_csv/nhgis0029_ds254_20215_tract.csv
-	
-	
-	rename aoqge001 total_pop 
-	rename aoqge002 total_below_poverty
-	rename aoqge003 total_above_poverty
-
-	rename geo_id geoid
-	keep gisjoin geoid year total_above_poverty total_below_poverty total_pop
-	
-	saveold $processedDir/pop_poverty_2020, replace
-	
\ No newline at end of file
diff --git a/stata/repo_merge_acs_refining_poverty_updated_ct.do b/stata/repo_merge_acs_refining_poverty_updated_ct.do
deleted file mode 100644
index cc937ef..0000000
--- a/stata/repo_merge_acs_refining_poverty_updated_ct.do
+++ /dev/null
@@ -1,177 +0,0 @@
-/***************************   HEADER **********************************/
-	clear all
-
-	//set local directory
-	if regexm("`c(pwd)'","")==1  { //Paige's machine
-		global startDir ""
-		global repoDir ""
-	}
-
-	else if regexm(c(os),"Windows")==1 { //Danae's machine
-		global startDir "drives/emlab/projects/current-projects/calepa-cn"
-		global repoDir "C:\Users\Danae\Documents\GitHub\ca-refining"
-	}
-	if "$startDir"=="" exit 
-	disp "local path is $startDir"
-
-	//set subfolders
-	global rawDir "$startDir/data/Census/nhgis_2020"
-	global processedDir "$startDir/data/Census"
-	global concentrationsDir "$startDir/outputs/refining-2023/health"
-	global figuresDir "$repoDir/stata/figures"
-	global graphDir "$repoDir/stata/gph"
-*******************************************************************************	
-	***INCOME
-use $processedDir/pop_poverty_2020, clear	
-	gen census_tract=substr(geoid,10,.)
-	destring census_tract, replace
-		drop year
-	duplicates tag census_tract, gen(dup)
-	sort geoid
-	browse if dup==1
-	
-	saveold $processedDir/pop_poverty_2021_mod, replace
-	
-use $concentrationsDir/refining_health_census_tract, clear
-	merge m:1 census_tract using $processedDir/pop_poverty_2021_mod
-	
-/*
-    Result                           # of obs.
-    -----------------------------------------
-    not matched                        76,286
-        from master                         0  (_merge==1)
-        from using                     76,286  (_merge==2)
-
-    matched                         1,475,658  (_merge==3)
-    -----------------------------------------
-
-*/	
-
-gen total_below_poverty_pct=total_below_poverty/total_pop
-gen total_above_poverty_pct=total_above_poverty/total_pop
-rename total_pm25 totalpm25
-	
-local group "total_above_poverty total_below_poverty"
-
-foreach g in `group'{
-	gen `g'_num=totalpm25*`g'_pct*total_pop
-	gen `g'_den=`g'_pct*total_pop
-}
-collapse (sum) total_above_poverty_num total_above_poverty_den total_below_poverty_num total_below_poverty_den, by(scen_id demand_scenario refining_scenario year)
-
-gen NP=total_above_poverty_num/total_above_poverty_den
-gen P=total_below_poverty_num/total_below_poverty_den
-
-gen stat_PNP=P-NP
-
-
-*FIGURE 1
-preserve
-keep if scen_id=="BAU historic exports"
-	twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("BAU historic exports")
-		graph export $figuresDir/bau_his_exports_pov.png, as(png) replace
-restore
-
-*FIGURE 2
-preserve
-keep if scen_id=="BAU historic production"
-	twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("BAU historic production")
-		graph export $figuresDir/bau_his_prod_pov.png, as(png) replace
-restore
-
-*FIGURE 3
-preserve
-keep if scen_id=="BAU low exports"
-	twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("BAU low exports")
-		graph export $figuresDir/bau_low_exports_pov.png, as(png) replace
-restore
-
-*FIGURE 4
-preserve
-keep if scen_id=="LC1 historic exports"
-	twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall))   title("LC1 historic exports")
-		graph export $figuresDir/lc1_his_exports_pov.png, as(png) replace
-restore
-
-*FIGURE 5
-preserve
-keep if scen_id=="LC1 historic production"
-	twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall))  title("LC1 historic production")
-		graph export $figuresDir/lc1_his_prod_pov.png, as(png) replace
-restore
-
-*FIGURE 6
-preserve
-keep if scen_id=="LC1 low exports"
-	twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall))  title("LC1 low exports")
-		graph export $figuresDir/lc1_low_exports_pov.png, as(png) replace
-restore
-
-
-
-*GAP
-*FIGURE 1
-preserve
-keep if scen_id=="BAU historic exports"
-	twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("BAU historic exports") saving($graphDir/bau_his_exp_pov_gap.gph, replace)
-		graph export $figuresDir/bau_his_exports_pov_gap.png, as(png) replace
-restore
-
-*FIGURE 2
-preserve
-keep if scen_id=="BAU historic production"
-	twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("BAU historic production") saving($graphDir/bau_his_prod_pov_gap.gph, replace)
-		graph export $figuresDir/bau_his_prod_pov_gap.png, as(png) replace
-restore
-
-*FIGURE 3
-preserve
-keep if scen_id=="BAU low exports"
-	twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("BAU low exports") saving($graphDir/bau_low_exp_pov_gap.gph, replace)
-		graph export $figuresDir/bau_low_exports_pov_gap.png, as(png) replace
-restore
-
-*FIGURE 4
-preserve
-keep if scen_id=="LC1 historic exports"
-	twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("LC1 historic exports") saving($graphDir/lc1_his_exp_pov_gap.gph, replace)
-		graph export $figuresDir/lc1_his_exports_pov_gap.png, as(png) replace
-restore
-
-*FIGURE 5
-preserve
-keep if scen_id=="LC1 historic production"
-	twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("LC1 historic production") saving($graphDir/lc1_his_prod_pov_gap.gph, replace)
-		graph export $figuresDir/lc1_his_prod_pov_gap.png, as(png) replace
-restore
-
-*FIGURE 6
-preserve
-keep if scen_id=="LC1 low exports"
-	twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("LC1 low exports") saving($graphDir/lc1_low_exp_pov_gap.gph, replace)
-		graph export $figuresDir/lc1_low_exports_pov_gap.png, as(png) replace
-restore
-
-*FINAL FIGURE WITH RELEVANT SCENARIOS
-
-grc1leg $graphDir/bau_his_prod_pov_gap.gph $graphDir/bau_his_exp_pov_gap.gph $graphDir/lc1_low_exp_pov_gap.gph, col(2)
-
-	graph export $figuresDir/all_gap_poverty.png, as(png) replace
-	
-************************************************************	
-keep if year==2045
-
-keep year scen_id stat_*
-
-sdf
-reshape long stat_, i(scen_id year) j(group, string)
-
-rename stat_ gap
-
-
-
-la var gap "Gap value in end point (2045)"
-gen groupt=" Poor-no poor gap" if group=="PNP"
-
-save $dataDir/processed/gaps_2045_poor, replace
-graph hbar (mean) gap, over(scen_id)  asyvars over(groupt)	
diff --git a/stata/repo_merge_acs_refining_updated_ct.do b/stata/repo_merge_acs_refining_updated_ct.do
deleted file mode 100644
index f1e088b..0000000
--- a/stata/repo_merge_acs_refining_updated_ct.do
+++ /dev/null
@@ -1,283 +0,0 @@
-/***************************   HEADER **********************************/
-	clear all
-
-	//set local directory
-	if regexm("`c(pwd)'","")==1  { //Paige's machine
-		global startDir ""
-		global repoDir ""
-	}
-
-	else if regexm(c(os),"Windows")==1 { //Danae's machine
-		global startDir "drives/emlab/projects/current-projects/calepa-cn"
-		global repoDir "C:\Users\Danae\Documents\GitHub\ca-refining"
-	}
-	if "$startDir"=="" exit 
-	disp "local path is $startDir"
-
-	//set subfolders
-	global rawDir "$startDir/data/Census/nhgis_2020"
-	global processedDir "$startDir/data/Census"
-	global concentrationsDir "$startDir/outputs/refining-2023/health"
-	global figuresDir "$repoDir/stata/figures"
-	global graphDir "$repoDir/stata/gph"
-********************************************************************************
-import delimited $concentrationsDir/refining_health_census_tract.csv, clear
-
-	saveold $concentrationsDir/refining_health_census_tract, replace
-	
-	
-	
-use $dataDir/processed/pop_income_2020, clear
-*2020: 14000US01001020100
-	gen census_tract=substr(geoid,8,.)
-	destring census_tract, replace
-		drop year
-	duplicates tag census_tract, gen(dup)
-	sort geoid
-	browse if dup==1
-	
-	saveold $processedDir/pop_income_2020_mod, replace
-	
-use $concentrationsDir/refining_health_census_tract, clear
-	merge m:1 census_tract using $processedDir/pop_income_2020_mod
-	/*
-
-    Result                           # of obs.
-    -----------------------------------------
-    not matched                        76,286
-        from master                         0  (_merge==1)
-        from using                     76,286  (_merge==2)
-
-    matched                         1,475,658  (_merge==3)
-    -----------------------------------------
-
-	*/
-	drop if _merge==2	
-	
-	gen minority_pct=(hispanic+black+aialnative+asian)/total_pop
-	gen black_pct=black/total_pop
-	gen hispanic_pct=hispanic/total_pop
-	gen asian_pct=asian/total_pop
-	gen aialnative_pct=aialnative/total_pop
-
-	rename total_pm25 totalpm25
-	
-	gen white_num=totalpm25*(1-minority_pct)*total_pop
-	gen white_den=(1-minority_pct)*total_pop
-
-local group "minority black hispanic asian aialnative"
-
-foreach g in `group'{
-	gen `g'_num=totalpm25*`g'_pct*total_pop
-	gen `g'_den=`g'_pct*total_pop
-}
-
-*creating dac index
-gen dac_population=total_pop if disadvantaged=="Yes"
-	replace dac_population=0 if disadvantaged=="No"
-gen dac_num=totalpm25*dac_population
-	gen dac_den=dac_population
-gen nodac_population=total_pop if disadvantaged=="No"
-	replace nodac_population=0 if disadvantaged=="Yes"
-gen nodac_num=totalpm25*nodac_population
-	gen nodac_den=nodac_population
-	
-
-collapse (sum) white_num white_den minority_num minority_den black_num black_den hispanic_num hispanic_den asian_num asian_den aialnative_num aialnative_den dac_num dac_den nodac_num nodac_den, by(scen_id demand_scenario refining_scenario year)
-
-gen W=white_num/white_den
-gen B=black_num/black_den
-gen H=hispanic_num/hispanic_den
-gen M=minority_num/minority_den
-gen A=asian_num/asian_den
-gen AIAL=aialnative_num/aialnative_den
-
-gen DAC=dac_num/dac_den
-gen nDAC=nodac_num/nodac_den
-
-gen stat_BW=B-W
-gen stat_MW=M-W
-gen stat_HW=H-W
-gen stat_AW=A-W
-gen stat_AIALW=AIAL-W
-gen stat_DAC=DAC-nDAC
-
-
-*LEVELS
-*FIGURE 1
-preserve
-keep if scen_id=="BAU historic exports"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2025(10)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU historic exports") saving($graphDir/bau_his_exports.gph, replace)
-		graph export $figuresDir/bau_his_exports.png, as(png) replace
-restore
-
-*FIGURE 2
-preserve
-keep if scen_id=="BAU historic production"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(10)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU historic production") saving($graphDir/bau_his_prod.gph, replace)
-		graph export $figuresDir/bau_his_prod.png, as(png) replace
-restore
-
-*FIGURE 3
-preserve
-keep if scen_id=="BAU low exports"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(10)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU low exports") saving($graphDir/bau_low_exports.gph, replace)
-		graph export $figuresDir/bau_low_exports.png, as(png) replace
-restore
-
-*FIGURE 4
-preserve
-keep if scen_id=="LC1 historic exports"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 historic exports")
-		graph export $figuresDir/lc1_his_exports.png, as(png) replace
-restore
-
-*FIGURE 5
-preserve
-keep if scen_id=="LC1 historic production"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 historic production")
-		graph export $figuresDir/lc1_his_prod.png, as(png) replace
-restore
-
-*FIGURE 6
-preserve
-keep if scen_id=="LC1 low exports"
-	twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045))  (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 low exports") saving($graphDir/lc1_low_exports.gph, replace)
-		graph export $figuresDir/lc1_low_exports.png, as(png) replace
-restore
-
-*FINAL FIGURE WITH RELEVANT SCENARIOS
-grc1leg $graphDir/bau_his_prod.gph $graphDir/bau_his_exports.gph $graphDir/lc1_low_exports.gph, col(2)
-	graph export $figuresDir/all_race.png, as(png) replace
-
-
-
-
-***GAPS
-*FIGURE 1
-preserve
-keep if scen_id=="BAU historic exports"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(15)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("BAU historic exports") saving($graphDir/bau_his_exp_gap.gph, replace)
-		graph export $figuresDir/bau_his_exports_gap.png, as(png) replace
-restore
-
-*FIGURE 2
-preserve
-keep if scen_id=="BAU historic production"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("BAU historic production") saving($graphDir/bau_his_prod_gap.gph, replace)
-		graph export $figuresDir/bau_his_prod_gap.png, as(png) replace
-restore
-
-*FIGURE 3
-preserve
-keep if scen_id=="BAU low exports"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("BAU low exports") saving($graphDir/bau_low_exp_gap.gph, replace)
-		graph export $figuresDir/bau_low_exports_gap.png, as(png) replace
-restore
-
-*grc1leg $graphDir/bau_his_exp_gap.gph $graphDir/bau_his_prod_gap.gph $graphDir/bau_low_exp_gap.gph, col(3)
-
-*FIGURE 4
-preserve
-keep if scen_id=="LC1 historic exports"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("LC1 historic exports") saving($graphDir/lc1_his_exp_gap.gph, replace)
-		graph export $figuresDir/lc1_his_exp_gap.png, as(png) replace
-restore
-
-*FIGURE 5
-preserve
-keep if scen_id=="LC1 historic production"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("LC1 historic production") saving($graphDir/lc1_his_prod_gap.gph, replace)
-		graph export $figuresDir/lc1_his_prod_gap.png, as(png) replace
-restore
-
-*FIGURE 6
-preserve
-keep if scen_id=="LC1 low exports"
-	twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045))  (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("")  ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("LC1 low exports") saving($graphDir/lc1_low_exp_gap.gph, replace)
-		graph export $figuresDir/lc1_low_exp_gap.png, as(png) replace
-restore
-
-*FINAL FIGURE WITH RELEVANT SCENARIOS
-
-grc1leg $graphDir/bau_his_prod_gap.gph $graphDir/bau_his_exp_gap.gph $graphDir/lc1_low_exp_gap.gph, col(2)
-	graph export $figuresDir/all_gap_race.png, as(png) replace
-
-
-
-
-
-
-***GAPS DAC AND NON DAC
-*FIGURE 1
-preserve
-keep if scen_id=="BAU historic exports"
-	twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC" ) size(medsmall)) ylabel(0(.1).4) title("BAU historic exports") saving($graphDir/bau_his_exp_dac_gap.gph, replace)
-		graph export $figuresDir/bau_his_exports_dac_gap.png, as(png) replace
-restore
-
-*FIGURE 2
-preserve
-keep if scen_id=="BAU historic production"
-	twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC" ) size(medsmall)) ylabel(0(.1).4) title("BAU historic production") saving($graphDir/bau_his_prod_dac_gap.gph, replace)
-		graph export $figuresDir/bau_his_prod_dac_gap.png, as(png) replace
-restore
-
-*FIGURE 3
-preserve
-keep if scen_id=="BAU low exports"
-	twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC" ) size(medsmall)) ylabel(0(.1).4) title("BAU low exports") saving($graphDir/bau_low_exp_dac_gap.gph, replace)
-		graph export $figuresDir/bau_low_exports_dac_gap.png, as(png) replace
-restore
-
-*FIGURE 4
-preserve
-keep if scen_id=="LC1 historic exports"
-	twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC" ) size(medsmall)) ylabel(0(.1).4) title("LC1 historic exports") saving($graphDir/lc1_his_exp_dac_gap.gph, replace)
-		graph export $figuresDir/lc1_his_exports_dac_gap.png, as(png) replace
-restore
-
-*FIGURE 5
-preserve
-keep if scen_id=="LC1 historic production"
-	twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC") size(medsmall)) ylabel(0(.1).4) title("LC1 historic production") saving($graphDir/lc1_his_prod_dac_gap.gph, replace)
-		graph export $figuresDir/lc1_his_prod_dac_gap.png, as(png) replace
-restore
-
-*FIGURE 6
-preserve
-keep if scen_id=="LC1 low exports"
-	twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("")  ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC") size(medsmall))  ylabel(0(.1).4) title("LC1 low exports") saving($graphDir/lc1_low_exp_dac_gap.gph, replace)
-		graph export $figuresDir/lc1_low_exports_dac_gap.png, as(png) replace
-restore
-
-*FINAL FIGURE WITH RELEVANT SCENARIOS
-
-grc1leg $graphDir/bau_his_prod_dac_gap.gph $graphDir/bau_his_exp_dac_gap.gph $graphDir/lc1_low_exp_dac_gap.gph, col(2)
-
-	graph export $figuresDir/all_gap_dac.png, as(png) replace
-
-*/
-
-
-*******************************
-
-keep if year==2045
-
-keep year scen_id stat_*
-
-reshape long stat_, i(scen_id year) j(group, string)
-
-rename stat_ gap
-
-la var gap "Gap value in end point (2045)"
-gen groupt=" Black-white gap" if group=="BW"
-	replace groupt="Asian-white gap" if group=="AW"
-	replace groupt="AIAN-white gap" if group=="AIALW"
-	replace groupt="Hispanic-white gap" if group=="HW"
-	replace groupt="Minority-white gap" if group=="MW"
-	replace groupt="DAC-nonDAC gap" if group=="DAC"
-	
-append using $processedDir/gaps_2045_poor	
-graph hbar (mean) gap, over(scen_id)  asyvars over(groupt)
-sdf
\ No newline at end of file