diff --git a/R/figures/fig1.R b/R/figures/fig1.R index 52529b0..e3a79c7 100644 --- a/R/figures/fig1.R +++ b/R/figures/fig1.R @@ -45,7 +45,6 @@ create_figure_1 <- function( raw_ct_2019, health_weighted, refining_mortality, - labor_2019, ca_regions, raw_pop_income_2021, cpi2020, @@ -1554,11 +1553,6 @@ create_figure_1 <- function( ## merge counties to census tracts ## ----------------------------------------------------------------- - # ## join with spatial data 2019 - # census_tract_labor_2019_sp <- raw_ct_2019 |> - # rename(census_tract = GEOID) |> - # left_join(census_tract_labor_2020) - # Create the expanded data census_tracts_l_expanded <- expand_grid( census_tracts, diff --git a/R/figures/health_labor_revised_figs.R b/R/figures/health_labor_revised_figs.R index 16412af..90963f4 100644 --- a/R/figures/health_labor_revised_figs.R +++ b/R/figures/health_labor_revised_figs.R @@ -1300,8 +1300,8 @@ plot_npv_health_labor <- function( x = "GHG emissions reduction (%, 2045 vs 2019)" ) + scale_y_continuous( - limits = c(0, 50), - breaks = seq(0, 50, by = 10) + limits = c(0, 40), + breaks = seq(0, 40, by = 10) ) + xlim(0, 80) + scale_color_manual( @@ -1380,8 +1380,8 @@ plot_npv_health_labor <- function( x = "GHG emissions reduction (%, 2045 vs 2019)" ) + scale_y_continuous( - limits = c(-50, 0), - breaks = seq(-50, 0, by = 10) + limits = c(-40, 0), + breaks = seq(-40, 0, by = 10) ) + xlim(0, 80) + scale_color_manual( @@ -1451,8 +1451,8 @@ plot_npv_health_labor <- function( x = "GHG emissions reduction (%, 2045 vs 2019)" ) + scale_y_continuous( - limits = c(-50, 0), - breaks = seq(-50, 0, by = 10) + limits = c(-40, 0), + breaks = seq(-40, 0, by = 10) ) + xlim(0, 80) + scale_color_manual( @@ -1523,8 +1523,8 @@ plot_npv_health_labor <- function( x = "GHG emissions reduction (%, 2045 vs 2019)" ) + scale_y_continuous( - limits = c(-50, 0), - breaks = seq(-50, 0, by = 10) + limits = c(-40, 0), + breaks = seq(-40, 0, by = 10) ) + xlim(0, 80) + scale_color_manual( @@ -1580,7 +1580,7 @@ plot_npv_health_labor <- function( y = NULL, x = "GHG emissions reduction (%, 2045 vs 2019)" ) + - ylim(-50, 0) + + ylim(-40, 0) + xlim(0, 80) + scale_color_manual( values = refin_colors, @@ -1655,7 +1655,7 @@ plot_npv_health_labor <- function( y = NULL, x = "GHG emissions reduction (%, 2045 vs 2019)" ) + - ylim(-50, 0) + + ylim(-40, 0) + xlim(0, 80) + scale_color_manual( values = refin_colors, @@ -4727,8 +4727,8 @@ plot_npv_health_labor_non_age_vsl <- function( x = "GHG emissions reduction (%, 2045 vs 2019)" ) + scale_y_continuous( - limits = c(-50, 0), - breaks = seq(-50, 0, by = 10) + limits = c(-40, 0), + breaks = seq(-40, 0, by = 10) ) + xlim(0, 80) + scale_color_manual( diff --git a/README.md b/README.md index c300b83..1af2f44 100644 --- a/README.md +++ b/README.md @@ -1,48 +1,90 @@ # Distributional and climate impacts of low-carbon transition pathways for California's oil refining (repo) ## Setting up -This repo relies on the R package ``targets`` to maintain the pipeline of the scripts and the reproducibility of the project. -Install the package if you have not already done so: -``` +This repo relies on the R package `targets` to maintain the pipeline of the scripts and the reproducibility of the project. Install the package if you have not already done so: + +``` install.packages("targets") ``` Load the package: -``` +``` library(targets) ``` -All of the functions for the pipeline are in the ``R/`` folder. -To open the ``_targets.R`` script (which is where the workflow is built and specified), run: +All of the functions for the pipeline are in the `R/` folder. To open the `_targets.R` script (which is where the workflow is built and specified), run: -``` +``` tar_edit() ``` -## Changing the file directory - -**IMPORTANT**: Before running the pipeline, one thing needs to be changed -- the path to the ``calepa-cn`` folder. -Look for the ``user`` target: - -``` -tar_target(name = user, "meas"), -``` - -And replace the name in the quotations above with a specified user. +## Changing important user-specific `targets` + +**IMPORTANT**: Before running the pipeline, several things should be modified +to reflect user-specific specifications. The `setup_data_paths.R` file, which is +sources in the main `_targets.R` folder, should auto-configure paths. Make sure +the data folder has been moved into your main repo (note that this folder is not +tracked by git and therefore is not pulled into the repo). + +Next, set the `confidential_data_access` target option based on whether or not +you have access to the confidential datasets. The code is as follows: + +``` +tar_target(name = confidential_data_access, + command = FALSE), +``` + +where `FALSE` indicates that the user does not have access and `TRUE` indicates +access. Users with access will have a subfolder within the data folder called +`confidential-data`. + +Next, set the following target to reflect which values for `beta` and `cuf` +you are running: + +``` +## set module settings for specific run (cuf and beta) + tar_target(name = beta_scenario, command = "main"), ## UPDATE WITH ("main", "high", or "low") + tar_target( + name = beta, + command = ifelse( + beta_scenario == "low", + 0.00422068, + ifelse(beta_scenario == "high", 0.00737932, 0.00582) + ) + ), + + # Coefficient from Krewski et al (2009) for mortality impact + tar_target(name = ref_threshold, command = 0.6), + ``` + + where the target `beta_scenario` can be "main", "low", or "high" and + will be used for file saving name conventions and sets the `beta` target below. + + Finally, specify the `version` target to determine folder names for saving outputs: + + ``` + # list save paths (UPDATE VERSION AS NEEDED) + tar_target(name = version, command = "test-no-conf-data"), + +``` +where in this example will create a folder with `test-no-conf-data` in the name. + ## Using the repo to recreate the analysis ### Debugging the pipeline + In order to check the pipeline is engineered properly, run the following command: -``` +``` tar_manifest(fields=command) ``` The output should look something like: -``` + +``` # A tibble: 90 × 2 name command @@ -59,19 +101,20 @@ The output should look something like: # … with 80 more rows # ℹ Use `print(n = ...)` to see more rows ``` + If there are any issues (missing targets, bugs, etc), you should receive an error message. ### Running the pipeline To build and run the pipeline (this will execute everything), run: -``` +``` tar_make() ``` If you are running this for the first time, it should take a few minutes, but the outputs should look something like: -``` +``` • start target ei_crude • built target ei_crude [0.019 seconds] • start target ei_diesel @@ -87,23 +130,23 @@ If you are running this for the first time, it should take a few minutes, but th ... ``` -Assuming none of the targets change, the next time(s) you run ``tar_make()``, ``targets`` will skip building targets that are already up-to-date. +Assuming none of the targets change, the next time(s) you run `tar_make()`, `targets` will skip building targets that are already up-to-date. ### Viewing and loading targets -If you are new to ``targets`` you might be confused that there are no objects in your environment. That's because the objects are stored locally in a folder called ``_targets`` (in your local repo). +If you are new to `targets` you might be confused that there are no objects in your environment. That's because the objects are stored locally in a folder called `_targets` (in your local repo). -But let's say you want to inspect a specific object, like ``dt_its``. If you want to just view it in your console, you can enter: +But let's say you want to inspect a specific object, like `dt_its`. If you want to just view it in your console, you can enter: -``` +``` tar_read(dt_its) ``` -And that should print the ``data.table``. +And that should print the `data.table`. -If you want to load the ``data.table`` into your environment, you can run the following instead: +If you want to load the `data.table` into your environment, you can run the following instead: -``` +``` tar_load(dt_its) ``` @@ -111,7 +154,7 @@ You'll notice the object is in your environment. You can also view plots. Running the following line should either load the plot in your Plots window or open a new window with the plot: -``` +``` tar_read(fig_demand) ``` @@ -119,58 +162,57 @@ tar_read(fig_demand) If you want to visualize the pipeline, run: -``` +``` tar_visnetwork() ``` -You'll notice the diagram is very small -- you can use your mouse to zoom in on the objects if you'd like. If you make changes to the targets/pipeline and run ``tar_visnetwork()`` before running ``tar_make``, you can see the colors of the objects change. +You'll notice the diagram is very small -- you can use your mouse to zoom in on the objects if you'd like. If you make changes to the targets/pipeline and run `tar_visnetwork()` before running `tar_make`, you can see the colors of the objects change. ## Example of target changes and impacts on the pipeline Want an example of what happens when a target is changed? Here's an easy one: -1. Find the target ``ei_crude`` in ``_targets.R``: +1. Find the target `ei_crude` in `_targets.R`: -``` +``` tar_target(name = ei_crude, command = 5.698) ``` -2. Change the command value to something else, say 10 for example: +2. Change the command value to something else, say 10 for example: -``` +``` tar_target(name = ei_crude, command = 10) ``` -3. Save the script. Then run: +3. Save the script. Then run: -``` +``` tar_visnetwork() ``` -4. You'll see the diagram now looks different, with a few lines and points assigned a different color, representing "Outdated". These are the targets affected by the updated ``ei_crude``. Run ``tar_make()`` to rerun the pipeline with the new ``ei_crude`` value: +4. You'll see the diagram now looks different, with a few lines and points assigned a different color, representing "Outdated". These are the targets affected by the updated `ei_crude`. Run `tar_make()` to rerun the pipeline with the new `ei_crude` value: -``` +``` tar_make() ``` In the outputs you'll see that the targets that are affected are being updated, while the ones that are unaffected are not being rebuilt. -If you run ``tar_visnetwork()`` everything should be up-to-date now in the diagram. +If you run `tar_visnetwork()` everything should be up-to-date now in the diagram. **Remember to change the value of the target back to normal (by ctrl + z for example).** ## Output Structure and Git Tracking -This repository uses a standardized output structure defined in `structure.md` and `output_structure.csv`. -The `output_structure.csv` file specifies: +This repository uses a standardized output structure defined in `structure.md` and `output_structure.csv`. The `output_structure.csv` file specifies: -- `file_name`: The name of each output file -- `relative_path`: The path where the file should be saved (relative to `save_path`) -- `tracked`: Whether the file should be tracked in git (`YES` or `NO`) +- `file_name`: The name of each output file +- `relative_path`: The path where the file should be saved (relative to `save_path`) +- `tracked`: Whether the file should be tracked in git (`YES` or `NO`) ### Directory Structure -```text +``` text outputs/ version/ iteration/ @@ -195,14 +237,14 @@ outputs/ Two utility scripts help manage output files and git tracking: -1. `update_gitignore.R`: Updates all `.gitignore` files based on `output_structure.csv` -2. `verify_file_paths.R`: Verifies that all files in `_targets.R` are saved in the correct locations +1. `update_gitignore.R`: Updates all `.gitignore` files based on `output_structure.csv` +2. `verify_file_paths.R`: Verifies that all files in `_targets.R` are saved in the correct locations ### File Saving Conventions All file-producing targets in `_targets.R` should use the `simple_fwrite_repo` function: -```r +``` r simple_fwrite_repo( data = your_data, folder_path = NULL, # Not needed when using save_path and file_type diff --git a/_targets.R b/_targets.R index beeba7a..1f519a8 100644 --- a/_targets.R +++ b/_targets.R @@ -1,5 +1,6 @@ # Load packages required to define the pipeline: library(targets) +library(tarchetypes) library(data.table) library(tidyr) library(tidyverse) @@ -50,6 +51,12 @@ source("setup_data_paths.R") # Replace the target list below with your own: list( + + ## indicate if you have access to confidential data (data/confidential-data folder) + ## TRUE indicates access, FALSE indicates that user does not have access + tar_target(name = confidential_data_access, + command = FALSE), + # data path (automatically configured) tar_target( name = main_path, @@ -65,11 +72,13 @@ list( 0.00422068, ifelse(beta_scenario == "high", 0.00737932, 0.00582) ) - ), # Coefficient from Krewski et al (2009) for mortality impact + ), + + # Coefficient from Krewski et al (2009) for mortality impact tar_target(name = ref_threshold, command = 0.6), # list save paths (UPDATE VERSION AS NEEDED) - tar_target(name = version, command = "rev-submission"), + tar_target(name = version, command = "test-no-conf-data"), tar_target( name = iteration, command = paste0("cuf=", ref_threshold, "_beta-scenario=", beta_scenario) @@ -221,7 +230,7 @@ list( name = file_raw_its, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/raw/Study 1 - Preliminary Fuel Volumes BAU & LC1.xlsx" + "data-files/stocks-flows/raw/Study 1 - Preliminary Fuel Volumes BAU & LC1.xlsx" ), format = "file" ), @@ -229,7 +238,7 @@ list( name = file_raw_avgas, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/raw/Distillates 10-10.xlsx" + "data-files/stocks-flows/raw/Distillates 10-10.xlsx" ), format = "file" ), @@ -237,7 +246,7 @@ list( name = file_raw_cec_jet, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/raw/5-20 Jet Fuel Demand.xlsx" + "data-files/stocks-flows/raw/5-20 Jet Fuel Demand.xlsx" ), format = "file" ), @@ -245,7 +254,7 @@ list( name = file_raw_mil_jet, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/raw/California Transportion Fuel Consumption - Summary 2020-06-01 GDS_rename.xlsx" + "data-files/stocks-flows/raw/California Transportion Fuel Consumption - Summary 2020-06-01 GDS_rename.xlsx" ), format = "file" ), @@ -253,7 +262,7 @@ list( name = file_raw_fpm, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/raw/Finished_Products_Movements.xlsx" + "data-files/stocks-flows/raw/Finished_Products_Movements.xlsx" ), format = "file" ), @@ -261,7 +270,7 @@ list( name = file_refcap, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/processed/refinery_loc_cap_manual.csv" + "data-files/stocks-flows/processed/refinery_loc_cap_manual.csv" ), format = "file" ), # this is a manually created file @@ -269,7 +278,7 @@ list( name = file_rediesel, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/processed/CARB_RE_fuels_CA_imports_figure10_053120.xlsx" + "data-files/stocks-flows/processed/CARB_RE_fuels_CA_imports_figure10_053120.xlsx" ), format = "file" ), @@ -277,7 +286,7 @@ list( name = file_renref, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/processed/renewable_refinery_capacity.xlsx" + "data-files/stocks-flows/processed/renewable_refinery_capacity.xlsx" ), format = "file" ), # this is a manually created file @@ -285,7 +294,7 @@ list( name = file_altair, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/raw/altair_refinery_capacity.xlsx" + "data-files/stocks-flows/raw/altair_refinery_capacity.xlsx" ), format = "file" ), # this is a manually created file @@ -294,7 +303,7 @@ list( name = file_raw_ces, command = file.path( main_path, - "data-staged-for-deletion/health/raw/ces3results.xlsx" + "data-files/health/raw/ces3results.xlsx" ), format = "file" ), @@ -302,7 +311,7 @@ list( name = file_raw_dac, command = file.path( main_path, - "data-staged-for-deletion/health/raw/SB535DACresultsdatadictionary_F_2022/SB535DACresultsdatadictionary_F_2022.xlsx" + "data-files/health/raw/SB535DACresultsdatadictionary_F_2022/SB535DACresultsdatadictionary_F_2022.xlsx" ), format = "file" ), @@ -310,7 +319,7 @@ list( name = file_raw_income_house, command = file.path( main_path, - "data-staged-for-deletion/Census/ca-median-house-income.csv" + "data-files/Census/ca-median-house-income.csv" ), format = "file" ), # remove from workflow @@ -318,7 +327,7 @@ list( name = file_raw_income_county, command = file.path( main_path, - "data-staged-for-deletion/Census/ca-median-house-income-county.csv" + "data-files/Census/ca-median-house-income-county.csv" ), format = "file" ), # remove from workflow @@ -326,14 +335,14 @@ list( name = file_inmap_re, command = file.path( main_path, - "data-staged-for-deletion/health/source_receptor_matrix/inmap_processed_srm/refining" + "data-files/health/source_receptor_matrix/inmap_processed_srm/refining" ) ), # these were created upstream tar_target( name = file_dt_ef, command = file.path( main_path, - "data-staged-for-deletion/health/processed/ref_emission_factor.csv" + "data-files/health/processed/ref_emission_factor.csv" ), format = "file" ), #cluster-level emission factors @@ -341,7 +350,7 @@ list( name = file_dt_ef_ref, command = file.path( main_path, - "data-staged-for-deletion/health/processed/refinery_emission_factor.csv" + "data-files/health/processed/refinery_emission_factor.csv" ), format = "file" ), #refinery-level emission factors @@ -349,7 +358,7 @@ list( name = file_dt_age_vsl, command = file.path( main_path, - "data-staged-for-deletion/health/processed/age_based_VSL_2019.csv" + "data-files/health/processed/age_based_VSL_2019.csv" ), format = "file" ), @@ -357,7 +366,7 @@ list( name = file_dt_ct_inc_pop, command = file.path( main_path, - "data-staged-for-deletion/health/processed/ct_inc_45_2020.csv" + "data-files/health/processed/ct_inc_45_2020.csv" ), format = "file" ), @@ -365,7 +374,7 @@ list( name = file_dt_growth_cap_rate, command = file.path( main_path, - "data-staged-for-deletion/benmap/processed/growth_per_cap.csv" + "data-files/benmap/processed/growth_per_cap.csv" ), format = "file" ), @@ -373,7 +382,7 @@ list( name = file_dt_health_income, command = file.path( main_path, - "outputs-staged-for-deletion/refining-2023/health/refining_health_income_2023.csv" + "output-files/refining-2023/health/refining_health_income_2023.csv" ), format = "file" ), @@ -381,7 +390,7 @@ list( name = file_raw_ct_2019, command = file.path( main_path, - "data-staged-for-deletion/GIS/raw/ct-cartographic-boundaries/cb_2019_06_tract_500k/cb_2019_06_tract_500k.shp" + "data-files/GIS/raw/ct-cartographic-boundaries/cb_2019_06_tract_500k/cb_2019_06_tract_500k.shp" ), format = "file" ), @@ -389,7 +398,7 @@ list( name = file_raw_ct_2020, command = file.path( main_path, - "data-staged-for-deletion/GIS/raw/ct-cartographic-boundaries/nhgis0030_shapefile_tl2020_us_tract_2020/US_tract_2020.shp" + "data-files/GIS/raw/ct-cartographic-boundaries/nhgis0030_shapefile_tl2020_us_tract_2020/US_tract_2020.shp" ), format = "file" ), @@ -397,7 +406,7 @@ list( name = file_raw_census_2020, command = file.path( main_path, - "data-staged-for-deletion/Census/nhgis_2020/nhgis0024_csv/nhgis0024_ds249_20205_tract.csv" + "data-files/Census/nhgis_2020/nhgis0024_csv/nhgis0024_ds249_20205_tract.csv" ), format = "file" ), @@ -405,7 +414,7 @@ list( name = file_raw_census_2021, command = file.path( main_path, - "data-staged-for-deletion/Census/nhgis_2020/nhgis0024_csv/nhgis0024_ds254_20215_tract.csv" + "data-files/Census/nhgis_2020/nhgis0024_csv/nhgis0024_ds254_20215_tract.csv" ), format = "file" ), @@ -413,7 +422,7 @@ list( name = file_raw_ct_race, command = file.path( main_path, - "data-staged-for-deletion/Census/nhgis0039_csv/nhgis0039_ds258_2020_tract.csv" + "data-files/Census/nhgis0039_csv/nhgis0039_ds258_2020_tract.csv" ), format = "file" ), @@ -421,7 +430,7 @@ list( name = file_raw_census_poverty, command = file.path( main_path, - "data-staged-for-deletion/Census/nhgis_2020/nhgis0029_csv/nhgis0029_csv/nhgis0029_ds254_20215_tract.csv" + "data-files/Census/nhgis_2020/nhgis0029_csv/nhgis0029_csv/nhgis0029_ds254_20215_tract.csv" ), format = "file" ), @@ -429,33 +438,52 @@ list( name = file_df_ca_regions, command = file.path( main_path, - "data-staged-for-deletion/labor/raw/ca_regions.csv" + "data-files/labor/raw/ca_regions.csv" ), format = "file" ), - # tar_target(name = file_df_labor, command = file.path(main_path, "data-staged-for-deletion/labor/processed/implan-results/academic-paper-multipliers/processed/ica_multipliers_v2.xlsx"), format = "file"), - # tar_target(name = file_df_labor_dest, command = file.path(main_path, "data-staged-for-deletion/labor/processed/implan-results/academic-paper-multipliers/processed/20240524-1million_la-Detail Economic Indicators.csv"), format = "file"), tar_target( name = file_direct_multipliers, - command = file.path( - main_path, - "data-staged-for-deletion/labor/ncomms-revisions/direct_multipliers_tract.csv" + command = { + if (!isTRUE(confidential_data_access)) { + file.path(main_path, + "intermediate-outputs/direct_multipliers_tract_blank.csv") + } else { + file.path( + main_path, + "confidential-data/direct_multipliers_tract.csv") + } + }, + format = "file" ), - format = "file" - ), tar_target( name = file_indirect_state_multipliers, - command = file.path( - main_path, - "data-staged-for-deletion/labor/ncomms-revisions/indirect_induced_multipliers_state.csv" - ), - format = "file" - ), + command = { + if (!isTRUE(confidential_data_access)) { + file.path( + main_path, + "intermediate-outputs/indirect_induced_multipliers_state_blank.csv") + } else { + file.path( + main_path, + "confidential-data/indirect_induced_multipliers_state.csv") + } + }, + format = "file" + ), + # tar_target( + # name = file_indirect_state_multipliers, + # command = file.path( + # main_path, + # "confidential-data/indirect_induced_multipliers_state.csv" + # ), + # format = "file" + # ), tar_target( name = file_df_labor_dest, command = file.path( main_path, - "data-staged-for-deletion/labor/processed/implan-results/academic-paper-multipliers/processed/20240623-census_regions-Detail Economic Indicators.csv" + "data-files/labor/processed/implan-results/academic-paper-multipliers/processed/20240623-census_regions-Detail Economic Indicators.csv" ), format = "file" ), @@ -463,7 +491,7 @@ list( name = file_df_labor_fte, command = file.path( main_path, - "data-staged-for-deletion/labor/processed/implan-results/academic-paper-multipliers/processed/Emp_FTE and W&S_EC_546 Industry Scheme.xlsx" + "data-files/labor/processed/implan-results/academic-paper-multipliers/processed/Emp_FTE and W&S_EC_546 Industry Scheme.xlsx" ), format = "file" ), @@ -471,7 +499,7 @@ list( name = file_oil_px, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/processed/oil_price_projections_revised.xlsx" + "data-files/stocks-flows/processed/oil_price_projections_revised.xlsx" ), format = "file" ), @@ -479,7 +507,7 @@ list( name = file_ca_counties_sp, command = file.path( main_path, - "data-staged-for-deletion/GIS/raw/CA_counties_noislands/CA_Counties_TIGER2016_noislands.shp" + "data-files/GIS/raw/CA_counties_noislands/CA_Counties_TIGER2016_noislands.shp" ), format = "file" ), @@ -487,7 +515,7 @@ list( name = file_refin_locs_orig, command = file.path( main_path, - "data-staged-for-deletion/GIS/raw/Petroleum_Refineries_US_EIA/Petroleum_Refineries_US_2019_v2.shp" + "data-files/GIS/raw/Petroleum_Refineries_US_EIA/Petroleum_Refineries_US_2019_v2.shp" ), format = "file" ), @@ -495,7 +523,7 @@ list( name = file_refin_locs, command = file.path( main_path, - "/data-staged-for-deletion/stocks-flows/processed/refinery_lat_long_revised.csv" + "/data-files/stocks-flows/processed/refinery_lat_long_revised.csv" ), format = "file" ), @@ -503,15 +531,7 @@ list( name = file_refin_locs_ct, command = file.path( main_path, - "data-staged-for-deletion/labor/ncomms-revisions/refinery_cluster_tract.csv" - ), - format = "file" - ), - tar_target( - name = file_labor_2019, - command = file.path( - main_path, - "/data-staged-for-deletion/labor/implan/20241010-census_regions_2019-Detail Economic Indicators.csv" + "data-files/labor/ncomms-revisions/refinery_cluster_tract.csv" ), format = "file" ), @@ -692,12 +712,34 @@ list( ), tar_target( name = dt_direct_multipliers, - command = read_labor_direct_mult_inputs(file_direct_multipliers) + command = { + if (!isTRUE(confidential_data_access)) { + fread(file_direct_multipliers) + } else { + + read_labor_direct_mult_inputs(file_direct_multipliers) + } + } ), tar_target( name = dt_indirect_state_multipliers, - command = read_labor_indirect_mult_inputs(file_indirect_state_multipliers) + command = { + if (!isTRUE(confidential_data_access)) { + fread(file_indirect_state_multipliers) + } else { + + read_labor_indirect_mult_inputs(file_indirect_state_multipliers) + } + } ), + # tar_target( + # name = dt_direct_multipliers, + # command = read_labor_direct_mult_inputs(file_direct_multipliers) + # ), + # tar_target( + # name = dt_indirect_state_multipliers, + # command = read_labor_indirect_mult_inputs(file_indirect_state_multipliers) + # ), tar_target( name = proc_oil_px_df, command = read_oil_px( @@ -722,7 +764,7 @@ list( name = file_ghg_emissions, command = file.path( main_path, - "outputs-staged-for-deletion/stocks-flows/refinery_ghg_emissions.csv" + "output-files/stocks-flows/refinery_ghg_emissions.csv" ), format = "file" ), @@ -730,11 +772,10 @@ list( file_hydrogen_facilities, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/raw/hydrogen_facilities_list.xlsx" + "data-files/stocks-flows/raw/hydrogen_facilities_list.xlsx" ), format = "file" ), - tar_target(name = labor_2019, command = fread(file_labor_2019)), # GHG factor calculation targets tar_target( @@ -845,7 +886,7 @@ list( command = read_and_bind_csv_files( file.path( main_path, - 'data-staged-for-deletion/stocks-flows/processed/ghg_mrr' + 'data-files/stocks-flows/processed/ghg_mrr' ), ".csv" ) @@ -862,7 +903,7 @@ list( name = file_fpm, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/processed/finished_product_movements_weekly_cec.csv" + "data-files/stocks-flows/processed/finished_product_movements_weekly_cec.csv" ), format = "file" ), @@ -870,7 +911,7 @@ list( name = file_fw, command = file.path( main_path, - "data-staged-for-deletion/stocks-flows/processed/fuel_watch_data.csv" + "data-files/stocks-flows/processed/fuel_watch_data.csv" ), format = "file" ), @@ -878,7 +919,7 @@ list( name = file_processed_ces3, command = file.path( main_path, - "data-staged-for-deletion/health/processed/ces3_data.csv" + "data-files/health/processed/ces3_data.csv" ), format = "file" ), @@ -1526,45 +1567,85 @@ list( save_path ) ), + tar_target( name = annual_direct_labor, - command = calc_labor_outputs( - main_path, - save_path, - indiv_prod_output, - dt_refcap, - product_px, - cpi2019, - cpi2020, - discount_rate, - alpha_comp, - alpha_emp, - refin_locs_ct, - dt_direct_multipliers - ) - ), + command = { + + if (!isTRUE(confidential_data_access)) { + + fread('data/intermediate-outputs/annual_labor_outputs.csv', + colClasses = list(character = "census_tract")) + } else { + + calc_labor_outputs( + main_path, + save_path, + indiv_prod_output, + dt_refcap, + product_px, + cpi2019, + cpi2020, + discount_rate, + alpha_comp, + alpha_emp, + refin_locs_ct, + dt_direct_multipliers) + } + } + ), + tar_target( name = state_annual_direct_impacts, command = calc_state_direct_impacts(annual_direct_labor) ), + tar_target( name = annual_all_impacts_labor, - command = calc_labor_all_impacts_outputs( - main_path, - save_path, - state_annual_direct_impacts, - indiv_prod_output, - dt_refcap, - product_px, - cpi2019, - cpi2020, - discount_rate, - alpha_comp, - alpha_emp, - dt_indirect_state_multipliers, - indirect_induced_mult - ) - ), + command = { + + if (!isTRUE(confidential_data_access)) { + + fread('data/intermediate-outputs/state_annual_labor_outputs.csv') + } else { + + calc_labor_all_impacts_outputs( + main_path, + save_path, + state_annual_direct_impacts, + indiv_prod_output, + dt_refcap, + product_px, + cpi2019, + cpi2020, + discount_rate, + alpha_comp, + alpha_emp, + dt_indirect_state_multipliers, + indirect_induced_mult + ) + } + } + ), + # tar_target( + # name = annual_all_impacts_labor, + # command = calc_labor_all_impacts_outputs( + # main_path, + # save_path, + # state_annual_direct_impacts, + # indiv_prod_output, + # dt_refcap, + # product_px, + # cpi2019, + # cpi2020, + # discount_rate, + # alpha_comp, + # alpha_emp, + # dt_indirect_state_multipliers, + # indirect_induced_mult + # ) + # ), + tar_target( ref_labor_demog_yr, command = calculate_labor_x_demg_annual( @@ -1939,7 +2020,6 @@ list( raw_ct_2019, health_weighted, refining_mortality, - labor_2019, ca_regions, raw_pop_income_2021, cpi2020, @@ -2472,13 +2552,18 @@ list( ), tar_target( name = save_state_labor_annual, - command = simple_fwrite_repo( + command = { + + if (!isTRUE(confidential_data_access)) targets::tar_cancel() + + simple_fwrite_repo( annual_all_impacts_labor, NULL, "state_annual_labor_outputs.csv", save_path = save_path, file_type = "labor" - ), + ) + }, format = "file" ), tar_target( @@ -3040,6 +3125,8 @@ list( tar_target( save_annual_direct_labor, command = { + if (!isTRUE(confidential_data_access)) targets::tar_cancel() + { simple_fwrite_repo( data = annual_direct_labor, folder_path = NULL, @@ -3047,6 +3134,7 @@ list( save_path = save_path, file_type = "labor" ) + } }, format = "file" ), diff --git a/before-targets/analysis/age_vsl_test.R b/before-targets/analysis/age_vsl_test.R deleted file mode 100644 index 71b9543..0000000 --- a/before-targets/analysis/age_vsl_test.R +++ /dev/null @@ -1,173 +0,0 @@ -tar_load(ct_inc_45) -tar_load(growth_cap_rates) -tar_load(vsl_2019) -tar_load(health_weighted) -tar_load(beta) -tar_load(dt_age_vsl) -tar_load(income_elasticity_mort) -tar_load(discount_rate) - -############################################################### - -# for monetary mortality impact - growth in income for use in WTP function -growth_rates <- growth_cap_rates %>% - filter(year > 2019) %>% - mutate(cum_growth = cumprod(1 + growth_2035)) %>% - select(-growth_2035)%>% - drop_na(year) - -# Function to grow WTP -future_WTP <- function(elasticity, growth_rate, WTP) { - return(elasticity * growth_rate * WTP + WTP) -} - -#### VSL age cross-walk - -vsl_cross_walk <- dt_age_vsl %>% - filter(age_min > 29)%>% - select(-year)%>% - fuzzyjoin::fuzzy_left_join( - ct_inc_45 %>% - select(start_age, end_age) %>% - distinct()%>% - filter(start_age > 29), - by = c("age_min" = "start_age", - "age_max" = "end_age"), - match_fun = list(`>=`, `<=`))%>% - group_by(start_age)%>% - summarise(end_age = first(end_age), - age_VSL_2019 = mean(age_VSL_2019))%>% - drop_na(start_age)%>% - ungroup() - - -########### Add VSL 2019 by age group - -ct_inc_45_temp <- ct_inc_45 %>% - select(GEO_ID:end_age, year, pop, incidence_2015) %>% - filter(start_age > 29)%>% - left_join(vsl_cross_walk, by = c("start_age", "end_age"))%>% - mutate(age_VSL_2019 = ifelse(start_age>64, - dt_age_vsl %>% filter(age_min>61) %>% select(age_VSL_2019) %>% pull(), - age_VSL_2019)) - -########### Grow VSL - -# for monetary mortality impact - growth in income for use in WTP function -growth_rates <- growth_cap_rates %>% - filter(year > 2019) %>% - mutate(cum_growth = cumprod(1 + growth_2035)) %>% - select(-growth_2035)%>% - drop_na(year) - -# Function to grow WTP -future_WTP <- function(elasticity, growth_rate, WTP) { - return(elasticity * growth_rate * WTP + WTP) -} - -# Merge growth factors -ct_inc_45_temp <- ct_inc_45_temp%>% - mutate(VSL_2019 = vsl_2019) %>% - left_join(growth_rates, by = c("year" = "year")) %>% - mutate( - VSL = future_WTP( - income_elasticity_mort, - (cum_growth - 1), - VSL_2019 - ), - age_VSL = future_WTP( - income_elasticity_mort, - (cum_growth - 1), - age_VSL_2019 - )) - -########### Aggregate at census tract and year level - -ct_inc_pop_45_weighted <- ct_inc_45_temp %>% - group_by(GEO_ID, year) %>% - mutate( - ct_pop = sum(pop, na.rm = T), - share = pop / ct_pop, - weighted_incidence = sum(share * incidence_2015, na.rm = T), - weighted_monetized_incidence = sum(share * incidence_2015 * VSL, na.rm = T), - weighted_monetized_age_incidence = sum(share * incidence_2015 * age_VSL, na.rm = T), - weighted_monetized_age_incidence_2019 = sum(share * incidence_2015 * age_VSL_2019, na.rm = T)) %>% - summarize( - weighted_incidence = unique(weighted_incidence), - weighted_monetized_incidence = unique(weighted_monetized_incidence), - weighted_monetized_age_incidence = unique(weighted_monetized_age_incidence), - weighted_monetized_age_incidence_2019 = unique(weighted_monetized_age_incidence), - pop = unique(ct_pop) - ) %>% - ungroup() %>% - mutate(GEO_ID = str_remove(GEO_ID, "US")) - -# Delta of pollution change ###################################### - -# refining pm25 BAU -refining_BAU <- subset(health_weighted, (scen_id == "BAU historic production")) %>% - # refining_BAU<-subset(health_income,(scen_id=="BAU historic production"))%>% - rename(bau_total_pm25 = total_pm25) # %>% -# mutate(census_tract = paste0("0",census_tract)) - -# refining pm25 difference -deltas_refining <- health_weighted %>% - # deltas_refining<- health_income%>% - # mutate(census_tract = paste0("0",census_tract))%>% - # left_join(refining_BAU %>% select(-scen_id,-demand_scenario,-refining_scenario,-population:-median_hh_income),by=c("census_tract", "year"))%>% - left_join(refining_BAU %>% select(-scen_id, -demand_scenario, -refining_scenario, -ces4_score, -disadvantaged), by = c("census_tract", "year")) %>% - mutate(delta_total_pm25 = total_pm25 - bau_total_pm25) %>% - select(census_tract, scen_id:year, total_pm25:delta_total_pm25) - -## Merge demographic data to pollution scenarios - -ct_incidence_ca_poll <- deltas_refining %>% - right_join(ct_inc_pop_45_weighted, by = c("census_tract" = "GEO_ID", "year" = "year")) %>% - drop_na(scen_id) # CURRENTLY DROPPING ALL THE MISMATCHED 2010/2022 GEOIDs - -# Monetized mortality for adults (>=29 years old) by age-based VSL - -ct_mort_cost <- ct_incidence_ca_poll %>% - mutate( - mortality_delta = ((exp(beta * delta_total_pm25) - 1)) * weighted_incidence * pop, - mortality_level = ((exp(beta * total_pm25) - 1)) * weighted_incidence * pop, - benefit_delta = ((exp(beta * delta_total_pm25) - 1)) * weighted_monetized_incidence * pop, - benefit_level = ((exp(beta * total_pm25) - 1)) * weighted_monetized_incidence * pop, - benefit_age_delta = ((exp(beta * delta_total_pm25) - 1)) * weighted_monetized_age_incidence * pop, - benefit_age_delta_2019 = ((exp(beta * delta_total_pm25) - 1)) * weighted_monetized_age_incidence_2019 * pop, - benefit_age_level = ((exp(beta * total_pm25) - 1)) * weighted_monetized_age_incidence * pop - )%>% - group_by(year) %>% - mutate( - cost_2019_PV = benefit_age_delta_2019 / ((1 + discount_rate)^(year - 2019)), - cost_PV = benefit_age_delta / ((1 + discount_rate)^(year - 2019)) - )%>% - ungroup() - -# # #### Calculate the cost per premature mortality (old way)################ -# -# ct_health <- ct_incidence_ca_poll %>% -# mutate( -# mortality_delta = ((exp(beta * delta_total_pm25) - 1)) * weighted_incidence * pop, -# mortality_level = ((exp(beta * total_pm25) - 1)) * weighted_incidence * pop -# ) -# -# ct_mort_cost <- ct_health %>% -# mutate(VSL_2019 = vsl_2019) %>% -# left_join(growth_rates, by = c("year" = "year")) %>% -# mutate( -# VSL = future_WTP( -# income_elasticity_mort, -# (cum_growth - 1), -# VSL_2019 -# ), -# cost_2019 = mortality_delta * VSL_2019, -# cost = mortality_delta * VSL -# ) %>% -# group_by(year) %>% -# mutate( -# cost_2019_PV = cost_2019 / ((1 + discount_rate)^(year - 2019)), -# cost_PV = cost / ((1 + discount_rate)^(year - 2019)) -# ) - - diff --git a/before-targets/analysis/compare-labor-out.R b/before-targets/analysis/compare-labor-out.R deleted file mode 100644 index 2ad02c6..0000000 --- a/before-targets/analysis/compare-labor-out.R +++ /dev/null @@ -1,63 +0,0 @@ -## compare outputs - -orig <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/labor_result_for_review.csv"), - colClasses = c(census_tract = "character")) - -compare <- merge(ct_out_refining_direct %>% filter(product_scenario == "changing prices"), - orig, - by = c("demand_scenario", - "refining_scenario", - "oil_price_scenario", - "census_tract", - "year"), suffixes = c("_orig", "_new")) |> - ## compute absolute difference - mutate(diff_total_comp_usd19_h = total_comp_usd19_h_orig - total_comp_usd19_h_new, - diff_prev_comp_usd19h = prev_comp_usd19h_orig - prev_comp_usd19h_new, - diff_total_comp_usd19_l = total_comp_usd19_l_orig - total_comp_usd19_l_new, - diff_total_emp = total_emp_orig - total_emp_new, - diff_prev_emp = prev_emp_orig - prev_emp_new, - diff_r = total_emp_revised_orig - total_emp_revised_new) - - -orig2 <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_out_labor_all_impacts.csv")) - -orig2 <- orig2 |> pivot_longer(state_emp_h:comp_all_impacts_PV_l, names_to = "metric", values_to = "orig_value") - -compare2 <- state_out_labor_all_impacts |> pivot_longer(state_emp_h:comp_all_impacts_PV_l, names_to = "metric", values_to = "value") |> - filter(product_scenario == "changing prices") |> - full_join(orig2) |> - mutate(diff = orig_value - value) - -orig3 <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_labor_direct_impacts_demo_annual.csv")) |> - pivot_longer(sum_demo_emp:sum_demo_comp_pv_l, names_to = "metric", values_to = "orig_value") - -compare3 <- state_demo_labor_out |> pivot_longer(sum_demo_emp:sum_demo_comp_pv_l, names_to = "metric", values_to = "value") |> - filter(product_scenario == "changing prices") |> - full_join(orig3) |> - mutate(diff = orig_value - value) - -orig4 <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/", "labor_high_low_annual_outputs.csv")) |> - rename(orig_value = value) - -compare4 <- labor_pct_long |> - filter(product_scenario == "changing prices") |> - full_join(orig4) |> - mutate(diff = orig_value - value) - -orig5 <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_labor_direct_impacts_demo_annual.csv")) |> - pivot_longer(sum_demo_emp:sum_demo_comp_pv_l, names_to = "metric", values_to = "orig_value") - -compared5 <- state_demo_labor_out |> - filter(product_scenario == "changing prices") |> - pivot_longer(sum_demo_emp:sum_demo_comp_pv_l, names_to = "metric", values_to = "value") |> - full_join(orig5) |> - mutate(diff = orig_value - value) - -npv_comp <- fread(file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/", "state_npv_fig_inputs_labor.csv")) - -comp6 <- plot_df_labor |> - filter(produc) - - - - \ No newline at end of file diff --git a/before-targets/analysis/create_ct_xwalk.R b/before-targets/analysis/create_ct_xwalk.R deleted file mode 100644 index df686af..0000000 --- a/before-targets/analysis/create_ct_xwalk.R +++ /dev/null @@ -1,59 +0,0 @@ -## Tracey Mangin -## May 27, 2023 -## Create crosswalk - -## libraries -library(tidyverse) -library(sf) -library(data.table) -library(plotly) - -## paths -main_path <- "/Volumes/GoogleDrive/Shared drives/emlab/projects/current-projects/calepa-cn/" -main_path <- "/Users/traceymangin/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared\ drives/emlab/projects/current-projects/calepa-cn/" -sp_data_path <- paste0(main_path, "data/GIS/raw/") -save_path <- paste0(main_path, "project-materials/refining-paper/model-prep/census-xwalk/") - -## file names - use cartographic boundaries, not tigerlines -prev_ct <- "ct-cartographic-boundaries/cb_2019_06_tract_500k/cb_2019_06_tract_500k.shp" -ct_2020 <- "ct-cartographic-boundaries/nhgis0030_shapefile_tl2020_us_tract_2020/US_tract_2020.shp" - -## crs NAD83 / California Albers -ca_crs <- 3310 - -## previous version of cts -census_tract19 <- read_sf(paste0(sp_data_path, prev_ct)) %>% - select(GEOID) %>% - st_transform(crs = ca_crs) - -## california id -ca_code <- "06" - -census_tract20 <- read_sf(paste0(sp_data_path, ct_2020)) %>% - filter(STATEFP == ca_code) %>% - select(GEOID) %>% - st_transform(crs = ca_crs) - -census_tract20 <- st_make_valid(census_tract20) - -## make a crosswalk for census tracts, tract percentage overlap -## ------------------------------- - -## 2020 cts merged with 2019, all matches -ct_merged <- census_tract20 %>% - rename(GEOID_2020 = GEOID) %>% - mutate(GEOID_2020_area = st_area(.)) %>% - st_intersection(census_tract19) %>% - mutate(intersect_area = st_area(.)) %>% - arrange(GEOID_2020, intersect_area) %>% - group_by(GEOID_2020) %>% - mutate(sum_intersect_area = sum(intersect_area)) %>% - ungroup() %>% - mutate(rel_intersect = intersect_area / sum_intersect_area) %>% - rename(GEOID_2019 = GEOID) %>% - mutate(rel_intersect = units::drop_units(rel_intersect)) %>% - select(GEOID_2020, GEOID_2020_area, GEOID_2019, intersect_area, sum_intersect_area, rel_intersect) %>% - st_drop_geometry() - -## save -fwrite(ct_merged, paste0(save_path, "ct_xwalk.csv")) diff --git a/before-targets/archive/health_labor.R b/before-targets/archive/health_labor.R deleted file mode 100644 index 0d2eb43..0000000 --- a/before-targets/archive/health_labor.R +++ /dev/null @@ -1,10906 +0,0 @@ -## health and labor figures - -## labor SI figure -## ----------------------------------------------------------------------------- - -## NPV figure -plot_npv_labor_oilpx <- function( - main_path, - save_path, - state_ghg_output, - dt_ghg_2019, - annual_all_impacts_labor -) { - ## add ghg emission reduction - ## 2019 ghg - ghg_2019_val <- dt_ghg_2019$mtco2e[1] - - ## 2045 vs 2019 ghg - ghg_2045 <- state_ghg_output[year == 2045 & source == "total"] - setnames(ghg_2045, "value", "ghg_kg") - ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6] - ghg_2045[, ghg_2019 := ghg_2019_val] - ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019] - - perc_diff_df <- ghg_2045[, .( - demand_scenario, - refining_scenario, - ghg_2045, - ghg_2019, - perc_diff - )] - - ## summarize by scenario, filter for total - state_ghg_df <- state_ghg_output[ - source == "total", - .(total_ghg = sum(value)), - by = .(demand_scenario, refining_scenario) - ] - - state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6] - - ## reference - ref_df <- state_ghg_df[ - demand_scenario == "BAU" & refining_scenario == "historic production", - .(total_ghg_mmt) - ] - setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt") - ref_value <- ref_df$ref_ghg_mmt[1] - - ## merge with summarized df - state_ghg_df[, ref_ghg := ref_value] - state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1] - - ## summarize labor for state - state_labor <- annual_all_impacts_labor[, - .( - # sum_total_emp = sum(total_emp), - sum_total_comp_pv_h = sum(comp_all_impacts_PV_h), - sum_total_comp_pv_l = sum(comp_all_impacts_PV_l, na.rm = T) - ), - by = .(demand_scenario, refining_scenario, oil_price_scenario) - ] - - ## ref labor - ref_labor <- state_labor[ - demand_scenario == "BAU" & refining_scenario == "historic production" - ] - setnames( - ref_labor, - c("sum_total_comp_pv_h", "sum_total_comp_pv_l"), - c("ref_total_comp_pv_h", "ref_total_comp_pv_l") - ) - ref_labor <- ref_labor[, .( - oil_price_scenario, - ref_total_comp_pv_l, - ref_total_comp_pv_h - )] - - ## add values to labor - state_labor_oil_px <- merge( - state_labor, - ref_labor, - by = c("oil_price_scenario") - ) - - ## compute forgone wages high and low - state_labor_oil_px[, - forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9 - ] - state_labor_oil_px[, - forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9 - ] - - ## merge with health and ghg - labor_ghg_df <- merge( - state_labor_oil_px[, .( - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_total_comp_pv_h, - ref_total_comp_pv_h, - forgone_wages_bil_h, - sum_total_comp_pv_l, - ref_total_comp_pv_l, - forgone_wages_bil_l - )], - state_ghg_df, - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## add ghg perc reduction - labor_ghg_df <- merge( - labor_ghg_df, - perc_diff_df, - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## add scen id - labor_ghg_df[, scen_id := paste(demand_scenario, refining_scenario)] - - ## prepare to plot - plot_df <- labor_ghg_df[, .( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - forgone_wages_bil_h, - forgone_wages_bil_l, - avoided_ghg, - perc_diff - )] - - setnames(plot_df, "perc_diff", "ghg_perc_diff") - - plot_df[, `:=`( - forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg, - forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg - )] - - plot_df_labor <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - ghg_perc_diff, - forgone_wages_bil_h, - forgone_wages_bil_l, - forgone_wages_bil_h_ghg, - forgone_wages_bil_l_ghg - ) %>% - pivot_longer( - forgone_wages_bil_h:forgone_wages_bil_l_ghg, - names_to = "metric", - values_to = "value" - ) %>% - mutate( - segment = "labor", - unit_desc = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "USD billion", - "USD billion per GHG" - ), - estimate = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"), - "high", - "low" - ), - metric = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "forgone_wages_bil", - "forgone_wages_bil_ghg" - ) - ) %>% - select( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - ghg_perc_diff, - segment, - metric, - unit_desc, - estimate, - value - ) %>% - pivot_wider(names_from = estimate, values_from = value) - - ## prepare labor ---------------------- - plot_df_labor <- plot_df_labor %>% - mutate( - title = ifelse( - metric == "forgone_wages_bil", - "Labor: forgone wages", - "Labor: forgone wages per avoided GHG" - ) - ) - - plot_df_labor$title <- factor( - plot_df_labor$title, - levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG") - ) - - ## rename - setDT(plot_df_labor) - plot_df_labor[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)] - - ## refactor - plot_df_labor$scenario <- factor( - plot_df_labor$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_labor[, - high := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - high * 1000, - high - ) - ] - plot_df_labor[, - low := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - low * 1000, - low - ) - ] - plot_df_labor[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_labor[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_labor[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")] - - ## has oil price label - plot_df_labor[, - oil_px_label := ifelse( - oil_price_scenario == "reference case", - "Reference", - ifelse(oil_price_scenario == "high oil price", "High", "Low") - ) - ] - - plot_df_labor$oil_px_label <- factor( - plot_df_labor$oil_px_label, - levels = c("Low", "Reference", "High") - ) - - ## save figure inputs - fwrite( - plot_df_labor, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_npv_fig_inputs_labor_all_oilpx.csv" - ) - ) - # fwrite(plot_df_labor, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_labor.csv")) - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - bau_scen <- "BAU historical production" - - ## make the plot - ## --------------------------------------------------- - - ## color for refining scenario - refin_colors <- c( - "LC1 low exports" = "#729b79", - "LC1 historical exports" = "#2F4858", - "BAU low exports" = "#F6AE2D", - "BAU historical exports" = "#F26419" - ) - - refin_labs <- c( - "LC1 low exports" = "Low demand, low exports", - "LC1 historical exports" = "Low demand, historical exports", - "BAU low exports" = "BAU demand, low exports", - "BAU historical exports" = "BAU demand, historical exports" - ) - - ## figs - make each separately - ## ------------------------------------------------------------------- - - hist_prod <- as.data.table( - plot_df_labor %>% - filter( - scen_id == bau_scen, - oil_price_scenario == "reference case", - unit == "NPV (2019 USD billion)" - ) - ) - - ## - forgone_wages_all_oil_px_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod$ghg_perc_diff * -100, - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historical production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.9 - ) + - facet_wrap(~oil_px_label) + - labs( - color = NULL, - title = "Labor: forgone wages", - y = "NPV (2019 USD billion)", - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-90, 10) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - return(forgone_wages_all_oil_px_fig) -} - - -## ----------------------------------------------------------------------------- -## NPV figure: main text -## ----------------------------------------------------------------------------- - -## NPV figure -plot_npv_health_labor <- function( - main_path, - save_path, - refining_mortality, - state_ghg_output, - dt_ghg_2019, - annual_all_impacts_labor -) { - npv_df <- refining_mortality %>% as.data.table() - - ## state level - state_npv_df <- npv_df[, - .( - sum_cost_2019_pv = sum(cost_2019_PV), ## constant VSL - sum_cost_pv = sum(cost_PV) - ), ## changing VSL - by = .(scen_id, demand_scenario, refining_scenario) - ] - - ## add column - state_npv_df[, sum_cost_2019_pv_b := sum_cost_2019_pv / 1e9] - state_npv_df[, sum_cost_pv_b := sum_cost_pv / 1e9] - - ## add ghg emission reduction - ## 2019 ghg - ghg_2019_val <- dt_ghg_2019$mtco2e[1] - - ## 2045 vs 2019 ghg - ghg_2045 <- state_ghg_output[year == 2045 & source == "total"] - setnames(ghg_2045, "value", "ghg_kg") - ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6] - ghg_2045[, ghg_2019 := ghg_2019_val] - ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019] - - perc_diff_df <- ghg_2045[, .( - demand_scenario, - refining_scenario, - ghg_2045, - ghg_2019, - perc_diff - )] - - ## summarize by scenario, filter for total - state_ghg_df <- state_ghg_output[ - source == "total", - .(total_ghg = sum(value)), - by = .(demand_scenario, refining_scenario) - ] - - state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6] - - ## reference - ref_df <- state_ghg_df[ - demand_scenario == "BAU" & refining_scenario == "historic production", - .(total_ghg_mmt) - ] - setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt") - ref_value <- ref_df$ref_ghg_mmt[1] - - ## merge with summarized df - state_ghg_df[, ref_ghg := ref_value] - state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1] - - ## merge with health - health_ghg_df <- merge( - state_npv_df, - state_ghg_df[, .( - demand_scenario, - refining_scenario, - total_ghg_mmt, - ref_ghg, - avoided_ghg - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## summarize labor for state - state_labor <- annual_all_impacts_labor[, - .( - # sum_total_emp = sum(total_emp), - sum_total_comp_pv_h = sum(comp_all_impacts_PV_h), - sum_total_comp_pv_l = sum(comp_all_impacts_PV_l, na.rm = T) - ), - by = .(demand_scenario, refining_scenario, oil_price_scenario) - ] - - state_labor <- state_labor[oil_price_scenario == "reference case", ] - - ## ref labor - ref_labor <- state_labor[ - demand_scenario == "BAU" & refining_scenario == "historic production" - ] - setnames( - ref_labor, - c("sum_total_comp_pv_h", "sum_total_comp_pv_l"), - c("ref_total_comp_pv_h", "ref_total_comp_pv_l") - ) - # setnames(ref_labor, c("sum_total_emp", "sum_total_comp_pv_h", "sum_total_comp_pv_l"), c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l")) - - ## add values to labor - state_labor[, `:=`( - # ref_total_emp = ref_labor$ref_total_emp[1], - ref_total_comp_pv_h = ref_labor$ref_total_comp_pv_h[1], - ref_total_comp_pv_l = ref_labor$ref_total_comp_pv_l[1] - )] - - state_labor[, - forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9 - ] - state_labor[, - forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9 - ] - - ## merge with health and ghg - health_labor_ghg_df <- merge( - health_ghg_df, - state_labor[, .( - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_total_comp_pv_h, - ref_total_comp_pv_h, - forgone_wages_bil_h, - sum_total_comp_pv_l, - ref_total_comp_pv_l, - forgone_wages_bil_l - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## add ghg perc reduction - health_labor_ghg_df <- merge( - health_labor_ghg_df, - perc_diff_df, - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## prepare to plot - plot_df <- health_labor_ghg_df[, .( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_cost_pv_b, - sum_cost_2019_pv_b, - forgone_wages_bil_h, - forgone_wages_bil_l, - avoided_ghg, - perc_diff - )] - - setnames(plot_df, "perc_diff", "ghg_perc_diff") - - ## add values / avoided ghgs - plot_df[, avoided_health_cost := sum_cost_2019_pv_b * -1] - plot_df[, avoided_health_cost_annual_vsl := sum_cost_pv_b * -1] - plot_df[, sum_cost_2019_pv_b := NULL] - plot_df[, sum_cost_pv_b := NULL] - - plot_df[, `:=`( - avoided_health_cost_ghg = avoided_health_cost / avoided_ghg, - avoided_health_cost_ghg_vsl2 = avoided_health_cost_annual_vsl / avoided_ghg, - forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg, - forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg - )] - - plot_df_health <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - ghg_perc_diff, - avoided_health_cost, - avoided_health_cost_annual_vsl, - avoided_health_cost_ghg, - avoided_health_cost_ghg_vsl2 - ) %>% - pivot_longer( - avoided_health_cost:avoided_health_cost_ghg_vsl2, - names_to = "metric", - values_to = "value" - ) - - ## add column for vsl - plot_df_health <- plot_df_health %>% - mutate( - segment = "health", - unit_desc = ifelse( - metric == "avoided_health_cost", - "USD billion (2019 VSL)", - ifelse( - metric == "avoided_health_cost_annual_vsl", - "USD billion (annual VSL)", - ifelse( - metric == "avoided_health_cost_ghg", - "USD billion per GHG (2019 VSL)", - "USD billion per GHG (annual VSL)" - ) - ) - ), - metric = ifelse( - metric %in% c("avoided_health_cost", "avoided_health_cost_annual_vsl"), - "avoided_health_cost", - "avoided_health_cost_ghg" - ) - ) - - plot_df_labor <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - ghg_perc_diff, - forgone_wages_bil_h, - forgone_wages_bil_l, - forgone_wages_bil_h_ghg, - forgone_wages_bil_l_ghg - ) %>% - pivot_longer( - forgone_wages_bil_h:forgone_wages_bil_l_ghg, - names_to = "metric", - values_to = "value" - ) %>% - mutate( - segment = "labor", - unit_desc = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "USD billion", - "USD billion per GHG" - ), - estimate = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"), - "high", - "low" - ), - metric = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "forgone_wages_bil", - "forgone_wages_bil_ghg" - ) - ) %>% - select( - scen_id, - demand_scenario, - refining_scenario, - ghg_perc_diff, - segment, - metric, - unit_desc, - estimate, - value - ) %>% - pivot_wider(names_from = estimate, values_from = value) - - # plot_df_long <- rbind(plot_df_health, plot_df_labor) - - ## prepare health for plotting ------------------------------ - plot_df_health <- plot_df_health %>% - mutate( - title = ifelse( - metric == "avoided_health_cost", - "Health: avoided mortality", - "Health: avoided mortality per avoided GHG" - ) - ) - - plot_df_health$title <- factor( - plot_df_health$title, - levels = c( - "Health: avoided mortality", - "Health: avoided mortality per avoided GHG" - ) - ) - - ## rename - setDT(plot_df_health) - plot_df_health[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_health[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## refactor - plot_df_health$scenario <- factor( - plot_df_health$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_health[, - value := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - value * 1000, - value - ) - ] - plot_df_health[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_health[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_health[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_health[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_health[, scenario := str_replace(scenario, "historic", "historical")] - - ## save figure inputs - fwrite( - plot_df_health, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_npv_fig_inputs_health.csv" - ) - ) - # fwrite(plot_df_health, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_health.csv")) - - ## prepare labor ---------------------- - plot_df_labor <- plot_df_labor %>% - mutate( - title = ifelse( - metric == "forgone_wages_bil", - "Labor: forgone wages", - "Labor: forgone wages per avoided GHG" - ) - ) - - plot_df_labor$title <- factor( - plot_df_labor$title, - levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG") - ) - - ## rename - setDT(plot_df_labor) - plot_df_labor[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## refactor - plot_df_labor$scenario <- factor( - plot_df_labor$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_labor[, - high := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - high * 1000, - high - ) - ] - plot_df_labor[, - low := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - low * 1000, - low - ) - ] - plot_df_labor[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_labor[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_labor[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")] - - ## save figure inputs - fwrite( - plot_df_labor, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_npv_fig_inputs_labor.csv" - ) - ) - # fwrite(plot_df_labor, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_labor.csv")) - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - bau_scen <- "BAU historical production" - - ## make the plot - ## --------------------------------------------------- - - ## color for refining scenario - refin_colors <- c( - "LC1 low exports" = "#729b79", - "LC1 historical exports" = "#2F4858", - "BAU low exports" = "#F6AE2D", - "BAU historical exports" = "#F26419" - ) - - refin_labs <- c( - "LC1 low exports" = "Low demand, low exports", - "LC1 historical exports" = "Low demand, historical exports", - "BAU low exports" = "BAU demand, low exports", - "BAU historical exports" = "BAU demand, historical exports" - ) - - ## refactor - # plot_df_health$scen_id <- factor(plot_df_health$scen_id, levels = c('LC1 low exports', - # 'LC1 historical production', - # 'BAU demand\nlow exports', - # 'Low demand\nhistorical exports', - # 'Low demand\nlow exports', - # 'Low demand\nhistorical production')) - # - - ## figs - make each separately - ## ------------------------------------------------------------------- - - hist_prod <- as.data.table( - plot_df_health %>% - filter( - scen_id == bau_scen, - unit == "NPV (2019 USD billion)", - unit_desc == "USD billion (2019 VSL)" - ) - ) - - fig_bxm_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - geom_point( - data = plot_df_health %>% - filter( - !scen_id %in% remove_scen, - title == "Health: avoided mortality", - unit == "NPV (2019 USD billion)", - unit_desc == "USD billion (2019 VSL)", - !refining_scenario == "historical production" - ), - aes(x = ghg_perc_diff * -100, y = value, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - labs( - color = NULL, - title = "Health: avoided mortality", - y = "NPV (2019 USD billion)", - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(0, 60) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - # ## make separete df for labor high and low for plotting - # plot_df_labor_pts <- plot_df_labor %>% - # filter(!scen_id %in% remove_scen, - # title == "Labor: forgone wages", - # unit == "NPV (2019 USD billion)", - # refining_scenario != "historical production") %>% - # select(scen_id, demand_scenario, refining_scenario, scenario, ghg_perc_diff, high, low) %>% - # pivot_longer(high:low, names_to = "estimate", values_to = "npv_2019_usd_billion") - # - fig_bxm_b <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historical production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.9 - ) + - labs( - color = NULL, - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-60, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - ## legends - low_legend_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historic production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 1 - ) + - # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = high, color = scen_id), shape = 1, size = 3, alpha = 0.8) + - labs( - color = "with re-emp:", - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-60, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - legend.title = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 1)) - - low_legend <- get_legend( - low_legend_fig - ) - - ## save legends - ggsave( - plot = low_legend, - device = "pdf", - filename = "fig3_low_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## legends - high_legend_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) + - # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = low, color = scen_id), shape = 16, size = 3, alpha = 1) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historic production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.8 - ) + - labs( - color = "no re-emp:", - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-60, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - legend.title = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 1)) - - high_legend <- get_legend( - high_legend_fig - ) - - ## save legends - ggsave( - plot = high_legend, - device = "pdf", - filename = "fig3_high_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - # fig_bxm_c <- ggplot() + - # geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - # geom_vline(xintercept = hist_prod[title == "Climate: avoided damage", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_point(data = plot_df_long %>% filter(!scen_id %in% remove_scen, - # title == "Climate: avoided damage", - # unit == "NPV (2019 USD billion)", - # !refining_scenario == "historic production"), aes(x = ghg_perc_diff * -100, y = value, color = refining_scenario, shape = demand_scenario), size = 3, alpha = 0.8) + - # geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - # labs(color = "Policy", - # title = "C. Climate: avoided damage", - # y = NULL, - # x = NULL) + - # ylim(-1, 20) + - # xlim(0, 80) + - # scale_color_manual(values = refin_colors) + - # theme_line + - # theme(legend.position = "none", - # plot.title = element_text(hjust = 0), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # axis.ticks.length.y = unit(0.1, 'cm'), - # axis.ticks.length.x = unit(0.1, 'cm')) - - # fig_bxm_c <- ggplot() + - # geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - # geom_vline(xintercept = hist_prod[title == "Health: avoided mortality", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_point(data = plot_df_long %>% filter(!scen_id %in% remove_scen, - # title == "Health: avoided mortality per avoided GHG", - # unit == "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - # unit_desc == "USD billion per GHG (2019 VSL)", - # !refining_scenario == "historic production"), aes(x = ghg_perc_diff * -100, y = value, color = refining_scenario, shape = demand_scenario), size = 3, alpha = 0.8) + - # labs(color = "Policy", - # title = "C.", - # y = bquote('NPV (2019 USD million)\nper avoided GHG MtCO'[2]~e), - # x = "GHG emissions reduction (%, 2045 vs 2019)") + - # scale_color_manual(values = refin_colors) + - # ylim(0, 125) + - # xlim(0, 80) + - # theme_line + - # theme(legend.position = "none", - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # axis.ticks.length.y = unit(0.1, 'cm'), - # axis.ticks.length.x = unit(0.1, 'cm')) - # - # fig_bxm_d <- ggplot() + - # geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages per avoided GHG", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_point(data = plot_df_long %>% filter(!scen_id %in% remove_scen, - # title == "Labor: forgone wages per avoided GHG", - # unit == "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - # !refining_scenario == "historic production"), aes(x = ghg_perc_diff * -100, y = value, color = refining_scenario, shape = demand_scenario), size = 3, alpha = 0.8) + - # labs(color = "Policy", - # title = "D.", - # y = NULL, - # # y = paste("NPV per avoied GHG ", bquotelab, "(2020 USD million / ", bquotelab), - # x = "GHG emissions reduction (%, 2045 vs 2019)") + - # scale_color_manual(values = refin_colors) + - # theme_line + - # xlim(0, 80) + - # ylim(-125, 0) + - # theme(legend.position = "none", - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # axis.ticks.length.y = unit(0.1, 'cm'), - # axis.ticks.length.x = unit(0.1, 'cm')) - - # fig_bxm_f <- ggplot() + - # geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - # geom_vline(xintercept = hist_prod[title == "Health: avoided mortality", ghg_2045_perc_reduction], color = "darkgray", lty = 2) + - # geom_point(data = npv_dt %>% filter(!scen_id %in% bau_scens, - # title == "Climate: avoided damage", - # measure == "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - # !refining_scenario == "historic production"), aes(x = ghg_2045_perc_reduction, y = value, color = refining_scenario, shape = demand_scenario), size = 3, alpha = 0.8) + - # labs(color = "Policy", - # title = "F.", - # y = NULL, - # # y = paste("NPV per avoied GHG ", bquotelab, "(2020 USD million / ", bquotelab), - # x = "GHG emissions reduction target (%, 2045 vs 2019)") + - # scale_color_manual(values = refin_colors) + - # theme_line + - # ylim(0, 80) + - # xlim(0, 80) + - # theme(legend.position = "none", - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # axis.ticks.length.y = unit(0.1, 'cm'), - # axis.ticks.length.x = unit(0.1, 'cm')) - # - ## extract legend - # legend_fig <- ggplot() + - # geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - # geom_point(data = npv_dt %>% filter(!scen_id %in% bau_scens, - # title == "Labor: forgone wages", - # measure == "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)"), aes(x = ghg_2045_perc_reduction, y = value, color = scenario, shape = scenario), size = 3, alpha = 0.8) + - # labs(title = "", - # y = NULL, - # # y = paste("NPV per avoied GHG ", bquotelab, "(2020 USD million / ", bquotelab), - # x = "GHG emissions reduction target (%, 2045 vs 2019)", - # color = NULL, - # shape = NULL) + - # # scale_shape_manual(values = c(16, 16, 16, 17, 17)) + - # scale_color_manual(name = "", - # labels = c("BAU demand - historic exports", - # "BAU demand - historic production", - # "BAU demand - low exports", - # "Low C. demand - historic exports", - # "Low C. demand - low exports"), - # values = c("BAU demand - historic exports" = "#2F4858", - # "BAU demand - historic production" = "#F6AE2D", - # "BAU demand - low exports" = "#F26419", - # "Low C. demand - historic exports" = "#2F4858", - # "Low C. demand - low exports" = "#F26419")) + - # scale_shape_manual(name = "", - # labels = c("BAU demand - historic exports", - # "BAU demand - historic production", - # "BAU demand - low exports", - # "Low C. demand - historic exports", - # "Low C. demand - low exports"), - # values = c(16, 16, 16, 17, 17)) + - # theme_line + - # theme(legend.position = "bottom", - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # axis.ticks.length.y = unit(0.1, 'cm'), - # axis.ticks.length.x = unit(0.1, 'cm')) + - # guides(color = guide_legend(nrow = 2, byrow = TRUE)) - - ## combine figure - ## --------------------------------- - - ## shared x axis - xaxis_lab <- ggdraw() + - draw_label("GHG emissions reduction (%, 2045 vs 2019)", size = 12) - - fig3_plot_grid_ab <- plot_grid( - fig_bxm_a, - fig_bxm_b, - align = "vh", - labels = c("A", "B"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - rel_widths = c(1, 1) - ) - - # fig3_plot_grid2 <- plot_grid( - # fig3_plot_grid_ab, - # align = "v", - # # labels = c("(A)", "(B)", "(C)", ""), - # # # labels = 'AUTO', - # # label_size = 10, - # # hjust = -1, - # ncol = 1, - # rel_heights = c(0.95, 0.05) - # # rel_widths = c(1, 1), - # ) - - return(fig3_plot_grid_ab) -} - - -## ----------------------------------------------------------------------------- -## NPV figure: refinery level emission factors -## ----------------------------------------------------------------------------- - -## NPV figure -plot_npv_health_labor_ref <- function( - main_path, - save_path, - refining_mortality, - state_ghg_output, - dt_ghg_2019, - annual_labor -) { - npv_df <- refining_mortality %>% as.data.table() - - ## state level - state_npv_df <- npv_df[, - .( - sum_cost_2019_pv = sum(cost_2019_PV), ## constant VSL - sum_cost_pv = sum(cost_PV) - ), ## changing VSL - by = .(scen_id, demand_scenario, refining_scenario) - ] - - ## add column - state_npv_df[, sum_cost_2019_pv_b := sum_cost_2019_pv / 1e9] - state_npv_df[, sum_cost_pv_b := sum_cost_pv / 1e9] - - ## add ghg emission reduction - ## 2019 ghg - ghg_2019_val <- dt_ghg_2019$mtco2e[1] - - ## 2045 vs 2019 ghg - ghg_2045 <- state_ghg_output[year == 2045 & source == "total"] - setnames(ghg_2045, "value", "ghg_kg") - ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6] - ghg_2045[, ghg_2019 := ghg_2019_val] - ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019] - - perc_diff_df <- ghg_2045[, .( - demand_scenario, - refining_scenario, - ghg_2045, - ghg_2019, - perc_diff - )] - - ## summarize by scenario, filter for total - state_ghg_df <- state_ghg_output[ - source == "total", - .(total_ghg = sum(value)), - by = .(demand_scenario, refining_scenario) - ] - - state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6] - - ## reference - ref_df <- state_ghg_df[ - demand_scenario == "BAU" & refining_scenario == "historic production", - .(total_ghg_mmt) - ] - setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt") - ref_value <- ref_df$ref_ghg_mmt[1] - - ## merge with summarized df - state_ghg_df[, ref_ghg := ref_value] - state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1] - - ## merge with health - health_ghg_df <- merge( - state_npv_df, - state_ghg_df[, .( - demand_scenario, - refining_scenario, - total_ghg_mmt, - ref_ghg, - avoided_ghg - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## summarize labor for state - state_labor <- annual_labor[, - .( - sum_total_emp = sum(total_emp), - sum_total_comp_pv_h = sum(total_comp_PV_h), - sum_total_comp_pv_l = sum(total_comp_PV_l) - ), - by = .(demand_scenario, refining_scenario, oil_price_scenario) - ] - - state_labor <- state_labor[oil_price_scenario == "reference case", ] - - ## ref labor - ref_labor <- state_labor[ - demand_scenario == "BAU" & refining_scenario == "historic production" - ] - setnames( - ref_labor, - c("sum_total_emp", "sum_total_comp_pv_h", "sum_total_comp_pv_l"), - c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l") - ) - - ## add values to labor - state_labor[, `:=`( - ref_total_emp = ref_labor$ref_total_emp[1], - ref_total_comp_pv_h = ref_labor$ref_total_comp_pv_h[1], - ref_total_comp_pv_l = ref_labor$ref_total_comp_pv_l[1] - )] - - state_labor[, - forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9 - ] - state_labor[, - forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9 - ] - - ## merge with health and ghg - health_labor_ghg_df <- merge( - health_ghg_df, - state_labor[, .( - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_total_comp_pv_h, - ref_total_comp_pv_h, - forgone_wages_bil_h, - sum_total_comp_pv_l, - ref_total_comp_pv_l, - forgone_wages_bil_l - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## add ghg perc reduction - health_labor_ghg_df <- merge( - health_labor_ghg_df, - perc_diff_df, - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## prepare to plot - plot_df <- health_labor_ghg_df[, .( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_cost_pv_b, - sum_cost_2019_pv_b, - forgone_wages_bil_h, - forgone_wages_bil_l, - avoided_ghg, - perc_diff - )] - - setnames(plot_df, "perc_diff", "ghg_perc_diff") - - ## add values / avoided ghgs - plot_df[, avoided_health_cost := sum_cost_2019_pv_b * -1] - plot_df[, avoided_health_cost_annual_vsl := sum_cost_pv_b * -1] - plot_df[, sum_cost_2019_pv_b := NULL] - plot_df[, sum_cost_pv_b := NULL] - - plot_df[, `:=`( - avoided_health_cost_ghg = avoided_health_cost / avoided_ghg, - avoided_health_cost_ghg_vsl2 = avoided_health_cost_annual_vsl / avoided_ghg, - forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg, - forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg - )] - - plot_df_health <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - ghg_perc_diff, - avoided_health_cost, - avoided_health_cost_annual_vsl, - avoided_health_cost_ghg, - avoided_health_cost_ghg_vsl2 - ) %>% - pivot_longer( - avoided_health_cost:avoided_health_cost_ghg_vsl2, - names_to = "metric", - values_to = "value" - ) - - ## add column for vsl - plot_df_health <- plot_df_health %>% - mutate( - segment = "health", - unit_desc = ifelse( - metric == "avoided_health_cost", - "USD billion (2019 VSL)", - ifelse( - metric == "avoided_health_cost_annual_vsl", - "USD billion (annual VSL)", - ifelse( - metric == "avoided_health_cost_ghg", - "USD billion per GHG (2019 VSL)", - "USD billion per GHG (annual VSL)" - ) - ) - ), - metric = ifelse( - metric %in% c("avoided_health_cost", "avoided_health_cost_annual_vsl"), - "avoided_health_cost", - "avoided_health_cost_ghg" - ) - ) - - plot_df_labor <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - ghg_perc_diff, - forgone_wages_bil_h, - forgone_wages_bil_l, - forgone_wages_bil_h_ghg, - forgone_wages_bil_l_ghg - ) %>% - pivot_longer( - forgone_wages_bil_h:forgone_wages_bil_l_ghg, - names_to = "metric", - values_to = "value" - ) %>% - mutate( - segment = "labor", - unit_desc = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "USD billion", - "USD billion per GHG" - ), - estimate = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"), - "high", - "low" - ), - metric = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "forgone_wages_bil", - "forgone_wages_bil_ghg" - ) - ) %>% - select( - scen_id, - demand_scenario, - refining_scenario, - ghg_perc_diff, - segment, - metric, - unit_desc, - estimate, - value - ) %>% - pivot_wider(names_from = estimate, values_from = value) - - # plot_df_long <- rbind(plot_df_health, plot_df_labor) - - ## prepare health for plotting ------------------------------ - plot_df_health <- plot_df_health %>% - mutate( - title = ifelse( - metric == "avoided_health_cost", - "Health: avoided mortality", - "Health: avoided mortality per avoided GHG" - ) - ) - - plot_df_health$title <- factor( - plot_df_health$title, - levels = c( - "Health: avoided mortality", - "Health: avoided mortality per avoided GHG" - ) - ) - - ## rename - setDT(plot_df_health) - plot_df_health[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_health[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## refactor - plot_df_health$scenario <- factor( - plot_df_health$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_health[, - value := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - value * 1000, - value - ) - ] - plot_df_health[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_health[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_health[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_health[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_health[, scenario := str_replace(scenario, "historic", "historical")] - - ## save figure inputs - fwrite( - plot_df_health, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_npv_fig_inputs_health_ref.csv" - ) - ) - # fwrite(plot_df_health, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_health.csv")) - - ## prepare labor ---------------------- - plot_df_labor <- plot_df_labor %>% - mutate( - title = ifelse( - metric == "forgone_wages_bil", - "Labor: forgone wages", - "Labor: forgone wages per avoided GHG" - ) - ) - - plot_df_labor$title <- factor( - plot_df_labor$title, - levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG") - ) - - ## rename - setDT(plot_df_labor) - plot_df_labor[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## refactor - plot_df_labor$scenario <- factor( - plot_df_labor$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_labor[, - high := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - high * 1000, - high - ) - ] - plot_df_labor[, - low := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - low * 1000, - low - ) - ] - plot_df_labor[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_labor[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_labor[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")] - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - bau_scen <- "BAU historical production" - - ## make the plot - ## --------------------------------------------------- - - ## color for refining scenario - refin_colors <- c( - "LC1 low exports" = "#729b79", - "LC1 historical exports" = "#2F4858", - "BAU low exports" = "#F6AE2D", - "BAU historical exports" = "#F26419" - ) - - refin_labs <- c( - "LC1 low exports" = "Low demand, low exports", - "LC1 historical exports" = "Low demand, historical exports", - "BAU low exports" = "BAU demand, low exports", - "BAU historical exports" = "BAU demand, historical exports" - ) - - ## refactor - # plot_df_health$scen_id <- factor(plot_df_health$scen_id, levels = c('LC1 low exports', - # 'LC1 historical production', - # 'BAU demand\nlow exports', - # 'Low demand\nhistorical exports', - # 'Low demand\nlow exports', - # 'Low demand\nhistorical production')) - # - - ## figs - make each separately - ## ------------------------------------------------------------------- - - hist_prod <- as.data.table( - plot_df_health %>% - filter( - scen_id == bau_scen, - unit == "NPV (2019 USD billion)", - unit_desc == "USD billion (2019 VSL)" - ) - ) - - fig_bxm_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - geom_point( - data = plot_df_health %>% - filter( - !scen_id %in% remove_scen, - title == "Health: avoided mortality", - unit == "NPV (2019 USD billion)", - unit_desc == "USD billion (2019 VSL)", - !refining_scenario == "historical production" - ), - aes(x = ghg_perc_diff * -100, y = value, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - labs( - color = NULL, - title = "Health: avoided mortality", - y = "NPV (2019 USD billion)", - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(0, 50) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - # ## make separete df for labor high and low for plotting - # plot_df_labor_pts <- plot_df_labor %>% - # filter(!scen_id %in% remove_scen, - # title == "Labor: forgone wages", - # unit == "NPV (2019 USD billion)", - # refining_scenario != "historical production") %>% - # select(scen_id, demand_scenario, refining_scenario, scenario, ghg_perc_diff, high, low) %>% - # pivot_longer(high:low, names_to = "estimate", values_to = "npv_2019_usd_billion") - # - fig_bxm_b <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historical production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.9 - ) + - labs( - color = NULL, - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - ## legends - low_legend_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historic production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 1 - ) + - # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = high, color = scen_id), shape = 1, size = 3, alpha = 0.8) + - labs( - color = "with re-emp:", - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - legend.title = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 1)) - - low_legend <- get_legend( - low_legend_fig - ) - - ## legends - high_legend_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) + - # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = low, color = scen_id), shape = 16, size = 3, alpha = 1) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historic production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.8 - ) + - labs( - color = "no re-emp:", - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - legend.title = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 1)) - - high_legend <- get_legend( - high_legend_fig - ) - - ## combine figure - ## --------------------------------- - - ## shared x axis - xaxis_lab <- ggdraw() + - draw_label("GHG emissions reduction (%, 2045 vs 2019)", size = 12) - - fig3_plot_grid_ab <- plot_grid( - fig_bxm_a, - fig_bxm_b, - align = "vh", - labels = c("A", "B"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - rel_widths = c(1, 1) - ) - - fig3_plot_grid2 <- plot_grid( - fig3_plot_grid_ab, - xaxis_lab, - align = "v", - # labels = c("(A)", "(B)", "(C)", ""), - # # labels = 'AUTO', - # label_size = 10, - # hjust = -1, - ncol = 1, - rel_heights = c(0.95, 0.05) - # rel_widths = c(1, 1), - ) - - return(fig3_plot_grid_ab) -} - - -## ----------------------------------------------------------------------------- -## NPV figure: constant VSL -## ----------------------------------------------------------------------------- - -## NPV figure -plot_npv_health_labor_constant_vsl <- function( - main_path, - save_path, - refining_mortality, - state_ghg_output, - dt_ghg_2019, - annual_labor -) { - npv_df <- refining_mortality %>% as.data.table() - - ## state level - state_npv_df <- npv_df[, - .( - sum_cost_2019_pv = sum(cost_2019_PV), ## constant VSL - sum_cost_pv = sum(cost_PV) - ), ## changing VSL - by = .(scen_id, demand_scenario, refining_scenario) - ] - - ## add column - state_npv_df[, sum_cost_2019_pv_b := sum_cost_2019_pv / 1e9] - state_npv_df[, sum_cost_pv_b := sum_cost_pv / 1e9] - - ## add ghg emission reduction - ## 2019 ghg - ghg_2019_val <- dt_ghg_2019$mtco2e[1] - - ## 2045 vs 2019 ghg - ghg_2045 <- state_ghg_output[year == 2045 & source == "total"] - setnames(ghg_2045, "value", "ghg_kg") - ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6] - ghg_2045[, ghg_2019 := ghg_2019_val] - ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019] - - perc_diff_df <- ghg_2045[, .( - demand_scenario, - refining_scenario, - ghg_2045, - ghg_2019, - perc_diff - )] - - ## summarize by scenario, filter for total - state_ghg_df <- state_ghg_output[ - source == "total", - .(total_ghg = sum(value)), - by = .(demand_scenario, refining_scenario) - ] - - state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6] - - ## reference - ref_df <- state_ghg_df[ - demand_scenario == "BAU" & refining_scenario == "historic production", - .(total_ghg_mmt) - ] - setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt") - ref_value <- ref_df$ref_ghg_mmt[1] - - ## merge with summarized df - state_ghg_df[, ref_ghg := ref_value] - state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1] - - ## merge with health - health_ghg_df <- merge( - state_npv_df, - state_ghg_df[, .( - demand_scenario, - refining_scenario, - total_ghg_mmt, - ref_ghg, - avoided_ghg - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## summarize labor for state - state_labor <- annual_labor[, - .( - sum_total_emp = sum(total_emp), - sum_total_comp_pv_h = sum(total_comp_PV_h), - sum_total_comp_pv_l = sum(total_comp_PV_l) - ), - by = .(demand_scenario, refining_scenario, oil_price_scenario) - ] - - state_labor <- state_labor[oil_price_scenario == "reference case", ] - - ## ref labor - ref_labor <- state_labor[ - demand_scenario == "BAU" & refining_scenario == "historic production" - ] - setnames( - ref_labor, - c("sum_total_emp", "sum_total_comp_pv_h", "sum_total_comp_pv_l"), - c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l") - ) - - ## add values to labor - state_labor[, `:=`( - ref_total_emp = ref_labor$ref_total_emp[1], - ref_total_comp_pv_h = ref_labor$ref_total_comp_pv_h[1], - ref_total_comp_pv_l = ref_labor$ref_total_comp_pv_l[1] - )] - - state_labor[, - forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9 - ] - state_labor[, - forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9 - ] - - ## merge with health and ghg - health_labor_ghg_df <- merge( - health_ghg_df, - state_labor[, .( - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_total_comp_pv_h, - ref_total_comp_pv_h, - forgone_wages_bil_h, - sum_total_comp_pv_l, - ref_total_comp_pv_l, - forgone_wages_bil_l - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## add ghg perc reduction - health_labor_ghg_df <- merge( - health_labor_ghg_df, - perc_diff_df, - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## prepare to plot - plot_df <- health_labor_ghg_df[, .( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_cost_pv_b, - sum_cost_2019_pv_b, - forgone_wages_bil_h, - forgone_wages_bil_l, - avoided_ghg, - perc_diff - )] - - setnames(plot_df, "perc_diff", "ghg_perc_diff") - - ## add values / avoided ghgs - plot_df[, avoided_health_cost := sum_cost_2019_pv_b * -1] - plot_df[, avoided_health_cost_annual_vsl := sum_cost_pv_b * -1] - plot_df[, sum_cost_2019_pv_b := NULL] - plot_df[, sum_cost_pv_b := NULL] - - plot_df[, `:=`( - avoided_health_cost_ghg = avoided_health_cost / avoided_ghg, - avoided_health_cost_ghg_vsl2 = avoided_health_cost_annual_vsl / avoided_ghg, - forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg, - forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg - )] - - plot_df_health <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - ghg_perc_diff, - avoided_health_cost, - avoided_health_cost_annual_vsl, - avoided_health_cost_ghg, - avoided_health_cost_ghg_vsl2 - ) %>% - pivot_longer( - avoided_health_cost:avoided_health_cost_ghg_vsl2, - names_to = "metric", - values_to = "value" - ) - - ## add column for vsl - plot_df_health <- plot_df_health %>% - mutate( - segment = "health", - unit_desc = ifelse( - metric == "avoided_health_cost", - "USD billion (2019 VSL)", - ifelse( - metric == "avoided_health_cost_annual_vsl", - "USD billion (annual VSL)", - ifelse( - metric == "avoided_health_cost_ghg", - "USD billion per GHG (2019 VSL)", - "USD billion per GHG (annual VSL)" - ) - ) - ), - metric = ifelse( - metric %in% c("avoided_health_cost", "avoided_health_cost_annual_vsl"), - "avoided_health_cost", - "avoided_health_cost_ghg" - ) - ) - - plot_df_labor <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - ghg_perc_diff, - forgone_wages_bil_h, - forgone_wages_bil_l, - forgone_wages_bil_h_ghg, - forgone_wages_bil_l_ghg - ) %>% - pivot_longer( - forgone_wages_bil_h:forgone_wages_bil_l_ghg, - names_to = "metric", - values_to = "value" - ) %>% - mutate( - segment = "labor", - unit_desc = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "USD billion", - "USD billion per GHG" - ), - estimate = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"), - "high", - "low" - ), - metric = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "forgone_wages_bil", - "forgone_wages_bil_ghg" - ) - ) %>% - select( - scen_id, - demand_scenario, - refining_scenario, - ghg_perc_diff, - segment, - metric, - unit_desc, - estimate, - value - ) %>% - pivot_wider(names_from = estimate, values_from = value) - - # plot_df_long <- rbind(plot_df_health, plot_df_labor) - - ## prepare health for plotting ------------------------------ - plot_df_health <- plot_df_health %>% - mutate( - title = ifelse( - metric == "avoided_health_cost", - "Health: avoided mortality", - "Health: avoided mortality per avoided GHG" - ) - ) - - plot_df_health$title <- factor( - plot_df_health$title, - levels = c( - "Health: avoided mortality", - "Health: avoided mortality per avoided GHG" - ) - ) - - ## rename - setDT(plot_df_health) - plot_df_health[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_health[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## refactor - plot_df_health$scenario <- factor( - plot_df_health$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_health[, - value := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - value * 1000, - value - ) - ] - plot_df_health[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_health[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_health[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_health[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_health[, scenario := str_replace(scenario, "historic", "historical")] - - ## save figure inputs - fwrite( - plot_df_health, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_npv_fig_inputs_health_constant_vsl.csv" - ) - ) - # fwrite(plot_df_health, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_health.csv")) - - ## prepare labor ---------------------- - plot_df_labor <- plot_df_labor %>% - mutate( - title = ifelse( - metric == "forgone_wages_bil", - "Labor: forgone wages", - "Labor: forgone wages per avoided GHG" - ) - ) - - plot_df_labor$title <- factor( - plot_df_labor$title, - levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG") - ) - - ## rename - setDT(plot_df_labor) - plot_df_labor[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## refactor - plot_df_labor$scenario <- factor( - plot_df_labor$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_labor[, - high := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - high * 1000, - high - ) - ] - plot_df_labor[, - low := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - low * 1000, - low - ) - ] - plot_df_labor[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_labor[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_labor[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")] - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - bau_scen <- "BAU historical production" - - ## make the plot - ## --------------------------------------------------- - - ## color for refining scenario - refin_colors <- c( - "LC1 low exports" = "#729b79", - "LC1 historical exports" = "#2F4858", - "BAU low exports" = "#F6AE2D", - "BAU historical exports" = "#F26419" - ) - - refin_labs <- c( - "LC1 low exports" = "Low demand, low exports", - "LC1 historical exports" = "Low demand, historical exports", - "BAU low exports" = "BAU demand, low exports", - "BAU historical exports" = "BAU demand, historical exports" - ) - - ## refactor - # plot_df_health$scen_id <- factor(plot_df_health$scen_id, levels = c('LC1 low exports', - # 'LC1 historical production', - # 'BAU demand\nlow exports', - # 'Low demand\nhistorical exports', - # 'Low demand\nlow exports', - # 'Low demand\nhistorical production')) - # - - ## figs - make each separately - ## ------------------------------------------------------------------- - - hist_prod <- as.data.table( - plot_df_health %>% - filter( - scen_id == bau_scen, - unit == "NPV (2019 USD billion)", - unit_desc == "USD billion (2019 VSL)" - ) - ) - - fig_bxm_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - geom_point( - data = plot_df_health %>% - filter( - !scen_id %in% remove_scen, - title == "Health: avoided mortality", - unit == "NPV (2019 USD billion)", - unit_desc == "USD billion (2019 VSL)", - !refining_scenario == "historical production" - ), - aes(x = ghg_perc_diff * -100, y = value, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - labs( - color = NULL, - title = "Health: avoided mortality", - y = "NPV (2019 USD billion)", - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(0, 50) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - # ## make separete df for labor high and low for plotting - # plot_df_labor_pts <- plot_df_labor %>% - # filter(!scen_id %in% remove_scen, - # title == "Labor: forgone wages", - # unit == "NPV (2019 USD billion)", - # refining_scenario != "historical production") %>% - # select(scen_id, demand_scenario, refining_scenario, scenario, ghg_perc_diff, high, low) %>% - # pivot_longer(high:low, names_to = "estimate", values_to = "npv_2019_usd_billion") - # - fig_bxm_b <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historical production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.9 - ) + - labs( - color = NULL, - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - ## legends - low_legend_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historic production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 1 - ) + - # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = high, color = scen_id), shape = 1, size = 3, alpha = 0.8) + - labs( - color = "with re-emp:", - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - legend.title = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 1)) - - low_legend <- get_legend( - low_legend_fig - ) - - ## save legends - ggsave( - plot = low_legend, - device = "pdf", - filename = "fig3_low_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## legends - high_legend_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) + - # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = low, color = scen_id), shape = 16, size = 3, alpha = 1) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historic production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.8 - ) + - labs( - color = "no re-emp:", - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - legend.title = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 1)) - - high_legend <- get_legend( - high_legend_fig - ) - - ## save legends - ggsave( - plot = high_legend, - device = "pdf", - filename = "fig3_high_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## combine figure - ## --------------------------------- - - ## shared x axis - xaxis_lab <- ggdraw() + - draw_label("GHG emissions reduction (%, 2045 vs 2019)", size = 12) - - fig3_plot_grid_ab <- plot_grid( - fig_bxm_a, - fig_bxm_b, - align = "vh", - labels = c("A", "B"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - rel_widths = c(1, 1) - ) - - fig3_plot_grid2 <- plot_grid( - fig3_plot_grid_ab, - xaxis_lab, - align = "v", - # labels = c("(A)", "(B)", "(C)", ""), - # # labels = 'AUTO', - # label_size = 10, - # hjust = -1, - ncol = 1, - rel_heights = c(0.95, 0.05) - # rel_widths = c(1, 1), - ) -} - - -## ----------------------------------------------------------------------------- -## NPV figure: growing VSL, age-based vsl, cluser EFs -## ----------------------------------------------------------------------------- - -## NPV figure -plot_npv_health_labor_growing_vsl <- function( - main_path, - save_path, - refining_mortality, - state_ghg_output, - dt_ghg_2019, - annual_labor -) { - npv_df <- refining_mortality %>% as.data.table() - - ## state level - state_npv_df <- npv_df[, - .( - sum_cost_2019_pv = sum(cost_2019_PV), ## constant VSL - sum_cost_pv = sum(cost_PV) - ), ## changing VSL - by = .(scen_id, demand_scenario, refining_scenario) - ] - - ## add column - state_npv_df[, sum_cost_2019_pv_b := sum_cost_2019_pv / 1e9] - state_npv_df[, sum_cost_pv_b := sum_cost_pv / 1e9] - - ## add ghg emission reduction - ## 2019 ghg - ghg_2019_val <- dt_ghg_2019$mtco2e[1] - - ## 2045 vs 2019 ghg - ghg_2045 <- state_ghg_output[year == 2045 & source == "total"] - setnames(ghg_2045, "value", "ghg_kg") - ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6] - ghg_2045[, ghg_2019 := ghg_2019_val] - ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019] - - perc_diff_df <- ghg_2045[, .( - demand_scenario, - refining_scenario, - ghg_2045, - ghg_2019, - perc_diff - )] - - ## summarize by scenario, filter for total - state_ghg_df <- state_ghg_output[ - source == "total", - .(total_ghg = sum(value)), - by = .(demand_scenario, refining_scenario) - ] - - state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6] - - ## reference - ref_df <- state_ghg_df[ - demand_scenario == "BAU" & refining_scenario == "historic production", - .(total_ghg_mmt) - ] - setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt") - ref_value <- ref_df$ref_ghg_mmt[1] - - ## merge with summarized df - state_ghg_df[, ref_ghg := ref_value] - state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1] - - ## merge with health - health_ghg_df <- merge( - state_npv_df, - state_ghg_df[, .( - demand_scenario, - refining_scenario, - total_ghg_mmt, - ref_ghg, - avoided_ghg - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## summarize labor for state - state_labor <- annual_labor[, - .( - sum_total_emp = sum(total_emp), - sum_total_comp_pv_h = sum(total_comp_PV_h), - sum_total_comp_pv_l = sum(total_comp_PV_l) - ), - by = .(demand_scenario, refining_scenario, oil_price_scenario) - ] - - state_labor <- state_labor[oil_price_scenario == "reference case", ] - - ## ref labor - ref_labor <- state_labor[ - demand_scenario == "BAU" & refining_scenario == "historic production" - ] - setnames( - ref_labor, - c("sum_total_emp", "sum_total_comp_pv_h", "sum_total_comp_pv_l"), - c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l") - ) - - ## add values to labor - state_labor[, `:=`( - ref_total_emp = ref_labor$ref_total_emp[1], - ref_total_comp_pv_h = ref_labor$ref_total_comp_pv_h[1], - ref_total_comp_pv_l = ref_labor$ref_total_comp_pv_l[1] - )] - - state_labor[, - forgone_wages_bil_h := (sum_total_comp_pv_h - ref_total_comp_pv_h) / 1e9 - ] - state_labor[, - forgone_wages_bil_l := (sum_total_comp_pv_l - ref_total_comp_pv_l) / 1e9 - ] - - ## merge with health and ghg - health_labor_ghg_df <- merge( - health_ghg_df, - state_labor[, .( - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_total_comp_pv_h, - ref_total_comp_pv_h, - forgone_wages_bil_h, - sum_total_comp_pv_l, - ref_total_comp_pv_l, - forgone_wages_bil_l - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## add ghg perc reduction - health_labor_ghg_df <- merge( - health_labor_ghg_df, - perc_diff_df, - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## prepare to plot - plot_df <- health_labor_ghg_df[, .( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - sum_cost_pv_b, - sum_cost_2019_pv_b, - forgone_wages_bil_h, - forgone_wages_bil_l, - avoided_ghg, - perc_diff - )] - - setnames(plot_df, "perc_diff", "ghg_perc_diff") - - ## add values / avoided ghgs - plot_df[, avoided_health_cost := sum_cost_2019_pv_b * -1] - plot_df[, avoided_health_cost_annual_vsl := sum_cost_pv_b * -1] - plot_df[, sum_cost_2019_pv_b := NULL] - plot_df[, sum_cost_pv_b := NULL] - - plot_df[, `:=`( - avoided_health_cost_ghg = avoided_health_cost / avoided_ghg, - avoided_health_cost_ghg_vsl2 = avoided_health_cost_annual_vsl / avoided_ghg, - forgone_wages_bil_h_ghg = forgone_wages_bil_h / avoided_ghg, - forgone_wages_bil_l_ghg = forgone_wages_bil_l / avoided_ghg - )] - - plot_df_health <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - ghg_perc_diff, - avoided_health_cost, - avoided_health_cost_annual_vsl, - avoided_health_cost_ghg, - avoided_health_cost_ghg_vsl2 - ) %>% - pivot_longer( - avoided_health_cost:avoided_health_cost_ghg_vsl2, - names_to = "metric", - values_to = "value" - ) - - ## add column for vsl - plot_df_health <- plot_df_health %>% - mutate( - segment = "health", - unit_desc = ifelse( - metric == "avoided_health_cost", - "USD billion (2019 VSL)", - ifelse( - metric == "avoided_health_cost_annual_vsl", - "USD billion (annual VSL)", - ifelse( - metric == "avoided_health_cost_ghg", - "USD billion per GHG (2019 VSL)", - "USD billion per GHG (annual VSL)" - ) - ) - ), - metric = ifelse( - metric %in% c("avoided_health_cost", "avoided_health_cost_annual_vsl"), - "avoided_health_cost", - "avoided_health_cost_ghg" - ) - ) - - plot_df_labor <- plot_df %>% - select( - scen_id, - demand_scenario, - refining_scenario, - oil_price_scenario, - ghg_perc_diff, - forgone_wages_bil_h, - forgone_wages_bil_l, - forgone_wages_bil_h_ghg, - forgone_wages_bil_l_ghg - ) %>% - pivot_longer( - forgone_wages_bil_h:forgone_wages_bil_l_ghg, - names_to = "metric", - values_to = "value" - ) %>% - mutate( - segment = "labor", - unit_desc = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "USD billion", - "USD billion per GHG" - ), - estimate = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_h_ghg"), - "high", - "low" - ), - metric = ifelse( - metric %in% c("forgone_wages_bil_h", "forgone_wages_bil_l"), - "forgone_wages_bil", - "forgone_wages_bil_ghg" - ) - ) %>% - select( - scen_id, - demand_scenario, - refining_scenario, - ghg_perc_diff, - segment, - metric, - unit_desc, - estimate, - value - ) %>% - pivot_wider(names_from = estimate, values_from = value) - - # plot_df_long <- rbind(plot_df_health, plot_df_labor) - - ## prepare health for plotting ------------------------------ - plot_df_health <- plot_df_health %>% - mutate( - title = ifelse( - metric == "avoided_health_cost", - "Health: avoided mortality", - "Health: avoided mortality per avoided GHG" - ) - ) - - plot_df_health$title <- factor( - plot_df_health$title, - levels = c( - "Health: avoided mortality", - "Health: avoided mortality per avoided GHG" - ) - ) - - ## rename - setDT(plot_df_health) - plot_df_health[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_health[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## refactor - plot_df_health$scenario <- factor( - plot_df_health$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_health[, - value := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - value * 1000, - value - ) - ] - plot_df_health[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_health[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_health[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_health[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_health[, scenario := str_replace(scenario, "historic", "historical")] - - ## save figure inputs - fwrite( - plot_df_health, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_npv_fig_inputs_health_growing_vsl.csv" - ) - ) - # fwrite(plot_df_health, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_npv_fig_inputs_health.csv")) - - ## prepare labor ---------------------- - plot_df_labor <- plot_df_labor %>% - mutate( - title = ifelse( - metric == "forgone_wages_bil", - "Labor: forgone wages", - "Labor: forgone wages per avoided GHG" - ) - ) - - plot_df_labor$title <- factor( - plot_df_labor$title, - levels = c("Labor: forgone wages", "Labor: forgone wages per avoided GHG") - ) - - ## rename - setDT(plot_df_labor) - plot_df_labor[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - plot_df_labor[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## refactor - plot_df_labor$scenario <- factor( - plot_df_labor$scenario, - levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" - ) - ) - - ## convert value of scaled outputs (by ghg) to millions, add unit column - plot_df_labor[, - high := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - high * 1000, - high - ) - ] - plot_df_labor[, - low := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_bil_ghg"), - low * 1000, - low - ) - ] - plot_df_labor[, - metric := fifelse( - metric == "forgone_wages_bil_ghg", - "forgone_wages_ghg", - metric - ) - ] - plot_df_labor[, - unit := fifelse( - metric %in% c("avoided_health_cost_ghg", "forgone_wages_ghg"), - "NPV per avoided GHG MtCO2e\n(2019 USD million / MtCO2e)", - "NPV (2019 USD billion)" - ) - ] - - ## change historic to historical - plot_df_labor[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_labor[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_labor[, scenario := str_replace(scenario, "historic", "historical")] - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - bau_scen <- "BAU historical production" - - ## make the plot - ## --------------------------------------------------- - - ## color for refining scenario - refin_colors <- c( - "LC1 low exports" = "#729b79", - "LC1 historical exports" = "#2F4858", - "BAU low exports" = "#F6AE2D", - "BAU historical exports" = "#F26419" - ) - - refin_labs <- c( - "LC1 low exports" = "Low demand, low exports", - "LC1 historical exports" = "Low demand, historical exports", - "BAU low exports" = "BAU demand, low exports", - "BAU historical exports" = "BAU demand, historical exports" - ) - - ## refactor - # plot_df_health$scen_id <- factor(plot_df_health$scen_id, levels = c('LC1 low exports', - # 'LC1 historical production', - # 'BAU demand\nlow exports', - # 'Low demand\nhistorical exports', - # 'Low demand\nlow exports', - # 'Low demand\nhistorical production')) - # - - ## figs - make each separately - ## ------------------------------------------------------------------- - - hist_prod <- as.data.table( - plot_df_health %>% - filter( - scen_id == bau_scen, - unit == "NPV (2019 USD billion)", - unit_desc == "USD billion (annual VSL)" - ) - ) - - fig_bxm_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - geom_point( - data = plot_df_health %>% - filter( - !scen_id %in% remove_scen, - title == "Health: avoided mortality", - unit == "NPV (2019 USD billion)", - unit_desc == "USD billion (annual VSL)", - !refining_scenario == "historical production" - ), - aes(x = ghg_perc_diff * -100, y = value, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - labs( - color = NULL, - title = "Health: avoided mortality", - y = "NPV (2019 USD billion)", - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(0, 50) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - # ## make separete df for labor high and low for plotting - # plot_df_labor_pts <- plot_df_labor %>% - # filter(!scen_id %in% remove_scen, - # title == "Labor: forgone wages", - # unit == "NPV (2019 USD billion)", - # refining_scenario != "historical production") %>% - # select(scen_id, demand_scenario, refining_scenario, scenario, ghg_perc_diff, high, low) %>% - # pivot_longer(high:low, names_to = "estimate", values_to = "npv_2019_usd_billion") - # - fig_bxm_b <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historical production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), linewidth = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 0.9 - ) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historical production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.9 - ) + - labs( - color = NULL, - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 2)) - - ## legends - low_legend_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historic production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = low, color = scen_id), - shape = 16, - size = 3, - alpha = 1 - ) + - # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = high, color = scen_id), shape = 1, size = 3, alpha = 0.8) + - labs( - color = "with re-emp:", - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - legend.title = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 1)) - - low_legend <- get_legend( - low_legend_fig - ) - - ## save legends - ggsave( - plot = low_legend, - device = "pdf", - filename = "fig3_low_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## legends - high_legend_fig <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", size = 0.5) + - geom_vline( - xintercept = hist_prod[ - title == "Health: avoided mortality", - ghg_perc_diff * -100 - ], - color = "darkgray", - lty = 2 - ) + - # geom_vline(xintercept = hist_prod[title == "Labor: forgone wages", ghg_perc_diff * -100], color = "darkgray", lty = 2) + - # geom_linerange(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production"), aes(x = ghg_perc_diff * -100, ymin = high, ymax = low, color = scen_id), size = 0.5, alpha = 0.8) + - # geom_point(data = plot_df_labor %>% filter(!scen_id %in% remove_scen, - # refining_scenario != "historic production", - # metric == "forgone_wages_bil"), aes(x = ghg_perc_diff * -100, y = low, color = scen_id), shape = 16, size = 3, alpha = 1) + - geom_point( - data = plot_df_labor %>% - filter( - !scen_id %in% remove_scen, - refining_scenario != "historic production", - metric == "forgone_wages_bil" - ), - aes(x = ghg_perc_diff * -100, y = high, color = scen_id), - shape = 1, - size = 3, - alpha = 0.8 - ) + - labs( - color = "no re-emp:", - title = "Labor: forgone wages", - y = NULL, - x = "GHG emissions reduction (%, 2045 vs 2019)" - ) + - ylim(-50, 0) + - xlim(0, 80) + - scale_color_manual( - values = refin_colors, - labels = refin_labs - ) + - theme_line + - theme( - legend.position = "bottom", - legend.text = element_text(size = 10), - legend.title = element_text(size = 10), - plot.title = element_text(hjust = 0.5, size = 12), - axis.title.y = element_text(size = 12), - axis.title.x = element_text(size = 11), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm"), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5, size = 11), - axis.text.y = element_text(vjust = 0.5, hjust = 0.5, size = 11) - ) + - guides(color = guide_legend(nrow = 1)) - - high_legend <- get_legend( - high_legend_fig - ) - - ## save legends - ggsave( - plot = high_legend, - device = "pdf", - filename = "fig3_high_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## combine figure - ## --------------------------------- - - ## shared x axis - xaxis_lab <- ggdraw() + - draw_label("GHG emissions reduction (%, 2045 vs 2019)", size = 12) - - fig3_plot_grid_ab <- plot_grid( - fig_bxm_a, - fig_bxm_b, - align = "vh", - labels = c("A", "B"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - rel_widths = c(1, 1) - ) - - fig3_plot_grid2 <- plot_grid( - fig3_plot_grid_ab, - xaxis_lab, - align = "v", - # labels = c("(A)", "(B)", "(C)", ""), - # # labels = 'AUTO', - # label_size = 10, - # hjust = -1, - ncol = 1, - rel_heights = c(0.95, 0.05) - # rel_widths = c(1, 1), - ) -} - - -# ## compute county-level outputs for low demand low export scenario -# ## --------------------------------------------------------------------- -# -# create_county_health_labor_df <- function(main_path, -# refining_mortality, -# state_ghg_output, -# annual_labor, -# raw_ct_2020_all, -# raw_counties) { -# -# -# ## 2020 population -# ##--------------------------------------------------------------------- -# -# ## geoid to census tract -# county_df <- raw_ct_2020_all %>% -# select(census_tract = GEOID, COUNTYFP) %>% -# st_drop_geometry() -# -# county_names <- raw_counties %>% -# select(COUNTYFP, NAME) %>% -# st_drop_geometry() %>% -# unique() -# -# ## calc 2020 pop by demographic -# pop_2020 <- refining_mortality %>% -# filter(year == 2020) %>% -# select(census_tract, year, pop) %>% -# unique() %>% -# left_join(county_df) %>% -# as.data.table() -# -# ## summarize by county -# pop_2020_county <- pop_2020[, .(pop_2020 = sum(pop)), -# by = .(COUNTYFP)] -# -# -# -# ## make maps for c and d -# ## --------------------------------------------------------------------------- -# -# ## health -# health_map_df <- refining_mortality %>% -# filter(demand_scenario == "LC1" & refining_scenario == "low exports") %>% -# as.data.table() -# -# ## rename -# health_map_df[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)] -# health_map_df[, scenario := gsub('LC1.', 'Low ', scenario)] -# health_map_df[, scenario := str_replace(scenario, "historic", "historical")] -# health_map_df[, scen_id := str_replace(scen_id, "historic", "historical")] -# -# ## group by scenario and census tract, sum cost_2019_pv -# health_map_npv_df <- health_map_df[, .(sum_cost_2019_pv = sum(cost_2019_PV)), -# by = .(census_tract, scenario, scen_id)] -# -# # ## save figure inputs -# # fwrite(health_map_npv_df, file.path(main_path, save_path, "fig-csv-files", "ct_health_npv_fig_inputs.csv")) -# -# -# ## health -# health_map_npv_df <- merge(health_map_npv_df %>% rename(GEOID = census_tract), raw_ct_2020_all %>% select(GEOID, COUNTYFP, ALAND), -# by = "GEOID") -# -# health_map_npv_df <- health_map_npv_df %>% -# mutate(sum_cost_2019_pv = sum_cost_2019_pv * -1) -# -# ## merge with county codes -# health_map_npv_df <- merge(health_map_npv_df, raw_counties %>% select(COUNTYFP, NAME) %>% st_drop_geometry(), -# by = "COUNTYFP", -# allow.cartesian = T, -# all.x = T) -# -# ## summarise by county, rank by sum of sum_cost_2019_pv -# health_county_df <- health_map_npv_df[ALAND > 0, .(sum_cost_2019_pv = sum(sum_cost_2019_pv)), -# by = .(NAME, COUNTYFP, scenario, scen_id)] -# -# -# -# health_county_df <- health_county_df %>% -# select(scenario, scen_id, NAME, COUNTYFP, sum_cost_2019_pv) %>% -# left_join(pop_2020_county) %>% -# rename(npv_health_av_mort = sum_cost_2019_pv) %>% -# mutate(npv_health_av_mort_pc = npv_health_av_mort / pop_2020) %>% -# arrange(-npv_health_av_mort_pc) -# -# # ## save figure inputs -# # fwrite(health_county_df, file.path(main_path, save_path, "fig-csv-files", "county_health_npv_fig_inputs.csv")) -# -# -# ## for plotting health -# health_map_npv_df <- st_as_sf(health_map_npv_df) -# -# health_map_npv_df <- st_transform(health_map_npv_df, crs = "EPSG:4269") -# -# health_map_npv_df <- st_make_valid(health_map_npv_df) -# -# -# ## consrtuct labor df -# labor_map_df <- copy(annual_labor) -# -# labor_map_df[, scen_id := paste(demand_scenario, refining_scenario)] -# labor_map_df[, scen_id := str_replace(scen_id, "historic", "historical")] -# labor_map_df[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)] -# labor_map_df[, scenario := gsub('LC1.', 'Low ', scenario)] -# labor_map_df[, scenario := str_replace(scenario, "historic", "historical")] -# -# labor_map_df <- labor_map_df[scenario == "Low demand - low exports" | scenario == "BAU demand - historical production"] -# -# ## group by scenario and census tract, sum cost_2019_pv -# labor_map_df <- labor_map_df[, .(sum_total_comp_usd19_h = sum(total_comp_usd19_h), -# sum_total_comp_usd19_l = sum(total_comp_usd19_l)), -# by = .(destination, scenario, scen_id)] -# -# -# -# labor_map_df[, ref := ifelse(scenario == "BAU demand - historical production", "bau", "alt")] -# -# ## low -# labor_map_df_l <- labor_map_df %>% -# mutate(county = str_remove(destination, " County, CA")) %>% -# select(county, ref, sum_total_comp_usd19_l) %>% -# pivot_wider(names_from = ref, values_from = sum_total_comp_usd19_l) %>% -# mutate(diff = alt - bau) %>% -# rename(delta_total_comp_usd19_l = diff) %>% -# mutate(scenario = "Low demand - low exports") %>% -# select(scenario, county, delta_total_comp_usd19_l) %>% -# rename(NAME = county) %>% -# left_join(county_names) %>% -# left_join(pop_2020_county) %>% -# mutate(delta_total_comp_usd19_pc_l = delta_total_comp_usd19_l / pop_2020) %>% -# select(scenario, NAME, COUNTYFP, pop_2020, delta_total_comp_usd19_l, delta_total_comp_usd19_pc_l) -# -# labor_map_df_h <- labor_map_df %>% -# mutate(county = str_remove(destination, " County, CA")) %>% -# select(county, ref, sum_total_comp_usd19_h) %>% -# pivot_wider(names_from = ref, values_from = sum_total_comp_usd19_h) %>% -# mutate(diff = alt - bau) %>% -# rename(delta_total_comp_usd19_h = diff) %>% -# mutate(scenario = "Low demand - low exports") %>% -# select(scenario, county, delta_total_comp_usd19_h) %>% -# rename(NAME = county) %>% -# left_join(county_names) %>% -# left_join(pop_2020_county) %>% -# mutate(delta_total_comp_usd19_pc_h = delta_total_comp_usd19_h / pop_2020) %>% -# select(scenario, NAME, COUNTYFP, pop_2020, delta_total_comp_usd19_h, delta_total_comp_usd19_pc_h) -# -# ## merge -# health_map_df_hl <- merge(labor_map_df_l, labor_map_df_h, -# by = c("scenario", "NAME", "COUNTYFP", "pop_2020"), -# all.x = T) -# -# ## join with county-level health -# county_map_df <- merge(health_county_df, health_map_df_hl %>% select(NAME, delta_total_comp_usd19_l, delta_total_comp_usd19_pc_l, -# delta_total_comp_usd19_h, delta_total_comp_usd19_pc_h), -# by = "NAME", -# all.x = T) -# -# county_map_df <- county_map_df %>% -# select(scenario, scen_id, county = NAME, COUNTYFP, pop_2020, npv_health_av_mort, npv_health_av_mort_pc, -# delta_total_comp_usd19_l, delta_total_comp_usd19_pc_l, -# delta_total_comp_usd19_h, delta_total_comp_usd19_pc_h) -# -# -# ## save county-level inputs -# fwrite(county_map_df, file.path(main_path, save_path, "fig-csv-files", "county_health_labor_npv_npv_pc.csv")) -# -# # ## make the maps -# # ##---------------------------------------------------------------------------- -# # -# # california <- st_as_sf(maps::map("state", plot = FALSE, fill = TRUE)) %>% -# # filter(ID == "california") %>% -# # st_transform(crs = "EPSG:4269") -# # -# # -# # # # filter for census tracts that do not experience health benefits -# # # neg_npv_health <- health_map_npv_df %>% -# # # filter(sum_cost_2019_pv < 0) %>% -# # # filter(ALAND > 0) -# # # -# # # ggplot() + -# # # geom_sf(data = neg_npv_health, mapping = aes(geometry = geometry, fill = sum_cost_2019_pv / 1000), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) + -# # # geom_sf(data = california, fill = "transparent", color = "black") -# # # -# # ## filter refining output df for ct's with negative outputs -# # # neg_health_out <- health_map_df %>% -# # # filter(census_tract %in% unique(neg_npv_health$GEOID)) %>% -# # # select(census_tract, year, delta_total_pm25, mortality_delta, cost_2019_PV) %>% -# # # pivot_longer(delta_total_pm25:cost_2019_PV, names_to = "metric", values_to = "value") %>% -# # # mutate(lab = ifelse(metric == "delta_total_pm25", "pm25 - difference from reference", -# # # ifelse(metric == "mortality_delta", "avoided mortality - difference from reference", "avoided mortality - difference from reference (USD)"))) -# # # -# # # ggplot(neg_health_out, aes(x = year, y = value, group = census_tract)) + -# # # geom_line(alpha = 0.4) + -# # # facet_wrap(~lab, scales = "free_y", nrow = 3) + -# # # theme_bw() -# # -# # -# # -# # -# # -# # ## health maps -# # ## --------------------------------------------------------------------------- -# # -# # ## bounding box 1 -# # bay_bb <- st_bbox(c(xmin = -123, ymin = 37.5, xmax = -121, ymax = 38.5), crs = st_crs(health_map_df)) -# # bay_bb <- st_as_sfc(bay_bb) -# # -# # la_bb <- st_bbox(c(xmin = -119, ymin = 33, xmax = -117, ymax = 34.5), crs = st_crs(health_map_df)) -# # la_bb <- st_as_sfc(la_bb) -# # -# # -# # # ## filter for bay area -# # # health_map_df2 <- health_map_df %>% -# # # mutate(bay = st_intersects(geometry, bay_bb), -# # # la = st_intersects(geometry, la_bb)) -# # -# # -# # -# # -# # health_fig_bay <- ggplot() + -# # geom_sf(data = health_map_npv_df %>% filter(ALAND > 0), mapping = aes(geometry = geometry, fill = sum_cost_2019_pv / 1000), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) + -# # # geom_sf(data = california, fill = "transparent", color = "black") + -# # scale_fill_gradient2(high = "navy", mid = "#FAFAFA", low = "#A84268", -# # # high = "navy", mid = "#FAFAFA", low = "#A84268", -# # midpoint = 0, space = "Lab", -# # # limits = c(min(health_map_df$sum_cost_2019_pv / 1000), max(health_map_df$sum_cost_2019_pv / 1000)), -# # limits = c(-2500, 99000), -# # breaks = c(-2500, 0, 49500, 99000), -# # labels = c(-2500, 0, 49500, 99000), -# # na.value = "grey50" -# # # , -# # # labels=function(x) format(x, big.mark = ",", scientific = FALSE) -# # ) + -# # coord_sf(xlim = c(-123, -121), ylim = c(37.5, 38.5)) + -# # labs(fill = "NPV (thousand USD 2019)", -# # color = NULL, -# # x = NULL, -# # y = NULL) + -# # theme_bw() + -# # theme( -# # # legend.justification defines the edge of the legend that the legend.position coordinates refer to -# # # legend.justification = c(0, 1), -# # # Set the legend flush with the left side of the plot, and just slightly below the top of the plot -# # legend.position = "none", -# # legend.key.width = unit(2, "line"), -# # legend.key.height = unit(1, "line"), -# # legend.title = element_text(size = 8), -# # legend.text = element_text(size = 8), -# # plot.margin = margin(0, 2, 0, 8), -# # plot.title = element_text(face = 'bold', size = 10), -# # plot.subtitle = element_text(face = 'bold', size = 8), -# # panel.grid.major = element_blank(), -# # panel.grid.minor = element_blank(), -# # panel.background = element_blank(), -# # axis.text = element_text(size = 8)) + -# # guides(fill = guide_colourbar(title.position="top", -# # title.hjust = 0, -# # direction = "horizontal", -# # ticks.colour = "black", frame.colour = "black"), -# # color = "none") -# # -# # -# # health_fig_la <- ggplot() + -# # geom_sf(data = health_map_npv_df %>% filter(ALAND > 0), mapping = aes(geometry = geometry, fill = sum_cost_2019_pv / 1000), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) + -# # # geom_sf(data = california, fill = "transparent", color = "black") + -# # scale_fill_gradient2(high = "navy", mid = "#FAFAFA", low = "#A84268", -# # # high = "navy", mid = "#FAFAFA", low = "#A84268", -# # midpoint = 0, space = "Lab", -# # # limits = c(min(health_map_df$sum_cost_2019_pv / 1000), max(health_map_df$sum_cost_2019_pv / 1000)), -# # limits = c(-2500, 99000), -# # breaks = c(-2500, 0, 49500, 99000), -# # labels = c(-2500, 0, 49500, 99000), -# # na.value = "grey50" -# # # , -# # # labels=function(x) format(x, big.mark = ",", scientific = FALSE) -# # ) + -# # coord_sf(xlim = c(-119, -117), ylim = c(33.2, 34.4)) + -# # labs(fill = "NPV (thousand USD 2019)", -# # color = NULL, -# # x = NULL, -# # y = NULL) + -# # theme_bw() + -# # theme( -# # # legend.justification defines the edge of the legend that the legend.position coordinates refer to -# # # legend.justification = c(0, 1), -# # # Set the legend flush with the left side of the plot, and just slightly below the top of the plot -# # legend.position = "none", -# # legend.key.width = unit(2, "line"), -# # legend.key.height = unit(1, "line"), -# # legend.title = element_text(size = 8), -# # legend.text = element_text(size = 8), -# # plot.margin = margin(0, 2, 0, 8), -# # plot.title = element_text(face = 'bold', size = 10), -# # plot.subtitle = element_text(face = 'bold', size = 8), -# # panel.grid.major = element_blank(), -# # panel.grid.minor = element_blank(), -# # panel.background = element_blank(), -# # axis.text = element_text(size = 8)) + -# # guides(fill = guide_colourbar(title.position="top", -# # title.hjust = 0, -# # direction = "horizontal", -# # ticks.colour = "black", frame.colour = "black"), -# # color = "none") -# # -# # fig3c_leg <- ggplot() + -# # geom_sf(data = health_map_df %>% filter(ALAND > 0), mapping = aes(geometry = geometry, fill = sum_cost_2019_pv / 1000), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) + -# # # geom_sf(data = california, fill = "transparent", color = "black") + -# # scale_fill_gradient2(low = "black", mid = "#FAFAFA", high = "#A84268", -# # # high = "navy", mid = "#FAFAFA", low = "#A84268", -# # midpoint = 0, space = "Lab", -# # # limits = c(min(health_map_df$sum_cost_2019_pv / 1000), max(health_map_df$sum_cost_2019_pv / 1000)), -# # limits = c(-2500, 99000), -# # breaks = c(-2500, 0, 49500, 99000), -# # labels = c(-2500, 0, 49500, 99000), -# # na.value = "grey50" -# # # , -# # # labels=function(x) format(x, big.mark = ",", scientific = FALSE) -# # ) + -# # coord_sf(xlim = c(-123, -121), ylim = c(37.5, 38.5)) + -# # labs(fill = "NPV (thousand USD 2019)", -# # color = NULL, -# # x = NULL, -# # y = NULL) + -# # theme_bw() + -# # theme( -# # # legend.justification defines the edge of the legend that the legend.position coordinates refer to -# # # legend.justification = c(0, 1), -# # # Set the legend flush with the left side of the plot, and just slightly below the top of the plot -# # legend.position = "bottom", -# # legend.key.width = unit(2, "line"), -# # legend.key.height = unit(1, "line"), -# # legend.title = element_text(size = 8), -# # legend.text = element_text(size = 8), -# # plot.margin = margin(0, 2, 0, 8), -# # plot.title = element_text(face = 'bold', size = 10), -# # plot.subtitle = element_text(face = 'bold', size = 8), -# # panel.grid.major = element_blank(), -# # panel.grid.minor = element_blank(), -# # panel.background = element_blank(), -# # axis.text = element_text(size = 8)) + -# # guides(fill = guide_colourbar(title.position="top", -# # title.hjust = 0, -# # direction = "horizontal", -# # ticks.colour = "black", frame.colour = "black"), -# # color = "none") -# # -# # -# # -# # # # ## quantile for plotting -# # # # numclas <- 12 -# # # # qbrks_h <- seq(0, 1, length.out = numclas + 1) -# # # # qbrks_h -# # # # -# # # # health_map_df <- health_map_df %>% -# # # # mutate(valq = cut(sum_cost_2019_pv, breaks = quantile(sum_cost_2019_pv, breaks = qbrks_h), -# # # # include.lowest = T)) -# # # # -# # # # -# # # fig3c_v2 <- ggplot() + -# # # geom_sf(data = health_map_df %>% filter(ALAND > 0), mapping = aes(geometry = geometry, fill = valq), lwd = 0, alpha = 1, color = "darkgrey", show.legend = TRUE) + -# # # # geom_sf(data = california, fill = "transparent", color = "black") + -# # # scale_fill_discrete(labels=function(x) format(x, big.mark = ",", scientific = FALSE)) + -# # # labs(fill = "NPV (USD 2019)", -# # # color = NULL, -# # # x = NULL, -# # # y = NULL) + -# # # theme_bw() + -# # # theme( -# # # # legend.justification defines the edge of the legend that the legend.position coordinates refer to -# # # # legend.justification = c(0, 1), -# # # # Set the legend flush with the left side of the plot, and just slightly below the top of the plot -# # # legend.position = "bottom", -# # # legend.key.width = unit(2, "line"), -# # # legend.key.height = unit(1, "line"), -# # # legend.title = element_text(size = 8), -# # # legend.text = element_text(size = 8), -# # # plot.margin = margin(0, 2, 0, 8), -# # # plot.title = element_text(face = 'bold', size = 10), -# # # plot.subtitle = element_text(face = 'bold', size = 8), -# # # panel.grid.major = element_blank(), -# # # panel.grid.minor = element_blank(), -# # # panel.background = element_blank(), -# # # axis.text = element_text(size = 8)) + -# # # guides(fill = guide_colourbar(title.position="top", -# # # title.hjust = 0, -# # # direction = "horizontal", -# # # ticks.colour = "black", frame.colour = "black"), -# # # color = "none") -# # -# # -# # ## labor -# # labor_map_df <- merge(raw_ca_counties_sp %>% select(NAME), labor_map_df %>% rename(NAME = county), -# # by = "NAME", -# # all.x = T) -# # -# # labor_map_df <- st_as_sf(labor_map_df) -# # -# # labor_map_df <- st_transform(labor_map_df, crs = "EPSG:4269") -# # -# # labor_map_df <- st_make_valid(labor_map_df) -# # -# # ## labor -# # blues_pal <- c("#FAFAFA", "#778DA9", "#415A77", "#1B263B", "#0D1B2A") -# # -# # fig3d <- ggplot() + -# # geom_sf(data = labor_map_df, mapping = aes(geometry = geometry, fill = delta_total_comp_usd19 / 1e9), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) + -# # # geom_sf(data = california, fill = "transparent", color = "black") + -# # scale_fill_gradientn(colors = rev(blues_pal), -# # na.value = "#FAFAFA") + -# # # coord_sf(xlim = c(-123, -116), ylim = c(33, 39)) + -# # labs(fill = "NPV (billion USD 2019)", -# # color = NULL, -# # x = NULL, -# # y = NULL) + -# # theme_bw() + -# # theme( -# # # legend.justification defines the edge of the legend that the legend.position coordinates refer to -# # # legend.justification = c(0, 1), -# # # Set the legend flush with the left side of the plot, and just slightly below the top of the plot -# # legend.position = "none", -# # legend.key.width = unit(2, "line"), -# # legend.key.height = unit(1, "line"), -# # legend.title = element_text(size = 8), -# # legend.text = element_text(size = 8), -# # plot.margin = margin(0, 2, 0, 8), -# # plot.title = element_text(face = 'bold', size = 10), -# # plot.subtitle = element_text(face = 'bold', size = 8), -# # panel.grid.major = element_blank(), -# # panel.grid.minor = element_blank(), -# # panel.background = element_blank(), -# # axis.text = element_text(size = 8)) + -# # guides(fill = guide_colourbar(title.position="top", -# # title.hjust = 0, -# # direction = "horizontal", -# # ticks.colour = "black", frame.colour = "black"), -# # color = "none") -# # -# # fig3d_leg <- ggplot() + -# # geom_sf(data = labor_map_df, mapping = aes(geometry = geometry, fill = delta_total_comp_usd19 / 1e9), lwd = 0.05, alpha = 1, color = "grey", show.legend = TRUE) + -# # # geom_sf(data = california, fill = "transparent", color = "black") + -# # scale_fill_gradientn(colors = rev(blues_pal), -# # na.value = "#FAFAFA") + -# # # coord_sf(xlim = c(-123, -116), ylim = c(33, 39)) + -# # labs(fill = "NPV (billion USD 2019)", -# # color = NULL, -# # x = NULL, -# # y = NULL) + -# # theme_bw() + -# # theme( -# # # legend.justification defines the edge of the legend that the legend.position coordinates refer to -# # # legend.justification = c(0, 1), -# # # Set the legend flush with the left side of the plot, and just slightly below the top of the plot -# # legend.position = "bottom", -# # legend.key.width = unit(2, "line"), -# # legend.key.height = unit(1, "line"), -# # legend.title = element_text(size = 8), -# # legend.text = element_text(size = 8), -# # plot.margin = margin(0, 2, 0, 8), -# # plot.title = element_text(face = 'bold', size = 10), -# # plot.subtitle = element_text(face = 'bold', size = 8), -# # panel.grid.major = element_blank(), -# # panel.grid.minor = element_blank(), -# # panel.background = element_blank(), -# # axis.text = element_text(size = 8)) + -# # guides(fill = guide_colourbar(title.position="top", -# # title.hjust = 0, -# # direction = "horizontal", -# # ticks.colour = "black", frame.colour = "black"), -# # color = "none") -# # -# # -# # -# # -# # -# # -# # legend_fig_3c <- get_legend( -# # fig3c_leg + -# # theme(legend.title = element_text(size = 8), -# # legend.text = element_text(size = 8)) -# # -# # ) -# # -# # legend_fig_3d <- get_legend( -# # fig3d_leg + -# # theme(legend.title = element_text(size = 8), -# # legend.text = element_text(size = 8)) -# # -# # ) -# # -# # -# -# -# } - -calc_county_pm25 <- function( - main_path, - save_path, - health_weighted, - raw_counties, - raw_ct_2020_all, - refining_mortality -) { - ## calc 2020 pop by demographic - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - as.data.table() - - health_df <- copy(health_weighted) - - health_df <- health_df[ - year == 2019 & - scen_id == "BAU historic exports" - ] - - county_names <- raw_counties %>% - select(COUNTYFP, NAME) %>% - st_drop_geometry() %>% - unique() - - ## geoid to census tract - county_df <- raw_ct_2020_all %>% - filter(STATEFP == "06") %>% - select(census_tract = GEOID, COUNTYFP, ALAND) %>% - st_drop_geometry() %>% - left_join(county_names) %>% - left_join(pop_2020) %>% - select(census_tract, COUNTYFP, NAME, pop, ALAND) - - health_df <- merge(health_df, county_df, by = "census_tract", all.x = T) - - health_county_df <- health_df %>% - group_by(NAME, COUNTYFP, year) %>% - summarise( - avg_pm25_areaw = weighted.mean(total_pm25, ALAND), - avg_pm25_popw = weighted.mean(total_pm25, pop) - ) %>% - ungroup() %>% - arrange(-avg_pm25_popw) - - fwrite( - health_county_df, - file.path(main_path, save_path, "fig-csv-files", "avg_pm25_county_2019.csv") - ) - # fwrite(health_county_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "avg_pm25_county_2019.csv")) - - return(health_county_df) -} - - -plot_health_levels <- function(main_path, save_path, health_grp) { - fig2_df <- copy(health_grp) - - ## change scenario names, factor - fig2_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # fig2_df[, scenario := gsub('BAU', 'Reference', scenario)] - fig2_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## add scenario title - fig2_df[, scenario_title := str_replace(scenario, " - ", "\n")] - - ## change historic to historical - fig2_df[, scen_id := str_replace(scen_id, "historic", "historical")] - fig2_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - fig2_df[, scenario := str_replace(scenario, "historic", "historical")] - fig2_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## refactor - fig2_df$scenario_title <- factor( - fig2_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## refactor - fig2_df$scenario <- factor( - fig2_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - ## scenarios for filtering - # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports') - remove_scen <- c("LC1 historical production") - - ## save figure inputs - fwrite( - fig2_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_levels_fig_inputs.csv" - ) - ) - # fwrite(fig2_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_fig_inputs.csv")) - - # health_level_fig <- ggplot(fig2_df %>% filter(!scen_id %in% remove_scen), aes(x = year, y = num_over_den, color = group)) + - # geom_line(linewidth = 1, alpha = 0.8) + - # facet_grid(type ~ scenario) + - # labs(x = NULL, - # y = "num_over_den") + - # theme_line + - # theme(legend.position = "bottom", - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # axis.ticks.length.y = unit(0.1, 'cm'), - # axis.ticks.length.x = unit(0.1, 'cm')) - - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - health_level_fig_a <- ggplot( - fig2_df %>% - filter( - !scen_id %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = mortality_level_dem, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 0.4)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figa <- health_level_fig_a + theme(legend.position = "right") - - legend_a <- get_legend( - legend_figa + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_level_fig_b <- ggplot( - fig2_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = mortality_level_dem, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 0.4)) + - scale_linetype_manual(values = dac_lty) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figb <- health_level_fig_b + theme(legend.position = "right") - - legend_b <- get_legend( - legend_figb + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_level_fig_c <- ggplot( - fig2_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = mortality_level_dem, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_linetype_manual(values = poverty_lty) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 0.4)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - legend.key.width = unit(10, "mm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figc <- health_level_fig_c + theme(legend.position = "right") - - legend_c <- get_legend( - legend_figc + - theme(legend.text = element_text(size = 8)) - ) - - ## shared y lab - # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person")), - # size = 8, angle = 90) - - yaxis_lab <- ggdraw() + draw_label("Mortalities", size = 8, angle = 90) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - fig2_plot_grid <- plot_grid( - health_level_fig_b, - health_level_fig_c, - health_level_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heights = c(1, 1, 1, 1.05) - ) - - fig2_plot_grid2 <- plot_grid( - yaxis_lab, - fig2_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - fig2_plot_grid2 -} - - -plot_health_levels_pc <- function( - main_path, - save_path, - health_grp, - refining_mortality, - pop_ratios -) { - mort_pc_df <- copy(health_grp) - - ## calc 2020 pop by demographic - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - left_join(pop_ratios) %>% - as.data.table() - - pop_2020[, demo_pop := pop * pct] - - ## summarize by demographic group - pop_2020 <- pop_2020[, - .(pop_2020 = sum(demo_pop)), - by = .(demo_group, demo_cat) - ] - - ## merge population back with results - mort_pc_df <- merge( - mort_pc_df, - pop_2020, - by = c("demo_group", "demo_cat"), - all.x = T - ) - - ## calculate per capita - mort_pc_df[, value := mortality_level_dem / pop_2020] - mort_pc_df[, value_pmil := value * 1e6] - - ## change scenario names, factor - mort_pc_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # fig2_df[, scenario := gsub('BAU', 'Reference', scenario)] - mort_pc_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## add scenario title - mort_pc_df[, scenario_title := str_replace(scenario, " - ", "\n")] - - ## change historic to historical - mort_pc_df[, scen_id := str_replace(scen_id, "historic", "historical")] - mort_pc_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - mort_pc_df[, scenario := str_replace(scenario, "historic", "historical")] - mort_pc_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## refactor - mort_pc_df$scenario_title <- factor( - mort_pc_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## refactor - mort_pc_df$scenario <- factor( - mort_pc_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - ## scenarios for filtering - # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports') - remove_scen <- c("LC1 historical production") - - ## save figure inputs - fwrite( - mort_pc_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_levels_pmil_fig_inputs.csv" - ) - ) - # fwrite(mort_pc_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_pmil_fig_inputs.csv")) - - # health_level_fig <- ggplot(fig2_df %>% filter(!scen_id %in% remove_scen), aes(x = year, y = num_over_den, color = group)) + - # geom_line(linewidth = 1, alpha = 0.8) + - # facet_grid(type ~ scenario) + - # labs(x = NULL, - # y = "num_over_den") + - # theme_line + - # theme(legend.position = "bottom", - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # axis.ticks.length.y = unit(0.1, 'cm'), - # axis.ticks.length.x = unit(0.1, 'cm')) - - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - health_level_fig_a <- ggplot( - mort_pc_df %>% - filter( - !scen_id %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = value_pmil, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 40)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figa <- health_level_fig_a + theme(legend.position = "right") - - legend_a <- get_legend( - legend_figa + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_level_fig_b <- ggplot( - mort_pc_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = value_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 40)) + - scale_linetype_manual(values = dac_lty) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figb <- health_level_fig_b + theme(legend.position = "right") - - legend_b <- get_legend( - legend_figb + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_level_fig_c <- ggplot( - mort_pc_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = value_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_linetype_manual(values = poverty_lty) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 40)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - legend.key.width = unit(10, "mm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figc <- health_level_fig_c + theme(legend.position = "right") - - legend_c <- get_legend( - legend_figc + - theme(legend.text = element_text(size = 8)) - ) - - ## shared y lab - # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person")), - # size = 8, angle = 90) - - yaxis_lab <- ggdraw() + - draw_label("Mortalities per million people", size = 8, angle = 90) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - fig2_plot_grid <- plot_grid( - health_level_fig_b, - health_level_fig_c, - health_level_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - fig2_plot_grid2 <- plot_grid( - yaxis_lab, - fig2_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - fig2_plot_grid2 -} - - -plot_health_levels_pm25 <- function(main_path, save_path, health_grp) { - fig2_df <- copy(health_grp) - - ## change scenario names, factor - fig2_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # fig2_df[, scenario := gsub('BAU', 'Reference', scenario)] - fig2_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## add scenario title - fig2_df[, scenario_title := str_replace(scenario, " - ", "\n")] - - ## change historic to historical - fig2_df[, scen_id := str_replace(scen_id, "historic", "historical")] - fig2_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - fig2_df[, scenario := str_replace(scenario, "historic", "historical")] - fig2_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## refactor - fig2_df$scenario_title <- factor( - fig2_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## refactor - fig2_df$scenario <- factor( - fig2_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - ## save figure inputs - fwrite( - fig2_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_levels_pm25_inputs.csv" - ) - ) - # fwrite(fig2_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_pm25_inputs.csv")) - - ## scenarios for filtering - # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports') - remove_scen <- c("LC1 historical production") - - # health_level_fig <- ggplot(fig2_df %>% filter(!scen_id %in% remove_scen), aes(x = year, y = num_over_den, color = group)) + - # geom_line(linewidth = 1, alpha = 0.8) + - # facet_grid(type ~ scenario) + - # labs(x = NULL, - # y = "num_over_den") + - # theme_line + - # theme(legend.position = "bottom", - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # axis.ticks.length.y = unit(0.1, 'cm'), - # axis.ticks.length.x = unit(0.1, 'cm')) - - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - health_level_fig_a <- ggplot( - fig2_df %>% - filter( - !scen_id %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = num_over_den, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 0.4)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figa <- health_level_fig_a + theme(legend.position = "right") - - legend_a <- get_legend( - legend_figa + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_level_fig_b <- ggplot( - fig2_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = num_over_den, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 0.4)) + - scale_linetype_manual(values = dac_lty) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figb <- health_level_fig_b + theme(legend.position = "right") - - legend_b <- get_legend( - legend_figb + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_level_fig_c <- ggplot( - fig2_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = num_over_den, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_linetype_manual(values = poverty_lty) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 0.4)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - legend.key.width = unit(10, "mm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figc <- health_level_fig_c + theme(legend.position = "right") - - legend_c <- get_legend( - legend_figc + - theme(legend.text = element_text(size = 8)) - ) - - ## shared y lab - yaxis_lab <- ggdraw() + - draw_label( - expression(paste( - "PM"[2.5], - " (", - mu, - "g ", - m^{ - -3 - }, - ")", - " per person" - )), - size = 8, - angle = 90 - ) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - fig2_plot_grid <- plot_grid( - health_level_fig_b, - health_level_fig_c, - health_level_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - fig2_plot_grid2 <- plot_grid( - yaxis_lab, - fig2_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - fig2_plot_grid2 -} - - -plot_health_levels_gaps <- function(main_path, save_path, health_grp) { - gaps_df <- copy(health_grp) - - ## change scenario names, factor - gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## refactor - gaps_df[, scenario_title := scenario] - gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## calculate gaps (BAU - scenario) - bau_gaps_df <- gaps_df[scen_id == "BAU historic production"] - bau_gaps_df <- bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "mortality_level_dem" - )] - setnames(bau_gaps_df, "mortality_level_dem", "bau_mortality_level_dem") - - gaps_df <- merge( - gaps_df, - bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - gaps_df[, gap := mortality_level_dem - bau_mortality_level_dem] - - ## change historic to historical - gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")] - gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - gaps_df$scenario <- factor( - gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - gaps_df$scenario_title <- factor( - gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## save figure inputs - fwrite( - gaps_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_levels_fig_gaps_inputs.csv" - ) - ) - # fwrite(gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_fig_gaps_inputs.csv")) - - ## make figures - ## --------------------------------------------------------- - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - - ## figure a - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - health_gap_fig_a <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = gap, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(c(-0.31, 0)) + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figa <- health_gap_fig_a + theme(legend.position = "right") - - legend_a <- get_legend( - legend_figa + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_gap_fig_b <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = gap, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-0.31, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - scale_linetype_manual(values = dac_lty) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figb <- health_gap_fig_b + theme(legend.position = "right") - - legend_b <- get_legend( - legend_figb + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_gap_fig_c <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = gap, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - scale_linetype_manual(values = poverty_lty) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - # ylim(c(-0.31, 0)) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figc <- health_gap_fig_c + theme(legend.position = "right") - - legend_c <- get_legend( - legend_figc + - theme(legend.text = element_text(size = 8)) - ) - - ## shared y lab - # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person, difference from reference")), - # size = 8, angle = 90) - - yaxis_lab <- ggdraw() + - draw_label( - "Avoided mortalities, difference from reference", - size = 8, - angle = 90 - ) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - gaps_plot_grid <- plot_grid( - health_gap_fig_b, - health_gap_fig_c, - health_gap_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - gaps_plot_grid2 <- plot_grid( - yaxis_lab, - gaps_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - gaps_plot_grid2 -} - - -plot_health_levels_gaps_pmil <- function( - main_path, - save_path, - health_grp, - refining_mortality, - pop_ratios -) { - ## calc 2020 pop by demographic - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - left_join(pop_ratios) %>% - as.data.table() - - pop_2020[, demo_pop := pop * pct] - - ## summarize by demographic group - pop_2020 <- pop_2020[, - .(pop_2020 = sum(demo_pop)), - by = .(demo_group, demo_cat) - ] - - ## compute gaps - ## ---------------------------------------------------- - gaps_df <- copy(health_grp) - - ## change scenario names, factor - gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## refactor - gaps_df[, scenario_title := scenario] - gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## calculate gaps (BAU - scenario) - bau_gaps_df <- gaps_df[scen_id == "BAU historic production"] - bau_gaps_df <- bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "mortality_level_dem" - )] - setnames(bau_gaps_df, "mortality_level_dem", "bau_mortality_level_dem") - - gaps_df <- merge( - gaps_df, - bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - gaps_df[, gap := mortality_level_dem - bau_mortality_level_dem] - - ## convert to per million - gaps_df <- merge( - gaps_df, - pop_2020, - by = c("demo_group", "demo_cat"), - all.x = T - ) - - ## calculate per capita - gaps_df[, value := gap / pop_2020] - gaps_df[, value_pmil := value * 1e6] - - ## change historic to historical - gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")] - gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - gaps_df$scenario <- factor( - gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - gaps_df$scenario_title <- factor( - gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## save figure inputs - fwrite( - gaps_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_levels_fig_gaps_pmil_inputs.csv" - ) - ) - # fwrite(gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_fig_gaps_pmil_inputs.csv")) - - ## make figures - ## --------------------------------------------------------- - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - - ## figure a - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - health_gap_fig_a <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = value_pmil, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(c(-30, 0)) + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figa <- health_gap_fig_a + theme(legend.position = "right") - - legend_a <- get_legend( - legend_figa + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_gap_fig_b <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = value_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-30, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - scale_linetype_manual(values = dac_lty) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figb <- health_gap_fig_b + theme(legend.position = "right") - - legend_b <- get_legend( - legend_figb + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_gap_fig_c <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = value_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - scale_linetype_manual(values = poverty_lty) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - # ylim(c(-30, 0)) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figc <- health_gap_fig_c + theme(legend.position = "right") - - legend_c <- get_legend( - legend_figc + - theme(legend.text = element_text(size = 8)) - ) - - ## shared y lab - # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person, difference from reference")), - # size = 8, angle = 90) - - yaxis_lab <- ggdraw() + - draw_label( - "Avoided mortalities per million people, difference from reference", - size = 8, - angle = 90 - ) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - gaps_plot_grid <- plot_grid( - health_gap_fig_b, - health_gap_fig_c, - health_gap_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - gaps_plot_grid2 <- plot_grid( - yaxis_lab, - gaps_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - gaps_plot_grid2 -} - - -## plot health pm2.5 gaps - -plot_health_levels_gaps_pm25 <- function(main_path, save_path, health_grp) { - gaps_df <- copy(health_grp) - - ## change scenario names, factor - gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## refactor - gaps_df[, scenario_title := scenario] - gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## calculate gaps (BAU - scenario) - bau_gaps_df <- gaps_df[scen_id == "BAU historic production"] - bau_gaps_df <- bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "num_over_den" - )] - setnames(bau_gaps_df, "num_over_den", "bau_num_over_den") - - gaps_df <- merge( - gaps_df, - bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - gaps_df[, gap := num_over_den - bau_num_over_den] - - ## change historic to historical - gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")] - gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - gaps_df$scenario <- factor( - gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - gaps_df$scenario_title <- factor( - gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## save figure inputs - fwrite( - gaps_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_levels_fig_gaps_pm25_inputs.csv" - ) - ) - # fwrite(gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_fig_gaps_pm25_inputs.csv")) - - ## make figures - ## --------------------------------------------------------- - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - - ## figure a - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - health_gap_fig_a <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = gap, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(c(-0.31, 0)) + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figa <- health_gap_fig_a + theme(legend.position = "right") - - legend_a <- get_legend( - legend_figa + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_gap_fig_b <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = gap, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-0.31, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - scale_linetype_manual(values = dac_lty) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figb <- health_gap_fig_b + theme(legend.position = "right") - - legend_b <- get_legend( - legend_figb + - theme(legend.text = element_text(size = 8)) - ) - - ## - health_gap_fig_c <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = gap, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - scale_linetype_manual(values = poverty_lty) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - # ylim(c(-0.31, 0)) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - legend_figc <- health_gap_fig_c + theme(legend.position = "right") - - legend_c <- get_legend( - legend_figc + - theme(legend.text = element_text(size = 8)) - ) - - ## shared y lab - yaxis_lab <- ggdraw() + - draw_label( - expression(paste( - "PM"[2.5], - " (", - mu, - "g ", - m^{ - -3 - }, - ")", - " per person, difference from reference" - )), - size = 8, - angle = 90 - ) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - gaps_plot_grid <- plot_grid( - health_gap_fig_b, - health_gap_fig_c, - health_gap_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - gaps_plot_grid2 <- plot_grid( - yaxis_lab, - gaps_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - gaps_plot_grid2 -} - -########################################################################### -## plot labor -########################################################################### - -plot_labor_levels <- function( - main_path, - save_path, - ref_labor_demog_yr, - refining_mortality, - pop_ratios -) { - # ## calc 2020 pop by demographic - # pop_2020 <- refining_mortality %>% - # filter(year == 2020) %>% - # select(census_tract, year, pop) %>% - # unique() %>% - # left_join(pop_ratios) %>% - # as.data.table() - # - # pop_2020[, demo_pop := pop * pct] - # - # ## summarize by demographic group - # pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)), - # by = .(demo_group, demo_cat)] - - fig2_l_df <- copy(ref_labor_demog_yr) - - ## change scenario names, factor - fig2_l_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # fig2_l_df[, scenario := gsub('BAU', 'Reference', scenario)] - fig2_l_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## scenarios for filtering - # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports') - remove_scen <- c("Low demand - historical production") - - ## add scenario title - fig2_l_df[, scenario_title := str_replace(scenario, " - ", "\n")] - - # ## sum for state - # fig2_l_df <- fig2_l_df[, .( - # sum_demo_emp = sum(demo_emp), - # sum_demo_comp_pv_h = sum(demo_comp_pv_h), - # sum_demo_comp_pv_l = sum(demo_comp_pv_l) - # ), - # by = .( - # year, demand_scenario, refining_scenario, oil_price_scenario, - # scenario, scenario_title, demo_cat, demo_group, title - # ) - # ] - - # ## merge with 2020 pop - # fig2_l_df <- merge(fig2_l_df, pop_2020, - # by = c("demo_cat", "demo_group"), - # all.x = T) - - # ## calculate per capita - # fig2_l_df[, demo_emp_pc := sum_demo_emp / pop_2020] - # fig2_l_df[, demo_comp_pc_h := sum_demo_comp_pv_h / pop_2020] - # fig2_l_df[, demo_comp_pc_l := sum_demo_comp_pv_l / pop_2020] - # - # ## select columns - # fig2_l_df <- fig2_l_df[, .(year, demand_scenario, refining_scenario, - # scenario, scenario_title, demo_cat, demo_group, title, sum_demo_emp, - # demo_emp_pc, sum_demo_comp_pv_h, sum_demo_comp_pv_l, demo_comp_pc_h, demo_comp_pc_l)] - - ## change historic to historical - fig2_l_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - fig2_l_df[, scenario := str_replace(scenario, "historic", "historical")] - fig2_l_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## refactor - fig2_l_df$scenario_title <- factor( - fig2_l_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## refactor - fig2_l_df$scenario <- factor( - fig2_l_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - # ## test to see if this matches fig 5 outputs - # test_state <- fig2_l_df %>% - # group_by(demand_scenario, refining_scenario, scenario, scenario_title, demo_cat, demo_group) %>% - # summarise(sum_demo_comp_pv = sum(sum_demo_comp_pv), - # demo_comp_pc = sum(demo_comp_pc)) %>% - # ungroup() - # - # bau_state <- test_state %>% - # filter(demand_scenario == "BAU" & refining_scenario == "historical production") %>% - # select(demo_cat, demo_group, sum_demo_comp_pv, demo_comp_pc) %>% - # rename(bau_comp = sum_demo_comp_pv, - # bau_comp_pc = demo_comp_pc) - # - # test_state <- test_state %>% - # left_join(bau_state) %>% - # mutate(diff = sum_demo_comp_pv - bau_comp, nj, - # diff_pc = demo_comp_pc - bau_comp_pc) - # - # ggplot(test_state %>% filter(!scenario_title %in% c("BAU demand\nhistorical production", - # "Low demand\nhistorical production")), aes(y = diff_pc, x = scenario_title, color = demo_group)) + - # geom_point() + - # facet_wrap(~demo_cat, nrow = 3) - # - - ## save figure inputs - fwrite( - fig2_l_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_levels_labor_fig_inputs.csv" - ) - ) - # fwrite(fig2_l_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_labor_fig_inputs.csv")) - - ## labor figure - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - # ## make the df longer - split in two, rbind - # fig2_l_df_h <- fig2_l_df %>% - # select(-sum_demo_comp_pv_l, -demo_comp_pc_l) %>% - # pivot_longer(sum_demo_comp_pv_h:demo_comp_pc_h, names_to = "comp_metric", values_to = "high_est") %>% - # mutate(comp_metric = substr(comp_metric, 1, nchar(comp_metric) - 2)) - # - # fig2_l_df <- fig2_l_df %>% - # select(-sum_demo_comp_pv_h, -demo_comp_pc_h) %>% - # pivot_longer(sum_demo_comp_pv_l:demo_comp_pc_l, names_to = "comp_metric", values_to = "low_est") %>% - # mutate(comp_metric = substr(comp_metric, 1, nchar(comp_metric) - 2)) %>% - # left_join(fig2_l_df_h) - # - - ## labor fig a - labor_level_fig_a <- ggplot( - fig2_l_df %>% - filter( - !scenario %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race", - oil_price_scenario == "reference case" - ) %>% - mutate( - title = factor(title, levels = c("Hispanic", "white", "Asian", "Black")) - ), - aes(x = year, y = sum_demo_emp / 1000, color = title, group = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 45)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figa <- labor_level_fig_a + theme(legend.position = "right") - # - # legend_a <- get_legend( - # legend_figa + - # theme(legend.text = element_text(size = 8))) - - ## - labor_level_fig_b <- ggplot( - fig2_l_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "DAC", - oil_price_scenario == "reference case" - ), - aes(x = year, y = sum_demo_emp / 1000, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = dac_lty - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 45)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figb <- labor_level_fig_b + theme(legend.position = "right") - # - # legend_b <- get_legend( - # legend_figb + - # theme(legend.text = element_text(size = 8))) - - ## - labor_level_fig_c <- ggplot( - fig2_l_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "Poverty", - oil_price_scenario == "reference case" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = sum_demo_emp / 1000, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_color_manual( - name = "", - values = poverty_lty - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 45)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - legend.key.width = unit(10, "mm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figc <- health_level_fig_c + theme(legend.position = "right") - # - # legend_c <- get_legend( - # legend_figc + - # theme(legend.text = element_text(size = 8))) - - ## shared y lab - yaxis_lab <- ggdraw() + - draw_label("Labor: FTE job-years (thousand)", size = 8, angle = 90) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - fig2_l_plot_grid <- plot_grid( - labor_level_fig_b, - labor_level_fig_c, - labor_level_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - fig2_l_plot_grid2 <- plot_grid( - yaxis_lab, - fig2_l_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - fig2_l_plot_grid2 -} - - -plot_labor_levels_pmil <- function( - main_path, - save_path, - ref_labor_demog_yr, - refining_mortality, - pop_ratios -) { - ## calc 2020 pop by demographic - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - left_join(pop_ratios) %>% - as.data.table() - - pop_2020[, demo_pop := pop * pct] - - ## summarize by demographic group - pop_2020 <- pop_2020[, - .(pop_2020 = sum(demo_pop)), - by = .(demo_group, demo_cat) - ] - - fig2_l_df <- copy(ref_labor_demog_yr) - - ## change scenario names, factor - fig2_l_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # fig2_l_df[, scenario := gsub('BAU', 'Reference', scenario)] - fig2_l_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## scenarios for filtering - # remove_scen <- c('LC1 historic production', 'BAU low exports', 'LC1 historic exports') - remove_scen <- c("Low demand - historical production") - - ## add scenario title - fig2_l_df[, scenario_title := str_replace(scenario, " - ", "\n")] - - # - # ## sum for state - # fig2_l_df <- fig2_l_df[, .( - # sum_demo_emp = sum(demo_emp), - # sum_demo_comp_pv_h = sum(demo_comp_pv_h), - # sum_demo_comp_pv_l = sum(demo_comp_pv_l) - # ), - # by = .( - # year, demand_scenario, refining_scenario, oil_price_scenario, - # scenario, scenario_title, demo_cat, demo_group, title - # ) - # ] - - ## merge with 2020 pop - fig2_l_df <- merge( - fig2_l_df, - pop_2020, - by = c("demo_cat", "demo_group"), - all.x = T - ) - - ## calculate per capita - fig2_l_df[, demo_emp_pc := sum_demo_emp / pop_2020] - fig2_l_df[, demo_emp_pmil := demo_emp_pc * 1e6] - fig2_l_df[, demo_comp_pc_h := sum_demo_comp_pv_h / pop_2020] - fig2_l_df[, demo_comp_pc_pmil_h := demo_comp_pc_h * 1e6] - fig2_l_df[, demo_comp_pc_l := sum_demo_comp_pv_l / pop_2020] - fig2_l_df[, demo_comp_pc_pmil_l := demo_comp_pc_l * 1e6] - - ## select columns - fig2_l_df <- fig2_l_df[, .( - year, - demand_scenario, - refining_scenario, - oil_price_scenario, - scenario, - scenario_title, - demo_cat, - demo_group, - title, - sum_demo_emp, - demo_emp_pc, - demo_emp_pmil, - sum_demo_comp_pv_h, - sum_demo_comp_pv_l, - demo_comp_pc_pmil_h, - demo_comp_pc_pmil_l, - demo_comp_pc_h, - demo_comp_pc_l - )] - - ## change historic to historical - fig2_l_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - fig2_l_df[, scenario := str_replace(scenario, "historic", "historical")] - fig2_l_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## refactor - fig2_l_df$scenario_title <- factor( - fig2_l_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## refactor - fig2_l_df$scenario <- factor( - fig2_l_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - # ## test to see if this matches fig 5 outputs - # test_state <- fig2_l_df %>% - # group_by(demand_scenario, refining_scenario, scenario, scenario_title, demo_cat, demo_group) %>% - # summarise(sum_demo_comp_pv = sum(sum_demo_comp_pv), - # demo_comp_pc = sum(demo_comp_pc)) %>% - # ungroup() - # - # bau_state <- test_state %>% - # filter(demand_scenario == "BAU" & refining_scenario == "historical production") %>% - # select(demo_cat, demo_group, sum_demo_comp_pv, demo_comp_pc) %>% - # rename(bau_comp = sum_demo_comp_pv, - # bau_comp_pc = demo_comp_pc) - # - # test_state <- test_state %>% - # left_join(bau_state) %>% - # mutate(diff = sum_demo_comp_pv - bau_comp, nj, - # diff_pc = demo_comp_pc - bau_comp_pc) - # - # ggplot(test_state %>% filter(!scenario_title %in% c("BAU demand\nhistorical production", - # "Low demand\nhistorical production")), aes(y = diff_pc, x = scenario_title, color = demo_group)) + - # geom_point() + - # facet_wrap(~demo_cat, nrow = 3) - # - - ## save figure inputs - fwrite( - fig2_l_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_levels_labor_pmil_fig_inputs.csv" - ) - ) - # fwrite(fig2_l_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_levels_labor_pmil_fig_inputs.csv")) - - ## labor figure - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - ## make the df longer - split in two, rbind - fig2_l_df_h <- fig2_l_df %>% - select(-sum_demo_comp_pv_l, -demo_comp_pc_pmil_l, -demo_comp_pc_l) %>% - pivot_longer( - sum_demo_comp_pv_h:demo_comp_pc_h, - names_to = "comp_metric", - values_to = "no_re_emp" - ) %>% - mutate(comp_metric = substr(comp_metric, 1, nchar(comp_metric) - 2)) %>% - as.data.table() - - fig2_l_df <- fig2_l_df %>% - select(-sum_demo_comp_pv_h, -demo_comp_pc_pmil_h, -demo_comp_pc_h) %>% - pivot_longer( - sum_demo_comp_pv_l:demo_comp_pc_l, - names_to = "comp_metric", - values_to = "with_re_emp" - ) %>% - mutate(comp_metric = substr(comp_metric, 1, nchar(comp_metric) - 2)) %>% - left_join(fig2_l_df_h) %>% - as.data.table() - - ## just employment - fig2_l_df <- fig2_l_df %>% - select(year:demo_emp_pmil) %>% - unique() - - ## labor fig a - labor_level_fig_a <- ggplot( - fig2_l_df %>% - filter( - !scenario %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race", - oil_price_scenario == "reference case" - ) %>% - mutate( - title = factor(title, levels = c("Hispanic", "white", "Asian", "Black")) - ), - aes(x = year, y = demo_emp_pmil, color = title, group = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 3500)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figa <- labor_level_fig_a + theme(legend.position = "right") - # - # legend_a <- get_legend( - # legend_figa + - # theme(legend.text = element_text(size = 8))) - - ## - labor_level_fig_b <- ggplot( - fig2_l_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "DAC", - oil_price_scenario == "reference case" - ), - aes(x = year, y = demo_emp_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = dac_lty - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 3500)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figb <- labor_level_fig_b + theme(legend.position = "right") - # - # legend_b <- get_legend( - # legend_figb + - # theme(legend.text = element_text(size = 8))) - - ## - labor_level_fig_c <- ggplot( - fig2_l_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "Poverty", - oil_price_scenario == "reference case" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = demo_emp_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_color_manual( - name = "", - values = poverty_lty - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(0, 3500)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - legend.key.width = unit(10, "mm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figc <- health_level_fig_c + theme(legend.position = "right") - # - # legend_c <- get_legend( - # legend_figc + - # theme(legend.text = element_text(size = 8))) - - ## shared y lab - yaxis_lab <- ggdraw() + - draw_label("Labor: FTE job-years per million people", size = 8, angle = 90) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - fig2_l_plot_grid <- plot_grid( - labor_level_fig_b, - labor_level_fig_c, - labor_level_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - fig2_l_plot_grid2 <- plot_grid( - yaxis_lab, - fig2_l_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - fig2_l_plot_grid2 -} - - -plot_labor_levels_gaps <- function( - main_path, - save_path, - ref_labor_demog_yr, - refining_mortality, - pop_ratios -) { - # ## calc 2020 pop by demographic - # pop_2020 <- refining_mortality %>% - # filter(year == 2020) %>% - # select(census_tract, year, pop) %>% - # unique() %>% - # left_join(pop_ratios) %>% - # as.data.table() - # - # pop_2020[, demo_pop := pop * pct] - # - # ## summarize by demographic group - # pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)), - # by = .(demo_group, demo_cat)] - # - ## labor outputs - l_gaps_df <- copy(ref_labor_demog_yr) - l_gaps_df <- l_gaps_df[oil_price_scenario == "reference case", ] - - ## change scenario names, factor - l_gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - l_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## add scenario title - l_gaps_df[, scenario_title := scenario] - l_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## change historic to historical - l_gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - l_gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - l_gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## scenarios for filtering - remove_scen <- c( - "Low demand - historical production", - "BAU demand - historical production" - ) - - l_gaps_df$scenario <- factor( - l_gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - l_gaps_df$scenario_title <- factor( - l_gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - # ## sum for state - # l_gaps_df <- l_gaps_df[, .(sum_demo_emp = sum(demo_emp)), - # by = .( - # year, demand_scenario, refining_scenario, oil_price_scenario, - # scenario, scenario_title, demo_cat, demo_group, title - # ) - # ] - - ## calculate gaps (BAU - scenario) - l_bau_gaps_df <- l_gaps_df[scenario == "BAU demand - historical production"] - l_bau_gaps_df <- l_bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "sum_demo_emp" - )] - setnames(l_bau_gaps_df, "sum_demo_emp", "bau_sum_demo_emp") - - l_gaps_df <- merge( - l_gaps_df, - l_bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - l_gaps_df[, gap_emp := sum_demo_emp - bau_sum_demo_emp] - - # ## normalize if needed - # ## merge with 2020 pop - # l_gaps_df <- merge(l_gaps_df, pop_2020, - # by = c("demo_cat", "demo_group"), - # all.x = T) - # - # ## calculate per capita - # l_gaps_df[, demo_emp_pc := sum_demo_emp / pop_2020] - # - # ## select columns - l_gaps_df <- l_gaps_df[, .( - year, - demand_scenario, - refining_scenario, - oil_price_scenario, - scenario, - scenario_title, - demo_cat, - demo_group, - title, - sum_demo_emp, - gap_emp - )] - - ## save figure inputs - fwrite( - l_gaps_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_labor_levels_fig_gaps_inputs.csv" - ) - ) - # fwrite(l_gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_labor_levels_fig_gaps_inputs.csv")) - - ## figure a - fig_title_vec <- c("Black", "Asian", "white", "Hispanic") - - labor_gap_fig_a <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race", - oil_price_scenario == "reference case" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Asian", "white", "Hispanic")) - ), - aes(x = year, y = gap_emp / 1000, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-40, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figa <- health_gap_fig_a + theme(legend.position = "right") - # - # legend_a <- get_legend( - # legend_figa + - # theme(legend.text = element_text(size = 8))) - - ## - labor_gap_fig_b <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "DAC", - oil_price_scenario == "reference case" - ), - aes(x = year, y = gap_emp / 1000, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_linetype_manual(values = dac_lty) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-40, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(c(-3500, 0)) + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figb <- health_gap_fig_b + theme(legend.position = "right") - # - # legend_b <- get_legend( - # legend_figb + - # theme(legend.text = element_text(size = 8))) - - ## - labor_gap_fig_c <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "Poverty", - oil_price_scenario == "reference case" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = gap_emp / 1000, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_linetype_manual(values = poverty_lty) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-40, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - # - # legend_figc <- health_gap_fig_c + theme(legend.position = "right") - # - # legend_c <- get_legend( - # legend_figc + - # theme(legend.text = element_text(size = 8))) - - ## shared y lab - yaxis_lab <- ggdraw() + - draw_label( - "Labor: FTE-jobs, difference from reference (thousand)", - size = 8, - angle = 90 - ) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - l_gaps_plot_grid <- plot_grid( - labor_gap_fig_b, - labor_gap_fig_c, - labor_gap_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - l_gaps_plot_grid2 <- plot_grid( - yaxis_lab, - l_gaps_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - l_gaps_plot_grid2 -} -# - -plot_labor_levels_gaps_pmil <- function( - main_path, - save_path, - ref_labor_demog_yr, - refining_mortality, - pop_ratios -) { - ## calc 2020 pop by demographic - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - left_join(pop_ratios) %>% - as.data.table() - - pop_2020[, demo_pop := pop * pct] - - ## summarize by demographic group - pop_2020 <- pop_2020[, - .(pop_2020 = sum(demo_pop)), - by = .(demo_group, demo_cat) - ] - - ## labor outputs - l_gaps_df <- copy(ref_labor_demog_yr) - - ## change scenario names, factor - l_gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - l_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## add scenario title - l_gaps_df[, scenario_title := scenario] - l_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## change historic to historical - l_gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - l_gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - l_gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## scenarios for filtering - remove_scen <- c( - "Low demand - historical production", - "BAU demand - historical production" - ) - - l_gaps_df$scenario <- factor( - l_gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - l_gaps_df$scenario_title <- factor( - l_gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - # ## sum for state - # l_gaps_df <- l_gaps_df[, .(sum_demo_emp = sum(demo_emp)), - # by = .( - # year, demand_scenario, refining_scenario, oil_price_scenario, - # scenario, scenario_title, demo_cat, demo_group, title - # ) - # ] - - ## select columns - l_gaps_df <- l_gaps_df[, .( - year, - demand_scenario, - refining_scenario, - oil_price_scenario, - scenario, - scenario_title, - demo_cat, - demo_group, - title, - sum_demo_emp - )] - - l_gaps_df <- l_gaps_df[oil_price_scenario == "reference case", ] - - ## calculate gaps (BAU - scenario) - l_bau_gaps_df <- l_gaps_df[scenario == "BAU demand - historical production"] - l_bau_gaps_df <- l_bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "sum_demo_emp" - )] - setnames(l_bau_gaps_df, "sum_demo_emp", "bau_sum_demo_emp") - - l_gaps_df <- merge( - l_gaps_df, - l_bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - l_gaps_df[, gap_emp := sum_demo_emp - bau_sum_demo_emp] - - ## merge with 2020 pop - l_gaps_df <- merge( - l_gaps_df, - pop_2020, - by = c("demo_cat", "demo_group"), - all.x = T - ) - - ## calculate per capita - l_gaps_df[, gap_emp_pc := gap_emp / pop_2020] - l_gaps_df[, gap_emp_pmil := gap_emp_pc * 1e6] - - ## save figure inputs - fwrite( - l_gaps_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_labor_levels_fig_gaps_pmil_inputs.csv" - ) - ) - # fwrite(l_gaps_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_labor_levels_fig_gaps_pmil_inputs.csv")) - - ## figure a - fig_title_vec <- c("Black", "Asian", "white", "Hispanic") - - labor_gap_fig_a <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race", - oil_price_scenario == "reference case" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Asian", "white", "Hispanic")) - ), - aes(x = year, y = gap_emp_pmil, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(-2500, 0) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figa <- health_gap_fig_a + theme(legend.position = "right") - # - # legend_a <- get_legend( - # legend_figa + - # theme(legend.text = element_text(size = 8))) - - ## - labor_gap_fig_b <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "DAC", - oil_price_scenario == "reference case" - ), - aes(x = year, y = gap_emp_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_linetype_manual(values = dac_lty) + - labs( - x = NULL, - y = NULL - ) + - # ylim(-2500, 0) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(c(-3500, 0)) + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figb <- health_gap_fig_b + theme(legend.position = "right") - # - # legend_b <- get_legend( - # legend_figb + - # theme(legend.text = element_text(size = 8))) - - ## - labor_gap_fig_c <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "Poverty", - oil_price_scenario == "reference case" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = gap_emp_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_linetype_manual(values = poverty_lty) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - # ylim(-2500, 0) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - # - # legend_figc <- health_gap_fig_c + theme(legend.position = "right") - # - # legend_c <- get_legend( - # legend_figc + - # theme(legend.text = element_text(size = 8))) - - ## shared y lab - yaxis_lab <- ggdraw() + - draw_label( - "Labor: FTE-jobs, difference from reference per million people", - size = 8, - angle = 90 - ) - - # ## plot together - # fig2l_a <- plot_grid( - # health_level_fig_a, - # legend_a, - # align = 'h', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # ncol = 2, - # rel_widths = c(0.95, 0.5), - # rel_heighs = c(1, 1) - # ) - # - # - - l_gaps_plot_grid <- plot_grid( - labor_gap_fig_b, - labor_gap_fig_c, - labor_gap_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1, 1), - rel_heighs = c(1, 1, 1, 1.05) - ) - - l_gaps_plot_grid2 <- plot_grid( - yaxis_lab, - l_gaps_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heighs = c(1, 1) - ) - - l_gaps_plot_grid2 -} - - -############################################################################ -############################################################################ - -plot_hl_levels_df <- function( - main_path, - save_path, - ref_mortality_demog, - ref_labor_demog, - state_ghg_output, - dt_ghg_2019 -) { - health_df <- copy(ref_mortality_demog) - - ## group by scenario, demo_cat, demo_group, title, and sum - health_df <- health_df[, - .( - sum_cost_2019_pv = sum(demo_cost_2019_PV, na.rm = T), ## constant VSL - sum_cost_pv = sum(demo_cost_PV, na.rm = T) - ), ## changing VSL - by = .( - scen_id, - demand_scenario, - refining_scenario, - demo_cat, - demo_group, - title - ) - ] - - ## multiply by -1 - health_df[, sum_cost_2019_pv := sum_cost_2019_pv * -1] - health_df[, sum_cost_pv := sum_cost_pv * -1] - - ## add ghg emission reduction ---------------------------- - ## 2019 ghg - ghg_2019_val <- dt_ghg_2019$mtco2e[1] - - ## 2045 vs 2019 ghg - ghg_2045 <- state_ghg_output[year == 2045 & source == "total"] - setnames(ghg_2045, "value", "ghg_kg") - ghg_2045[, ghg_2045 := (ghg_kg / 1000) / 1e6] - ghg_2045[, ghg_2019 := ghg_2019_val] - ghg_2045[, perc_diff := (ghg_2045 - ghg_2019) / ghg_2019] - - perc_diff_df <- ghg_2045[, .( - demand_scenario, - refining_scenario, - ghg_2045, - ghg_2019, - perc_diff - )] - - ## summarize by scenario, filter for total - state_ghg_df <- state_ghg_output[ - source == "total", - .(total_ghg = sum(value)), - by = .(demand_scenario, refining_scenario) - ] - - state_ghg_df[, total_ghg_mmt := (total_ghg / 1000) / 1e6] - - ## reference - ref_df <- state_ghg_df[ - demand_scenario == "BAU" & refining_scenario == "historic production", - .(total_ghg_mmt) - ] - setnames(ref_df, "total_ghg_mmt", "ref_ghg_mmt") - ref_value <- ref_df$ref_ghg_mmt[1] - - ## merge with summarized df - state_ghg_df[, ref_ghg := ref_value] - state_ghg_df[, avoided_ghg := (total_ghg_mmt - ref_value) * -1] - - ## merge with health - health_ghg_df <- merge( - health_df, - state_ghg_df[, .( - demand_scenario, - refining_scenario, - total_ghg_mmt, - ref_ghg, - avoided_ghg - )], - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## labor - labor_df <- copy(ref_labor_demog) - labor_df <- labor_df[oil_price_scenario == "reference case", ] - - ## summarize across years - labor_df <- labor_df[, - .( - sum_demo_emp = sum(sum_demo_emp), - sum_demo_comp_pv_h = sum(sum_demo_comp_pv_h), - sum_demo_comp_pv_l = sum(sum_demo_comp_pv_l) - ), - by = .(demand_scenario, refining_scenario, demo_cat, demo_group, title) - ] - - ## ref labor - ref_labor <- labor_df[ - demand_scenario == "BAU" & refining_scenario == "historic production" - ] - setnames( - ref_labor, - c("sum_demo_emp", "sum_demo_comp_pv_h", "sum_demo_comp_pv_l"), - c("ref_total_emp", "ref_total_comp_pv_h", "ref_total_comp_pv_l") - ) - ref_labor <- ref_labor[, .( - demo_cat, - demo_group, - title, - ref_total_emp, - ref_total_comp_pv_h, - ref_total_comp_pv_l - )] - - ## add values to labor - labor_df <- merge( - labor_df, - ref_labor, - by = c("demo_cat", "demo_group", "title") - ) - - ## calculate difference - labor_df[, forgone_wages_h := (sum_demo_comp_pv_h - ref_total_comp_pv_h)] - labor_df[, forgone_wages_l := (sum_demo_comp_pv_l - ref_total_comp_pv_l)] - - ## merge with health and ghg - health_labor_ghg_df <- merge( - health_ghg_df, - labor_df[, .( - demand_scenario, - refining_scenario, - demo_cat, - demo_group, - title, - sum_demo_comp_pv_h, - sum_demo_comp_pv_l, - ref_total_comp_pv_h, - ref_total_comp_pv_l, - forgone_wages_h, - forgone_wages_l - )], - by = c( - "demand_scenario", - "refining_scenario", - "demo_cat", - "demo_group", - "title" - ), - all.x = T - ) - - ## add ghg perc reduction - health_labor_ghg_df <- merge( - health_labor_ghg_df, - perc_diff_df, - by = c("demand_scenario", "refining_scenario"), - all.x = T - ) - - ## prepare to plot - plot_df <- health_labor_ghg_df[, .( - scen_id, - demand_scenario, - refining_scenario, - demo_cat, - demo_group, - title, - sum_cost_pv, - sum_cost_2019_pv, - forgone_wages_h, - forgone_wages_l, - avoided_ghg, - perc_diff - )] - - setnames(plot_df, "perc_diff", "ghg_perc_diff") - - ## pivot longer - plot_df <- plot_df %>% - select( - scen_id:title, - ghg_perc_diff, - sum_cost_pv, - sum_cost_2019_pv, - forgone_wages_h, - forgone_wages_l - ) %>% - pivot_longer( - sum_cost_pv:forgone_wages_l, - names_to = "metric", - values_to = "value" - ) - - ## add column for vsl - plot_df_health <- plot_df %>% - filter(metric %in% c("sum_cost_pv", "sum_cost_2019_pv")) %>% - mutate( - segment = "health", - unit_desc = ifelse( - metric == "sum_cost_2019_pv", - "USD (2019 VSL)", - "USD (annual VSL)" - ), - metric_desc = "avoided_health_cost" - ) - - plot_df_labor <- plot_df %>% - filter(metric %in% c("forgone_wages_h", "forgone_wages_l")) %>% - mutate( - segment = "labor", - unit_desc = "USD", - metric_desc = "forgone_wages" - ) - - plot_df_long <- rbind(plot_df_health, plot_df_labor) - - plot_df_long <- plot_df_long %>% - mutate( - seg_title = ifelse( - segment == "health", - "Health: avoided mortality", - "Labor: forgone wages" - ) - ) - - ## rename - setDT(plot_df_long) - plot_df_long[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # plot_df_long[, scenario := gsub('BAU', 'Reference', scenario)] - plot_df_long[, scenario := gsub("LC1.", "Low ", scenario)] - # plot_df_long[, short_scen := gsub('BAU', 'Reference', short_scen)] - # plot_df_long[, short_scen := gsub('Low C.', 'Low carbon', short_scen)] - - ## change historic to historical - plot_df_long[, scen_id := str_replace(scen_id, "historic", "historical")] - plot_df_long[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - plot_df_long[, scenario := str_replace(scenario, "historic", "historical")] - - ## refactor - plot_df_long$scenario <- factor( - plot_df_long$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - ## titles for plotting - plot_df_long[, - demand_title := ifelse(demand_scenario == "BAU", "BAU demand", "Low demand") - ] - plot_df_long[, - scen_title := paste0(demand_title, "\n", str_to_sentence(refining_scenario)) - ] - - plot_df_long$scen_title <- factor( - plot_df_long$scen_title, - levels = c( - "BAU demand\nHistorical production", - "BAU demand\nHistorical exports", - "BAU demand\nLow exports", - "Low demand\nHistorical exports", - "Low demand\nLow exports", - "Low demand\nHistorical production" - ) - ) - - ## save figure inputs - fwrite( - plot_df_long, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_disaggregated_npv_fig_inputs.csv" - ) - ) - # fwrite(plot_df_long, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_disaggregated_npv_fig_inputs.csv")) - - return(plot_df_long) -} - - -plot_hl_levels <- function(demographic_npv_df) { - plot_df_long <- copy(demographic_npv_df) - - ## create the figure --------------------------------------------- - ## --------------------------------------------------------------- - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - bau_scen <- "BAU historical production" - - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - ## add column for defining shapes - plot_df_long[, demo_grp_metric := paste0(demo_group, "_", metric)] - - ## health fig - race - health_level_fig_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Race", - unit_desc == "USD (2019 VSL)", - title %in% fig_title_vec - ) %>% - mutate( - title = factor( - title, - levels = c("Black", "Asian", "white", "Hispanic") - ) - ), - aes(x = scen_title, y = value / 1e9, color = title), - size = 3, - alpha = 0.8 - ) + - facet_wrap(~seg_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - # ylim(0, 12) + - labs( - y = "NPV (USD billion)", - x = NULL, - color = NULL - ) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## labor fig - race - labor_level_fig_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Race", - segment == "labor", - title %in% fig_title_vec - ) %>% - mutate( - title = factor( - title, - levels = c("Black", "Asian", "white", "Hispanic") - ) - ), - aes(x = scen_title, y = value / 1e9, color = title, shape = metric), - size = 3, - alpha = 0.8 - ) + - facet_wrap(~seg_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - scale_shape_manual( - name = "", - values = race_shape_ptc, - labels = high_low_labs - ) + - # ylim(-25, 0) + - labs( - y = "NPV (USD billion)", - x = NULL, - color = NULL - ) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## health fig - poverty - health_level_fig_b <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty", - unit_desc == "USD (2019 VSL)" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = scen_title, y = value / 1e9, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual(values = poverty_ptc_h) + - facet_wrap(~seg_title) + - labs( - y = "NPV (USD billion)", - x = NULL, - color = NULL - ) + - theme_line + - # ylim(0, 20) + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## labor fig - poverty - labor_level_fig_b <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty", - segment == "labor" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = scen_title, y = value / 1e9, shape = demo_grp_metric), - color = "black", - size = 3, - alpha = 0.8 - ) + - scale_shape_manual( - values = poverty_ptc_l, - labels = poverty_hl_labs - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_wrap(~seg_title) + - labs( - y = "NPV (USD billion)", - x = NULL, - color = NULL - ) + - # ylim(-50, 0) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## health fig - DAC - health_level_fig_c <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC", - unit_desc == "USD (2019 VSL)" - ), - aes(x = scen_title, y = value / 1e9, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual(values = dac_ptc) + - facet_wrap(~seg_title) + - labs( - y = "NPV (USD billion)", - x = NULL, - color = NULL - ) + - # ylim(0, 15) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## labor fig - DAC - labor_level_fig_c <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC", - segment == "labor" - ), - aes(x = scen_title, y = value / 1e9, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual( - values = dac_ptc - ) + - facet_wrap(~seg_title) + - labs( - y = "NPV (USD billion)", - x = NULL, - color = NULL - ) + - # ylim(-40, 0) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## labor fig - DAC - labor_level_fig_c <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC", - segment == "labor" - ), - aes(x = scen_title, y = value / 1e9, shape = demo_grp_metric), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual( - values = dac_hl_ptc, - labels = dac_hl_labs - ) + - facet_wrap(~seg_title) + - labs( - y = "NPV (USD billion)", - x = NULL, - color = NULL - ) + - # ylim(-40, 0) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## combine figure - ## --------------------------------- - - fig_text_size <- 12 - - ## health - health_column_fig_nl <- plot_grid( - health_level_fig_c + - theme( - axis.text.x = element_blank(), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1), - strip.text.x = element_text(size = fig_text_size), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size) - ), - health_level_fig_b + - theme( - axis.text.x = element_blank(), - strip.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1) - ), - health_level_fig_a + - theme( - strip.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size), - axis.text.x = element_text(size = fig_text_size), - legend.position = "none" - ), - align = "vh", - labels = c("A", "B", "C"), - # # labels = 'AUTO', - label_size = 10, - hjust = -1, - nrow = 3 - # rel_widths = c(1, 0.25, 1), - # rel_heights = c(1, 0.1, 1, 0.1, 1) - ) - - ## labor - labor_column_fig_nl <- plot_grid( - labor_level_fig_c + - labs(y = NULL) + - theme( - axis.text.x = element_blank(), - strip.text.x = element_text(size = fig_text_size), - axis.text.y = element_text(size = fig_text_size), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1) - ), - labor_level_fig_b + - labs(y = NULL) + - theme( - axis.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - strip.text.x = element_blank(), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1) - ), - labor_level_fig_a + - labs(y = NULL) + - theme( - strip.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - axis.text.x = element_text(size = fig_text_size), - legend.position = "none" - ), - align = "vh", - labels = c("D", "E", "F"), - # # labels = 'AUTO', - label_size = 10, - hjust = -1, - nrow = 3 - # rel_widths = c(1, 0.25, 1), - # rel_heights = c(1, 0.1, 1, 0.1, 1) - ) - - ## all together now - hl_pc_plot_grid_nl <- plot_grid( - health_column_fig_nl, - labor_column_fig_nl, - align = "h", - # labels = c("(A)", "(B)", "(C)", ""), - # # labels = 'AUTO', - # label_size = 10, - # hjust = -1, - ncol = 2, - rel_widths = c(1, 1) - # rel_widths = c(1, 1, 1) - ) - - return(hl_pc_plot_grid_nl) -} - -plot_hl_levels_pc <- function( - demographic_npv_df, - refining_mortality, - pop_ratios, - main_path, - save_path -) { - ## copy npv results - plot_df_long <- copy(demographic_npv_df) - - ## add column for defining shapes - plot_df_long[, demo_grp_metric := paste0(demo_group, "_", metric)] - - ## calc 2020 pop by demographic - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - left_join(pop_ratios) %>% - as.data.table() - - pop_2020[, demo_pop := pop * pct] - - ## summarize by demographic group - pop_2020 <- pop_2020[, - .(pop_2020 = sum(demo_pop)), - by = .(demo_group, demo_cat) - ] - - ## merge population back with results - plot_df_long <- merge( - plot_df_long, - pop_2020, - by = c("demo_group", "demo_cat"), - all.x = T - ) - - ## calculate per capita - plot_df_long[, value := value / pop_2020] - - ## save figure inputs - fwrite( - plot_df_long, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_disaggregated_npv_pc_fig_inputs.csv" - ) - ) - # fwrite(plot_df_long, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_disaggregated_npv_pc_fig_inputs.csv")) - - ## create the figure --------------------------------------------- - ## --------------------------------------------------------------- - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - bau_scen <- "BAU historical production" - - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - ## health fig - race - health_level_fig_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Race", - unit_desc == "USD (2019 VSL)", - title %in% fig_title_vec - ) %>% - mutate( - title = factor( - title, - levels = c("Black", "Hispanic", "Asian", "white") - ) - ), - aes(x = scen_title, y = value, color = title), - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_wrap(~seg_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - y = "NPV per capita (USD)", - x = NULL, - color = NULL - ) + - scale_y_continuous(label = comma, limits = c(0, 3000)) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - strip.text = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## labor fig - race - labor_level_fig_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Race", - segment == "labor", - title %in% fig_title_vec - ) %>% - mutate( - title = factor( - title, - levels = c("Black", "Hispanic", "Asian", "white") - ) - ), - aes(x = scen_title, y = value, color = title, shape = metric), - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_wrap(~seg_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - scale_shape_manual( - name = "", - values = race_shape_ptc, - labels = high_low_labs - ) + - labs( - y = " ", - x = NULL, - color = NULL - ) + - scale_y_continuous(label = comma, limits = c(-3000, 0)) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - strip.text = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # ## legend - # legend_figa <- labor_level_fig_a + theme(legend.position = "bottom") - # - # legend_a <- get_legend( - # legend_figa + - # theme(legend.text = element_text(size = 12)) + - # guides(color = guide_legend(order = 1), shape = guide_legend(order = 2))) - - # ## save version for presentation - # hl_plot_grid_a_pres <- plot_grid( - # health_level_fig_a + - # theme(axis.title.y = element_text(size = 12), - # axis.text.x = element_text(size = 9), - # axis.text.y = element_text(size = 12), - # strip.text = element_text(size = 12), - # plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")), - # labor_level_fig_a + labs(y = NULL) + - # theme(axis.title.y = element_text(size = 12), - # axis.text.x = element_text(size = 9), - # axis.text.y = element_text(size = 12), - # strip.text = element_text(size = 12), - # plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")), - # align = 'vh', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # rel_widths = c(1, 1) - # ) - # - # ## add legend - # hl_plot_grid_a_pres <- plot_grid( - # hl_plot_grid_a_pres, - # legend_a, - # ncol = 1, - # rel_heights = c(0.95, 0.05) - # ) - # - # - # - # ggsave(plot = hl_plot_grid_a_pres, - # filename = paste0(main_path, "outputs/academic-out/refining/figures/2025-health-revisions/presentation-figs/fig5-race.jpeg"), - # device = "jpeg", - # width = 9, - # height = 4, - # units= "in", - # dpi = 300) - # - - ## health fig - poverty - health_level_fig_b <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty", - unit_desc == "USD (2019 VSL)" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = scen_title, y = value, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual(values = poverty_ptc_h) + - facet_wrap(~seg_title) + - labs( - y = "NPV per capita (USD)", - x = NULL, - color = NULL - ) + - theme_line + - scale_y_continuous(label = comma, limits = c(0, 3000)) + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## labor fig - poverty - labor_level_fig_b <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty", - segment == "labor" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = scen_title, y = value, shape = demo_grp_metric), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual( - values = poverty_ptc_l, - labels = poverty_hl_labs - ) + - facet_wrap(~seg_title) + - labs( - y = " ", - x = NULL, - color = NULL - ) + - scale_y_continuous(label = comma, limits = c(-3000, 0)) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figb <- labor_level_fig_b + - # theme(legend.position = "bottom") - # - # legend_b <- get_legend( - # legend_figb + - # theme(legend.text = element_text(size = 12))) - # - - # ## save version for presentation - # hl_plot_grid_b_pres <- plot_grid( - # health_level_fig_b + - # theme(axis.title.y = element_text(size = 12), - # axis.text.x = element_text(size = 9), - # axis.text.y = element_text(size = 12), - # strip.text = element_text(size = 12), - # plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")), - # labor_level_fig_b + labs(y = NULL) + - # theme(axis.title.y = element_text(size = 12), - # axis.text.x = element_text(size = 9), - # axis.text.y = element_text(size = 12), - # strip.text = element_text(size = 12), - # plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")), - # align = 'vh', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # rel_widths = c(1, 1) - # ) - # - # ## add legend - # hl_plot_grid_b_pres <- plot_grid( - # hl_plot_grid_b_pres, - # legend_b, - # ncol = 1, - # rel_heights = c(0.95, 0.05) - # ) - # - # ggsave(plot = hl_plot_grid_b_pres, - # filename = paste0(main_path, "outputs/academic-out/refining/figures/2025-health-revisions/presentation-figs/fig5-income.jpeg"), - # device = "jpeg", - # width = 9, - # height = 4, - # units= "in", - # dpi = 300) - - ## health fig - DAC - health_level_fig_c <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC", - unit_desc == "USD (2019 VSL)" - ), - aes(x = scen_title, y = value, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual(values = dac_ptc) + - facet_wrap(~seg_title) + - labs( - y = "NPV per capita (USD)", - x = NULL, - color = NULL - ) + - theme_line + - scale_y_continuous(label = comma, limits = c(0, 3000)) + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## labor fig - DAC - labor_level_fig_c <- ggplot() + - geom_point( - data = plot_df_long %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC", - segment == "labor" - ), - aes(x = scen_title, y = value, shape = demo_grp_metric), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual( - values = dac_hl_ptc, - labels = dac_hl_labs - ) + - facet_wrap(~seg_title) + - labs( - y = " ", - x = NULL, - color = NULL - ) + - scale_y_continuous(label = comma, limits = c(-3000, 0)) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - # legend_figc <- labor_level_fig_c + theme(legend.position = "bottom") - # - # legend_c <- get_legend( - # legend_figc + - # theme(legend.text = element_text(size = 12))) - - # ## save version for presentation - # hl_plot_grid_c_pres <- plot_grid( - # health_level_fig_c + - # theme(axis.title.y = element_text(size = 12), - # axis.text.x = element_text(size = 9), - # axis.text.y = element_text(size = 12), - # strip.text = element_text(size = 12), - # plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")), - # labor_level_fig_c + labs(y = NULL) + - # theme(axis.title.y = element_text(size = 12), - # axis.text.x = element_text(size = 9), - # axis.text.y = element_text(size = 12), - # strip.text = element_text(size = 12), - # plot.margin = unit(c(0, 0, 0.25, 0.1), "cm")), - # align = 'vh', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # rel_widths = c(1, 1) - # ) - # - # ## add legend - # hl_plot_grid_c_pres <- plot_grid( - # hl_plot_grid_c_pres, - # legend_c, - # ncol = 1, - # rel_heights = c(0.95, 0.05) - # ) - # - # - # ggsave(plot = hl_plot_grid_c_pres, - # filename = paste0(main_path, "outputs/academic-out/refining/figures/2025-health-revisions/presentation-figs/fig5-dac.jpeg"), - # device = "jpeg", - # width = 9, - # height = 4, - # units= "in", - # dpi = 300) - - ## combine figure - ## --------------------------------- - - fig_text_size <- 12 - - ## health - health_column_fig_nl <- plot_grid( - health_level_fig_c + - theme( - axis.text.x = element_blank(), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1), - strip.text.x = element_text(size = fig_text_size), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size) - ), - health_level_fig_b + - theme( - axis.text.x = element_blank(), - strip.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1) - ), - health_level_fig_a + - theme( - strip.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size), - axis.text.x = element_text(size = fig_text_size), - legend.position = "none" - ), - align = "vh", - labels = c("A", "B", "C"), - # # labels = 'AUTO', - label_size = 10, - hjust = -1, - nrow = 3 - # rel_widths = c(1, 0.25, 1), - # rel_heights = c(1, 0.1, 1, 0.1, 1) - ) - - ## labor - labor_column_fig_nl <- plot_grid( - labor_level_fig_c + - labs(y = NULL) + - theme( - axis.text.x = element_blank(), - strip.text.x = element_text(size = fig_text_size), - axis.text.y = element_text(size = fig_text_size), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1) - ), - labor_level_fig_b + - labs(y = NULL) + - theme( - axis.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - strip.text.x = element_blank(), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1) - ), - labor_level_fig_a + - labs(y = NULL) + - theme( - strip.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - axis.text.x = element_text(size = fig_text_size), - legend.position = "none" - ), - align = "vh", - labels = c("D", "E", "F"), - # # labels = 'AUTO', - label_size = 10, - hjust = -1, - nrow = 3 - # rel_widths = c(1, 0.25, 1), - # rel_heights = c(1, 0.1, 1, 0.1, 1) - ) - - ## all together now - hl_pc_plot_grid_nl <- plot_grid( - health_column_fig_nl, - labor_column_fig_nl, - align = "h", - # labels = c("(A)", "(B)", "(C)", ""), - # # labels = 'AUTO', - # label_size = 10, - # hjust = -1, - ncol = 2, - rel_widths = c(1, 1) - # rel_widths = c(1, 1, 1) - ) - - hl_pc_plot_grid_nl - - # ## race - # hl_plot_grid_a <- plot_grid( - # health_level_fig_a + theme(strip.text.x = element_blank()), - # labor_level_fig_a + labs(y = NULL) + theme(strip.text.x = element_blank()), - # align = 'vh', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # rel_widths = c(1, 1) - # ) - # - # ## add race legend - # hl_plot_grid_a <- plot_grid( - # hl_plot_grid_a, - # legend_a, - # ncol = 1, - # rel_heights = c(0.95, 0.05) - # ) - # - # ## poverty - # hl_plot_grid_b <- plot_grid( - # health_level_fig_b + theme(axis.text.x = element_blank(), - # strip.text.x = element_blank()), - # labor_level_fig_b + labs(y = NULL) + theme(axis.text.x = element_blank(), - # strip.text.x = element_blank()), - # align = 'vh', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # rel_widths = c(1, 1) - # ) - # - # ## add poverty legend - # hl_plot_grid_b <- plot_grid( - # hl_plot_grid_b, - # legend_b, - # ncol = 1, - # rel_heights = c(0.95, 0.05) - # ) - # - # ## DAC - # hl_plot_grid_c <- plot_grid( - # health_level_fig_c + theme(axis.text.x = element_blank()), - # labor_level_fig_c + labs(y = NULL) + theme(axis.text.x = element_blank()), - # align = 'vh', - # # labels = c("A", "B", "C", "D", "E", "F"), - # # # labels = 'AUTO', - # # label_size = 10, - # hjust = -1, - # nrow = 1, - # rel_widths = c(1, 1) - # ) - # - # ## add DAC legend - # hl_plot_grid_c <- plot_grid( - # hl_plot_grid_c, - # legend_c, - # ncol = 1, - # rel_heights = c(0.95, 0.05) - # ) - # - # - # ## all together now - # hl_plot_grid_pc <- plot_grid( - # hl_plot_grid_c, - # NULL, - # hl_plot_grid_b, - # NULL, - # hl_plot_grid_a, - # align = "v", - # # labels = c("(A)", "(B)", "(C)", ""), - # # # labels = 'AUTO', - # # label_size = 10, - # # hjust = -1, - # ncol = 1, - # rel_heights = c(1, 0.1, 1, 0.1, 1) - # # rel_widths = c(1, 1, 1) - # ) - # - - return(hl_pc_plot_grid_nl) -} - - -## npv shares -## ---------------------------------------------------------------------------- - -plot_hl_shares <- function( - main_path, - save_path, - demographic_npv_df, - state_pop_ratios -) { - plot_df_long <- copy(demographic_npv_df) - - ## calculate shares - plot_df_long[, - total_value := sum(value), - by = .( - scen_id, - demand_scenario, - refining_scenario, - demo_cat, - metric, - segment, - unit_desc, - metric_desc, - seg_title, - scenario, - demand_title, - scen_title - ) - ] - - plot_df_long[, share := value / total_value] - - ## shares - pct_df <- copy(state_pop_ratios) - - pct_df[, scen_title := "population"] - - ## create one df for plotting - share_df <- plot_df_long[, .( - scen_id, - demand_scenario, - refining_scenario, - demo_cat, - title, - metric, - unit_desc, - segment, - metric_desc, - seg_title, - scenario, - demand_title, - scen_title, - share - )] - - pop_share_df <- copy(share_df) - pop_share_df[, `:=`( - scen_id = "Population", - demand_scenario = NA, - refining_scenario = NA, - metric = "general_pop_share", - unit_desc = NA, - segment = "general", - metric_desc = "general", - seg_title = "State", - scenario = "State", - demand_title = NA, - scen_title = "State\npopulation", - share = NULL - )] - - pop_share_df <- unique(pop_share_df) - - ## merge - pop_share_df <- merge( - pop_share_df, - pct_df[, .(demo_cat, title, pct)], - by = c("demo_cat", "title") - ) - - setnames(pop_share_df, "pct", "share") - - ## bind - share_df <- rbind(share_df, pop_share_df) - - ## add column for defining shapes - share_df[, demo_grp_metric := paste0(title, "_", metric)] - - ## save figure inputs - fwrite( - share_df, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_disaggreated_npv_share_fig_inputs.csv" - ) - ) - # fwrite(share_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_disaggreated_npv_share_fig_inputs.csv")) - - ## create the figure --------------------------------------------- - ## --------------------------------------------------------------- - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - bau_scen <- "BAU historical production" - - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - ## health fig - race - health_share_fig_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Race", - unit_desc == "USD (2019 VSL)", - title %in% fig_title_vec - ) %>% - mutate( - title = factor( - title, - levels = c("Black", "Hispanic", "Asian", "white") - ) - ), - aes(x = scen_title, y = share, color = title), - size = 3, - alpha = 0.8 - ) + - facet_wrap(~seg_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - # ylim(0, 0.5) + - labs( - y = "NPV share", - x = NULL, - color = NULL - ) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## labor fig - race - labor_share_fig_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Race", - segment == "labor", - title %in% fig_title_vec - ) %>% - mutate( - title = factor( - title, - levels = c("Black", "Hispanic", "Asian", "white") - ) - ), - aes(x = scen_title, y = share, color = title, shape = metric), - size = 3, - alpha = 0.8 - ) + - facet_wrap(~seg_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - scale_shape_manual( - name = "", - values = race_shape_ptc, - labels = high_low_labs - ) + - # ylim(0, 0.6) + - labs( - y = "NPV share", - x = NULL, - color = "with re-emp:" - ) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_text(size = 8), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) + - guides(shape = "none") - - # legend_fig_labor_h <- ggplot() + - # geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - # geom_point( - # data = share_df %>% filter( - # !scen_id %in% remove_scen, - # demo_cat == "Race", - # segment == "labor", - # title %in% fig_title_vec, - # metric == "forgone_wages_h" - # ) %>% - # mutate(title = factor(title, levels = c("Black", "Hispanic", "Asian", "white"))), - # aes(x = scen_title, y = share, color = title, shape = metric), - # size = 3, alpha = 0.8 - # ) + - # facet_wrap(~seg_title) + - # scale_color_manual( - # name = "no re-emp:", - # values = race_col_pal - # ) + - # scale_shape_manual( - # name = "", - # values = race_shape_ptc, - # labels = high_low_labs - # ) + - # # ylim(0, 0.6) + - # labs( - # y = "NPV share", - # x = NULL, - # color = "no re-emp:" - # ) + - # theme_line + - # theme( - # legend.position = "bottom", - # legend.title = element_text(size = 8), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.ticks.length.y = unit(0.1, "cm"), - # axis.ticks.length.x = unit(0.1, "cm") - # ) + - # guides( - # color = guide_legend(override.aes = list(shape = 21)), - # shape = "none" - # ) - # - # - # legend_a_h <- get_legend( - # legend_fig_labor_h + - # theme(legend.text = element_text(size = 12)) - # ) - - # ## add no-emp legend to labor fig - # labor_share_fig_a2 <- plot_grid( - # labor_share_fig_a, - # legend_a_h, - # ncol = 1, - # rel_heights = c(0.95, 0.05), - # rel_widths = c(1, 1) - # ) - - ## state fig - race - state_share_fig_a <- ggplot() + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Race", - segment == "general", - title %in% fig_title_vec - ) %>% - mutate( - title = factor( - title, - levels = c("Black", "Hispanic", "Asian", "white") - ) - ), - aes(x = scen_title, y = share, color = title), - size = 3, - alpha = 0.8 - ) + - facet_wrap(~seg_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - # ylim(0, 0.5) + - labs( - y = " ", - x = NULL, - color = NULL - ) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## health fig - poverty - health_share_fig_b <- ggplot() + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty", - unit_desc == "USD (2019 VSL)" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = scen_title, y = share, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_wrap(~seg_title) + - scale_shape_manual(values = poverty_ptc_h) + - labs( - y = "NPV share", - x = NULL, - color = NULL - ) + - theme_line + - # ylim(0, 0.9) + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) + - guides(shape = guide_legend(nrow = 2, byrow = TRUE)) - - labor_share_fig_b <- ggplot() + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty", - segment == "labor" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ), - demo_grp_metric = factor( - demo_grp_metric, - levels = c( - "Above poverty line_forgone_wages_l", - "Below poverty line_forgone_wages_l", - "Above poverty line_forgone_wages_h", - "Below poverty line_forgone_wages_h" - ) - ) - ), - aes(x = scen_title, y = share, shape = demo_grp_metric), - color = "black", - size = 3, - alpha = 0.8 - ) + - scale_shape_manual( - values = poverty_pt_share_l, - labels = poverty_hl_share_labs - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_wrap(~seg_title) + - # ylim(0, 0.9) + - labs( - y = "NPV share", - x = NULL, - color = NULL - ) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) + - guides(shape = guide_legend(nrow = 2, byrow = TRUE)) - - ## state - poverty - state_share_fig_b <- ggplot() + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty", - segment == "general" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = scen_title, y = share, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - scale_shape_manual(values = poverty_ptc_h) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_wrap(~seg_title) + - # ylim(0, 0.9) + - labs( - y = " ", - x = NULL, - color = NULL - ) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## health fig - DAC - health_share_fig_c <- ggplot() + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC", - unit_desc == "USD (2019 VSL)" - ), - aes(x = scen_title, y = share, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual(values = dac_ptc) + - facet_wrap(~seg_title) + - # ylim(0, 0.85) + - labs( - y = "NPV share", - x = NULL, - color = NULL - ) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) + - guides(shape = guide_legend(nrow = 2, byrow = TRUE)) - - ## labor fig - DAC - labor_share_fig_c <- ggplot() + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC", - segment == "labor" - ) %>% - mutate( - demo_grp_metric = factor( - demo_grp_metric, - levels = c( - "DAC_forgone_wages_l", - "Non-DAC_forgone_wages_l", - "DAC_forgone_wages_h", - "Non-DAC_forgone_wages_h" - ) - ) - ), - aes(x = scen_title, y = share, shape = demo_grp_metric), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual( - values = dac_hl_ptc_share, - labels = dac_hl_labs_share - ) + - facet_wrap(~seg_title) + - # ylim(0, 0.85) + - labs( - y = "NPV share", - x = NULL, - color = NULL - ) + - # ylim(-15, 0) + - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) + - guides(shape = guide_legend(nrow = 2, byrow = TRUE)) - - ## general fig - DAC - state_share_fig_c <- ggplot() + - geom_point( - data = share_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC", - segment == "general" - ), - aes(x = scen_title, y = share, shape = title), - color = "black", - size = 3, - alpha = 0.8 - ) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - scale_shape_manual(values = dac_ptc) + - facet_wrap(~seg_title) + - # ylim(0, 0.85) + - labs( - y = " ", - x = NULL, - color = NULL - ) + - # ylim(-15, 0) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - # axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## save legends - ## ----------------------------------------------------------------------- - - health_dac_legend <- get_legend( - health_share_fig_c + - theme(legend.text = element_text(size = 12)) - ) - - health_poverty_legend <- get_legend( - health_share_fig_b + - theme(legend.text = element_text(size = 12)) - ) - - health_race_legend <- get_legend( - health_share_fig_a + - theme(legend.text = element_text(size = 12)) - ) - - ## save legends - ggsave( - plot = health_dac_legend, - device = "pdf", - filename = "health_dac_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## save legends - ggsave( - plot = health_poverty_legend, - device = "pdf", - filename = "health_poverty_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## save legends - ggsave( - plot = health_race_legend, - device = "pdf", - filename = "health_race_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - labor_dac_legend <- get_legend( - labor_share_fig_c + - theme(legend.text = element_text(size = 12)) - ) - - labor_poverty_legend <- get_legend( - labor_share_fig_b + - theme(legend.text = element_text(size = 12)) - ) - - labor_race_legend <- get_legend( - labor_share_fig_a + - theme(legend.text = element_text(size = 12)) - ) - - ## save legends - ggsave( - plot = labor_dac_legend, - device = "pdf", - filename = "labor_dac_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## save legends - ggsave( - plot = labor_poverty_legend, - device = "pdf", - filename = "labor_poverty_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## save legends - ggsave( - plot = labor_race_legend, - device = "pdf", - filename = "labor_race_legend.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## save legends - ggsave( - plot = legend_a_h, - device = "pdf", - filename = "labor_race_legend_no_re-emp.pdf", - path = file.path(main_path, save_path, "legends"), - dpi = 600 - ) - - ## combine figure - ## --------------------------------- - - fig_text_size <- 12 - - ## health - health_column_fig <- plot_grid( - health_share_fig_c + - ylim(0, 1) + - theme( - axis.text.x = element_blank(), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1), - strip.text.x = element_text(size = fig_text_size), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size) - ), - health_share_fig_b + - ylim(0, 1) + - theme( - axis.text.x = element_blank(), - strip.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1) - ), - health_share_fig_a + - ylim(0, 1) + - theme( - strip.text.x = element_blank(), - axis.text.y = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size), - axis.text.x = element_text(size = fig_text_size), - legend.position = "none" - ), - align = "vh", - labels = c("A", "B", "C"), - # # labels = 'AUTO', - label_size = 10, - hjust = -1, - nrow = 3 - # rel_widths = c(1, 0.25, 1), - # rel_heights = c(1, 0.1, 1, 0.1, 1) - ) - - ## state - state_column_fig <- plot_grid( - state_share_fig_c + - ylim(0, 1) + - theme( - axis.text.x = element_blank(), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1), - strip.text.x = element_text(size = fig_text_size), - axis.title.y = element_text(size = fig_text_size), - # axis.text.y = element_text(size = fig_text_size), - axis.text.y = element_blank() - ), - state_share_fig_b + - ylim(0, 1) + - theme( - axis.text.x = element_blank(), - strip.text.x = element_blank(), - axis.title.y = element_text(size = fig_text_size), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1), - # axis.text.y = element_text(size = fig_text_size), - axis.text.y = element_blank() - ), - state_share_fig_a + - ylim(0, 1) + - theme( - strip.text.x = element_blank(), - axis.title.y = element_text(size = fig_text_size), - axis.text.x = element_text(size = fig_text_size), - legend.position = "none", - # axis.text.y = element_text(size = fig_text_size), - axis.text.y = element_blank() - ), - align = "vh", - labels = c("D", "E", "F"), - # # labels = 'AUTO', - label_size = 10, - hjust = -1, - nrow = 3 - # rel_widths = c(1, 0.25, 1), - # rel_heights = c(1, 0.1, 1, 0.1, 1) - ) - - ## labor - labor_column_fig <- plot_grid( - labor_share_fig_c + - ylim(0, 1) + - labs(y = " ") + - theme( - axis.text.x = element_blank(), - strip.text.x = element_text(size = fig_text_size), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1), - # axis.text.y = element_text(size = fig_text_size), - axis.text.y = element_blank() - ), - labor_share_fig_b + - ylim(0, 1) + - labs(y = " ") + - theme( - axis.text.x = element_blank(), - strip.text.x = element_blank(), - legend.position = "none", - plot.margin = margin(1, 1, 20, 1), - # axis.text.y = element_text(size = fig_text_size), - axis.text.y = element_blank() - ), - labor_share_fig_a + - ylim(0, 1) + - labs(y = " ") + - theme( - strip.text.x = element_blank(), - axis.text.x = element_text(size = fig_text_size), - legend.position = "none", - # axis.text.y = element_text(size = fig_text_size), - axis.text.y = element_blank() - ), - align = "vh", - labels = c("G", "H", "I"), - # # labels = 'AUTO', - label_size = 10, - hjust = -1, - nrow = 3 - # rel_widths = c(1, 0.25, 1), - # rel_heights = c(1, 0.1, 1, 0.1, 1) - ) - - ## all together now - hl_pc_plot_grid_nl <- plot_grid( - health_column_fig, - state_column_fig, - labor_column_fig, - align = "h", - # labels = c("(A)", "(B)", "(C)", ""), - # # labels = 'AUTO', - # label_size = 10, - # hjust = -1, - ncol = 3, - rel_widths = c(1, 0.3, 0.9) - # rel_widths = c(1, 1, 1) - ) - - return(hl_pc_plot_grid_nl) -} - -create_health_labor_table <- function( - main_path, - save_path, - demographic_npv_df, - ref_labor_demog, - pop_ratios, - refining_mortality -) { - ## create table of total health benefit (NPV), labor loss (NPV), change in job years, - ## and avoided premature mortality (scenario x demographic group, state) - - ## NPV values - npv_out <- demographic_npv_df[, .( - demand_scenario, - refining_scenario, - demo_cat, - demo_group, - title, - segment, - metric, - metric_desc, - unit_desc, - value - )] - - ## fte-job-years - emp_out <- ref_labor_demog %>% - # select(-sum_demo_comp_pv_h, -sum_demo_comp_pv_l) %>% - select(demand_scenario:title, sum_demo_emp) %>% - rename(value = sum_demo_emp) %>% - group_by( - demand_scenario, - refining_scenario, - oil_price_scenario, - demo_cat, - demo_group, - title - ) %>% - summarise(value = sum(value)) %>% - # summarise(value = sum(value), - # value_revised = sum(value_revised)) %>% - ungroup() %>% - mutate( - segment = "labor", - metric = "fte_job_years", - metric_desc = "job_loss", - unit_desc = "fte-jobs" - ) - - ## bau scen - bau_emp_out <- emp_out %>% - filter( - demand_scenario == "BAU", - refining_scenario == "historic production" - ) %>% - select(oil_price_scenario, demo_cat, demo_group, title, value) %>% - rename(bau_value = value) - # , - # bau_revised = value_revised) - - ## difference - emp_out <- merge( - emp_out, - bau_emp_out, - by = c("oil_price_scenario", "demo_cat", "demo_group", "title"), - all.x = T - ) - - emp_out <- emp_out %>% - mutate( - delta_value = value - bau_value - # , - # delta_revised_value = value_revised - bau_revised - ) %>% - select( - oil_price_scenario:refining_scenario, - segment, - metric, - metric_desc, - unit_desc, - delta_value - ) %>% - rename(value = delta_value) %>% - mutate( - refining_scenario = str_replace( - refining_scenario, - "historic", - "historical" - ) - ) %>% - filter(oil_price_scenario == "reference case") %>% - select(-oil_price_scenario) - - ## avoided mortality - avoid_m_out <- copy(refining_mortality) %>% as.data.table() - - ## select columns - avoid_m_out <- avoid_m_out %>% - select( - census_tract, - demand_scenario, - refining_scenario, - year, - mortality_delta - ) - - ## merge with pop ratios - avoid_m_out <- merge( - avoid_m_out, - pop_ratios, - by = "census_tract", - all.x = TRUE, - allow.cartesian = TRUE - ) - - setDT(avoid_m_out) - - ## calc value by demographic group - avoid_m_out[, value := mortality_delta * pct] - - avoid_m_out_total <- avoid_m_out[, - .(value = sum(value)), - by = .( - demand_scenario, - refining_scenario, - demo_cat, - demo_group, - title - ) - ] - - avoid_m_out_total <- avoid_m_out_total %>% - mutate( - refining_scenario = str_replace( - refining_scenario, - "historic", - "historical" - ), - segment = "health", - metric = "avoided_mortality", - metric_desc = "avoided_mortality", - unit_desc = "persons" - ) %>% - select( - demand_scenario, - refining_scenario, - demo_cat, - demo_group, - title, - segment, - metric, - metric_desc, - unit_desc, - value - ) - - ## bind - result_output <- rbind(npv_out, emp_out, avoid_m_out_total) - - ## save figure inputs - fwrite( - result_output, - file.path( - main_path, - save_path, - "fig-csv-files", - "state_health_labor_ouputs.csv" - ) - ) - # fwrite(result_output, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/", "state_health_labor_ouputs.csv")) - - return(result_output) -} - - -fig4_hl <- function( - health_grp, - ref_labor_demog_yr, - refining_mortality, - pop_ratios -) { - gaps_df <- copy(health_grp) - - ## change scenario names, factor - gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## refactor - gaps_df[, scenario_title := scenario] - gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## calculate gaps (BAU - scenario) - bau_gaps_df <- gaps_df[scen_id == "BAU historic production"] - bau_gaps_df <- bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "mortality_level_dem" - )] - setnames(bau_gaps_df, "mortality_level_dem", "bau_mortality_level_dem") - - gaps_df <- merge( - gaps_df, - bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - gaps_df[, gap := mortality_level_dem - bau_mortality_level_dem] - - ## change historic to historical - gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")] - gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## refactor - gaps_df$scenario <- factor( - gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - gaps_df$scenario_title <- factor( - gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## make figures - ## --------------------------------------------------------- - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - - ## figure a - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - health_gap_fig_a <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = gap, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(c(-0.31, 0)) + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## figure b - health_gap_fig_b <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = gap, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-0.31, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - scale_linetype_manual(values = dac_lty) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## figure c - health_gap_fig_c <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = gap, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - scale_linetype_manual(values = poverty_lty) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - # ylim(c(-0.31, 0)) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## shared y lab - # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person, difference from reference")), - # size = 8, angle = 90) - - yaxis_lab <- ggdraw() + - draw_label( - "Avoided mortalities, difference from reference", - size = 8, - angle = 90 - ) - - gaps_plot_grid_h <- plot_grid( - health_gap_fig_b, - health_gap_fig_c, - health_gap_fig_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1), - rel_heights = c(1.1, 0.9, 0.9) - ) - - gaps_plot_grid2 <- plot_grid( - yaxis_lab, - gaps_plot_grid_h, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heights = c(1, 1) - ) - - ## labor - ## ------------------------------------------------------- - - # ## calc 2020 pop by demographic - # pop_2020 <- refining_mortality %>% - # filter(year == 2020) %>% - # select(census_tract, year, pop) %>% - # unique() %>% - # left_join(pop_ratios) %>% - # as.data.table() - # - # pop_2020[, demo_pop := pop * pct] - # - # ## summarize by demographic group - # pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)), - # by = .(demo_group, demo_cat)] - # - ## labor outputs - l_gaps_df <- copy(ref_labor_demog_yr) - - ## change scenario names, factor - l_gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - l_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## add scenario title - l_gaps_df[, scenario_title := scenario] - l_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## change historic to historical - l_gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - l_gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - l_gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## scenarios for filtering - remove_scen <- c( - "Low demand - historical production", - "BAU demand - historical production" - ) - - l_gaps_df$scenario <- factor( - l_gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - l_gaps_df$scenario_title <- factor( - l_gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## sum for state - l_gaps_df <- l_gaps_df[, - .(sum_demo_emp = sum(sum_demo_emp)), - by = .( - year, - demand_scenario, - refining_scenario, - oil_price_scenario, - scenario, - scenario_title, - demo_cat, - demo_group, - title - ) - ] - - # ## merge with 2020 pop - # l_gaps_df <- merge(l_gaps_df, pop_2020, - # by = c("demo_cat", "demo_group"), - # all.x = T) - # - # ## calculate per capita - # l_gaps_df[, demo_emp_pc := sum_demo_emp / pop_2020] - - ## select columns - l_gaps_df <- l_gaps_df[, .( - year, - demand_scenario, - refining_scenario, - oil_price_scenario, - scenario, - scenario_title, - demo_cat, - demo_group, - title, - sum_demo_emp - )] - - # filter for oil px == reference case - l_gaps_df <- l_gaps_df[oil_price_scenario == "reference case"] - - ## calculate gaps (BAU - scenario) - l_bau_gaps_df <- l_gaps_df[scenario == "BAU demand - historical production"] - l_bau_gaps_df <- l_bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "sum_demo_emp" - )] - setnames(l_bau_gaps_df, "sum_demo_emp", "bau_sum_demo_emp") - # setnames(l_bau_gaps_df, "demo_emp_pc", "bau_demo_emp_pc") - - l_gaps_df <- merge( - l_gaps_df, - l_bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - l_gaps_df[, gap_emp := sum_demo_emp - bau_sum_demo_emp] - # l_gaps_df[, gap_emp_pc := demo_emp_pc - bau_demo_emp_pc] - - ## figure labor a - labor_gap_fig_a <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = gap_emp / 1000, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - # ylim(-35, 0) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## legend a - legend_a <- labor_gap_fig_a + - geom_line(linewidth = 1, alpha = 0.9) + - theme(legend.position = "right") - - legend_a <- get_legend( - legend_a + - theme(legend.text = element_text(size = 6)) - ) - - ## labor b - labor_gap_fig_b <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = gap_emp / 1000, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_linetype_manual(values = dac_lty) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-35, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## legend b - legend_b <- labor_gap_fig_b + - geom_line(linewidth = 1, alpha = 0.9) + - theme(legend.position = "right") - - legend_b <- get_legend( - legend_b + - theme(legend.text = element_text(size = 6)) - ) - - ## labor c - labor_gap_fig_c <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = gap_emp / 1000, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_linetype_manual(values = poverty_lty) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(-35, 0) + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## legend c - legend_c <- labor_gap_fig_c + - geom_line(linewidth = 1, alpha = 0.9) + - theme(legend.position = "right") - - legend_c <- get_legend( - legend_c + - theme(legend.text = element_text(size = 6)) - ) - - ## shared y lab - # yaxis_lab <- ggdraw() + draw_label("Labor: FTE job-years, difference from reference", size = 8, angle = 90) - yaxis_lab <- ggdraw() + - draw_label( - "Labor: FTE job-years, difference from reference (thousand)", - size = 8, - angle = 90 - ) - - l_gaps_plot_grid <- plot_grid( - labor_gap_fig_b + theme(legend.position = "none"), - labor_gap_fig_c + theme(legend.position = "none"), - labor_gap_fig_a + theme(legend.position = "none"), - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1), - rel_heights = c(1.05, 0.9, 0.9) - ) - - l_gaps_plot_grid2 <- plot_grid( - yaxis_lab, - l_gaps_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heights = c(1, 1) - ) - - l_gaps_plot_grid2 - - ## plot legends - legends <- plot_grid( - legend_b, - legend_c, - legend_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 3, - ncol = 1, - rel_widths = c(1, 1, 1), - rel_heights = c(1, 1, 1) - ) - - ## plot side by side - ## ---------------------------------------------- - - health_labor_plot <- plot_grid( - gaps_plot_grid2, - l_gaps_plot_grid2, - NULL, - legends, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 4, - rel_widths = c(1, 1, 0.05, 0.2), - rel_heights = c(1, 1, 1, 1) - ) - - return(health_labor_plot) -} - - -fig4_hl_pmil <- function( - health_grp, - ref_labor_demog_yr, - refining_mortality, - pop_ratios -) { - ## calc 2020 pop by demographic - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - left_join(pop_ratios) %>% - as.data.table() - - pop_2020[, demo_pop := pop * pct] - - ## summarize by demographic group - pop_2020 <- pop_2020[, - .(pop_2020 = sum(demo_pop)), - by = .(demo_group, demo_cat) - ] - - gaps_df <- copy(health_grp) - - ## change scenario names, factor - gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## refactor - gaps_df[, scenario_title := scenario] - gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## calculate gaps (BAU - scenario) - bau_gaps_df <- gaps_df[scen_id == "BAU historic production"] - bau_gaps_df <- bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "mortality_level_dem" - )] - setnames(bau_gaps_df, "mortality_level_dem", "bau_mortality_level_dem") - - gaps_df <- merge( - gaps_df, - bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - gaps_df[, gap := mortality_level_dem - bau_mortality_level_dem] - - ## convert to per million - gaps_df <- merge( - gaps_df, - pop_2020, - by = c("demo_group", "demo_cat"), - all.x = T - ) - - ## calculate per capita - gaps_df[, value := gap / pop_2020] - gaps_df[, value_pmil := value * 1e6] - - ## change historic to historical - gaps_df[, scen_id := str_replace(scen_id, "historic", "historical")] - gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## refactor - gaps_df$scenario <- factor( - gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - gaps_df$scenario_title <- factor( - gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - ## make figures - ## --------------------------------------------------------- - - ## scenarios for filtering - remove_scen <- c("LC1 historical production", "BAU historical production") - - ## figure a - fig_title_vec <- c("Asian", "Black", "Hispanic", "white") - - fig_text_size <- 12 - - health_gap_fig_a <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = value_pmil, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(c(-30, 0)) + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text( - vjust = 0.5, - hjust = 0.5, - size = fig_text_size - ), - axis.text.y = element_text(size = fig_text_size), - strip.text.y = element_text(size = fig_text_size), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## figure b - health_gap_fig_b <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = value_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-30, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - scale_linetype_manual(values = dac_lty) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text( - vjust = 0.5, - hjust = 0.5, - size = fig_text_size - ), - axis.text.y = element_text(size = fig_text_size), - strip.text = element_text(size = fig_text_size), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## figure c - health_gap_fig_c <- ggplot( - gaps_df %>% - filter( - !scen_id %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = value_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - scale_linetype_manual(values = poverty_lty) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - # ylim(c(-30, 0)) + - theme_line + - theme( - legend.position = "none", - legend.title = element_blank(), - axis.text.x = element_text( - vjust = 0.5, - hjust = 0.5, - size = fig_text_size - ), - axis.text.y = element_text(size = fig_text_size), - strip.text.y = element_text(size = fig_text_size), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## shared y lab - # yaxis_lab <- ggdraw() + draw_label(expression(paste("PM"[2.5], " (",mu,"g ", m^{-3},")", " per person, difference from reference")), - # size = 8, angle = 90) - - yaxis_lab <- ggdraw() + - draw_label( - "Health: Avoided mortalities per million people (difference from reference)", - size = fig_text_size, - angle = 90 - ) - - gaps_plot_grid_h <- plot_grid( - NULL, - health_gap_fig_b, - NULL, - health_gap_fig_c, - NULL, - health_gap_fig_a, - align = "v", - labels = c("", "A", "", "B", "", "C"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -0.5, - vjust = 0.25, - nrow = 6, - ncol = 1, - rel_widths = c(1, 1, 1, 1, 1, 1), - rel_heights = c(0.15, 1.1, 0.15, 0.9, 0.15, 0.9) - ) - gaps_plot_grid_h - - gaps_plot_grid2 <- plot_grid( - yaxis_lab, - gaps_plot_grid_h, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heights = c(1, 1) - ) - - ## labor - ## ------------------------------------------------------- - - # ## calc 2020 pop by demographic - # pop_2020 <- refining_mortality %>% - # filter(year == 2020) %>% - # select(census_tract, year, pop) %>% - # unique() %>% - # left_join(pop_ratios) %>% - # as.data.table() - # - # pop_2020[, demo_pop := pop * pct] - # - # ## summarize by demographic group - # pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)), - # by = .(demo_group, demo_cat) - # ] - - ## labor outputs - l_gaps_df <- copy(ref_labor_demog_yr) - - l_gaps_df <- l_gaps_df[oil_price_scenario == "reference case", ] - - ## change scenario names, factor - l_gaps_df[, - scenario := paste0(demand_scenario, " demand - ", refining_scenario) - ] - # gaps_df[, scenario := gsub('BAU', 'Reference', scenario)] - l_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - - ## add scenario title - l_gaps_df[, scenario_title := scenario] - l_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - - ## change historic to historical - l_gaps_df[, - refining_scenario := str_replace( - refining_scenario, - "historic", - "historical" - ) - ] - l_gaps_df[, scenario := str_replace(scenario, "historic", "historical")] - l_gaps_df[, - scenario_title := str_replace(scenario_title, "historic", "historical") - ] - - ## scenarios for filtering - remove_scen <- c( - "Low demand - historical production", - "BAU demand - historical production" - ) - - l_gaps_df$scenario <- factor( - l_gaps_df$scenario, - levels = c( - "BAU demand - historical production", - "BAU demand - historical exports", - "BAU demand - low exports", - "Low demand - historical exports", - "Low demand - low exports", - "Low demand - historical production" - ) - ) - - l_gaps_df$scenario_title <- factor( - l_gaps_df$scenario_title, - levels = c( - "BAU demand\nhistorical production", - "BAU demand\nhistorical exports", - "BAU demand\nlow exports", - "Low demand\nhistorical exports", - "Low demand\nlow exports", - "Low demand\nhistorical production" - ) - ) - - # ## sum for state - # l_gaps_df <- l_gaps_df[, .(sum_demo_emp = sum(demo_emp)), - # by = .( - # year, demand_scenario, refining_scenario, - # scenario, scenario_title, demo_cat, demo_group, title - # ) - # ] - - ## select columns - l_gaps_df <- l_gaps_df[, .( - year, - demand_scenario, - refining_scenario, - scenario, - scenario_title, - demo_cat, - demo_group, - title, - sum_demo_emp - )] - - ## calculate gaps (BAU - scenario) - l_bau_gaps_df <- l_gaps_df[scenario == "BAU demand - historical production"] - l_bau_gaps_df <- l_bau_gaps_df[, c( - "year", - "demo_cat", - "demo_group", - "title", - "sum_demo_emp" - )] - setnames(l_bau_gaps_df, "sum_demo_emp", "bau_sum_demo_emp") - - l_gaps_df <- merge( - l_gaps_df, - l_bau_gaps_df, - by = c("year", "demo_cat", "demo_group", "title"), - all.x = T - ) - - l_gaps_df[, gap_emp := sum_demo_emp - bau_sum_demo_emp] - - ## merge with 2020 pop - l_gaps_df <- merge( - l_gaps_df, - pop_2020, - by = c("demo_cat", "demo_group"), - all.x = T - ) - - ## calculate per capita - l_gaps_df[, gap_emp_pc := gap_emp / pop_2020] - l_gaps_df[, gap_emp_pmil := gap_emp_pc * 1e6] - - ## figure labor a - labor_gap_fig_a <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - title %in% fig_title_vec, - demo_cat == "Race" - ) %>% - mutate( - title = factor(title, levels = c("Black", "Hispanic", "Asian", "white")) - ), - aes(x = year, y = gap_emp_pmil, color = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_color_manual( - name = "", - values = race_col_pal - ) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - axis.text.x = element_text( - vjust = 0.5, - hjust = 0.5, - size = fig_text_size - ), - axis.text.y = element_text(size = fig_text_size), - strip.text.y = element_text(size = fig_text_size), - strip.text.x = element_blank(), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## legend a - legend_a <- labor_gap_fig_a + - geom_line(linewidth = 1, alpha = 0.9) + - theme(legend.position = "right") - - legend_a <- get_legend( - legend_a + - theme(legend.text = element_text(size = fig_text_size)) - ) - - ## labor b - labor_gap_fig_b <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "DAC" - ), - aes(x = year, y = gap_emp_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - scale_linetype_manual(values = dac_lty) + - labs( - x = NULL, - y = NULL - ) + - # ylim(c(-0.31, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text( - vjust = 0.5, - hjust = 0.5, - size = fig_text_size - ), - axis.text.y = element_text(size = fig_text_size), - strip.text.y = element_text(size = fig_text_size), - strip.text.x = element_text(size = fig_text_size), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## legend b - legend_b <- labor_gap_fig_b + - geom_line(linewidth = 1, alpha = 0.9) + - theme(legend.position = "right") - - legend_b <- get_legend( - legend_b + - theme(legend.text = element_text(size = fig_text_size)) - ) - - ## labor c - labor_gap_fig_c <- ggplot( - l_gaps_df %>% - filter( - !scenario %in% remove_scen, - demo_cat == "Poverty" - ) %>% - mutate( - title = factor( - title, - levels = c("Below poverty line", "Above poverty line") - ) - ), - aes(x = year, y = gap_emp_pmil, lty = title) - ) + - geom_line(linewidth = 1, alpha = 0.8, color = "black") + - scale_linetype_manual(values = poverty_lty) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = NULL - ) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - # ylim(-70, 0) + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text( - vjust = 0.5, - hjust = 0.5, - size = fig_text_size - ), - axis.text.y = element_text(size = fig_text_size), - strip.text.y = element_text(size = fig_text_size), - legend.key.width = unit(10, "mm"), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - strip.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - - ## legend c - legend_c <- labor_gap_fig_c + - geom_line(linewidth = 1, alpha = 0.9) + - theme(legend.position = "right") - - legend_c <- get_legend( - legend_c + - theme(legend.text = element_text(size = fig_text_size)) - ) - - ## shared y lab - # yaxis_lab <- ggdraw() + draw_label("Labor: FTE job-years, difference from reference", size = 8, angle = 90) - yaxis_lab <- ggdraw() + - draw_label( - "Labor: FTE employment changes per million people (difference from reference)", - size = fig_text_size, - angle = 90 - ) - - l_gaps_plot_grid <- plot_grid( - NULL, - labor_gap_fig_b + theme(legend.position = "none"), - NULL, - labor_gap_fig_c + theme(legend.position = "none"), - NULL, - labor_gap_fig_a + theme(legend.position = "none"), - align = "v", - labels = c("", "D", "", "E", "", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -0.5, - vjust = 0.25, - nrow = 6, - ncol = 1, - rel_widths = c(1, 1, 1, 1, 1, 1), - rel_heights = c(0.15, 1.1, 0.15, 0.9, 0.15, 0.9) - ) - - l_gaps_plot_grid2 <- plot_grid( - yaxis_lab, - l_gaps_plot_grid, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 2, - rel_widths = c(0.05, 1), - rel_heights = c(1, 1) - ) - - l_gaps_plot_grid2 - - ## plot legends - legends <- plot_grid( - NULL, - legend_b, - NULL, - legend_c, - NULL, - legend_a, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 6, - ncol = 1, - rel_widths = c(1, 1, 1, 1, 1, 1), - rel_heights = c(0.15, 1, 0.15, 1, 0.15, 1) - ) - - ## plot side by side - ## ---------------------------------------------- - - health_labor_plot <- plot_grid( - gaps_plot_grid2, - l_gaps_plot_grid2, - NULL, - legends, - align = "v", - # labels = c("A", "B", "C", "D", "E", "F"), - # # labels = 'AUTO', - # label_size = 10, - hjust = -1, - nrow = 1, - ncol = 4, - rel_widths = c(1, 1, 0.05, 0.2), - rel_heights = c(1, 1, 1, 1) - ) - - return(health_labor_plot) -} diff --git a/before-targets/archive/labor_functions.R b/before-targets/archive/labor_functions.R deleted file mode 100644 index 8c759ff..0000000 --- a/before-targets/archive/labor_functions.R +++ /dev/null @@ -1,1088 +0,0 @@ -## labor functions ## - -create_prod_px_spread <- function(proc_oil_px_df) { - crack_spread <- tibble( - product = c("gasoline", "jet_fuel", "diesel"), - spread = c(23, 20, 23) - ) - - crack_spread_ex <- expand.grid( - year = c(2019:2045), - product = unique(crack_spread$product) - ) - - crack_spread <- merge(crack_spread_ex, crack_spread, - all.x = T - ) - - prod_price <- copy(proc_oil_px_df) - # prod_price <- proc_oil_px_df[oil_price_scenario == "reference case"] - - prod_price <- merge(crack_spread, prod_price, - by = c("year"), - all = T, - allow.cartesian = T - ) - - setDT(prod_price) - - prod_price[, product_price := oil_price_usd_per_bbl + spread] - - prod_price -} - - -calc_labor_outputs <- function(main_path, - indiv_prod_output, - dt_refcap, - product_px, - cpi2019, - cpi2020, - discount_rate, - alpha_comp, - alpha_emp, - refin_locs_ct, - dt_direct_multipliers) { - ## add product for calculating price - county_out_refining <- copy(indiv_prod_output) - - county_out_refining[, fuel := as.character(fuel)] - - county_out_refining[, product := fifelse( - fuel %chin% c("gasoline", "drop-in gasoline"), "gasoline", - fifelse(fuel %chin% c("diesel", "renewable diesel"), "diesel", "jet_fuel") - )] - - ## merge with counties - county_df <- dt_refcap[, .(site_id, county)] - county_df[, site_id := as.character(site_id)] - - - county_out_refining <- merge(county_out_refining, county_df, - by = c("site_id"), - all.x = T - ) - - # fill in missing counties - county_out_refining[, county := fifelse( - site_id == "342-2", "Contra Costa", - fifelse( - site_id == "99999", "Kern", - fifelse(site_id == "t-800", "Los Angeles", county) - ) - )] - - ## merge with prices - product_df <- copy(product_px) - product_df <- product_df[, .(year, oil_price_scenario, product, product_price)] - - county_out_refining <- merge(county_out_refining, product_df, - by = c("year", "product"), - all.x = T, - allow.cartesian = T - ) - - ## calculate revenue - county_out_refining[, revenue := value * product_price] - - ## merge with census tracts - ct_out_refining <- merge(county_out_refining, - refin_locs_ct[, .(GEOID, site_id)], - by = c("site_id"), - all.x = T) - - ## step 1: summarize at the census tract level - ct_out_refining_summary <- ct_out_refining[, .( - production_bbl = sum(value), - revenue = sum(revenue) - ), by = .( - demand_scenario, refining_scenario, - oil_price_scenario, year, county, GEOID - )] - - ## fix columns - ct_out_refining_summary[, county := fifelse(county == "Solano County", "Solano", county)] - - ct_out_refining_summary[, w_tract_geocode := substr(GEOID, 1, 11)] - - ## step 2: merge with direct multipliers on the "w_tract_geocode" - ct_out_refining_direct <- merge(ct_out_refining_summary, - dt_direct_multipliers, - by = c("w_tract_geocode"), - all.x = TRUE, - allow.cartesian = TRUE) - - ## step 3: divide revenue by $1 million and multiply it separately - ## by emp.rev (employment multiplier) and ec.rev (compensation multiplier). - ct_out_refining_direct <- ct_out_refining_direct[, `:=` (empl_direct_impact = (revenue / 1e6) * emp.rev, - comp_direct_impact = (revenue / 1e6) * ec.rev)] - - - ## step 4: summarize by “h” census tract. This is what you will use for - ## Figure 1 D, Figure 4 D-F, and Figure 5 D-F. - - ct_out_refining_direct <- ct_out_refining_direct[, .( - total_emp = sum(empl_direct_impact), - total_comp = sum(comp_direct_impact) - ), .(demand_scenario, refining_scenario, oil_price_scenario, year, h_tract_geocode)] - - - ct_out_refining_direct <- ct_out_refining_direct[, .(h_tract_geocode, - demand_scenario, - refining_scenario, - oil_price_scenario, - year, - total_emp, - total_comp)] - - ## convert comp to 2019 dollars - ct_out_refining_direct[, total_comp_usd19 := total_comp * cpi2019 / cpi2020] - - ## calc PV - ct_out_refining_direct[, total_comp_PV := total_comp_usd19 / ((1 + discount_rate)^(year - 2019))] - - ## rename columns - setnames(ct_out_refining_direct, c("total_comp", "total_comp_usd19", "total_comp_PV"), c("total_comp_h", "total_comp_usd19_h", "total_comp_PV_h")) - - - ## revision: unclear if we need to do the following - ## -------------------------------------------------------------------------- - - ## calculate the lower bound value - ct_out_refining_direct <- ct_out_refining_direct %>% - rename(census_tract = h_tract_geocode) %>% - arrange(demand_scenario, refining_scenario, oil_price_scenario, census_tract, year) %>% - group_by(demand_scenario, refining_scenario, oil_price_scenario, census_tract) %>% - mutate( - prev_emp = ifelse(year == 2020, NA, lag(total_emp)), - total_emp_revised = ifelse(year == 2020, total_emp, total_emp - ((1 - alpha_emp) * prev_emp)), - prev_comp_usd19h = ifelse(year == 2020, NA, lag(total_comp_usd19_h)), - total_comp_usd19_l = ifelse(year == 2020, total_comp_usd19_h, total_comp_usd19_h - ((1 - alpha_comp) * prev_comp_usd19h)) - ) %>% - ungroup() %>% - as.data.table() - - review_df <- ct_out_refining_direct %>% - select(demand_scenario, - refining_scenario, - oil_price_scenario, - census_tract, - year, - total_comp_usd19_h, - prev_comp_usd19h, - total_comp_usd19_l, - total_emp, - prev_emp, - total_emp_revised) - - ## save for review - write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/labor_result_for_review.csv")) - # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files/labor_result_for_review.csv")) - - ## calc discounted low - ct_out_refining_direct[, total_comp_PV_l := total_comp_usd19_l / ((1 + discount_rate)^(year - 2019))] - - ## select columns - ct_out_refining_direct <- ct_out_refining_direct[, .( - demand_scenario, refining_scenario, oil_price_scenario, census_tract, year, total_emp, total_emp_revised, total_comp_h, - total_comp_usd19_h, total_comp_usd19_l, total_comp_PV_h, total_comp_PV_l - )] - - - return(ct_out_refining_direct) - - # ## summarize at the county level - # county_out_refining_summary <- county_out_refining[, .( - # production_bbl = sum(value), - # revenue = sum(revenue) - # ), by = .( - # demand_scenario, refining_scenario, - # oil_price_scenario, year, county - # )] -# -# ## calculate labor impacts -# county_out_refining_summary[, county := fifelse(county == "Solano County", "Solano", county)] -# -# -# ## merge with labor multipliers, calculate labor vals -# county_out_labor <- merge(county_out_refining_summary, proc_labor_dest_df, -# by = c("county"), -# all.x = T, -# allow.cartesian = T -# ) - - # county_out_labor[, ':=' (c.dire_emp = (revenue / (10 ^ 6)) * dire_emp_mult, - # c.indi_emp = (revenue / (10 ^ 6)) * indi_emp_mult, - # c.indu_emp = (revenue / (10 ^ 6)) * indu_emp_mult, - # c.dire_comp = (revenue / (10 ^ 6)) * dire_comp_mult, - # c.indi_comp = (revenue / (10 ^ 6)) * ip.indi_comp_mult, - # c.indu_comp = (revenue / (10 ^ 6)) * ip.indu_comp_mult)] - # - # county_out_labor[, ':=' (total_emp = c.dire_emp + c.indi_emp + c.indu_emp, - # total_comp = c.dire_comp + c.indi_comp + c.indu_comp)] -# -# county_out_labor[, ":="(c.emp = (revenue / (10^6)) * employment, -# c.comp = (revenue / (10^6)) * emp_comp)] -# -# county_out_labor <- county_out_labor[, .( -# total_production_bbl = sum(production_bbl), -# total_revenue = sum(revenue), -# total_emp = sum(c.emp), -# total_comp = sum(c.comp) -# ), .(demand_scenario, refining_scenario, oil_price_scenario, year, destination)] - - # ## convert to 2019 dollars - # county_out_labor[, total_comp_usd19 := total_comp * cpi2019 / cpi2020] - # - # ## calc PV - # county_out_labor[, total_comp_PV := total_comp_usd19 / ((1 + discount_rate)^(year - 2019))] - # - # ## rename columns - # setnames(county_out_labor, c("total_comp", "total_comp_usd19", "total_comp_PV"), c("total_comp_h", "total_comp_usd19_h", "total_comp_PV_h")) - # - # ## calculate the lower bound value - # county_out_labor <- county_out_labor %>% - # arrange(demand_scenario, refining_scenario, oil_price_scenario, destination, year) %>% - # group_by(demand_scenario, refining_scenario, oil_price_scenario, destination) %>% - # mutate( - # prev_emp = ifelse(year == 2020, NA, lag(total_emp)), - # total_emp_revised = ifelse(year == 2020, total_emp, total_emp - ((1 - alpha_emp) * prev_emp)), - # prev_comp_usd19h = ifelse(year == 2020, NA, lag(total_comp_usd19_h)), - # total_comp_usd19_l = ifelse(year == 2020, total_comp_usd19_h, total_comp_usd19_h - ((1 - alpha_comp) * prev_comp_usd19h)) - # ) %>% - # ungroup() %>% - # as.data.table() - # - # review_df <- county_out_labor %>% - # select(demand_scenario, refining_scenario, oil_price_scenario, destination, year, total_production_bbl, total_revenue, total_comp_usd19_h, prev_comp_usd19h, total_comp_usd19_l, total_emp, total_emp_revised) - # - # ## save for review - # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-update/fig-csv-files/labor_result_for_review.csv")) - # # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-beta-adj/fig-csv-files/labor_result_for_review.csv")) - # - # ## calc discounted low - # county_out_labor[, total_comp_PV_l := total_comp_usd19_l / ((1 + discount_rate)^(year - 2019))] - # - # ## select columns - # county_out_labor <- county_out_labor[, .( - # demand_scenario, refining_scenario, oil_price_scenario, destination, year, total_emp, total_emp_revised, total_comp_h, - # total_comp_usd19_h, total_comp_usd19_l, total_comp_PV_h, total_comp_PV_l - # )] - # - # county_out_labor -} - - -calc_state_direct_impacts <- function(annual_direct_labor) { - - dt <- annual_direct_labor - - dt_state <- dt[, .( - state_emp_h = sum(total_emp), - state_emp_l = sum(total_emp_revised), - state_comp_h = sum(total_comp_h), - state_comp_usd19_h = sum(total_comp_usd19_h), - state_comp_PV_h = sum(total_comp_PV_h), - state_comp_usd19_l = sum(total_comp_usd19_l), - state_comp_PV_l = sum(total_comp_PV_l)), - .(demand_scenario, refining_scenario, oil_price_scenario, year)] - - return(dt_state) - -} - - - - -calc_labor_all_impacts_outputs <- function(main_path, - state_annual_direct_impacts, - indiv_prod_output, - dt_refcap, - product_px, - cpi2019, - cpi2020, - discount_rate, - alpha_comp, - alpha_emp, - dt_indirect_state_multipliers) { - ## add product for calculating price - county_out_refining <- copy(indiv_prod_output) - - county_out_refining[, fuel := as.character(fuel)] - - county_out_refining[, product := fifelse( - fuel %chin% c("gasoline", "drop-in gasoline"), "gasoline", - fifelse(fuel %chin% c("diesel", "renewable diesel"), "diesel", "jet_fuel") - )] - - ## merge with counties - county_df <- dt_refcap[, .(site_id, county)] - county_df[, site_id := as.character(site_id)] - - - county_out_refining <- merge(county_out_refining, county_df, - by = c("site_id"), - all.x = T - ) - - # fill in missing counties - county_out_refining[, county := fifelse( - site_id == "342-2", "Contra Costa", - fifelse( - site_id == "99999", "Kern", - fifelse(site_id == "t-800", "Los Angeles", county) - ) - )] - - ## merge with prices - product_df <- copy(product_px) - product_df <- product_df[, .(year, oil_price_scenario, product, product_price)] - - county_out_refining <- merge(county_out_refining, product_df, - by = c("year", "product"), - all.x = T, - allow.cartesian = T - ) - - ## calculate revenue - county_out_refining[, revenue := value * product_price] - - ## step 1: summarize at the state level - state_out_refining_summary <- county_out_refining[, .( - production_bbl = sum(value), - revenue = sum(revenue) - ), by = .( - demand_scenario, refining_scenario, - oil_price_scenario, year - )] - - - ## step 2: process induced df - ## step 3: skip - total_indir_induc_multipliers <- dt_indirect_state_multipliers[, .(emp.rev = sum(emp.rev), - ec.rev = sum(ec.rev), - emp.li = sum(emp.li, na.rm = T), - ec.li = sum(ec.li, na.rm = T)), by = .(DestinationRegion)] - - ## step 4: divide revenue by $1 million. - ## multiple the resulting number separately by emp.rev and ec.rev. - ## You should now have the combined indirect and induced impacts by year and scenario. - state_out_refining_summary[, `:=` (empl_indir_induc_impact = (revenue / 1e6) * total_indir_induc_multipliers$emp.rev[1], - comp_indir_induc_impact = (revenue / 1e6) * total_indir_induc_multipliers$ec.rev[1])] - - - - ## step 5: merge with state direct impacts - state_out_refining_all_impacts <- merge(state_out_refining_summary, state_annual_direct_impacts, - by = c("year", - "demand_scenario", - "refining_scenario", - "oil_price_scenario"), - all = T) - - ## step 6: sum direct and indirct/induced impacts - state_out_refining_all_impacts <- state_out_refining_all_impacts[, state_comp_all_impacts := comp_indir_induc_impact + state_comp_h] - - state_out_refining_all_impacts <- state_out_refining_all_impacts[, .(demand_scenario, - refining_scenario, - oil_price_scenario, - year, - production_bbl, - revenue, - state_comp_all_impacts)] - - - ## merge with BAU to compute relative impact and induced and indirect impacts - state_out_refining_all_impacts_bau <- filter(state_out_refining_all_impacts, - demand_scenario=="BAU" & refining_scenario=="historic production" & oil_price_scenario=="reference case") - - state_out_refining_all_impacts_bau <- state_out_refining_all_impacts_bau[, .(year, - state_comp_all_impacts)] - - setnames(state_out_refining_all_impacts_bau, c("state_comp_all_impacts"), c("state_comp_all_impacts_bau")) - - ## step 7: lag state_comp_all_impacts by one year. - ## step 8: multiply lagged state_comp_all_impacts by 0.8 and - ## then separately by emp.li and ec.li. This will leave you with - ## induced employment and compensation effects from rehires’ labor - ## income at their new jobs for each year and scenario. - state_out_refining_all_impacts <- merge(state_out_refining_all_impacts, state_out_refining_all_impacts_bau, - by=c("year"), - all.x = T) %>% - arrange(demand_scenario, refining_scenario, oil_price_scenario, year) %>% - group_by(demand_scenario, refining_scenario, oil_price_scenario) %>% - mutate( - prev_comp = ifelse(year == 2020, NA, lag(state_comp_all_impacts)), - prev_comp_bau = ifelse(year == 2020, NA, lag(state_comp_all_impacts_bau)), - state_comp_all_impacts_l = ifelse(year == 2020, state_comp_all_impacts, state_comp_all_impacts - ((1 - alpha_comp) * prev_comp)), - state_comp_all_impacts_l_bau = ifelse(year == 2020, state_comp_all_impacts_bau, state_comp_all_impacts_bau - ((1 - alpha_comp) * prev_comp_bau)), - state_comp_all_impacts_l_relative = state_comp_all_impacts_l - state_comp_all_impacts_l_bau, - state_comp_all_impacts_l_relative_adj = ifelse(state_comp_all_impacts_l_relative > 0, NA, state_comp_all_impacts_l_relative), - prev_comp_l = ifelse(year == 2020, NA, lag(state_comp_all_impacts_l_relative_adj)), - state_comp_emp_li = ifelse(year == 2020, NA, - (prev_comp_l / 1e6) * total_indir_induc_multipliers$emp.li[1]), - state_comp_ec_li = ifelse(year == 2020, NA, - (prev_comp_l / 1e6) * total_indir_induc_multipliers$ec.li[1]) - ) %>% - ungroup() %>% - as.data.table() - - ## save for review - write_csv(state_out_refining_all_impacts, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/step_8_output_for_review.csv")) - - ## step 9: calc revised statewide indirect and induced impact that is equal to - ## the indirect and induced impact from step 4 - direct and indirect impact from - ## step 7. - state_out_refining_summary <- merge(state_out_refining_summary, - state_out_refining_all_impacts[, .(demand_scenario, - refining_scenario, - oil_price_scenario, - year, - state_comp_all_impacts, - state_comp_emp_li, - state_comp_ec_li)], - by = c("demand_scenario", - "refining_scenario", - "oil_price_scenario", - "year"), - all = T) - - - ## subtract indirect and induced values computed in step 8 (if year 2020, use original 2020 value) - state_out_refining_summary <- arrange(state_out_refining_summary, - demand_scenario, refining_scenario, oil_price_scenario, year) %>% - group_by(demand_scenario, refining_scenario, oil_price_scenario) %>% - mutate( - prev_comp = ifelse(year == 2020, NA, lag(comp_indir_induc_impact)), - prev_empl = ifelse(year == 2020, NA, lag(empl_indir_induc_impact)), - comp_indir_induc_impact_l = ifelse(year == 2020, comp_indir_induc_impact, comp_indir_induc_impact - ((1 - alpha_comp) * prev_comp)), - empl_indir_induc_impact_l = ifelse(year == 2020, empl_indir_induc_impact, empl_indir_induc_impact - ((1 - alpha_emp) * prev_empl)) - ) %>% - as.data.table() - - state_out_refining_summary[, `:=` (empl_indir_induc_impact_l = - fifelse(year == 2020, empl_indir_induc_impact_l, empl_indir_induc_impact_l - state_comp_emp_li), - comp_indir_induc_impact_l = - fifelse(year == 2020, comp_indir_induc_impact_l, comp_indir_induc_impact_l - state_comp_ec_li))] - - ## create df with low and high induced and indirect impacts, convert to 2019, calc pv - state_out_labor_induc_indir <- state_out_refining_summary[, .(demand_scenario, - refining_scenario, - oil_price_scenario, - year, - empl_indir_induc_impact, - empl_indir_induc_impact_l, - comp_indir_induc_impact, - comp_indir_induc_impact_l)] - - setnames(state_out_labor_induc_indir, - c("empl_indir_induc_impact", "comp_indir_induc_impact"), - c("empl_indir_induc_impact_h", "comp_indir_induc_impact_h")) - - ## convert into to 2019 dollars - state_out_labor_induc_indir[, `:=` (comp_indir_induc_impact_h_usd19 = comp_indir_induc_impact_h * cpi2019 / cpi2020, - comp_indir_induc_impact_l_usd19 = comp_indir_induc_impact_l * cpi2019 / cpi2020)] - - ## calc PV - state_out_labor_induc_indir[, `:=` (comp_indir_induc_impact_h_PV = comp_indir_induc_impact_h_usd19 / ((1 + discount_rate)^(year - 2019)), - comp_indir_induc_impact_l_PV = comp_indir_induc_impact_l_usd19 / ((1 + discount_rate)^(year - 2019)))] - - - ## merge with direct impact - state_out_labor_induc_indir <- merge(state_out_labor_induc_indir, - state_annual_direct_impacts[, .(demand_scenario, - refining_scenario, - oil_price_scenario, - year, - state_emp_h, - state_emp_l, - state_comp_h, - state_comp_usd19_h, - state_comp_PV_h, - state_comp_usd19_l, - state_comp_PV_l)]) - - ## compute total impact, high and low - state_out_labor_induc_indir[, `:=` (empl_all_impacts_h = state_emp_h + empl_indir_induc_impact_h, - emp_all_impacts_l = state_emp_l + empl_indir_induc_impact_l, - comp_all_impacts_h = comp_indir_induc_impact_h + state_comp_h, - comp_all_impacts_usd19_h = comp_indir_induc_impact_h_usd19 + state_comp_usd19_h, - comp_all_impacts_PV_h = comp_indir_induc_impact_h_PV + state_comp_PV_h, - comp_all_impacts_usd19_l = comp_indir_induc_impact_l_usd19 + state_comp_usd19_l, - comp_all_impacts_PV_l = comp_indir_induc_impact_l_PV + state_comp_PV_l)] - - - ## final df - state_out_labor_all_impacts <- state_out_labor_induc_indir[, .(demand_scenario, - refining_scenario, - oil_price_scenario, - year, - state_emp_h, - empl_indir_induc_impact_h, - empl_all_impacts_h, - state_emp_l, - empl_indir_induc_impact_l, - emp_all_impacts_l, - state_comp_h, - comp_indir_induc_impact_h, - comp_all_impacts_h, - state_comp_usd19_h, - comp_indir_induc_impact_h_usd19, - comp_all_impacts_usd19_h, - state_comp_PV_h, - comp_indir_induc_impact_h_PV, - comp_all_impacts_PV_h, - state_comp_usd19_l, - comp_indir_induc_impact_l_usd19, - comp_all_impacts_usd19_l, - state_comp_PV_l, - comp_indir_induc_impact_l_PV, - comp_all_impacts_PV_l)] - - # state_out_labor_all_impacts <- state_out_labor_all_impacts |> - # rename(comp_dir_impact_h = state_comp_h, - # comp_dir_usd19_h = state_comp_usd19_h, - # comp_dir_PV_h = state_comp_PV_h, - # comp_dir_usd19_l = state_comp_usd19_l, - # comp_dir_PV_l = state_comp_PV_l) |> - # as.data.table() - - ## save for review - write_csv(state_out_labor_all_impacts, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_out_labor_all_impacts.csv")) - - return(state_out_labor_all_impacts) - - # ## summarize at the county level - # county_out_refining_summary <- county_out_refining[, .( - # production_bbl = sum(value), - # revenue = sum(revenue) - # ), by = .( - # demand_scenario, refining_scenario, - # oil_price_scenario, year, county - # )] - # - # - # ## calculate labor impacts - # county_out_refining_summary[, county := fifelse(county == "Solano County", "Solano", county)] - # - # - # ## merge with labor multipliers, calculate labor vals - # county_out_labor <- merge(county_out_refining_summary, proc_labor_dest_df, - # by = c("county"), - # all.x = T, - # allow.cartesian = T - # ) - # - # # county_out_labor[, ':=' (c.dire_emp = (revenue / (10 ^ 6)) * dire_emp_mult, - # # c.indi_emp = (revenue / (10 ^ 6)) * indi_emp_mult, - # # c.indu_emp = (revenue / (10 ^ 6)) * indu_emp_mult, - # # c.dire_comp = (revenue / (10 ^ 6)) * dire_comp_mult, - # # c.indi_comp = (revenue / (10 ^ 6)) * ip.indi_comp_mult, - # # c.indu_comp = (revenue / (10 ^ 6)) * ip.indu_comp_mult)] - # # - # # county_out_labor[, ':=' (total_emp = c.dire_emp + c.indi_emp + c.indu_emp, - # # total_comp = c.dire_comp + c.indi_comp + c.indu_comp)] - # - # - # county_out_labor[, ":="(c.emp = (revenue / (10^6)) * employment, - # c.comp = (revenue / (10^6)) * emp_comp)] - # - # county_out_labor <- county_out_labor[, .( - # total_production_bbl = sum(production_bbl), - # total_revenue = sum(revenue), - # total_emp = sum(c.emp), - # total_comp = sum(c.comp) - # ), .( - # demand_scenario, - # refining_scenario, - # oil_price_scenario, - # impact_type, - # year, - # destination - # )] - # - # ## convert to 2019 dollars - # county_out_labor[, total_comp_usd19 := total_comp * cpi2019 / cpi2020] - # - # ## calc PV - # county_out_labor[, total_comp_PV := total_comp_usd19 / ((1 + discount_rate)^(year - 2019))] - # - # ## rename columns - # setnames(county_out_labor, c("total_comp", "total_comp_usd19", "total_comp_PV"), c("total_comp_h", "total_comp_usd19_h", "total_comp_PV_h")) - # - # ## calculate the lower bound value - # county_out_labor <- county_out_labor %>% - # arrange(demand_scenario, refining_scenario, oil_price_scenario, impact_type, destination, year) %>% - # group_by(demand_scenario, refining_scenario, oil_price_scenario, impact_type, destination) %>% - # mutate( - # prev_emp = ifelse(year == 2020, NA, lag(total_emp)), - # total_emp_revised = ifelse(year == 2020, total_emp, total_emp - ((1 - alpha_emp) * prev_emp)), - # prev_comp_usd19h = ifelse(year == 2020, NA, lag(total_comp_usd19_h)), - # total_comp_usd19_l = ifelse(year == 2020, total_comp_usd19_h, total_comp_usd19_h - ((1 - alpha_comp) * prev_comp_usd19h)) - # ) %>% - # ungroup() %>% - # as.data.table() - # - # review_df <- county_out_labor %>% - # select( - # demand_scenario, refining_scenario, oil_price_scenario, impact_type, - # destination, year, total_production_bbl, total_revenue, total_comp_usd19_h, - # prev_comp_usd19h, total_comp_usd19_l, total_emp, total_emp_revised - # ) - # - # ## save file - # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-update/fig-csv-files/labor_result_x_impact_type_for_review.csv")) - # # write_csv(review_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-beta-adj/fig-csv-files/labor_result_x_impact_type_for_review.csv")) - # - # - # ## calc discounted low - # county_out_labor[, total_comp_PV_l := total_comp_usd19_l / ((1 + discount_rate)^(year - 2019))] - # - # ## select columns - # county_out_labor <- county_out_labor[, .( - # demand_scenario, refining_scenario, oil_price_scenario, impact_type, destination, year, total_emp, total_emp_revised, total_comp_h, - # total_comp_usd19_h, total_comp_usd19_l, total_comp_PV_h, total_comp_PV_l - # )] - # - # - # ## save file - # write_csv(county_out_labor, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-update/fig-csv-files/labor_result_x_impact_type.csv")) - # # write_csv(county_out_labor, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-beta-adj/fig-csv-files/labor_result_x_impact_type.csv")) - # - # - # county_out_labor -} - - - - - -## labor results grouped by demographic - -calculate_labor_x_demg_annual <- function(main_path, - annual_direct_labor, - pop_ratios) { - - # ## census pop - # census_pop <- refining_mortality %>% - # ungroup() %>% - # filter(year == 2020) %>% - # select(census_tract, pop) %>% - # unique() %>% - # rename(total_pop = pop) %>% - # as.data.table() - - ## merge with demographic info - ct_out_demo <- merge(annual_direct_labor, - pop_ratios, - by = c("census_tract"), - all.x = T, - allow.cartesian = T) - - ct_out_demo <- ct_out_demo[, .(demand_scenario, - refining_scenario, - oil_price_scenario, - census_tract, - demo_cat, - demo_group, - title, - pct, - year, - total_emp, - total_emp_revised, - total_comp_h, - total_comp_usd19_h, - total_comp_usd19_l, - total_comp_PV_h, - total_comp_PV_l)] - - # - # ## merge with population - # ct_out_demo <- merge(ct_out_demo, census_pop, - # by = c("census_tract"), - # all.x = T) - - ## multiply by pct - ct_out_demo[, `:=` (demo_emp = total_emp * pct, - demo_emp_revised = total_emp_revised * pct, - demo_comp_h = total_comp_h * pct, - demo_comp_usd19_h = total_comp_usd19_h * pct, - demo_comp_usd19_l = total_comp_usd19_l * pct, - demo_comp_PV_h = total_comp_PV_h * pct, - demo_comp_PV_l = total_comp_PV_l * pct)] - - ## summarize at the state level - state_demo_labor_out <- ct_out_demo[, .(sum_demo_emp = sum(demo_emp), - sum_demo_emp_revised = sum(demo_emp_revised), - sum_demo_usd19_h = sum(demo_comp_usd19_h), - sum_demo_usd19_l = sum(demo_comp_usd19_l), - sum_demo_comp_pv_h = sum(demo_comp_PV_h), - sum_demo_comp_pv_l = sum(demo_comp_PV_l) - ), - by = .(demand_scenario, refining_scenario, oil_price_scenario, year, demo_cat, demo_group, title)] - - - state_demo_labor_out <- state_demo_labor_out[, .(demand_scenario, - refining_scenario, - oil_price_scenario, - demo_cat, - demo_group, - title, - year, - sum_demo_emp, - sum_demo_emp_revised, - sum_demo_usd19_h, - sum_demo_usd19_l, - sum_demo_comp_pv_h, - sum_demo_comp_pv_l)] - - ## save for review - write_csv(state_demo_labor_out, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/state_labor_direct_impacts_demo_annual.csv")) - - - return(state_demo_labor_out) - - - # - # ## get county and census tracts - # c_ct_df <- raw_pop_income_2021[state == "California"] - # c_ct_df[, census_tract := as.character(substr(geoid, 10, nchar(geoid)))] - # c_ct_df <- c_ct_df[, .(county, census_tract)] - # c_ct_df[, county := str_remove(county, " County")] - # - # c_ct_df <- merge(c_ct_df, ca_regions, - # by = c("county"), - # allow.cartesian = T - # ) -# -# ## get pop from refining_mortality -# pop_df <- refining_mortality %>% -# select(census_tract, year, pop) -# -# # ## get - # ratio_df <- copy(county_grp_pop_ratios) - # setnames(ratio_df, "region", "destination") - # - # ## merge - # labor_pct_df <- merge(annual_labor, ratio_df, - # by = c("destination"), - # allow.cartesian = T - # ) - # - # ## multiply by pct - # # labor_pct_df[, demo_emp := total_emp_revised * pct] - # labor_pct_df[, demo_emp := total_emp * pct] - # labor_pct_df[, demo_comp_pv_h := total_comp_PV_h * pct] - # labor_pct_df[, demo_comp_pv_l := total_comp_PV_l * pct] -# -# ## merge with population -# labor_pct_df <- merge(labor_pct_df, pop_df, -# by = c("destination", "year"), -# all.x = T -# ) -# -# ## rename pop column -# setnames(labor_pct_df, "pop", "region_pop") -# -# return(labor_pct_df) -} - - -## function for summarizing labor data -calculate_labor_x_demg <- function(ref_labor_demog_yr) { - labor_pct <- copy(ref_labor_demog_yr) - - # ## county pop by demographic group - # labor_pct[, demo_pop := region_pop * pct] - - ## summarise over years - labor_pct <- labor_pct[, .( - sum_demo_emp = sum(sum_demo_emp), - sum_demo_comp_pv_h = sum(sum_demo_comp_pv_h), - sum_demo_comp_pv_l = sum(sum_demo_comp_pv_l) - ), - by = .(demand_scenario, refining_scenario, oil_price_scenario, demo_cat, demo_group, title) - ] - - return(labor_pct) -} - -## function for creating labor output df -calculate_annual_labor_x_demg_hl <- function(main_path, - ref_labor_demog_yr, - refining_mortality, - pop_ratios) { - labor_pct <- copy(ref_labor_demog_yr) - - # ## county pop by demographic group - # labor_pct[, demo_emp_revised := total_emp_revised * pct] -# -# ## summarise over years -# labor_pct <- labor_pct[, .( -# sum_demo_emp = sum(demo_emp), -# sum_demo_emp_revised = sum(demo_emp_revised), -# sum_demo_comp_pv_h = sum(demo_comp_pv_h), -# sum_demo_comp_pv_l = sum(demo_comp_pv_l) -# ), -# by = .(demand_scenario, refining_scenario, oil_price_scenario, demo_cat, demo_group, title, year) -# ] - - ## change scenario names, factor - labor_pct[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)] - labor_pct[, scenario := gsub("LC1.", "Low ", scenario)] - - ## change historic to historical - labor_pct[, refining_scenario := str_replace(refining_scenario, "historic", "historical")] - labor_pct[, scenario := str_replace(scenario, "historic", "historical")] - - # ## select columns - # labor_pct <- labor_pct[, .( - # scenario, demand_scenario, refining_scenario, oil_price_scenario, year, demo_cat, demo_group, - # title, sum_demo_emp, sum_demo_emp_revised, sum_demo_comp_pv_h, sum_demo_comp_pv_l - # )] - - ## compute per million stat - ## --------------------------------------------------------- - - ## calc 2020 pop by demographic - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - left_join(pop_ratios) %>% - as.data.table() - - pop_2020[, demo_pop := pop * pct] - - ## summarize by demographic group - pop_2020 <- pop_2020[, .(pop_2020 = sum(demo_pop)), - by = .(demo_group, demo_cat) - ] - - - ## merge with 2020 pop - labor_pct <- merge(labor_pct, pop_2020, - by = c("demo_cat", "demo_group"), - all.x = T - ) - - ## calculate per capita - labor_pct[, demo_emp_pc_h := sum_demo_emp / pop_2020] - labor_pct[, demo_emp_pmil_h := demo_emp_pc_h * 1e6] - labor_pct[, demo_emp_pc_l := sum_demo_emp_revised / pop_2020] - labor_pct[, demo_emp_pmil_l := demo_emp_pc_l * 1e6] - labor_pct[, demo_comp_pc_h := sum_demo_comp_pv_h / pop_2020] - labor_pct[, demo_comp_pc_pmil_h := demo_comp_pc_h * 1e6] - labor_pct[, demo_comp_pc_l := sum_demo_comp_pv_l / pop_2020] - labor_pct[, demo_comp_pc_pmil_l := demo_comp_pc_l * 1e6] - ## remove pop - labor_pct[, pop_2020 := NULL] - labor_pct[, title := NULL] - - ## for renaming - high_est_vec <- c("sum_demo_emp", "demo_emp_pc_h", "demo_emp_pmil_h", "sum_demo_comp_pv_h", "demo_comp_pc_h", "demo_comp_pc_pmil_h") - low_est_vec <- c("sum_demo_emp_revised", "demo_emp_pc_l", "demo_emp_pmil_l", "sum_demo_comp_pv_l", "demo_comp_pc_l", "demo_comp_pc_pmil_l") - - labor_metric_df <- tibble( - metric = c(high_est_vec, low_est_vec), - metric_name = c( - "employment", - "employment_pc", - "employment_pmil", - "compensation_pv", - "compensation_pv_pc", - "compensation_pv_pmil", - "employment", - "employment_pc", - "employment_pmil", - "compensation_pv", - "compensation_pv_pc", - "compensation_pv_pmil" - ) - ) - - ## pivot longer - labor_pct_long <- melt(labor_pct, id.vars = c("demo_cat", "demo_group", "scenario", "demand_scenario", "refining_scenario", "oil_price_scenario", "year"), variable.name = "metric", value.name = "value") - labor_pct_long[, estimate := fifelse(metric %in% high_est_vec, "high", "low")] - - ## merge - labor_pct_long <- merge(labor_pct_long, labor_metric_df, - by = "metric", - all.x = T - ) - - labor_pct_long <- labor_pct_long[, .( - demo_cat, demo_group, scenario, demand_scenario, - refining_scenario, oil_price_scenario, year, metric_name, estimate, value - )] - - - ## save df - fwrite(labor_pct_long, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/", "labor_high_low_annual_outputs.csv")) - # fwrite(labor_pct_long, file.path(main_path, "outputs/academic-out/refining/figures/2025-08-beta-adj/fig-csv-files", "labor_high_low_annual_outputs.csv")) - - - return(labor_pct_long) -} - -calc_county_level_outputs <- function(main_path, - annual_direct_labor, - refining_mortality, - raw_pop_income_2021, - pop_ratios) { - - - - ## merge with demographic info - ct_out_demo <- merge(annual_direct_labor, - pop_ratios, - by = c("census_tract"), - all.x = T, - allow.cartesian = T) - - ct_out_demo <- ct_out_demo[, .(demand_scenario, - refining_scenario, - oil_price_scenario, - census_tract, - demo_cat, - demo_group, - title, - pct, - year, - total_emp, - total_emp_revised, - total_comp_h, - total_comp_usd19_h, - total_comp_usd19_l, - total_comp_PV_h, - total_comp_PV_l)] - - ## multiply by pct - ct_out_demo[, `:=` (demo_emp = total_emp * pct, - demo_emp_revised = total_emp_revised * pct, - demo_comp_h = total_comp_h * pct, - demo_comp_usd19_h = total_comp_usd19_h * pct, - demo_comp_usd19_l = total_comp_usd19_l * pct, - demo_comp_PV_h = total_comp_PV_h * pct, - demo_comp_PV_l = total_comp_PV_l * pct)] - - - - ## compute county populations - pop_2020 <- refining_mortality %>% - filter(year == 2020) %>% - select(census_tract, year, pop) %>% - unique() %>% - as.data.table() - - ## census tract x county - c_ct_df <- raw_pop_income_2021[state == "California"] - c_ct_df[, census_tract := as.character(substr(geoid, 10, nchar(geoid)))] - c_ct_df <- c_ct_df[, .(county, census_tract)] - c_ct_df[, county := str_remove(county, " County")] - - ## merge with counties - pop_2020 <- merge(pop_2020, c_ct_df, - by = c("census_tract"), - all.x = T - ) - - pop_2020 <- pop_2020[, .(census_tract, pop, county)] - - ## merge - labor_county_out <- merge(ct_out_demo, pop_2020, - by = c("census_tract"), - all.x = T - ) - - - ## direct impact - ## make labor outputs long and sum by county region - labor_county_out <- labor_county_out %>% - select( - demand_scenario, - refining_scenario, - oil_price_scenario, - demo_cat, - demo_group, - title, - census_tract, - county, - pop, - year, - demo_emp, - demo_emp_revised, - demo_comp_PV_h, - demo_comp_PV_l) - - - ## summarize by county - labor_county_out <- labor_county_out[, .(county_pop = sum(pop), - demo_emp = sum(demo_emp), - demo_emp_revised = sum(demo_emp_revised), - demo_comp_PV_h = sum(demo_comp_PV_h), - demo_comp_PV_l = sum(demo_comp_PV_l)), - by = .(demand_scenario, - refining_scenario, - oil_price_scenario, - demo_cat, - demo_group, - title, - county)] - - - # ## test - # test_df <- labor_county_region_out %>% - # group_by(county, demand_scenario, refining_scenario, demo_cat, metric) %>% - # summarize(sum_value = sum(value)) %>% - # ungroup() - # - # ## total for county - # total_county_test <- ref_labor_demog_yr %>% - # select(demand_scenario, refining_scenario, destination, year, total_emp, total_emp_revised, - # total_comp_PV_h, total_comp_PV_l) %>% - # unique() %>% - # rename(region = destination) %>% - # full_join(county_region_ratio) %>% - # mutate(total_emp_h_county = total_emp * county_ratio, - # total_emp_l_county = total_emp_revised * county_ratio, - # total_comp_PV_h_county = total_comp_PV_h * county_ratio, - # total_comp_PV_l_county = total_comp_PV_l * county_ratio) %>% - # select(county, demand_scenario, refining_scenario, year, total_emp_h_county, - # total_emp_l_county, total_comp_PV_l_county, total_comp_PV_h_county) %>% - # pivot_longer(total_emp_h_county:total_comp_PV_h_county, names_to = "metric", values_to = "value") %>% - # group_by(county, demand_scenario, refining_scenario, metric) %>% - # summarise(value = sum(value)) %>% - # ungroup() - # - - - ## for renaming - high_est_vec <- c("demo_emp", "demo_comp_PV_h") - low_est_vec <- c("demo_emp_revised","demo_comp_PV_l") - - labor_metric_df <- tibble( - metric = c(high_est_vec, low_est_vec), - metric_name = c( - "employment", - "compensation_pv", - "employment", - "compensation_pv" - ) - ) - - labor_county_out_df <- labor_county_out %>% - pivot_longer(demo_emp:demo_comp_PV_l, names_to = "metric", values_to = "value") %>% - left_join(labor_metric_df) %>% - mutate(estimate = ifelse(metric %in% high_est_vec, "high", "low")) %>% - select(demand_scenario:county_pop, metric_name, estimate, value) %>% - as.data.table() - - - ## save df - fwrite(labor_county_out_df, file.path(main_path, "outputs/academic-out/refining/figures/2025-update/fig-csv-files/", "labor_county_outputs.csv")) - # fwrite(labor_county_out_df, file.path(main_path, "outputs/academic-out/refining/figures/2024-08-beta-adj/fig-csv-files", "labor_county_outputs.csv")) - - - return(labor_county_out_df) -} diff --git a/before-targets/exploratory/labor_revisions_prep.R b/before-targets/data-prep/labor_revisions_prep.R similarity index 100% rename from before-targets/exploratory/labor_revisions_prep.R rename to before-targets/data-prep/labor_revisions_prep.R diff --git a/before-targets/exploratory/compare-health-npv-plot.R b/before-targets/exploratory/compare-health-npv-plot.R deleted file mode 100644 index 7018867..0000000 --- a/before-targets/exploratory/compare-health-npv-plot.R +++ /dev/null @@ -1,16 +0,0 @@ -## compare health npv plot outputs - -library(data.table) -library(tidyverse) - -main_path <- "/Users/tracey/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/" - -submission_df <- "2024-orig-submission/fig-csv-files/state_npv_fig_inputs_health.csv" -newer_df <- "2025-health-revisions/fig-csv-files/state_npv_fig_inputs_health.csv" - -orig_df <- fread(paste0(main_path, submission_df)) - -new_df <- fread(paste0(main_path, newer_df)) |> - rename(new_val = value) |> - left_join(orig_df) |> - mutate(diff = new_val - value) diff --git a/before-targets/exploratory/compare_demo.R b/before-targets/exploratory/compare_demo.R deleted file mode 100644 index 66d89e9..0000000 --- a/before-targets/exploratory/compare_demo.R +++ /dev/null @@ -1,80 +0,0 @@ -library(data.table) -library(ggplot2) -library(tidyr) -library(dplyr) -library(cowplot) - -# $tar_target(name = file_population, command = file.path(main_path, "data/benmap/processed/ct_inc_45.csv"), format = "file"), -# tar_target(name = file_dt_ct_inc_pop, command = file.path(main_path, "data/health/processed/ct_inc_45_2020.csv"), format = "file"), - -path <- c("G://Shared drives/emlab/projects/current-projects/calepa-cn") - -old <- fread(paste0(path, "/data/benmap/processed/ct_inc_45.csv")) -new_old <- fread(paste0(path, "/data/health/processed/ct_inc_45_2020_old.csv")) -new <- fread(paste0(path, "/data/health/processed/ct_inc_45_2020.csv")) - -# First compare total pop in 2020 - -old %>% - filter(year == 2020) %>% - summarize(pop = sum(pop) / 1000000) -new_old %>% - filter(year == 2020) %>% - summarize(pop = sum(pop)) -new %>% - filter(year == 2020) %>% - summarize(pop = sum(pop)) - -# Compare age groups -old %>% - select(lower_age, upper_age) %>% - distinct() -new_old %>% - select(start_age, end_age) %>% - distinct() -new %>% - select(start_age, end_age) %>% - distinct() - -# figs by age group - -old_fig <- old %>% - filter(year > 2019) %>% - mutate(age_group = ifelse(lower_age < 30, "<30", ">=30")) %>% - group_by(age_group, year) %>% - summarise(pop = sum(pop) / 1000000) %>% - ggplot(aes(x = year, y = pop, color = age_group, group = age_group)) + - geom_line() + - # scale_y_continuous(limits = c(0,25), breaks = c(0,5,10,15,20,25))+ - scale_y_continuous(limits = c(10, 30), breaks = c(10, 15, 20, 25, 30)) + - theme_cowplot() + - labs(y = "Population (millions)", x = "Year") + - guides(color = "none") -old_fig - -new_old_fig <- new_old %>% - filter(year > 2019) %>% - mutate(age_group = ifelse(start_age < 30, "<30", ">=30")) %>% - group_by(age_group, year) %>% - summarise(pop = sum(pop) / 1000000) %>% - ggplot(aes(x = year, y = pop, color = age_group, group = age_group)) + - geom_line() + - # scale_y_continuous(limits = c(0,25), breaks = c(0,5,10,15,20,25))+ - scale_y_continuous(limits = c(10, 30), breaks = c(10, 15, 20, 25, 30)) + - theme_cowplot() + - labs(y = NULL, x = "Year") + - guides(color = "none") -new_old_fig - -new_fig <- new %>% - mutate(age_group = ifelse(start_age < 30, "<30", ">=30")) %>% - group_by(age_group, year) %>% - summarise(pop = sum(pop) / 1000000) %>% - ggplot(aes(x = year, y = pop, color = age_group, group = age_group)) + - geom_line() + - theme_cowplot() + - scale_y_continuous(limits = c(10, 30), breaks = c(10, 15, 20, 25, 30)) + - labs(y = "Population (millions)", color = "Age group", x = "Year") -new_fig - -plot_grid(old_fig, new_old_fig, new_fig, labels = c("A. Old population data", "B. New (old) population data", "C. New population data")) diff --git a/before-targets/exploratory/cumlutative_number_health.R b/before-targets/exploratory/cumlutative_number_health.R deleted file mode 100644 index 1c84258..0000000 --- a/before-targets/exploratory/cumlutative_number_health.R +++ /dev/null @@ -1,57 +0,0 @@ -### Cumulative NPV for health - -# setwd("G://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/2024-08-update/fig-csv-files") -setwd("H://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/2024-08-update/fig-csv-files") - -cum_h <- fread("state_npv_fig_inputs_health.csv", stringsAsFactors = F) - -cum_h %>% - filter(metric %in% "avoided_health_cost") %>% - filter(scen_id %in% c("BAU historical production", "BAU historical exports", "LC1 low exports")) %>% - select(scen_id, value, unit_desc, unit) - -# cumulative mortality and pop - -# cum_m <- fread("cumulative_avoided_mortality.csv" , stringsAsFactors = F) ; cum_m -# -# cum_m%>% -# filter(demo_cat %in% c("Race"))%>% -# group_by(scen_id)%>% -# summarise(cumul_mort_level = sum(cumul_mort_level )) -# filter(scen_id %in% c("BAU historical production","LC1 historical exports","LC1 low exports", "BAU historical exports")) - -# cumulative pop - -# fread("state_disaggregated_npv_pc_fig_inputs.csv" , stringsAsFactors = F) - -### SENSITIVITY ANALYSIS TO DIFF BETA - -# Krewski et al Beta (0.0058) and SE (0.0009628) - -# 90% CI [5 percentile; 95 percentile] - -beta_95 <- 0.0058 + 1.64 * 0.000963 -beta_95 -beta_5 <- 0.0058 - 1.64 * 0.000963 -beta_5 - -## High - -setwd("H://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/2024-08-beta-adj-high/fig-csv-files") -cum_h <- fread("state_npv_fig_inputs_health.csv", stringsAsFactors = F) - -cum_h %>% - filter(metric %in% "avoided_health_cost") %>% - filter(scen_id %in% c("BAU historical production", "BAU historical exports", "LC1 low exports")) %>% - select(scen_id, value, unit_desc, unit) - - -## High - -setwd("H://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures/2024-08-beta-adj-low/fig-csv-files") -cum_h <- fread("state_npv_fig_inputs_health.csv", stringsAsFactors = F) - -cum_h %>% - filter(metric %in% "avoided_health_cost") %>% - filter(scen_id %in% c("BAU historical production", "BAU historical exports", "LC1 low exports")) %>% - select(scen_id, value, unit_desc, unit) diff --git a/before-targets/exploratory/debug_refinery_level_emission_factors.R b/before-targets/exploratory/debug_refinery_level_emission_factors.R deleted file mode 100644 index 842e86c..0000000 --- a/before-targets/exploratory/debug_refinery_level_emission_factors.R +++ /dev/null @@ -1,72 +0,0 @@ - - -renewables_info_altair <- tar_read(renewables_info_altair) -dt_ef_ref <- tar_read(dt_ef_ref) -dt_ef <- tar_read(dt_ef) -dt_refcap <- tar_read(dt_refcap) - -ref_ei <- fread("H:/Shared drives/emlab/projects/current-projects/calepa-cn/data-staged-for-deletion/health/processed/refinery_emission_factor.csv" - , stringsAsFactors = F) - - -#refining <- copy(refining_sites_cons_ghg_2019_2045) -refining <- fread("C:/Users/vince/Desktop/refining_sites_cons_ghg_2019_2045_for_review.csv", stringsAsFactors = F) - -cluster_cw <- dt_refcap %>% - dplyr::select(site_id, region) %>% - mutate(site_id = as.character(site_id)) %>% - bind_rows(renewables_info_altair %>% dplyr::select(site_id, region)) %>% - distinct() - -refining <- merge(refining, cluster_cw, by = "site_id", all.x = T, allow.cartesian = T, no.dups = T) - -refining[, site_id := ifelse(site_id == "t-800", "800", site_id)] -refining[, site_id := ifelse(site_id == "342-2", "34222", site_id)] -refining[, site_id := as.numeric(site_id)] - -# Cluster-level emission factors ----------------------------------------- -dt_ef <- dt_ef %>% - mutate(ton_bbl = kg_bbl / 1000) %>% - dplyr::select(-kg_bbl) %>% - spread(pollutant_code, ton_bbl) - -# refining <- merge(refining, dt_ef, by.x = "region", by.y = "cluster", all.x = T, allow.cartesian = T, no.dups = T) -# -# refining <- refining %>% -# mutate( -# nh3 = bbls_consumed * NH3, -# nox = bbls_consumed * NOX, -# pm25 = bbls_consumed * `PM25-PRI`, -# sox = bbls_consumed * SO2, -# voc = bbls_consumed * VOC -# ) %>% -# dplyr::select(-NH3:-VOC) - -# Refinery-level emission factors ----------------------------------------- - -dt_ef_ref <- dt_ef_ref %>% - mutate(ton_bbl = kg_bbl / 1000) %>% - dplyr::select(-kg_bbl) %>% - spread(pollutant_code, ton_bbl)%>% #0 for NH3 for facility 271 (San Joaquin Refining Company Inc., Bakersfield Refinery) - mutate(NH3 = replace_na(NH3,0)) # didnt report any for NEI 2011, 2014 and 2017 - -refining <- merge(refining, dt_ef_ref, by.x = "site_id", by.y = "id1", all.x = T, allow.cartesian = T, no.dups = T) -refining <- merge(refining, dt_ef, by.x = "region", by.y = "cluster", all.x = T, allow.cartesian = T, no.dups = T) - -#Assign cluster-specific EF for renewable fuel refineries -refining <- refining%>% - mutate(NH3 = coalesce(NH3.x,NH3.y), - NOX = coalesce(NOX.x,NOX.y), - `PM25-PRI` = coalesce(`PM25-PRI.x`,`PM25-PRI.y`), - SO2 = coalesce(SO2.x,SO2.y), - VOC = coalesce(VOC.x,VOC.y)) - -refining <- refining %>% - mutate( - nh3 = bbls_consumed * NH3, - nox = bbls_consumed * NOX, - pm25 = bbls_consumed * `PM25-PRI`, - sox = bbls_consumed * SO2, - voc = bbls_consumed * VOC - ) %>% - dplyr::select(-NH3.x:-VOC) \ No newline at end of file diff --git a/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25.png b/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25.png deleted file mode 100644 index ebf90ff..0000000 Binary files a/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25.png and /dev/null differ diff --git a/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25_no_outliers.png b/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25_no_outliers.png deleted file mode 100644 index 77b9876..0000000 Binary files a/before-targets/exploratory/exploratory-figs/srm_ratio_120pm25_pm25_no_outliers.png and /dev/null differ diff --git a/before-targets/exploratory/exploratory-figs/srm_ratio_nh3_pm25.png b/before-targets/exploratory/exploratory-figs/srm_ratio_nh3_pm25.png deleted file mode 100644 index fa98949..0000000 Binary files a/before-targets/exploratory/exploratory-figs/srm_ratio_nh3_pm25.png and /dev/null differ diff --git a/before-targets/exploratory/exploratory-figs/srm_ratio_pollutant_pm25.png b/before-targets/exploratory/exploratory-figs/srm_ratio_pollutant_pm25.png deleted file mode 100644 index 91d027e..0000000 Binary files a/before-targets/exploratory/exploratory-figs/srm_ratio_pollutant_pm25.png and /dev/null differ diff --git a/before-targets/exploratory/exploratory_health_figs.R b/before-targets/exploratory/exploratory_health_figs.R deleted file mode 100644 index f78022e..0000000 --- a/before-targets/exploratory/exploratory_health_figs.R +++ /dev/null @@ -1,50 +0,0 @@ -# setwd("G://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/refining-2024/health") -setwd("H://Shared drives/emlab/projects/current-projects/calepa-cn/outputs/refining-2024/health") - -mort <- fread("refining_mortality_2023.csv", stringsAsFactors = F) - -## pop-weighted pollution exposure - -mort %>% - group_by(disadvantaged, year) %>% - mutate(weight = pop / sum(pop, na.rm = F)) %>% - ungroup() %>% - group_by(scen_id, disadvantaged, year) %>% - summarise(per_cap = sum(total_pm25 * weight, na.rm = T)) %>% - ungroup() %>% - ggplot(aes(x = year, y = per_cap, group = disadvantaged, color = disadvantaged)) + - facet_wrap(~scen_id) + - geom_line() + - geom_point() + - theme_cowplot() + - labs(y = "PM2.5 (pop-weighted)", color = "DAC") - -## pop-weighted health cost per cap - -mort %>% - mutate(per_cap = cost_2019_PV / pop) %>% - group_by(scen_id, disadvantaged, year) %>% - summarise(per_cap = mean(per_cap, na.rm = T)) %>% - ungroup() %>% - ggplot(aes(x = year, y = per_cap, group = disadvantaged, color = disadvantaged)) + - facet_wrap(~scen_id) + - geom_line() + - geom_point() + - theme_cowplot() + - labs(y = "NPV per cap (unweighted)", color = "DAC") - -## pop-unweighted health cost per cap - -mort %>% - group_by(disadvantaged, year, scen_id) %>% - summarize( - cost_2019_PV = sum(cost_2019_PV, na.rm = F), - pop = sum(pop, na.rm = F) - ) %>% - ungroup() %>% - ggplot(aes(x = year, y = cost_2019_PV / pop, group = disadvantaged, color = disadvantaged)) + - facet_wrap(~scen_id) + - geom_line() + - geom_point() + - theme_cowplot() + - labs(y = "NPV per cap (pop-weighted)", color = "DAC") diff --git a/before-targets/exploratory/labor_check.R b/before-targets/exploratory/labor_check.R deleted file mode 100644 index 9fdf384..0000000 --- a/before-targets/exploratory/labor_check.R +++ /dev/null @@ -1,424 +0,0 @@ -###### CHECK LABOR OUTPUTS - -rm(list = ls()) - -list.of.packages <- c("dplyr", "data.table", "lubridate", "tidyr", "readxl", "fixest", "modelsummary", "flextable") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])] -if (length(new.packages)) install.packages(new.packages) - -library(lubridate) -library(tidyr) -library(dplyr) -library(data.table) -library(readxl) -library(stringr) -library(readr) -library(fixest) -library(modelsummary) -library(flextable) -library(svglite) -library(sf) -library(ggplot2) -library(tigris) -library(cowplot) -library(janitor) - - -# setwd('C:/Users/mall0065/Dropbox/calepa/refining-labor') -setwd("~/Dropbox/calepa/refining-labor") - -### define function for "not in" -"%!in%" <- function(x, y) !("%in%"(x, y)) - -# Check that string doesn't match any non-letter -letters_only <- function(x) !grepl("[^A-Za-z*-]", x) - - -# READ IN LABOR MULTIPLIERS - -multipliers <- fread("20240605-census_regions-Detail Economic Indicators.csv") - -multipliers <- janitor::clean_names(multipliers) - -multipliers <- multipliers[, .( - employment = sum(employment), - emp_comp = sum(employee_compensation) -), .(origin_region, destination_region, impact_type)] - -multipliers[, impact_type := tolower(impact_type)] - -multipliers[, county := str_remove(origin_region, " County, CA Group")] - -setnames(multipliers, c("origin_region", "destination_region"), c("origin", "destination")) - -multipliers <- multipliers[, .(county, origin, destination, impact_type, employment, emp_comp)] - -str(multipliers) - -multipliers %>% - filter(destination == "Los Angeles County, CA") %>% - summary() - -multipliers %>% - filter(destination != "Los Angeles County, CA") %>% - summary() - -# READ IN LABOR IMPACTS - -df <- fread("labor_result_for_review (2).csv") -str(df) - -## CALCULATE DIFFERENCE FROM BAU AND AGGREGATE - -df.agg <- group_by( - df, - demand_scenario, refining_scenario, year -) %>% - summarize( - total_comp_usd19_h = sum(total_comp_usd19_h, na.rm = TRUE), - total_comp_usd19_l = sum(total_comp_usd19_l, na.rm = TRUE), - total_emp = sum(total_emp, na.rm = TRUE), - total_emp_revised = sum(total_emp_revised, na.rm = TRUE), - total_revenue = sum(total_revenue, na.rm = TRUE), - total_production_bbl = sum(total_production_bbl, na.rm = TRUE) - ) %>% - ungroup() -summary(df.agg$total_comp_usd19_h) -summary(df.agg$total_comp_usd19_l) -summary(df.agg$total_emp) -summary(df.agg$total_emp_revised) - - -df.bau <- filter( - df, - demand_scenario == "BAU" & refining_scenario == "historic production" -) %>% - group_by(demand_scenario, refining_scenario, year) %>% - summarize( - total_comp_usd19_h = sum(total_comp_usd19_h, na.rm = TRUE), - total_comp_usd19_l = sum(total_comp_usd19_l, na.rm = TRUE), - total_emp = sum(total_emp, na.rm = TRUE), - total_emp_revised = sum(total_emp_revised, na.rm = TRUE), - total_revenue = sum(total_revenue, na.rm = TRUE), - total_production_bbl = sum(total_production_bbl, na.rm = TRUE) - ) %>% - rename( - total_comp_usd19_h_bau = total_comp_usd19_h, - total_comp_usd19_l_bau = total_comp_usd19_l, - total_emp_bau = total_emp, - total_emp_revised_bau = total_emp_revised, - total_revenue_bau = total_revenue, - total_production_bbl_bau = total_production_bbl - ) %>% - ungroup() %>% - dplyr::select(-demand_scenario, -refining_scenario) - -df.county.bau <- filter( - df, - demand_scenario == "BAU" & refining_scenario == "historic production" -) %>% - group_by(demand_scenario, refining_scenario, year, destination) %>% - summarize( - total_comp_usd19_h = sum(total_comp_usd19_h, na.rm = TRUE), - total_comp_usd19_l = sum(total_comp_usd19_l, na.rm = TRUE), - total_emp = sum(total_emp, na.rm = TRUE), - total_emp_revised = sum(total_emp_revised, na.rm = TRUE), - total_revenue = sum(total_revenue, na.rm = TRUE), - total_production_bbl = sum(total_production_bbl, na.rm = TRUE) - ) %>% - rename( - total_comp_usd19_h_bau = total_comp_usd19_h, - total_comp_usd19_l_bau = total_comp_usd19_l, - total_emp_bau = total_emp, - total_emp_revised_bau = total_emp_revised, - total_revenue_bau = total_revenue, - total_production_bbl_bau = total_production_bbl - ) %>% - ungroup() %>% - dplyr::select(-demand_scenario, -refining_scenario) - - -df.agg <- left_join(df.agg, df.bau, by = c("year")) %>% - mutate( - total_comp_usd19_h = total_comp_usd19_h - total_comp_usd19_h_bau, - total_comp_usd19_l = total_comp_usd19_l - total_comp_usd19_l_bau, - total_emp = total_emp - total_emp_bau, - total_emp_revised = total_emp_revised - total_emp_revised_bau - ) - -summary(df.agg$total_comp_usd19_h) -summary(df.agg$total_comp_usd19_l) -summary(df.agg$total_emp) -summary(df.agg$total_emp_revised) -summary(df.agg$total_revenue) -summary(df.agg$total_production_bbl) - -df.county <- left_join(df, df.county.bau, by = c("destination", "year")) %>% - mutate( - total_comp_usd19_h = total_comp_usd19_h - total_comp_usd19_h_bau, - total_comp_usd19_l = total_comp_usd19_l - total_comp_usd19_l_bau, - total_emp = total_emp - total_emp_bau, - total_emp_revised = total_emp_revised - total_emp_revised_bau, - total_revenue_gap = total_revenue - total_revenue_bau, - total_production_bbl_gap = total_production_bbl - total_production_bbl_bau - ) - -### PATHWAYS - -#### FIG 1: COMPENSATION LOWER BOUND -fig1 <- filter( - df.agg, - demand_scenario == "LC1" & refining_scenario == "low exports" -) %>% - ggplot(aes(y = total_comp_usd19_l / 1000, x = year)) + - geom_line(size = 1, color = "#841617") + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(0,20)) + - labs(y = "Compensation (2019 USD)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig1 - -#### FIG 2: COMPENSATION UPPER BOUND -fig2 <- filter( - df.agg, - demand_scenario == "LC1" & refining_scenario == "low exports" -) %>% - ggplot(aes(y = total_comp_usd19_h / 1000, x = year)) + - geom_line(size = 1, color = "#841617") + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(0,20)) + - labs(y = "Compensation (2019 USD)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig2 - -#### FIG 3: FTE JOBS -fig3 <- filter( - df.agg, - demand_scenario == "LC1" & refining_scenario == "low exports" -) %>% - ggplot(aes(y = total_emp_revised / 1000, x = year)) + - geom_line(size = 1, color = "#841617") + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(-60,60)) + - labs(y = "Labor FTE Job-years (Thousands)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig3 - - -## FIG 4: FTE JOBS BY COUNTY - -fig4 <- filter( - df.county, - demand_scenario == "LC1" & refining_scenario == "low exports" -) %>% - ggplot(aes(y = total_emp_revised / 1000, x = year, color = destination)) + - geom_line(size = 1, aes(color = destination)) + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(-60,60)) + - labs(y = "Labor FTE Job-years (Thousands)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig4 - -ggsave("emp_check.png", fig4, width = 10, height = 5, units = "in", dpi = 300, bg = "white") - - -## FIG 5: Compensation BY COUNTY - -fig5 <- filter( - df.county, - demand_scenario == "LC1" & refining_scenario == "low exports" -) %>% - ggplot(aes(y = total_comp_usd19_l / 1000000, x = year, color = destination)) + - geom_line(size = 1, aes(color = destination)) + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(-60,60)) + - labs(y = "Compensation (Millions)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig5 - -ggsave("comp_check.png", fig5, width = 10, height = 5, units = "in", dpi = 300, bg = "white") - - -#### FIG 6: COMPENSATION LOWER BOUND -fig6 <- filter( - df.county, - demand_scenario == "LC1" & refining_scenario == "low exports" & year > 2022 & destination == "Siskiyou County, CA" -) %>% - ggplot(aes(y = total_comp_usd19_l / 1000000, x = year)) + - geom_line(size = 1, color = "#841617") + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(0,20)) + - labs(y = "Compensation (2019 USD)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig6 - -#### FIG 7: FTE JOBS -fig7 <- filter( - df.county, - demand_scenario == "LC1" & refining_scenario == "historic exports" -) %>% - ggplot(aes(y = total_emp_revised / 1000, x = year)) + - geom_line(size = 1, color = "#841617") + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(-60,60)) + - labs(y = "Labor FTE Job-years (Thousands)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig7 - - -#### FIG 8: REVENUE -fig8 <- filter( - df.county, - demand_scenario == "LC1" & refining_scenario == "low exports" -) %>% - ggplot(aes(y = total_revenue_gap / 10^9, x = year, color = destination)) + - geom_line(size = 1, aes(color = destination)) + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(-60,60)) + - labs(y = "Revenue (Billions of Dollars, Difference with BAU)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig8 - -ggsave("revenue_check.png", fig8, width = 10, height = 5, units = "in", dpi = 300, bg = "white") - -#### FIG 9: PRODUCTION GAP - -fig9 <- filter( - df.county, - demand_scenario == "LC1" & refining_scenario == "low exports" -) %>% - ggplot(aes(y = total_production_bbl_gap / 10^9, x = year, color = destination)) + - geom_line(size = 1, aes(color = destination)) + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(-60,60)) + - labs(y = "Total Production (Billions of Barrels, Difference with BAU)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig9 - -#### FIG 10: PRODUCTION LEVEL -fig10 <- filter( - df.agg, - demand_scenario == "LC1" & refining_scenario == "historic exports" -) %>% - ggplot(aes(y = total_production_bbl_bau / 10^9, x = year)) + - geom_line(size = 1, color = "#841617") + - geom_line(aes(y = total_production_bbl / 10^9), size = 1, color = "blue") + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(0,20)) + - labs(y = "Production (Billions of Barrels)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig10 - - -#### FIG 11: REVENUE GAP, JUST LA -fig11 <- filter( - df.county, - demand_scenario == "LC1" & refining_scenario == "low exports" & destination == "Los Angeles" -) %>% - ggplot(aes(y = total_revenue / 10^9, x = year)) + - geom_line(size = 1, color = "#841617") + - geom_line(aes(y = total_revenue_bau / 10^9), size = 1, color = "blue") + - theme_cowplot(12) + - scale_x_continuous(limits = c(2020, 2045)) + - # scale_y_continuous(limits = c(-60,60)) + - labs(y = "Revenue (Billions of Dollars)", x = "Year", color = "", linetype = "") + - theme( - panel.background = element_blank(), - legend.position = "top", legend.justification = "center", - panel.grid.major.y = element_line(color = "gray", size = 0.5), - panel.grid.major.x = element_blank(), - panel.grid.minor.x = element_blank(), - axis.text = element_text(size = 14), - axis.title = element_text(size = 14) - ) -fig11 diff --git a/before-targets/exploratory/labor_text_numbers.R b/before-targets/exploratory/labor_text_numbers.R deleted file mode 100644 index 4de8954..0000000 --- a/before-targets/exploratory/labor_text_numbers.R +++ /dev/null @@ -1,434 +0,0 @@ -###### CHECK LABOR OUTPUTS - -rm(list = ls()) - -list.of.packages <- c("dplyr", "data.table", "lubridate", "tidyr", "readxl", "fixest", "modelsummary", "flextable") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])] -if (length(new.packages)) install.packages(new.packages) - -library(lubridate) -library(tidyr) -library(dplyr) -library(data.table) -library(readxl) -library(stringr) -library(readr) -library(fixest) -library(modelsummary) -library(flextable) -library(svglite) -library(sf) -library(ggplot2) -library(tigris) -library(cowplot) -library(janitor) - - -# setwd('G:/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures') -setwd("~/Library/CloudStorage/GoogleDrive-cmalloy@ucsb.edu/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures") - -### define function for "not in" -"%!in%" <- function(x, y) !("%in%"(x, y)) - -# Check that string doesn't match any non-letter -letters_only <- function(x) !grepl("[^A-Za-z*-]", x) - -# IMPORT DISAGGREGATED RESULTS - -df <- fread("2024-08-update/fig-csv-files/labor_high_low_annual_outputs.csv") -str(df) -summary(df$sum_demo_emp) -summary(df$sum_demo_emp_revised) - -# IMPORT COUNTY LEVEL RESULTS - -df.county <- fread("2024-08-update/fig-csv-files/labor_county_outputs.csv") %>% - filter(oil_price_scenario == "reference case") -str(df.county) - -# CALCULATE PERCENT REDUCTION RELATIVE TO BAU - -## AGGREGATE IMPACTS ACROSS DEMOGRAPHIC GROUPS AND YEARS, SELECT 1 DEMOGRAPHIC CATEGORY SO IMPACTS ARENT DUPLICATED -df <- pivot_wider(df, - id_cols = c("scenario", "demo_cat", "demo_group", "demand_scenario", "refining_scenario", "oil_price_scenario", "year"), - names_from = c("metric_name", "estimate"), - values_from = value -) %>% - filter(oil_price_scenario == "reference case") -str(df) - -df.agg <- filter( - df, - demo_cat == "DAC" -) %>% - group_by(scenario, year) %>% - summarize( - sum_demo_emp = sum(employment_high), - sum_demo_emp_revised = sum(employment_low), - sum_demo_comp_pv_h = sum(compensation_pv_high), - sum_demo_comp_pv_l = sum(compensation_pv_low) - ) %>% - ungroup() - -## SUBTRACT BAU FROM SCENARIO IMPACT AND DIVIDE BY BAU FOR PERCENT REDUCTION - -df.lc1 <- filter( - df.agg, - scenario == "Low demand - low exports" -) - -df.bau <- filter( - df.agg, - scenario == "BAU demand - historical production" -) %>% - rename( - sum_demo_emp_bau = sum_demo_emp, - sum_demo_emp_revised_bau = sum_demo_emp_revised, - sum_demo_comp_pv_h_bau = sum_demo_comp_pv_h, - sum_demo_comp_pv_l_bau = sum_demo_comp_pv_l - ) %>% - dplyr::select(-scenario) - -df.agg <- filter( - df.agg, - scenario != "BAU demand - historical production" -) %>% - left_join(df.bau, by = "year") %>% - group_by(scenario) %>% - summarize( - sum_demo_emp = sum(sum_demo_emp), - sum_demo_emp_revised = sum(sum_demo_emp_revised), - sum_demo_comp_pv_h = sum(sum_demo_comp_pv_h), - sum_demo_comp_pv_l = sum(sum_demo_comp_pv_l), - sum_demo_emp_bau = sum(sum_demo_emp_bau), - sum_demo_emp_revised_bau = sum(sum_demo_emp_revised_bau), - sum_demo_comp_pv_h_bau = sum(sum_demo_comp_pv_h_bau), - sum_demo_comp_pv_l_bau = sum(sum_demo_comp_pv_l_bau) - ) %>% - ungroup() %>% - mutate( - perc_emp = 1 - sum_demo_emp / sum_demo_emp_bau, - perc_emp_revised = 1 - sum_demo_emp_revised / sum_demo_emp_revised_bau, - perc_comp_h = 1 - sum_demo_comp_pv_h / sum_demo_comp_pv_h_bau, - perc_comp_l = 1 - sum_demo_comp_pv_l / sum_demo_comp_pv_l_bau, - gap_emp = sum_demo_emp - sum_demo_emp_bau, - gap_emp_revised = sum_demo_emp_revised - sum_demo_emp_revised_bau, - gap_comp_h = sum_demo_comp_pv_h - sum_demo_comp_pv_h_bau, - gap_comp_l = sum_demo_comp_pv_l - sum_demo_comp_pv_l_bau - ) - -summary(df.agg$perc_emp) -summary(df.agg$perc_emp_revised) -summary(df.agg$perc_comp_h) -summary(df.agg$perc_comp_l) - - -# CALCULATE SHARE OF TOTAL IMPACT FROM EACH COUNTY - -df.total <- group_by( - df.county, - demand_scenario, refining_scenario, metric_name, estimate -) %>% - summarize(total_value = sum(value)) %>% - ungroup() - - -df.county.agg <- group_by( - df.county, - county, demand_scenario, refining_scenario, metric_name, estimate -) %>% - summarize(value = sum(value)) %>% - ungroup() - -df.county.agg <- left_join(df.county.agg, df.total, by = c("demand_scenario", "refining_scenario", "metric_name", "estimate")) %>% - mutate(share = value / total_value) %>% - arrange(demand_scenario, refining_scenario, metric_name, estimate, -share) -summary(df.county.agg$share) - - -# CALCULATE SHARE OF IMPACT RELATIVE TO BAU FOR EACH COUNTY - -df.total.bau <- filter( - df.county, - demand_scenario == "BAU" & refining_scenario == "historic production" -) %>% - group_by(metric_name, estimate) %>% - summarize(total_value_bau = sum(value)) %>% - ungroup() - -df.total2 <- filter( - df.county, - demand_scenario != "BAU" & refining_scenario != "historic production" -) %>% - group_by(demand_scenario, refining_scenario, metric_name, estimate) %>% - summarize(total_value = sum(value)) %>% - ungroup() %>% - left_join(df.total.bau, by = c("metric_name", "estimate")) %>% - mutate(gap_total_value = total_value - total_value_bau) - - -df.county.agg.bau <- filter( - df.county, - demand_scenario == "BAU" & refining_scenario == "historic production" -) %>% - group_by(county, metric_name, estimate) %>% - summarize(value_bau = sum(value)) %>% - ungroup() - -df.county.agg2 <- filter( - df.county, - demand_scenario != "BAU" & refining_scenario != "historic production" -) %>% - group_by(county, demand_scenario, refining_scenario, metric_name, estimate) %>% - summarize(value = sum(value)) %>% - ungroup() %>% - left_join(df.county.agg.bau, by = c("county", "metric_name", "estimate")) %>% - mutate(gap_value = value - value_bau) %>% - left_join(df.total2, by = c("demand_scenario", "refining_scenario", "metric_name", "estimate")) %>% - mutate(share = gap_value / gap_total_value) %>% - arrange(demand_scenario, refining_scenario, metric_name, estimate, -share) - - - -# AVERAGE ANNUAL JOB LOSSES (DAN AND NON-DAC) SHOULD THIS BE ADJUSTED FOR FIRST FEW YRS? - -df.agg2 <- filter( - df, - demo_cat == "Poverty" -) %>% - group_by(scenario, year, demo_group) %>% - summarize( - sum_demo_emp = sum(employment_pc_high), - sum_demo_emp_revised = sum(employment_pc_low), - sum_demo_comp_pv_h = sum(compensation_pv_pc_high), - sum_demo_comp_pv_l = sum(compensation_pv_pc_low) - ) %>% - ungroup() - -df.bau2 <- filter( - df, - demo_cat == "Poverty" & - scenario == "BAU demand - historical production" -) %>% - group_by(year, demo_group) %>% - summarize( - sum_demo_emp_bau = sum(employment_pc_high), - sum_demo_emp_revised_bau = sum(employment_pc_low), - sum_demo_comp_pv_h_bau = sum(compensation_pv_pc_high), - sum_demo_comp_pv_l_bau = sum(compensation_pv_pc_low) - ) %>% - ungroup() - - - -# TREND IN EMPLOYMENT IMPACT DIFFERENCE BETWEEN DAC AND NON-DAC -df.year <- filter( - df.agg2, - scenario != "BAU demand - historical production" -) %>% - left_join(df.bau2, by = c("year", "demo_group")) %>% - mutate( - gap_emp = sum_demo_emp - sum_demo_emp_bau, - gap_emp_revised = sum_demo_emp_revised - sum_demo_emp_revised_bau, - gap_comp_h = sum_demo_comp_pv_h - sum_demo_comp_pv_h_bau, - gap_comp_l = sum_demo_comp_pv_l - sum_demo_comp_pv_l_bau - ) - - -fig1 <- filter(df.year, scenario == "Low demand - low exports") %>% - ggplot(aes(y = gap_emp, x = year, group = demo_group)) + - geom_line(aes(color = demo_group)) -fig1 - - -## COMPARE TO FIG 4 CSV FILE - -df.f4 <- fread("2022-12-update/fig-csv-files/state_labor_levels_fig_gaps_inputs.csv") - -# AVERAGE ANNUAL JOB LOSSES (DAN AND NON-DAC) SHOULD THIS BE ADJUSTED FOR FIRST FEW YRS? - -df.agg2 <- filter( - df.agg2, - scenario != "BAU demand - historical production" -) %>% - left_join(df.bau2, by = c("year", "demo_group")) %>% - mutate( - gap_emp = sum_demo_emp - sum_demo_emp_bau, - gap_emp_revised = sum_demo_emp_revised - sum_demo_emp_revised_bau, - gap_comp_h = sum_demo_comp_pv_h - sum_demo_comp_pv_h_bau, - gap_comp_l = sum_demo_comp_pv_l - sum_demo_comp_pv_l_bau - ) %>% - group_by(scenario, demo_group) %>% - summarize( - gap_emp = sum(gap_emp), - gap_emp_revised = sum(gap_emp_revised), - gap_comp_h = sum(gap_comp_h), - gap_comp_l = sum(gap_comp_l) - ) %>% - ungroup() - - - - -# IMPORT DISAGGREGATED RESULTS WITH 3 DIFFERENT OIL PRICE PATHWAYS - -df <- fread("2024-08-update/fig-csv-files/labor_high_low_annual_outputs.csv") -str(df) -summary(df$sum_demo_emp) -summary(df$sum_demo_emp_revised) - - -# KEEP JUST 2045 AND COMPUTE GAP BETWEEN SCENARIO AND BAU FOR EACH OIL PRICE PATHWAY - -# df <- filter(df,year==2045) - -## AGGREGATE IMPACTS ACROSS DEMOGRAPHIC GROUPS AND YEARS, SELECT 1 DEMOGRAPHIC CATEGORY SO IMPACTS ARENT DUPLICATED -df <- pivot_wider(df, - id_cols = c("scenario", "demo_cat", "demo_group", "demand_scenario", "refining_scenario", "oil_price_scenario", "year"), - names_from = c("metric_name", "estimate"), - values_from = value -) %>% - filter(demo_cat == "DAC") %>% - group_by(scenario, oil_price_scenario, year) %>% - summarize( - compensation_pv_high = sum(compensation_pv_high), - compensation_pv_low = sum(compensation_pv_low), - employment_high = sum(employment_high), - employment_low = sum(employment_low) - ) %>% - ungroup() - -## BAU - -df.bau <- filter(df, scenario == "BAU demand - historical production") %>% - rename( - compensation_pv_high_bau = compensation_pv_high, - compensation_pv_low_bau = compensation_pv_low, - employment_high_bau = employment_high, - employment_low_bau = employment_low - ) %>% - dplyr::select(-scenario) - - -## JOIN BAU TO DF AND COMPUTE GAPS - -df <- filter(df, scenario != "BAU demand - historical production") %>% - left_join(df.bau, by = c("oil_price_scenario", "year")) %>% - group_by(scenario, oil_price_scenario) %>% - summarize( - compensation_pv_high = sum(compensation_pv_high), - compensation_pv_low = sum(compensation_pv_low), - employment_high = sum(employment_high), - employment_low = sum(employment_low), - compensation_pv_high_bau = sum(compensation_pv_high_bau), - compensation_pv_low_bau = sum(compensation_pv_low_bau), - employment_high_bau = sum(employment_high_bau), - employment_low_bau = sum(employment_low_bau) - ) %>% - ungroup() %>% - mutate( - gap_comp_pv_high = compensation_pv_high - compensation_pv_high_bau, - gap_comp_pv_low = compensation_pv_low - compensation_pv_low_bau, - gap_emp_high = employment_high - employment_high_bau, - gap_emp_low = employment_low - employment_low_bau - ) - - -## NEEDS: COMP AND EMP IN 2019 (FROM IMPLAN), COMP AND EMP IN 2045 HIGH AND LOW ESTIMATES FOR EACH SCENARIO - -## FROM IMPLAN: SPREADSHEETS ARE AT /Dropbox/calepa/refining-labor/2019-baseline-ica -fte.per.emp <- 0.991150442477876 - -direct.emp.2019 <- 11456.6380044314 * fte.per.emp -indirect.emp.2019 <- 70118.2031857474 * fte.per.emp -induced.emp.2019 <- 43395.822784118 * fte.per.emp -total.emp.2019 <- direct.emp.2019 + indirect.emp.2019 + induced.emp.2019 - -direct.comp.2019 <- 3028965833 -indirect.comp.2019 <- 4812737405.89 -induced.comp.2019 <- 2273210360.05 -total.comp.2019 <- direct.comp.2019 + indirect.comp.2019 + induced.comp.2019 - - -## READ IN IMPACTS - -# IMPORT DISAGGREGATED RESULTS ADD ACROSS COUNTIES - -df <- fread("2024-08-update/fig-csv-files/labor_result_for_review.csv") %>% - filter(oil_price_scenario == "reference case") %>% - group_by(demand_scenario, refining_scenario, year) %>% - summarize( - total_emp = sum(total_emp), - total_emp_revised = sum(total_emp_revised), - total_comp_usd19_h = sum(total_comp_usd19_h), - total_comp_usd19_l = sum(total_comp_usd19_l) - ) %>% - ungroup() -str(df) - - -# KEEP JUST 2045 AND COMPUTE 1-SHARE OF 2019 EMPLOMENT OR COMPENSATION FOR W/REEMPLOYMENT (HIGH) - -df.2045 <- filter(df, year == 2045) - -df.high <- mutate(df.2045, - emp_perc_h = 1 - (total_emp / total.emp.2019), - emp_comp_h = 1 - (total_comp_usd19_h / total.comp.2019) -) - - - -# DIRECT,INDIRECT,INDUCED SHARES RELATIVE TO 2019 - -df <- fread("2024-08-update/fig-csv-files/labor_result_x_impact_type.csv") -str(df) -unique(df$oil_price_scenario) - -df.review <- fread("2024-08-update/fig-csv-files/labor_result_x_impact_type_for_review.csv") -str(df.review) -unique(df.review$oil_price_scenario) - -df <- filter( - df, - oil_price_scenario == "reference case" -) %>% - group_by(demand_scenario, refining_scenario, impact_type, year) %>% - summarize( - total_emp = sum(total_emp), - total_emp_revised = sum(total_emp_revised), - total_comp_usd19_h = sum(total_comp_usd19_h), - total_comp_usd19_l = sum(total_comp_usd19_l) - ) %>% - ungroup() - -# KEEP JUST 2045 AND COMPUTE 1-SHARE OF 2019 EMPLOMENT OR COMPENSATION FOR W/REEMPLOYMENT (HIGH) - -df.2045 <- filter(df, year == 2045) - -df.high <- mutate(df.2045, - emp_perc_h = ifelse(impact_type == "direct", 1 - (total_emp / direct.emp.2019), - ifelse(impact_type == "indirect", 1 - (total_emp / indirect.emp.2019), - ifelse(impact_type == "induced", 1 - (total_emp / induced.emp.2019), NA) - ) - ), - comp_perc_h = ifelse(impact_type == "direct", 1 - (total_comp_usd19_h / direct.comp.2019), - ifelse(impact_type == "indirect", 1 - (total_comp_usd19_h / indirect.comp.2019), - ifelse(impact_type == "induced", 1 - (total_comp_usd19_h / induced.comp.2019), NA) - ) - ) -) - - -# TOTAL IMPACT LEVEL BY SCENARIO - -df.cumu <- group_by( - df, - demand_scenario, refining_scenario -) %>% - summarize( - total_emp = sum(total_emp), - total_emp_revised = sum(total_emp_revised), - total_comp_usd19_h = sum(total_comp_usd19_h), - total_comp_usd19_l = sum(total_comp_usd19_l) - ) %>% - ungroup() diff --git a/before-targets/exploratory/ncomms_labor_multipliers.R b/before-targets/exploratory/ncomms_labor_multipliers.R deleted file mode 100644 index 4cede8c..0000000 --- a/before-targets/exploratory/ncomms_labor_multipliers.R +++ /dev/null @@ -1,515 +0,0 @@ -###### NCOMMS REVISIONS - -rm(list = ls()) - -list.of.packages <- c("dplyr", "data.table", "lubridate", "tidyr", "readxl", "fixest", "modelsummary", "flextable") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])] -if (length(new.packages)) install.packages(new.packages) - -library(lubridate) -library(tidyr) -library(dplyr) -library(data.table) -library(readxl) -library(stringr) -library(readr) -library(fixest) -library(modelsummary) -library(flextable) -library(svglite) -library(sf) -library(ggplot2) -library(tigris) -library(cowplot) -library(janitor) - - -# setwd('G:/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures') -setwd("~/Dropbox/ou/ncomms-revisions") - -### define function for "not in" -"%!in%" <- function(x, y) !("%in%"(x, y)) - -# Check that string doesn't match any non-letter -letters_only <- function(x) !grepl("[^A-Za-z*-]", x) - - -# IMPORT LODES DATA--FILTERED USING CENSUS BLOCK IDs - -df <- fread('ca_od_main_JT02_2020.csv') %>% - filter(w_geocode == 60379800021006 | - w_geocode == 60379800301000 | - w_geocode == 60133780001024 | - w_geocode == 60952521022025 | - w_geocode == 60290024011034 | - w_geocode == 60379800141083 | - w_geocode == 60379800051007 | - w_geocode == 60290005072025 | - w_geocode == 60133200011067 | - w_geocode == 60133150001075 | - w_geocode == 60133580005003 | - w_geocode == 60379800151000 | - w_geocode == 60790123061000 | - w_geocode == 60375535021000 | - w_geocode == 60290005072077) %>% - dplyr::select(w_geocode,h_geocode,S000,SI01) %>% - mutate(refinery_name = ifelse(w_geocode == 60379800021006,"Marathon Carson", - ifelse(w_geocode == 60379800301000,"Chevron El Segundo", - ifelse(w_geocode == 60133780001024,"Chevron Richmond", - ifelse(w_geocode == 60952521022025,"Valero Benicia", - ifelse(w_geocode == 60290024011034,"Kern Oil Bakersfield", - ifelse(w_geocode == 60379800141083,"Valero Wilmington", - ifelse(w_geocode == 60379800051007,"PBF Torrance", - ifelse(w_geocode == 60290005072025,"San Joaquin Bakersfield", - ifelse(w_geocode == 60133200011067,"PBF Martinez", - ifelse(w_geocode == 60133150001075,"Marathon Golden Eagle", - ifelse(w_geocode == 60133580005003,"Phillips 66 Rodeo", - ifelse(w_geocode == 60379800151000,"Phillips 66 Wilmington", - ifelse(w_geocode == 60790123061000,"Phillips 66 Santa Maria", - ifelse(w_geocode == 60375535021000,"AltAir Paramount", - ifelse(w_geocode == 60290005072077,"Global Clean Energy",NA)))))))))))))))) -str(df) -summary(df$SI01) -summary(df$S000) - -df.refinery <- group_by(df,refinery_name) %>% - summarize(SI01 = sum(SI01), - S000 = sum(S000)) %>% - ungroup() -summary(df.refinery$SI01) -summary(df.refinery$S000) - - - -# IMPORT LODES -- FILTER USING CENSUS TRACT IDS - -df.tract <- fread('ca_od_main_JT02_2020.csv') %>% - filter(substr(as.character(w_geocode),1,10) == 6037980002 | - substr(as.character(w_geocode),1,10) == 6037980030 | - substr(as.character(w_geocode),1,10) == 6013378000 | - substr(as.character(w_geocode),1,10) == 6095252102 | - substr(as.character(w_geocode),1,10) == 6029002401 | - substr(as.character(w_geocode),1,10) == 6037980014 | - substr(as.character(w_geocode),1,10) == 6037980005 | - substr(as.character(w_geocode),1,10) == 6029000507 | - substr(as.character(w_geocode),1,10) == 6013320001 | - substr(as.character(w_geocode),1,10) == 6013315000 | - substr(as.character(w_geocode),1,10) == 6013358000 | - substr(as.character(w_geocode),1,10) == 6037980015 | - substr(as.character(w_geocode),1,10) == 6079012306 | - substr(as.character(w_geocode),1,10) == 6037553502 | - substr(as.character(w_geocode),1,10) == 6029000507) %>% - mutate(w_tract_geocode = substr(as.character(w_geocode),1,10), - h_tract_geocode = substr(as.character(h_geocode),1,10)) %>% - dplyr::select(w_geocode,h_geocode,w_tract_geocode,h_tract_geocode,S000,SI01) %>% - mutate(refinery_name = ifelse(substr(as.character(w_geocode),1,10) == 6037980002,"Marathon Carson", - ifelse(substr(as.character(w_geocode),1,10) == 6037980030,"Chevron El Segundo", - ifelse(substr(as.character(w_geocode),1,10) == 6013378000,"Chevron Richmond", - ifelse(substr(as.character(w_geocode),1,10) == 6095252102,"Valero Benicia", - ifelse(substr(as.character(w_geocode),1,10) == 6029002401,"Kern Oil Bakersfield", - ifelse(substr(as.character(w_geocode),1,10) == 6037980014,"Valero Wilmington", - ifelse(substr(as.character(w_geocode),1,10) == 6037980005,"PBF Torrance", - ifelse(substr(as.character(w_geocode),1,10) == 6029000507,"San Joaquin Bakersfield", - ifelse(substr(as.character(w_geocode),1,10) == 6013320001,"PBF Martinez", - ifelse(substr(as.character(w_geocode),1,10) == 6013315000,"Marathon Golden Eagle", - ifelse(substr(as.character(w_geocode),1,10) == 6013358000,"Phillips 66 Rodeo", - ifelse(substr(as.character(w_geocode),1,10) == 6037980015,"Phillips 66 Wilmington", - ifelse(substr(as.character(w_geocode),1,10) == 6079012306,"Phillips 66 Santa Maria", - ifelse(substr(as.character(w_geocode),1,10) == 6037553502,"AltAir Paramount", - ifelse(substr(as.character(w_geocode),1,10) == 6029000507,"Global Clean Energy",NA)))))))))))))))) -str(df) -summary(df$SI01) -summary(df$S000) - -df.refinery <- group_by(df.tract,refinery_name) %>% - summarize(SI01 = sum(SI01), - S000 = sum(S000)) %>% - ungroup() -summary(df.refinery$SI01) -summary(df.refinery$S000) - - -# ADD UP JOBS BY WORK-RESIDENCE TRACT PAIRS - -df.tract <- group_by(df.tract, - w_tract_geocode,h_tract_geocode) %>% - summarize(SI01 = sum(SI01), - refinery_name = first(refinery_name)) %>% - filter(SI01 > 0) %>% - ungroup() - -df.tract <- mutate(df.tract, - w_tract_geocode = str_pad(as.character(w_tract_geocode),11,pad="0"), - h_tract_geocode = str_pad(as.character(h_tract_geocode),11,pad="0")) - -# IMPORT CENSUS TRACTS FOR CA - -ca.tracts <- tracts(state="CA",year=2020) - -# JOIN SHAPEFILE TO LODES BY RESIDENTIAL TRACT - -df.tract.r <- left_join(df.tract,ca.tracts,by=c("h_tract_geocode"="GEOID")) -unique(is.na(df.tract.r$STATEFP)) # FALSE FOR ALL OBSERVATIONS - -df.tract.r <- st_as_sf(df.tract.r, - crs=4269, - sf_column_name = "geometry") - - -sf1 <- filter(df.tract.r, refinery_name=="Marathon Carson" | refinery_name=="Chevron El Segundo" | - refinery_name=="Valero Wilmington" | refinery_name=="PBF Torrance" | - refinery_name=="Phillips 66 Wilmington" | refinery_name=="AltAir Paramount") %>% - mutate(refinery_name = as.factor(refinery_name)) %>% - rename(South_Cluster=SI01) -sf2 <- filter(df.tract.r, refinery_name!="Marathon Carson" & refinery_name!="Chevron El Segundo" & - refinery_name!="Valero Wilmington" & refinery_name!="PBF Torrance" & - refinery_name!="Phillips 66 Wilmington" & refinery_name!="AltAir Paramount") %>% - mutate(refinery_name = as.factor(refinery_name)) %>% - rename(North_Cluster=SI01) - -library(ggnewscale) -fig1 <- ggplot(data=sf1) + - geom_sf(aes(color=South_Cluster)) + - scale_color_viridis_c(option = "D") + - new_scale_color() + - geom_sf(data=sf2,aes(color=North_Cluster)) + - scale_color_viridis_c(option = "C") + - scale_fill_gradient() + - geom_sf(data=ca.tracts,fill=NA) + - theme_bw() + - theme(panel.border = element_blank(), - panel.grid = element_blank(), - axis.ticks.x = element_blank(), - axis.text.x = element_blank(), - axis.ticks.y = element_blank(), - axis.text.y = element_blank(), - axis.title = element_blank()) -fig1 - -sf1 <- mutate(sf1, South_Cluster = 1,North_Cluster=0) -sf2 <- mutate(sf2, North_Cluster = 1,South_Cluster=0) - -sf3 <- bind_rows(sf1,sf2) - -fig2 <- ggplot(data=sf3) + - geom_sf(aes(fill=as.factor(South_Cluster))) + - geom_sf(data=ca.tracts,fill=NA) + - theme_bw() + - theme(panel.border = element_blank(), - panel.grid = element_blank(), - axis.ticks.x = element_blank(), - axis.text.x = element_blank(), - axis.ticks.y = element_blank(), - axis.text.y = element_blank(), - axis.title = element_blank()) -fig2 - -sf4 <- filter(df.tract.r, SI01>1) - -sf1 <- filter(sf4, refinery_name=="Marathon Carson" | refinery_name=="Chevron El Segundo" | - refinery_name=="Valero Wilmington" | refinery_name=="PBF Torrance" | - refinery_name=="Phillips 66 Wilmington" | refinery_name=="AltAir Paramount") %>% - mutate(refinery_name = as.factor(refinery_name)) %>% - rename(South_Cluster=SI01) -sf2 <- filter(sf4, refinery_name!="Marathon Carson" & refinery_name!="Chevron El Segundo" & - refinery_name!="Valero Wilmington" & refinery_name!="PBF Torrance" & - refinery_name!="Phillips 66 Wilmington" & refinery_name!="AltAir Paramount") %>% - mutate(refinery_name = as.factor(refinery_name)) %>% - rename(North_Cluster=SI01) - -sf1 <- mutate(sf1, South_Cluster = 1,North_Cluster=0) -sf2 <- mutate(sf2, North_Cluster = 1,South_Cluster=0) - -sf3 <- bind_rows(sf1,sf2) - -fig3 <- ggplot(data=sf3) + - geom_sf(aes(fill=as.factor(South_Cluster))) + - geom_sf(data=ca.tracts,fill=NA) + - theme_bw() + - theme(panel.border = element_blank(), - panel.grid = element_blank(), - axis.ticks.x = element_blank(), - axis.text.x = element_blank(), - axis.ticks.y = element_blank(), - axis.text.y = element_blank(), - axis.title = element_blank()) -fig3 - -ggsave('ns_emp_map.png',fig1) -ggsave('ns_emp_map_v2.png',fig2) -ggsave('ns_emp_map_v3.png',fig3) -rm(fig1,fig2,fig3,sf1,sf2,sf3,sf4,df.tract.r,df.tract,df.refinery,df,ca.tracts) - -# SHARE OF ZIP CODE OUTPUT VALUE FROM REFINING -filenames <- list.files(".", pattern="Industry Detail_9*", full.names=TRUE) -zips <- as.numeric(gsub(".*?([0-9]+).*", "\\1", filenames)) -zips <- rep(zips,times=c(rep(528,12))) -ldf <- lapply(filenames, fread) -df <- rbindlist(ldf) %>% - filter(is.na(V1)==FALSE) -df$zcta <- zips -names(df) <- make.names(names(df)) -str(df) -rm(zips,filenames,ldf) - -df <- mutate(df, - Total.Output = str_remove_all(Total.Output,"[$,]"), - Total.Output = as.numeric(Total.Output)) - -df_total <- group_by(df, - zcta) %>% - summarize(Total.Output = sum(Total.Output)) %>% - ungroup() - -df_refining <- group_by(df, - zcta) %>% - filter(Industry.Code==146) %>% - summarize(refinery.output = sum(Total.Output)) %>% - ungroup() %>% - left_join(df_total,by="zcta") %>% - mutate(share = refinery.output/Total.Output) - -summary(df_refining$share) - -rm(df,df_total) - -# SHARE OF CLUSTER AND STATEWIDE OUTPUT - -df_nc <- fread('Industry Detail_north_cluster.csv') -names(df_nc) <- make.names(names(df_nc)) -df_nc <- filter(df_nc, - Industry.Code==146 & is.na(V1)==FALSE) %>% - mutate(Total.Output = str_remove_all(Total.Output,"[$,]"), - Total.Output = as.numeric(Total.Output)) %>% - summarize(total_north = sum(Total.Output)) -str(df_nc) - -df_sc <- fread('Industry Detail_south_cluster.csv') -names(df_sc) <- make.names(names(df_sc)) -df_sc <- filter(df_sc, - Industry.Code==146 & is.na(V1)==FALSE) %>% - mutate(Total.Output = str_remove_all(Total.Output,"[$,]"), - Total.Output = as.numeric(Total.Output)) %>% - summarize(total_south = sum(Total.Output)) -str(df_sc) - - -df_refining$total_north <- rep(df_nc$total_north,12) -df_refining$total_south <- rep(df_sc$total_south,12) -df_refining <- mutate(df_refining, - share_north = refinery.output/total_north, - share_south = refinery.output/total_south, - share_state = refinery.output/(total_north+total_south)) -summary(df_refining$share_north) -summary(df_refining$share_south) -summary(df_refining$share_state) - - - - -### SHARE OF WORKERS IN ESTABLISHMENT TRACT THAT LIVE IN HOME TRACT - -df.tract <- fread('ca_od_main_JT02_2020.csv') %>% - mutate(w_tract_geocode = substr(as.character(w_geocode),1,10), - h_tract_geocode = substr(as.character(h_geocode),1,10)) %>% - dplyr::select(w_tract_geocode, h_tract_geocode,S000,SI01,SI02,SI03) %>% - group_by(w_tract_geocode,h_tract_geocode) %>% - summarize(S000 = sum(S000,na.rm=TRUE), - SI01 = sum(SI01,na.rm=TRUE), - SI02 = sum(SI02,na.rm=TRUE), - SI03 = sum(SI03,na.rm=TRUE)) %>% - ungroup() - -df.work.total <- group_by(df.tract, - w_tract_geocode) %>% - summarize(S000_w = sum(S000,na.rm=TRUE), - SI01_w = sum(SI01,na.rm=TRUE), - SI02_w = sum(SI02,na.rm=TRUE), - SI03_w = sum(SI03,na.rm=TRUE)) %>% - ungroup() - -df.tract <- left_join(df.tract,df.work.total, by="w_tract_geocode") %>% - mutate(S000_share = S000/S000_w, - SI01_share = SI01/SI01_w, - SI02_share = SI02/SI02_w, - SI03_share = SI03/SI03_w) -summary(df.tract$S000_share) -summary(df.tract$SI01_share) -summary(df.tract$SI02_share) -summary(df.tract$SI03_share) - -df.tract <- mutate(df.tract, - w_tract_geocode = str_pad(as.character(w_tract_geocode),11,pad="0"), - h_tract_geocode = str_pad(as.character(h_tract_geocode),11,pad="0")) - -######################################## -# Census Tract to Zip Code Xwalk - -df <- read_excel('TRACT_ZIP_122024.xlsx') %>% - filter(USPS_ZIP_PREF_STATE=="CA") %>% - dplyr::select(TRACT,ZIP,BUS_RATIO) -str(df) -summary(df$BUS_RATIO) - - -# Crosswalk work census tract to work zip code - -df2 <- left_join(df.tract,df,by=c("w_tract_geocode"="TRACT")) - -df2 <- mutate(df2, - S000 = ifelse(is.na(BUS_RATIO)==FALSE,S000*BUS_RATIO,S000), - SI01 = ifelse(is.na(BUS_RATIO)==FALSE,SI01*BUS_RATIO,SI01), - SI02 = ifelse(is.na(BUS_RATIO)==FALSE,SI02*BUS_RATIO,SI02), - SI03 = ifelse(is.na(BUS_RATIO)==FALSE,SI03*BUS_RATIO,SI03)) %>% - filter(is.na(ZIP)==FALSE) %>% - group_by(ZIP,h_tract_geocode) %>% - summarize(S000 = sum(S000,na.rm=TRUE), - SI01 = sum(SI01,na.rm=TRUE), - SI02 = sum(SI02,na.rm=TRUE), - SI03 = sum(SI03,na.rm=TRUE)) %>% - ungroup() - -total.zip <- group_by(df2, - ZIP) %>% - summarize(S000_w = sum(S000,na.rm=TRUE), - SI01_w = sum(SI01,na.rm=TRUE), - SI02_w = sum(SI02,na.rm=TRUE), - SI03_w = sum(SI03,na.rm=TRUE)) %>% - ungroup() - -df2 <- left_join(df2,total.zip, by="ZIP") %>% - mutate(S000_share = S000/S000_w, - SI01_share = SI01/SI01_w, - SI02_share = SI02/SI02_w, - SI03_share = SI03/SI03_w) -summary(df2$S000_share) -summary(df2$SI01_share) -summary(df2$SI02_share) -summary(df2$SI03_share) - -######################################## -# IMPLAN zip code tables - -load.implan.zip.agg <- function(x){ - df <- fread(paste0("implan-zip/",x)) - names(df) <- make.names(names(df)) - separated.string <- str_split(x,"_") - - df <- dplyr::select(df, - Industry.Code,Description,Total.Output) %>% - filter(Description != "" & Description != "* Employment and payroll of federal govt, military" & Description != "* Employment and payroll of federal govt, non-military") %>% - mutate(zip = str_extract(separated.string[[1]][4],pat <- "(\\d)+"), - county = separated.string[[1]][3], - Total.Output = as.numeric(str_remove_all(Total.Output,"[$,]"))) - - - df <- left_join(df,implan.naics.xwalk, by=c("Industry.Code"="Implan546Index")) %>% - mutate(ind_type = ifelse((substr(as.character(`2017NaicsCode`),1,2)=="11" | - substr(as.character(`2017NaicsCode`),1,2)=="21" | - substr(as.character(`2017NaicsCode`),1,2)=="23" | - substr(as.character(`2017NaicsCode`),1,2)=="31" | - substr(as.character(`2017NaicsCode`),1,2)=="32" | - substr(as.character(`2017NaicsCode`),1,2)=="33" | - Description == "Construction of new commercial structures, including farm structures" | - Description == "Construction of new multifamily residential structures" | - Description == "Construction of other new residential structures" | - Description == "Maintenance and repair construction of nonresidential structures" | - Description == "Maintenance and repair construction of residential structures" | - Description == "Maintenance and repair construction of highways, streets, bridges, and tunnels" | - Description == "Dog and cat food manufacturing" | - Description == "Other animal food manufacturing" | - Description == "Flour milling" | - Description == "Rice milling" | - Description == "Malt manufacturing" | - Description == "Construction of other new nonresidential structures" | - Description == "Construction of new single-family residential structures"), "SI01", - ifelse((substr(as.character(`2017NaicsCode`),1,2)=="42" | - substr(as.character(`2017NaicsCode`),1,2)=="44" | - substr(as.character(`2017NaicsCode`),1,2)=="45" | - substr(as.character(`2017NaicsCode`),1,2)=="48" | - substr(as.character(`2017NaicsCode`),1,2)=="49" | - substr(as.character(`2017NaicsCode`),1,2)=="22"), "SI02","SI03")), - ind_type = ifelse(Description=="Veterinary services","SI03",ind_type), - cluster = ifelse((county == "Ventura County" | - county == "Los Angeles County" | - county == "San Bernardino County" | - county == "Orange County" | - county == "Riverside County" | - county == "Imperial County" | - county == "San Diego County"), "south", "north")) %>% - dplyr::select(-`2017NaicsCode`,-Description,-Industry.Code,-NaicsDescription) %>% - group_by(zip,county,cluster,ind_type) %>% - summarize(Total.Output = sum(Total.Output,na.rm=TRUE)) %>% - ungroup() %>% - pivot_wider(id_cols=c("zip","county","cluster"),values_from = "Total.Output", names_from = "ind_type",names_prefix = "output_") - -} - - -# disaggregated by IMPLAN industry and zip - -load.implan.zip <- function(x){ - df <- fread(paste0("implan-zip/",x)) - names(df) <- make.names(names(df)) - separated.string <- str_split(x,"_") - - df <- dplyr::select(df, - Industry.Code,Description,Total.Output) %>% - filter(Description != "" & Description != "* Employment and payroll of federal govt, military" & Description != "* Employment and payroll of federal govt, non-military") %>% - mutate(zip = str_extract(separated.string[[1]][4],pat <- "(\\d)+"), - county = separated.string[[1]][3], - Total.Output = as.numeric(str_remove_all(Total.Output,"[$,]"))) - - - df <- left_join(df,implan.naics.xwalk, by=c("Industry.Code"="Implan546Index")) %>% - mutate(ind_type = ifelse((substr(as.character(`2017NaicsCode`),1,2)=="11" | - substr(as.character(`2017NaicsCode`),1,2)=="21" | - substr(as.character(`2017NaicsCode`),1,2)=="23" | - substr(as.character(`2017NaicsCode`),1,2)=="31" | - substr(as.character(`2017NaicsCode`),1,2)=="32" | - substr(as.character(`2017NaicsCode`),1,2)=="33" | - Description == "Construction of new commercial structures, including farm structures" | - Description == "Construction of new multifamily residential structures" | - Description == "Construction of other new residential structures" | - Description == "Maintenance and repair construction of nonresidential structures" | - Description == "Maintenance and repair construction of residential structures" | - Description == "Maintenance and repair construction of highways, streets, bridges, and tunnels" | - Description == "Dog and cat food manufacturing" | - Description == "Other animal food manufacturing" | - Description == "Flour milling" | - Description == "Rice milling" | - Description == "Malt manufacturing" | - Description == "Construction of other new nonresidential structures" | - Description == "Construction of new single-family residential structures"), "SI01", - ifelse((substr(as.character(`2017NaicsCode`),1,2)=="42" | - substr(as.character(`2017NaicsCode`),1,2)=="44" | - substr(as.character(`2017NaicsCode`),1,2)=="45" | - substr(as.character(`2017NaicsCode`),1,2)=="48" | - substr(as.character(`2017NaicsCode`),1,2)=="49" | - substr(as.character(`2017NaicsCode`),1,2)=="22"), "SI02","SI03")), - ind_type = ifelse(Description=="Veterinary services","SI03",ind_type), - cluster = ifelse((county == "Ventura County" | - county == "Los Angeles County" | - county == "San Bernardino County" | - county == "Orange County" | - county == "Riverside County" | - county == "Imperial County" | - county == "San Diego County"), "south", "north")) %>% - dplyr::select(-`2017NaicsCode`) -} - -file.names <- list.files(path="implan-zip",pattern="*.csv") -implan.naics.xwalk <- read_xlsx("Bridge_2017NaicsToImplan546.xlsx") %>% - dplyr::select(Implan546Index,`2017NaicsCode`) %>% - mutate(`2017NaicsCode` = substr(`2017NaicsCode`,1,2)) %>% - group_by(Implan546Index,`2017NaicsCode`) %>% - summarize() %>% - ungroup() - - -output <- lapply(file.names,load.implan.zip) %>% - bind_rows() - -fwrite(output,'implan_zip_output.csv') - - diff --git a/before-targets/exploratory/ncomms_labor_revisions.R b/before-targets/exploratory/ncomms_labor_revisions.R deleted file mode 100644 index da62e5b..0000000 --- a/before-targets/exploratory/ncomms_labor_revisions.R +++ /dev/null @@ -1,687 +0,0 @@ -###### NCOMMS REVISIONS - -rm(list = ls()) - -list.of.packages <- c("dplyr", "data.table", "lubridate", "tidyr", "readxl", "fixest", "modelsummary", "flextable") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])] -if (length(new.packages)) install.packages(new.packages) - -library(lubridate) -library(tidyr) -library(dplyr) -library(data.table) -library(readxl) -library(stringr) -library(readr) -library(fixest) -library(modelsummary) -library(flextable) -library(svglite) -library(sf) -library(ggplot2) -library(tigris) -library(cowplot) -library(janitor) - - -# setwd('G:/Shared drives/emlab/projects/current-projects/calepa-cn/outputs/academic-out/refining/figures') -setwd("~/Dropbox/ou/ncomms-revisions") - -### define function for "not in" -"%!in%" <- function(x, y) !("%in%"(x, y)) - -# Check that string doesn't match any non-letter -letters_only <- function(x) !grepl("[^A-Za-z*-]", x) - - -# EMPLOYMENT MULTIPLIERS (FROM IMPLAN) -rev.mrio <- fread('multipliers/mrio/output-value/20250402-ns_cluster_mrio-Detail Economic Indicators.csv') %>% - mutate(cluster = ifelse(DestinationRegion=="north_cluster (2023)","north","south")) %>% - dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation,cluster) %>% - rename(emp.rev = Employment, - ec.rev = EmployeeCompensation) -li.mrio <- fread('multipliers/mrio/labor-income/20250402-ns_cluster_mrio-Detail Economic Indicators.csv') %>% - mutate(cluster = ifelse(DestinationRegion=="north_cluster (2023)","north","south")) %>% - dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation,cluster) %>% - rename(emp.li = Employment, - ec.li = EmployeeCompensation) -mrio <- left_join(rev.mrio,li.mrio, by=c("OriginRegion","DestinationRegion","IndustryCode","ImpactType","cluster")) - -rev.not.mrio <- fread('multipliers/non-mrio/output-value/20250402-ns_cluster-Detail Economic Indicators.csv') %>% - mutate(cluster = ifelse(DestinationRegion=="north_cluster (2023)","north","south")) %>% - dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation,cluster) %>% - rename(emp.rev = Employment, - ec.rev = EmployeeCompensation) -li.not.mrio <- fread('multipliers/non-mrio/labor-income/20250402-ns_cluster-Detail Economic Indicators.csv') %>% - mutate(cluster = ifelse(DestinationRegion=="north_cluster (2023)","north","south")) %>% - dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation,cluster) %>% - rename(emp.li = Employment, - ec.li = EmployeeCompensation) -not.mrio <- left_join(rev.not.mrio,li.not.mrio, by=c("OriginRegion","DestinationRegion","IndustryCode","ImpactType","cluster")) - -rev.statewide <- fread('multipliers/statewide/output-value/20250402-statewide_ca_refining-Detail Economic Indicators.csv') %>% - dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation) %>% - rename(emp.rev = Employment, - ec.rev = EmployeeCompensation) -li.statewide <- fread('multipliers/statewide/labor-income/20250402-statewide_ca_refining-Detail Economic Indicators.csv') %>% - dplyr::select(OriginRegion,DestinationRegion,IndustryCode,ImpactType,Employment,EmployeeCompensation) %>% - rename(emp.li = Employment, - ec.li = EmployeeCompensation) -statewide <- left_join(rev.statewide,li.statewide, by=c("OriginRegion","DestinationRegion","IndustryCode","ImpactType")) - -rm(rev.mrio,li.mrio,rev.not.mrio,li.not.mrio,rev.statewide,li.statewide) - - -### ADD UP WORKERS FROM BLOCK TO TRACT LEVEL - -df.tract <- fread('ca_od_main_JT02_2020.csv') %>% - mutate(w_tract_geocode = substr(as.character(w_geocode),1,10), - h_tract_geocode = substr(as.character(h_geocode),1,10)) %>% - dplyr::select(w_tract_geocode, h_tract_geocode,S000,SI01,SI02,SI03) %>% - group_by(w_tract_geocode,h_tract_geocode) %>% - summarize(S000 = sum(S000,na.rm=TRUE), - SI01 = sum(SI01,na.rm=TRUE), - SI02 = sum(SI02,na.rm=TRUE), - SI03 = sum(SI03,na.rm=TRUE)) %>% - ungroup() - -df.tract <- mutate(df.tract, - w_tract_geocode = str_pad(as.character(w_tract_geocode),11,pad="0"), - h_tract_geocode = str_pad(as.character(h_tract_geocode),11,pad="0")) - -######################################## -# Census Tract to Zip Code Xwalk - -df <- read_excel('TRACT_ZIP_122024.xlsx') %>% - filter(USPS_ZIP_PREF_STATE=="CA") %>% - dplyr::select(TRACT,ZIP,BUS_RATIO) -str(df) -summary(df$BUS_RATIO) - - -# Crosswalk work census tract to work zip code - -df.zip <- left_join(df.tract,df,by=c("w_tract_geocode"="TRACT")) - -df.zip <- mutate(df.zip, - S000 = ifelse(is.na(BUS_RATIO)==FALSE,S000*BUS_RATIO,S000), - SI01 = ifelse(is.na(BUS_RATIO)==FALSE,SI01*BUS_RATIO,SI01), - SI02 = ifelse(is.na(BUS_RATIO)==FALSE,SI02*BUS_RATIO,SI02), - SI03 = ifelse(is.na(BUS_RATIO)==FALSE,SI03*BUS_RATIO,SI03)) %>% - filter(is.na(ZIP)==FALSE) %>% - group_by(ZIP,h_tract_geocode) %>% - summarize(S000 = sum(S000,na.rm=TRUE), - SI01 = sum(SI01,na.rm=TRUE), - SI02 = sum(SI02,na.rm=TRUE), - SI03 = sum(SI03,na.rm=TRUE)) %>% - ungroup() - -total.zip <- group_by(df.zip, - ZIP) %>% - summarize(S000_w = sum(S000,na.rm=TRUE), - SI01_w = sum(SI01,na.rm=TRUE), - SI02_w = sum(SI02,na.rm=TRUE), - SI03_w = sum(SI03,na.rm=TRUE)) %>% - ungroup() - -df.zip <- left_join(df.zip,total.zip, by="ZIP") %>% - mutate(S000_share = ifelse(S000_w > 0, S000/S000_w, 0), - SI01_share = ifelse(SI01_w > 0, SI01/SI01_w, 0), - SI02_share = ifelse(SI02_w > 0, SI02/SI02_w, 0), - SI03_share = ifelse(SI03_w > 0, SI03/SI03_w, 0)) -summary(df.zip$S000_share) -summary(df.zip$SI01_share) -summary(df.zip$SI02_share) -summary(df.zip$SI03_share) - -rm(total.zip) - -######################################## -# IMPLAN zip code tables -# -# load.implan.zip.agg <- function(x){ -# df <- fread(paste0("implan-zip/",x)) -# names(df) <- make.names(names(df)) -# separated.string <- str_split(x,"_") -# -# df <- dplyr::select(df, -# Industry.Code,Description,Total.Output) %>% -# filter(Description != "" & Description != "* Employment and payroll of federal govt, military" & Description != "* Employment and payroll of federal govt, non-military") %>% -# mutate(zip = str_extract(separated.string[[1]][4],pat <- "(\\d)+"), -# county = separated.string[[1]][3], -# Total.Output = as.numeric(str_remove_all(Total.Output,"[$,]"))) -# -# -# df <- left_join(df,implan.naics.xwalk, by=c("Industry.Code"="Implan546Index")) %>% -# mutate(ind_type = ifelse((substr(as.character(`2017NaicsCode`),1,2)=="11" | -# substr(as.character(`2017NaicsCode`),1,2)=="21" | -# substr(as.character(`2017NaicsCode`),1,2)=="23" | -# substr(as.character(`2017NaicsCode`),1,2)=="31" | -# substr(as.character(`2017NaicsCode`),1,2)=="32" | -# substr(as.character(`2017NaicsCode`),1,2)=="33" | -# Description == "Construction of new commercial structures, including farm structures" | -# Description == "Construction of new multifamily residential structures" | -# Description == "Construction of other new residential structures" | -# Description == "Maintenance and repair construction of nonresidential structures" | -# Description == "Maintenance and repair construction of residential structures" | -# Description == "Maintenance and repair construction of highways, streets, bridges, and tunnels" | -# Description == "Dog and cat food manufacturing" | -# Description == "Other animal food manufacturing" | -# Description == "Flour milling" | -# Description == "Rice milling" | -# Description == "Malt manufacturing" | -# Description == "Construction of other new nonresidential structures" | -# Description == "Construction of new single-family residential structures"), "SI01", -# ifelse((substr(as.character(`2017NaicsCode`),1,2)=="42" | -# substr(as.character(`2017NaicsCode`),1,2)=="44" | -# substr(as.character(`2017NaicsCode`),1,2)=="45" | -# substr(as.character(`2017NaicsCode`),1,2)=="48" | -# substr(as.character(`2017NaicsCode`),1,2)=="49" | -# substr(as.character(`2017NaicsCode`),1,2)=="22"), "SI02","SI03")), -# ind_type = ifelse(Description=="Veterinary services","SI03",ind_type), -# cluster = ifelse((county == "Ventura County" | -# county == "Los Angeles County" | -# county == "San Bernardino County" | -# county == "Orange County" | -# county == "Riverside County" | -# county == "Imperial County" | -# county == "San Diego County"), "south", "north")) %>% -# dplyr::select(-`2017NaicsCode`,-Description,-Industry.Code,-NaicsDescription) %>% -# group_by(zip,county,cluster,ind_type) %>% -# summarize(Total.Output = sum(Total.Output,na.rm=TRUE)) %>% -# ungroup() %>% -# pivot_wider(id_cols=c("zip","county","cluster"),values_from = "Total.Output", names_from = "ind_type",names_prefix = "output_") -# -# } -# -# -# # disaggregated by IMPLAN industry and zip -# -# load.implan.zip <- function(x){ -# df <- fread(paste0("implan-zip/",x)) -# names(df) <- make.names(names(df)) -# separated.string <- str_split(x,"_") -# -# df <- dplyr::select(df, -# Industry.Code,Description,Total.Output) %>% -# filter(Description != "" & Description != "* Employment and payroll of federal govt, military" & Description != "* Employment and payroll of federal govt, non-military") %>% -# mutate(zip = str_extract(separated.string[[1]][4],pat <- "(\\d)+"), -# county = separated.string[[1]][3], -# Total.Output = as.numeric(str_remove_all(Total.Output,"[$,]"))) -# -# -# df <- left_join(df,implan.naics.xwalk, by=c("Industry.Code"="Implan546Index")) %>% -# mutate(ind_type = ifelse((substr(as.character(`2017NaicsCode`),1,2)=="11" | -# substr(as.character(`2017NaicsCode`),1,2)=="21" | -# substr(as.character(`2017NaicsCode`),1,2)=="23" | -# substr(as.character(`2017NaicsCode`),1,2)=="31" | -# substr(as.character(`2017NaicsCode`),1,2)=="32" | -# substr(as.character(`2017NaicsCode`),1,2)=="33" | -# Description == "Construction of new commercial structures, including farm structures" | -# Description == "Construction of new multifamily residential structures" | -# Description == "Construction of other new residential structures" | -# Description == "Maintenance and repair construction of nonresidential structures" | -# Description == "Maintenance and repair construction of residential structures" | -# Description == "Maintenance and repair construction of highways, streets, bridges, and tunnels" | -# Description == "Dog and cat food manufacturing" | -# Description == "Other animal food manufacturing" | -# Description == "Flour milling" | -# Description == "Rice milling" | -# Description == "Malt manufacturing" | -# Description == "Construction of other new nonresidential structures" | -# Description == "Construction of new single-family residential structures"), "SI01", -# ifelse((substr(as.character(`2017NaicsCode`),1,2)=="42" | -# substr(as.character(`2017NaicsCode`),1,2)=="44" | -# substr(as.character(`2017NaicsCode`),1,2)=="45" | -# substr(as.character(`2017NaicsCode`),1,2)=="48" | -# substr(as.character(`2017NaicsCode`),1,2)=="49" | -# substr(as.character(`2017NaicsCode`),1,2)=="22"), "SI02","SI03")), -# ind_type = ifelse(Description=="Veterinary services","SI03",ind_type), -# cluster = ifelse((county == "Ventura County" | -# county == "Los Angeles County" | -# county == "San Bernardino County" | -# county == "Orange County" | -# county == "Riverside County" | -# county == "Imperial County" | -# county == "San Diego County"), "south", "north")) %>% -# dplyr::select(-`2017NaicsCode`) -# } -# -# file.names <- list.files(path="implan-zip",pattern="*.csv") -# implan.naics.xwalk <- read_xlsx("Bridge_2017NaicsToImplan546.xlsx") %>% -# dplyr::select(Implan546Index,`2017NaicsCode`) %>% -# mutate(`2017NaicsCode` = substr(`2017NaicsCode`,1,2)) %>% -# group_by(Implan546Index,`2017NaicsCode`) %>% -# summarize() %>% -# ungroup() -# -# -# output <- lapply(file.names,load.implan.zip) %>% -# bind_rows() -# -# fwrite(output,'implan_zip_output.csv') - -## IMPORT ZIP CODE TABLES, CALCULATE SHARE OF CLUSTER OR STATEWIDE OUTPUT ATTRIBUTABLE TO EACH ZIP CODE -zip.output <- fread('implan_zip_output.csv') - -cluster.output <- group_by(zip.output, - cluster,Industry.Code) %>% - summarize(cluster_output = sum(Total.Output,na.rm=TRUE)) %>% - ungroup() - -state.output <- group_by(zip.output, - Industry.Code) %>% - summarize(state_output = sum(Total.Output,na.rm=TRUE)) %>% - ungroup() - -zip.output <- left_join(zip.output,cluster.output,by=c("cluster","Industry.Code")) %>% - mutate(share_output = ifelse(cluster_output > 0, Total.Output/cluster_output,0)) %>% - dplyr::select(-cluster_output) -summary(zip.output$share_output) - -zip.output <- left_join(zip.output,state.output,by=c("Industry.Code")) %>% - mutate(share_output_state = ifelse(state_output > 0, Total.Output/state_output,0)) %>% - dplyr::select(-state_output) -summary(zip.output$share_output_state) - -rm(cluster.output,state.output) - -## JOIN ZIP CODE SHARE OF OUTPUT TO IMPLAN MULTIPLIERS, CALCULATE EFFECT FOR EACH ZIP CODE, ADD UP ACROSS INDUSTRIES - -### MRIO -mrio.multipliers <- left_join(mrio,zip.output,by=c("IndustryCode"="Industry.Code","cluster")) %>% - mutate(emp.rev = emp.rev*share_output, - ec.rev = ec.rev*share_output, - emp.li = emp.li*share_output, - ec.li = ec.li*share_output) %>% - group_by(OriginRegion,cluster,zip,ImpactType,county,ind_type) %>% - summarize(emp.rev = sum(emp.rev, na.rm=TRUE), - ec.rev = sum(ec.rev, na.rm=TRUE), - emp.li = sum(emp.li, na.rm=TRUE), - ec.li = sum(ec.li, na.rm=TRUE)) %>% - ungroup() - - -mrio.multipliers <- pivot_wider(mrio.multipliers, - id_cols=c("zip","county","cluster","OriginRegion","ImpactType"),values_from = c("emp.rev","ec.rev","emp.li","ec.li"), names_from = "ind_type") %>% - mutate(emp.rev_SI01 = ifelse(is.na(emp.rev_SI01)==TRUE,0,emp.rev_SI01), - emp.rev_SI02 = ifelse(is.na(emp.rev_SI02)==TRUE,0,emp.rev_SI02), - emp.rev_SI03 = ifelse(is.na(emp.rev_SI03)==TRUE,0,emp.rev_SI03), - ec.rev_SI01 = ifelse(is.na(ec.rev_SI01)==TRUE,0,ec.rev_SI01), - ec.rev_SI02 = ifelse(is.na(ec.rev_SI02)==TRUE,0,ec.rev_SI02), - ec.rev_SI03 = ifelse(is.na(ec.rev_SI03)==TRUE,0,ec.rev_SI03), - emp.li_SI01 = ifelse(is.na(emp.li_SI01)==TRUE,0,emp.li_SI01), - emp.li_SI02 = ifelse(is.na(emp.li_SI02)==TRUE,0,emp.li_SI02), - emp.li_SI03 = ifelse(is.na(emp.li_SI03)==TRUE,0,emp.li_SI03), - ec.li_SI01 = ifelse(is.na(ec.li_SI01)==TRUE,0,ec.li_SI01), - ec.li_SI02 = ifelse(is.na(ec.li_SI02)==TRUE,0,ec.li_SI02), - ec.li_SI03 = ifelse(is.na(ec.li_SI03)==TRUE,0,ec.li_SI03)) - -### NOT MRIO -not.mrio.multipliers <- left_join(not.mrio,zip.output,by=c("IndustryCode"="Industry.Code","cluster")) %>% - mutate(emp.rev = emp.rev*share_output, - ec.rev = ec.rev*share_output, - emp.li = emp.li*share_output, - ec.li = ec.li*share_output) %>% - group_by(OriginRegion,cluster,zip,ImpactType,county,ind_type) %>% - summarize(emp.rev = sum(emp.rev, na.rm=TRUE), - ec.rev = sum(ec.rev, na.rm=TRUE), - emp.li = sum(emp.li, na.rm=TRUE), - ec.li = sum(ec.li, na.rm=TRUE)) %>% - ungroup() - - -not.mrio.multipliers <- pivot_wider(not.mrio.multipliers, - id_cols=c("zip","county","cluster","OriginRegion","ImpactType"),values_from = c("emp.rev","ec.rev","emp.li","ec.li"), names_from = "ind_type") %>% - mutate(emp.rev_SI01 = ifelse(is.na(emp.rev_SI01)==TRUE,0,emp.rev_SI01), - emp.rev_SI02 = ifelse(is.na(emp.rev_SI02)==TRUE,0,emp.rev_SI02), - emp.rev_SI03 = ifelse(is.na(emp.rev_SI03)==TRUE,0,emp.rev_SI03), - ec.rev_SI01 = ifelse(is.na(ec.rev_SI01)==TRUE,0,ec.rev_SI01), - ec.rev_SI02 = ifelse(is.na(ec.rev_SI02)==TRUE,0,ec.rev_SI02), - ec.rev_SI03 = ifelse(is.na(ec.rev_SI03)==TRUE,0,ec.rev_SI03), - emp.li_SI01 = ifelse(is.na(emp.li_SI01)==TRUE,0,emp.li_SI01), - emp.li_SI02 = ifelse(is.na(emp.li_SI02)==TRUE,0,emp.li_SI02), - emp.li_SI03 = ifelse(is.na(emp.li_SI03)==TRUE,0,emp.li_SI03), - ec.li_SI01 = ifelse(is.na(ec.li_SI01)==TRUE,0,ec.li_SI01), - ec.li_SI02 = ifelse(is.na(ec.li_SI02)==TRUE,0,ec.li_SI02), - ec.li_SI03 = ifelse(is.na(ec.li_SI03)==TRUE,0,ec.li_SI03)) - -### STATEWIDE - -state.multipliers <- left_join(statewide,zip.output,by=c("IndustryCode"="Industry.Code")) %>% - mutate(emp.rev = emp.rev*share_output_state, - ec.rev = ec.rev*share_output_state, - emp.li = emp.li*share_output_state, - ec.li = ec.li*share_output_state) %>% - group_by(OriginRegion,zip,ImpactType,county,ind_type) %>% - summarize(emp.rev = sum(emp.rev, na.rm=TRUE), - ec.rev = sum(ec.rev, na.rm=TRUE), - emp.li = sum(emp.li, na.rm=TRUE), - ec.li = sum(ec.li, na.rm=TRUE)) %>% - ungroup() - - -state.multipliers <- pivot_wider(state.multipliers, - id_cols=c("zip","county","OriginRegion","ImpactType"),values_from = c("emp.rev","ec.rev","emp.li","ec.li"), names_from = "ind_type") %>% - mutate(emp.rev_SI01 = ifelse(is.na(emp.rev_SI01)==TRUE,0,emp.rev_SI01), - emp.rev_SI02 = ifelse(is.na(emp.rev_SI02)==TRUE,0,emp.rev_SI02), - emp.rev_SI03 = ifelse(is.na(emp.rev_SI03)==TRUE,0,emp.rev_SI03), - ec.rev_SI01 = ifelse(is.na(ec.rev_SI01)==TRUE,0,ec.rev_SI01), - ec.rev_SI02 = ifelse(is.na(ec.rev_SI02)==TRUE,0,ec.rev_SI02), - ec.rev_SI03 = ifelse(is.na(ec.rev_SI03)==TRUE,0,ec.rev_SI03), - emp.li_SI01 = ifelse(is.na(emp.li_SI01)==TRUE,0,emp.li_SI01), - emp.li_SI02 = ifelse(is.na(emp.li_SI02)==TRUE,0,emp.li_SI02), - emp.li_SI03 = ifelse(is.na(emp.li_SI03)==TRUE,0,emp.li_SI03), - ec.li_SI01 = ifelse(is.na(ec.li_SI01)==TRUE,0,ec.li_SI01), - ec.li_SI02 = ifelse(is.na(ec.li_SI02)==TRUE,0,ec.li_SI02), - ec.li_SI03 = ifelse(is.na(ec.li_SI03)==TRUE,0,ec.li_SI03)) - -rm(zip.output) - -## JOIN ZIP CODE EFFECTS TO LODES DATA AND MULTIPLY BY THE SHARE OF WORKERS IN EACH ZIP CODE WORKING IN DIFFERENT CENSUS TRACTS -df.zip <- mutate(df.zip, - ZIP = as.numeric(ZIP)) - -### MRIO -mrio.multipliers <- left_join(mrio.multipliers,df.zip,by=c("zip"="ZIP")) %>% - mutate(emp.rev_SI01 = emp.rev_SI01*SI01_share, - emp.rev_SI02 = emp.rev_SI02*SI02_share, - emp.rev_SI03 = emp.rev_SI03*SI03_share, - ec.rev_SI01 = ec.rev_SI01*SI01_share, - ec.rev_SI02 = ec.rev_SI02*SI02_share, - ec.rev_SI03 = ec.rev_SI03*SI03_share, - emp.rev = emp.rev_SI01+emp.rev_SI02+emp.rev_SI03, - ec.rev = ec.rev_SI01+ec.rev_SI02+ec.rev_SI03, - emp.li_SI01 = emp.li_SI01*SI01_share, - emp.li_SI02 = emp.li_SI02*SI02_share, - emp.li_SI03 = emp.li_SI03*SI03_share, - ec.li_SI01 = ec.li_SI01*SI01_share, - ec.li_SI02 = ec.li_SI02*SI02_share, - ec.li_SI03 = ec.li_SI03*SI03_share, - emp.li = emp.li_SI01+emp.li_SI02+emp.li_SI03, - ec.li = ec.li_SI01+ec.li_SI02+ec.li_SI03) %>% - group_by(OriginRegion,cluster,h_tract_geocode,county,ImpactType) %>% - summarize(emp.rev = sum(emp.rev,na.rm=TRUE), - ec.rev = sum(ec.rev,na.rm=TRUE), - emp.li = sum(emp.li,na.rm=TRUE), - ec.li = sum(ec.li,na.rm=TRUE)) - -### NOT MRIO -not.mrio.multipliers <- left_join(not.mrio.multipliers,df.zip,by=c("zip"="ZIP")) %>% - mutate(emp.rev_SI01 = emp.rev_SI01*SI01_share, - emp.rev_SI02 = emp.rev_SI02*SI02_share, - emp.rev_SI03 = emp.rev_SI03*SI03_share, - ec.rev_SI01 = ec.rev_SI01*SI01_share, - ec.rev_SI02 = ec.rev_SI02*SI02_share, - ec.rev_SI03 = ec.rev_SI03*SI03_share, - emp.rev = emp.rev_SI01+emp.rev_SI02+emp.rev_SI03, - ec.rev = ec.rev_SI01+ec.rev_SI02+ec.rev_SI03, - emp.li_SI01 = emp.li_SI01*SI01_share, - emp.li_SI02 = emp.li_SI02*SI02_share, - emp.li_SI03 = emp.li_SI03*SI03_share, - ec.li_SI01 = ec.li_SI01*SI01_share, - ec.li_SI02 = ec.li_SI02*SI02_share, - ec.li_SI03 = ec.li_SI03*SI03_share, - emp.li = emp.li_SI01+emp.li_SI02+emp.li_SI03, - ec.li = ec.li_SI01+ec.li_SI02+ec.li_SI03) %>% - group_by(OriginRegion,cluster,h_tract_geocode,county,ImpactType) %>% - summarize(emp.rev = sum(emp.rev,na.rm=TRUE), - ec.rev = sum(ec.rev,na.rm=TRUE), - emp.li = sum(emp.li,na.rm=TRUE), - ec.li = sum(ec.li,na.rm=TRUE)) - -### STATEWIDE -state.multipliers <- left_join(state.multipliers,df.zip,by=c("zip"="ZIP")) %>% - mutate(emp.rev_SI01 = emp.rev_SI01*SI01_share, - emp.rev_SI02 = emp.rev_SI02*SI02_share, - emp.rev_SI03 = emp.rev_SI03*SI03_share, - ec.rev_SI01 = ec.rev_SI01*SI01_share, - ec.rev_SI02 = ec.rev_SI02*SI02_share, - ec.rev_SI03 = ec.rev_SI03*SI03_share, - emp.rev = emp.rev_SI01+emp.rev_SI02+emp.rev_SI03, - ec.rev = ec.rev_SI01+ec.rev_SI02+ec.rev_SI03, - emp.li_SI01 = emp.li_SI01*SI01_share, - emp.li_SI02 = emp.li_SI02*SI02_share, - emp.li_SI03 = emp.li_SI03*SI03_share, - ec.li_SI01 = ec.li_SI01*SI01_share, - ec.li_SI02 = ec.li_SI02*SI02_share, - ec.li_SI03 = ec.li_SI03*SI03_share, - emp.li = emp.li_SI01+emp.li_SI02+emp.li_SI03, - ec.li = ec.li_SI01+ec.li_SI02+ec.li_SI03) %>% - group_by(OriginRegion,h_tract_geocode,county,ImpactType) %>% - summarize(emp.rev = sum(emp.rev,na.rm=TRUE), - ec.rev = sum(ec.rev,na.rm=TRUE), - emp.li = sum(emp.li,na.rm=TRUE), - ec.li = sum(ec.li,na.rm=TRUE)) - - -########################################################################### - -# DIRECT IMPACTS--IMPLAN MULTIPLIER * SHARE OF OUTPUT AT EACH REFINERY CENSUS TRACT * LODES SHARES AT TRACT LEVEL -## IMPLAN MULTIPLIERS -mrio <- filter(mrio, ImpactType=="Direct") -statewide <- filter(statewide,ImpactType=="Direct") - -## SHARE WORKERS IN EACH REFINERY CENSUS TRACT LIVING IN RESIDENTIAL CENSUS TRACT R - -w.tract.total <- group_by(df.tract, - w_tract_geocode) %>% - summarize(S000_w = sum(S000,na.rm=TRUE), - SI01_w = sum(SI01,na.rm=TRUE)) %>% - ungroup() - -wh.tract.share <- left_join(df.tract,w.tract.total,by="w_tract_geocode") %>% - mutate(S000_share = ifelse(S000_w > 0, S000/S000_w, 0), - SI01_share = ifelse(SI01_w > 0, SI01/SI01_w, 0)) %>% - filter(w_tract_geocode == "06037980002" | - w_tract_geocode == "06037980030" | - w_tract_geocode == "06013378000" | - w_tract_geocode == "06095252102" | - w_tract_geocode == "06029002401" | - w_tract_geocode == "06037980014" | - w_tract_geocode == "06037980005" | - w_tract_geocode == "06029000507" | - w_tract_geocode == "06013320001" | - w_tract_geocode == "06013315000" | - w_tract_geocode == "06013358000" | - w_tract_geocode == "06037980015" | - w_tract_geocode == "06079012306" | - w_tract_geocode == "06037553502" | - w_tract_geocode == "06029000507") %>% - dplyr::select(w_tract_geocode,h_tract_geocode,S000_share,SI01_share) %>% - mutate(refinery_name = ifelse(w_tract_geocode == "06037980002","Marathon Carson", - ifelse(w_tract_geocode == "06037980030","Chevron El Segundo", - ifelse(w_tract_geocode == "06013378000","Chevron Richmond", - ifelse(w_tract_geocode == "06095252102","Valero Benicia", - ifelse(w_tract_geocode == "06029002401","Kern Oil Bakersfield", - ifelse(w_tract_geocode == "06037980014","Valero Wilmington", - ifelse(w_tract_geocode == "06037980005","PBF Torrance", - ifelse(w_tract_geocode == "06029000507","San Joaquin Bakersfield", - ifelse(w_tract_geocode == "06013320001","PBF Martinez", - ifelse(w_tract_geocode == "06013315000","Marathon Golden Eagle", - ifelse(w_tract_geocode == "06013358000","Phillips 66 Rodeo", - ifelse(w_tract_geocode == "06037980015","Phillips 66 Wilmington", - ifelse(w_tract_geocode == "06079012306","Phillips 66 Santa Maria", - ifelse(w_tract_geocode == "06037553502","AltAir Paramount", - ifelse(w_tract_geocode == "06029000507","Global Clean Energy",NA))))))))))))))), - cluster = ifelse((refinery_name=="Marathon Carson" | refinery_name=="Chevron El Segundo" | refinery_name=="Valero Wilmington" | - refinery_name=="PBF Torrance" | refinery_name=="Phillips 66 Wilmington" | refinery_name=="AltAir Paramount"),"south","north")) -summary(wh.tract.share$S000_share) -summary(wh.tract.share$SI01_share) - -mrio <- dplyr::select(mrio, - emp.rev,ec.rev,cluster) %>% - rename(emp.rev.mrio = emp.rev, - ec.rev.mrio = ec.rev) -direct.multipliers <- left_join(wh.tract.share,mrio,by="cluster") -direct.multipliers$emp.rev.state <- statewide$emp.rev -direct.multipliers$ec.rev.state <- statewide$ec.rev - -direct.multipliers <- mutate(direct.multipliers, - emp.rev.mrio = emp.rev.mrio*SI01_share, - ec.rev.mrio = ec.rev.mrio*SI01_share, - emp.rev.state = emp.rev.state*SI01_share, - ec.rev.state = ec.rev.state*SI01_share) %>% - dplyr::select(-S000_share, -SI01_share) - -summary(direct.multipliers$emp.rev.mrio) -summary(direct.multipliers$ec.rev.mrio) -summary(direct.multipliers$emp.rev.state) -summary(direct.multipliers$ec.rev.state) - - - -# COMPARE TO OLD MULTIPLIERS - -mrio.multipliers <- filter(mrio.multipliers, - ImpactType != "Direct") %>% - ungroup() %>% - mutate(county = str_remove_all(county," County"), - region = ifelse((county=="Butte" | county=="Colusa" | county=="El Dorado" | county=="Glenn" | county=="Lassen" | - county=="Modoc" | county=="Nevada" | county=="Placer" | county=="Plumas" | county=="Sacramento" | - county=="Shasta" | county=="Sierra" | county=="Siskiyou" | county=="Sutter" | county=="Tehama" | - county=="Yolo" | county=="Yuba"),"1", - ifelse((county=="Del Norte" | county=="Humboldt" | county=="Lake" | county=="Mendocino" | county=="Napa" | - county=="Sonoma" | county=="Trinity"),"2", - ifelse((county=="Alameda" | county=="Marin" | county=="San Francisco" | county=="San Mateo" | county=="Santa Clara"),"3", - ifelse((county=="Alpine" | county=="Amador" | county=="Calaveras" | county=="Madera" | county=="Mariposa" | - county=="Merced" | county=="Mono" | county=="San Joaquin" | county=="Stanislaus" | county=="Tuolumne"),"4", - ifelse((county=="Monterey" | county=="San Benito" | county=="Santa Barbara" | county=="Santa Cruz" | county=="Ventura"),"5", - ifelse((county=="Fresno" | county=="Inyo" | county=="Kings" | county=="Tulare"),"6", - ifelse((county=="Riverside" | county=="San Bernardino"),"7", - ifelse(county=="Orange","9", - ifelse((county=="Imperial" | county=="San Diego"),"10",county)))))))))) %>% - group_by(region,ImpactType) %>% - summarize(emp.rev = sum(emp.rev,na.rm = FALSE), - ec.rev = sum(ec.rev,na.rm = FALSE)) %>% - ungroup() - - -not.mrio.multipliers <- filter(not.mrio.multipliers, - ImpactType != "Direct") %>% - ungroup() %>% - mutate(county = str_remove_all(county," County"), - region = ifelse((county=="Butte" | county=="Colusa" | county=="El Dorado" | county=="Glenn" | county=="Lassen" | - county=="Modoc" | county=="Nevada" | county=="Placer" | county=="Plumas" | county=="Sacramento" | - county=="Shasta" | county=="Sierra" | county=="Siskiyou" | county=="Sutter" | county=="Tehama" | - county=="Yolo" | county=="Yuba"),"1", - ifelse((county=="Del Norte" | county=="Humboldt" | county=="Lake" | county=="Mendocino" | county=="Napa" | - county=="Sonoma" | county=="Trinity"),"2", - ifelse((county=="Alameda" | county=="Marin" | county=="San Francisco" | county=="San Mateo" | county=="Santa Clara"),"3", - ifelse((county=="Alpine" | county=="Amador" | county=="Calaveras" | county=="Madera" | county=="Mariposa" | - county=="Merced" | county=="Mono" | county=="San Joaquin" | county=="Stanislaus" | county=="Tuolumne"),"4", - ifelse((county=="Monterey" | county=="San Benito" | county=="Santa Barbara" | county=="Santa Cruz" | county=="Ventura"),"5", - ifelse((county=="Fresno" | county=="Inyo" | county=="Kings" | county=="Tulare"),"6", - ifelse((county=="Riverside" | county=="San Bernardino"),"7", - ifelse(county=="Orange","9", - ifelse((county=="Imperial" | county=="San Diego"),"10",county)))))))))) %>% - group_by(region,ImpactType) %>% - summarize(emp.rev = sum(emp.rev,na.rm = FALSE), - ec.rev = sum(ec.rev,na.rm = FALSE)) %>% - ungroup() - -state.multipliers <- filter(state.multipliers, - ImpactType != "Direct") %>% - ungroup() %>% - mutate(county = str_remove_all(county," County"), - region = ifelse((county=="Butte" | county=="Colusa" | county=="El Dorado" | county=="Glenn" | county=="Lassen" | - county=="Modoc" | county=="Nevada" | county=="Placer" | county=="Plumas" | county=="Sacramento" | - county=="Shasta" | county=="Sierra" | county=="Siskiyou" | county=="Sutter" | county=="Tehama" | - county=="Yolo" | county=="Yuba"),"1", - ifelse((county=="Del Norte" | county=="Humboldt" | county=="Lake" | county=="Mendocino" | county=="Napa" | - county=="Sonoma" | county=="Trinity"),"2", - ifelse((county=="Alameda" | county=="Marin" | county=="San Francisco" | county=="San Mateo" | county=="Santa Clara"),"3", - ifelse((county=="Alpine" | county=="Amador" | county=="Calaveras" | county=="Madera" | county=="Mariposa" | - county=="Merced" | county=="Mono" | county=="San Joaquin" | county=="Stanislaus" | county=="Tuolumne"),"4", - ifelse((county=="Monterey" | county=="San Benito" | county=="Santa Barbara" | county=="Santa Cruz" | county=="Ventura"),"5", - ifelse((county=="Fresno" | county=="Inyo" | county=="Kings" | county=="Tulare"),"6", - ifelse((county=="Riverside" | county=="San Bernardino"),"7", - ifelse(county=="Orange","9", - ifelse((county=="Imperial" | county=="San Diego"),"10",county)))))))))) %>% - group_by(region,ImpactType) %>% - summarize(emp.rev = sum(emp.rev,na.rm = FALSE), - ec.rev = sum(ec.rev,na.rm = FALSE)) %>% - ungroup() - - -lodes.xwalk <- fread('ca_xwalk.csv') %>% - dplyr::select(trct,ctyname) %>% - mutate(trct = str_pad(as.character(trct),11,pad="0"), - ctyname = str_remove_all(ctyname," County,CA")) -str(lodes.xwalk) - -## where are new impacts actually felt? all 58 counties -direct.v2 <- inner_join(direct.multipliers,lodes.xwalk,by=c("h_tract_geocode"="trct")) %>% - group_by(ctyname) %>% - summarize(emp.rev.mrio = sum(emp.rev.mrio,na.rm = FALSE), - ec.rev.mrio = sum(ec.rev.mrio,na.rm = FALSE), - emp.rev.state = sum(emp.rev.state,na.rm = FALSE), - ec.rev.state = sum(ec.rev.state,na.rm = FALSE)) %>% - ungroup() %>% - mutate(ec.rev.mrio.per.worker = ec.rev.mrio/emp.rev.mrio) -summary(direct.v2$ec.rev.mrio.per.worker) - -fwrite(direct.v2, 'direct_impact_by_home_county.csv') - -direct.multipliers <- mutate(direct.multipliers, - county = ifelse((refinery_name=="Marathon Golden Eagle" | refinery_name=="Chevron Richmond" | - refinery_name=="PBF Martinez" | refinery_name=="Phillips 66 Rodeo"),"Contra Costa", - ifelse((refinery_name=="Marathon Carson" | refinery_name=="Chevron El Segundo" | - refinery_name=="Valero Wilmington" | refinery_name=="PBF Torrance" | - refinery_name=="Phillips 66 Wilmington" | refinery_name=="AltAir Paramount"), "Los Angeles", - ifelse((refinery_name=="Valero Benicia"),"Solano", - ifelse((refinery_name=="Kern Oil Bakersfield" | refinery_name=="San Joaquin Bakersfield"),"Kern", - ifelse(refinery_name=="Phillips 66 Santa Maria","San Luis Obispo",NA)))))) %>% - group_by(county) %>% - summarize(emp.rev.mrio = sum(emp.rev.mrio,na.rm = FALSE), - ec.rev.mrio = sum(ec.rev.mrio,na.rm = FALSE), - emp.rev.state = sum(emp.rev.state,na.rm = FALSE), - ec.rev.state = sum(ec.rev.state,na.rm = FALSE)) %>% - mutate(n_refineries = ifelse(county=="Contra Costa",4, - ifelse(county=="Los Angeles",6, - ifelse(county=="Solano",1, - ifelse(county=="Kern",2, - ifelse(county=="San Luis Obispo",1,NA)))))) %>% - ungroup() - -### COMPARISON IS OLD MULTIPLIERS -old.mult <- fread('old_multipliers.csv') %>% - filter(ImpactType=="Direct") %>% - rename(old_emp = Employment, - old_ec = EmployeeCompensation) %>% - mutate(county = str_remove_all(DestinationRegion," County, CA"), - county = trimws(county)) %>% - dplyr::select(county, old_emp, old_ec) -str(old.mult) - -old.mult.indir.indu <- fread('old_multipliers.csv') %>% - filter(ImpactType=="Indirect" | ImpactType=="Induced") %>% - rename(old_emp = Employment, - old_ec = EmployeeCompensation) %>% - mutate(county = str_remove_all(DestinationRegion," County, CA"), - county = trimws(county)) %>% - dplyr::select(county,ImpactType, old_emp, old_ec) %>% - group_by(county,ImpactType) %>% - summarize(old_emp = sum(old_emp,na.rm=TRUE), - old_ec = sum(old_ec, na.rm=TRUE)) %>% - ungroup() -str(old.mult.indir.indu) - -comparison.df <- left_join(direct.multipliers,old.mult,by="county") -fwrite(comparison.df,'multiplier_comparison.csv') - -comparison.df.indir.indu <- left_join(mrio.multipliers,old.mult.indir.indu,by=c("region"="county","ImpactType")) - - -statewide.indir.indu <- filter(rev.statewide, ImpactType != "Direct") %>% - group_by(DestinationRegion,ImpactType) %>% - summarize(emp.rev = sum(emp.rev, na.rm=TRUE), - ec.rev = sum(ec.rev, na.rm=TRUE)) - -li.statewide <- filter(statewide, ImpactType=="Induced") %>% - group_by(DestinationRegion) %>% - summarize(emp.li = sum(emp.li, na.rm=TRUE), - ec.li = sum(ec.li, na.rm=TRUE)) - - - diff --git a/before-targets/exploratory/srm-120-exploration.R b/before-targets/exploratory/srm-120-exploration.R deleted file mode 100644 index d43dd16..0000000 --- a/before-targets/exploratory/srm-120-exploration.R +++ /dev/null @@ -1,426 +0,0 @@ -## tracey mangin -## july 7, 2025 -## srm exploration for refinery 120 - -## attach libraries -library(data.table) -library(sf) -library(rnaturalearth) -library(rnaturalearthhires) -library(tidyverse) - -## user -user <- "vincent-home" - -## paths -## ------------------------------------------- - -# list paths -list_paths <- c( - "tracey-laptop" = "/Users/traceymangin/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared\ drives/emlab/projects/current-projects/calepa-cn/", - "tracey-desktop" = "/Users/tracey/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared\ drives/emlab/projects/current-projects/calepa-cn/", - "vincent-work" = "H://Shared drives/emlab/projects/current-projects/calepa-cn", - "vincent-home" = "G://Shared drives/emlab/projects/current-projects/calepa-cn", - "meas" = "data" - ) - -# set main path -main_path <- list_paths[user] - -file_inmap_re <- file.path(main_path, - "data-staged-for-deletion/health/source_receptor_matrix/inmap_processed_srm/refining") - -file_refin_locs <- file.path(main_path, - "/data-staged-for-deletion/stocks-flows/processed/refinery_lat_long_revised.csv") - -file_refin_locs_orig <- file.path(main_path, - "data-staged-for-deletion/GIS/raw/Petroleum_Refineries_US_EIA/Petroleum_Refineries_US_2019_v2.shp" -) - -ca_crs <- 3310 - -## functions -## -------------------------------------------- - -read_inmap_data <- function(inmap_path, bsite) { - - if (bsite != 120) { - - nh3 <- fread( - paste0(inmap_path, "/nh3/srm_nh3_site", bsite, ".csv"), - header = TRUE, - colClasses = c(GEOID = "character") - ) - } else {nh3 <- data.table()} - - nox <- fread( - paste0(inmap_path, "/nox/srm_nox_site", bsite, ".csv"), - header = TRUE, - colClasses = c(GEOID = "character") - ) - pm25 <- fread( - paste0(inmap_path, "/pm25/srm_pm25_site", bsite, ".csv"), - header = TRUE, - colClasses = c(GEOID = "character") - ) - sox <- fread( - paste0(inmap_path, "/sox/srm_sox_site", bsite, ".csv"), - header = TRUE, - colClasses = c(GEOID = "character") - ) - voc <- fread( - paste0(inmap_path, "/voc/srm_voc_site", bsite, ".csv"), - header = TRUE, - colClasses = c(GEOID = "character") - ) - - nh3[, pollutant := "nh3"] - nox[, pollutant := "nox"] - pm25[, pollutant := "pm25"] - sox[, pollutant := "sox"] - voc[, pollutant := "voc"] - - all_pollutants <- rbind(nox, pm25, sox, voc, nh3, fill = TRUE) - all_pollutants[, site := bsite] - -} - -process_weighted_pm25 <- function(dt_inmap_re) { - dt <- copy(dt_inmap_re) - setnames(dt, "totalpm25_aw", "weighted_totalpm25") - - dt_wide <- dcast( - dt, - GEOID + site ~ pollutant, - value.var = "weighted_totalpm25" - ) - setnames( - dt_wide, - c("nh3", "nox", "pm25", "sox", "voc"), - paste0("weighted_totalpm25_", c("nh3", "nox", "pm25", "sox", "voc")) - ) - - dt_wide <- dt_wide[!is.na(GEOID)] - - setnames(dt_wide, "site", "site_id") - - dt_wide <- unique(dt_wide) - - dt_wide -} - -read_refin_locs <- function(file_refin_locs, file_refin_locs_orig, ca_crs) { - refin_crs <- st_crs(st_read(file_refin_locs_orig)) - - ## Refineries plus - refin_new_locations <- fread(file_refin_locs) %>% - mutate(coords = gsub("^c\\(|\\)$", "", geometry)) %>% - separate(coords, c("lon", "lat"), sep = ",") %>% - select(-geometry) %>% - st_as_sf( - coords = c("lon", "lat"), - crs = refin_crs - ) %>% - st_transform(ca_crs) -} - - -## read data -## -------------------------------------------- - -buff_sites <- c( - 97, - 119, - 120, - 164, - 202, - 209, - 226, - 271, - 279, - 332, - 342, - 343, - 800, - 3422, - 34222, - 99999 -) - -# refinery locations -refin_locs <- read_refin_locs(file_refin_locs, - file_refin_locs_orig, - ca_crs -) - -# read inputs -dt_inmap_re <- rbindlist(lapply( - buff_sites, - read_inmap_data, - inmap_path = file_inmap_re -)) - -# process srm -srm_weighted_pm25 <- process_weighted_pm25(dt_inmap_re) - -## find closest refineries -## -------------------------------------------------- - -# Select the reference point (e.g., id == "A") -ref_point <- refin_locs[refin_locs$site_id == 120, ] - -# Compute distances from the reference point to all others -distances <- st_distance(ref_point, refin_locs) - -# Optional: add to the original data -refin_locs$distance_to_120 <- as.numeric(distances) # units in meters by default - -# rank -refin_locs$dist_rank <- rank(refin_locs$distance_to_120, ties.method = "min") - -# disance in km -refin_locs$dist_km <- refin_locs$distance_to_120 / 1000 - -## modify so that 120 doesn't count -refin_locs <- refin_locs |> - mutate(dist_rank = ifelse(site_id == 120, 0, dist_rank - 1), - site_name = paste0(site_id, " dist = ", round(dist_km, 1), " km")) - -refin_locs <- refin_locs %>% - mutate(site_name = fct_reorder(site_name, dist_rank)) - -# usa <- ne_states(country = "united states of america", returnclass = "sf") -# california <- usa[usa$name == "California", ] -# -# california <- st_transform(california, ca_crs) -# -# refinery_fig <- ggplot() + -# geom_sf(data = california, fill = NA, color = "black") + -# geom_sf(data = refin_locs, aes(color = site_id), alpha = 0.7, size = 1) + # point locations -# # geom_sf_text(data = refin_locs, aes(label = site_id), nudge_y = 0.05) + # labels -# theme_minimal() -# -# plotly::ggplotly(refinery_fig) - -## compute ratio, NH3 to PM2.5 -## -------------------------------------------- - -srm_ratio_df_nh3 <- srm_weighted_pm25 |> - select(GEOID, site_id, weighted_totalpm25_nh3) - -srm_ratio_df <- srm_weighted_pm25 |> - pivot_longer(weighted_totalpm25_nh3:weighted_totalpm25_voc, - names_to = "pollutant", - values_to = "concentration") |> - filter(pollutant != "weighted_totalpm25_nh3") |> - left_join(srm_ratio_df_nh3) |> - mutate(site_id = as.character(site_id), - ratio_nh3_pol = weighted_totalpm25_nh3 / concentration) |> - left_join(refin_locs |> - select(site_id, cluster, distance_to_120, dist_rank, site_name, geometry)) |> - mutate(pollutant_name = str_remove(pattern = "weighted_totalpm25_", pollutant)) - -allboxplot <- ggplot(srm_ratio_df |> filter(site_id != 120, - cluster == "Bay Area"), aes(x = pollutant_name, y = ratio_nh3_pol)) + - geom_boxplot() + - facet_wrap(~ site_name, scales = "fixed") + # or facet_grid(pollutant ~ site_id) - scale_fill_brewer(palette = "Set2") + # or manual colors - labs(y = "concentration ratio: NH3 to pollutant", - x = NULL) + - theme_minimal() + - theme( - axis.text.x = element_text(angle = 45, hjust = 1), - strip.background = element_rect(fill = "white", color = NA), - strip.text = element_text(face = "bold") - ) - -ggsave(filename = "srm_ratio_pollutant_pm25.png", - plot = allboxplot, - path = here::here("outputs", "exploratory", "exploratory-figs"), - width = 8, - height = 6, - dpi = 300) - - -boxplot_summary_df <- srm_ratio_df |> - filter(!site_id %in% c(120, 3422), - cluster == "Bay Area", - pollutant_name == "pm25") |> - group_by(site_id, site_name, pollutant) |> - summarise( - Q1 = quantile(ratio_nh3_pol, 0.25, na.rm = TRUE), - Median = quantile(ratio_nh3_pol, 0.5, na.rm = TRUE), - Q3 = quantile(ratio_nh3_pol, 0.75, na.rm = TRUE), - IQR = IQR(ratio_nh3_pol, na.rm = TRUE), - Lower_Whisker = max(min(ratio_nh3_pol), Q1 - 1.5 * IQR), - Upper_Whisker = min(max(ratio_nh3_pol), Q3 + 1.5 * IQR) - ) - -ratio_fig <- ggplot(srm_ratio_df |> filter(!site_id %in% c(120, 3422), - cluster == "Bay Area", - pollutant_name == "pm25"), - aes(x = site_name, y = ratio_nh3_pol)) + - geom_boxplot() + - geom_text(data = boxplot_summary_df, aes(x = site_name, y = Median, label = round(Median, 2)), - vjust = -0.5, size = 3, color = "black") + - labs(y = "concentration ratio: NH3 to PM2.5", - x = NULL) + - theme_minimal() + - theme(axis.text.x = element_text(angle = 45, hjust = 1), - strip.background = element_rect(fill = "white", color = NA), - strip.text = element_text(face = "bold") - ) - -ggsave(filename = "srm_ratio_nh3_pm25.png", - plot = ratio_fig, - path = here::here("exploratory", "exploratory", "exploratory-figs"), - width = 8, - height = 6, - dpi = 300) - -## repeat, 120 PM2.5 to other site PM2.5 -## -------------------------------------------- - -srm_ratio_df_120 <- srm_weighted_pm25 |> - filter(site_id == 120) |> - select(GEOID, pm25_120 = weighted_totalpm25_pm25) - -srm_ratio_pm25_df <- srm_weighted_pm25 |> - filter(site_id != 120) |> - select(GEOID, site_id, pm25 = weighted_totalpm25_pm25) |> - left_join(srm_ratio_df_120) |> - mutate(site_id = as.character(site_id), - ratio_pm25 = pm25_120 / pm25) |> - left_join(refin_locs |> - select(site_id, cluster, distance_to_120, dist_rank, site_name, geometry)) - -pm25_boxplot_summary_df <- srm_ratio_pm25_df |> - filter(!site_id %in% c(120, 3422), - cluster == "Bay Area") |> - group_by(site_id, site_name) |> - summarise( - Q1 = quantile(ratio_pm25, 0.25, na.rm = TRUE), - Median = quantile(ratio_pm25, 0.5, na.rm = TRUE), - Q3 = quantile(ratio_pm25, 0.75, na.rm = TRUE), - IQR = IQR(ratio_pm25, na.rm = TRUE), - Lower_Whisker = max(min(ratio_pm25), Q1 - 1.5 * IQR), - Upper_Whisker = min(max(ratio_pm25), Q3 + 1.5 * IQR) - ) - -pm25_ratio_fig <- ggplot(srm_ratio_pm25_df |> filter(!site_id %in% c(120, 3422), - cluster == "Bay Area"), - aes(x = site_name, y = ratio_pm25)) + - geom_boxplot() + - geom_text(data = pm25_boxplot_summary_df, aes(x = site_name, y = Median, label = round(Median, 2)), - vjust = -0.5, size = 3, color = "black") + - labs(y = "Concentration ratio: site 120 PM2.5 to PM2.5", - x = NULL) + - theme_minimal() + - theme(axis.text.x = element_text(angle = 45, hjust = 1), - strip.background = element_rect(fill = "white", color = NA), - strip.text = element_text(face = "bold") - ) - -ggsave(filename = "srm_ratio_120pm25_pm25.png", - plot = pm25_ratio_fig, - path = here::here("exploratory", "exploratory", "exploratory-figs"), - width = 8, - height = 6, - dpi = 300) - -pm25_ratio_fig_no <- ggplot(srm_ratio_pm25_df |> filter(!site_id %in% c(120, 3422), - cluster == "Bay Area"), - aes(x = site_name, y = ratio_pm25)) + - geom_boxplot(outlier.shape = NA) + - geom_text(data = pm25_boxplot_summary_df, aes(x = site_name, y = Median, label = round(Median, 2)), - vjust = -0.2, size = 3, color = "black") + - labs(y = "Concentration ratio: site 120 PM2.5 to PM2.5", - x = NULL) + - ylim(0.75, 1.3) + - theme_minimal() + - theme(axis.text.x = element_text(angle = 45, hjust = 1), - strip.background = element_rect(fill = "white", color = NA), - strip.text = element_text(face = "bold") - ) - -ggsave(filename = "srm_ratio_120pm25_pm25_no_outliers.png", - plot = pm25_ratio_fig_no, - path = here::here("exploratory", "exploratory", "exploratory-figs"), - width = 8, - height = 6, - dpi = 300) - -#### importance of NH3 - -#across sites -total <- srm_weighted_pm25 %>% - #filter(!(site_id %in% "120"))%>% - group_by(site_id)%>% - mutate(across(starts_with("weighted"), sum))%>% - select(-GEOID)%>% - ungroup()%>% - unique()%>% - gather("poll", "total_pm25", -site_id)%>% - mutate(poll = toupper(gsub("^.*_", "", poll))) - -#refinery specific EF -ef <- fread(paste0(main_path,"/data-staged-for-deletion/health/processed/refinery_emission_factor.csv"), - stringsAsFactors = F)%>% - mutate(ton_bbl = 0.001*kg_bbl)%>% - select(-kg_bbl) - -poll_importance <- ef %>% - mutate(pollutant_code = ifelse(pollutant_code == "SO2", "SOX", pollutant_code), - pollutant_code = ifelse(pollutant_code == "PM25-PRI", "PM25", pollutant_code))%>% - full_join(total, by = c("id1"="site_id", "pollutant_code"= "poll"))%>% - mutate(total_pm25_bbl = ton_bbl*total_pm25) - -#cluster level EF -ef_cluster <- fread(paste0(main_path,"/data-staged-for-deletion/health/processed/cluster_emission_factor_v2.csv"), - stringsAsFactors = F)%>% - mutate(ton_bbl = 0.001*kg_bbl)%>% - select(-kg_bbl) - -site_cluster <- fread(paste0(main_path,"/data-staged-for-deletion/health/raw/ref_match/ref_to_match_eia.csv"), - stringsAsFactors = F)%>% - select(site_id, cluster) - - -poll_importance <- ef_cluster %>% - mutate(pollutant_code = ifelse(pollutant_code == "SO2", "SOX", pollutant_code), - pollutant_code = ifelse(pollutant_code == "PM25-PRI", "PM25", pollutant_code))%>% - right_join(total %>% left_join(site_cluster, by = c("site_id")) %>% drop_na(cluster), - by = c("cluster", "pollutant_code"="poll"))%>% - mutate(total_pm25_bbl = ton_bbl*total_pm25) - -#tot pm25 dispersed per barrel -poll_importance %>% - group_by(site_id)%>% - mutate(total_site_pm25 = sum(total_pm25_bbl,na.rm = T), - share = total_pm25_bbl/total_site_pm25)%>% - ungroup()%>% - ggplot(aes(x= as.factor(site_id), y = share))+ - geom_point()+ - facet_wrap(~pollutant_code)+ - labs(x = "Site ID", y = "Share of total dispersed secondary PM2.5 dispersed per site per barrel")+ - theme_gray(16) - -#tot pm25 dispersed per ton of precursor -poll_importance %>% - group_by(site_id)%>% - mutate(total_site_pm25 = sum(total_pm25,na.rm = T), - share = total_pm25/total_site_pm25)%>% - ungroup()%>% - ggplot(aes(x= as.factor(id1), y = share))+ - geom_point()+ - facet_wrap(~pollutant_code)+ - labs(x = "Site ID", y = "Share of total secondary PM2.5 dispersed per site per ton emitted")+ - theme_gray(16) - -#what is site 800? its a renewable fuel - -ref_analysis <- fread(paste0(main_path,"/data-staged-for-deletion/stocks-flows/processed/refinery_loc_cap_manual.csv"), - stringsAsFactors = F) - -ref <- fread(paste0(main_path,"/data-staged-for-deletion/stocks-flows/processed/refinery_lat_long.csv"), - stringsAsFactors = F) diff --git a/extras/health_results.R b/extras/health_results.R deleted file mode 100644 index 9fd66fc..0000000 --- a/extras/health_results.R +++ /dev/null @@ -1,157 +0,0 @@ -# CalEPA: Summary stats from health results -# vincent.thivierge@uottawa.ca -# created: 07/08/2025 -# updated: 07/08/2025 - -# set up environment - -rm(list = ls()) -`%notin%` <- Negate(`%in%`) -gc() - -options(scipen = 999) - -## Packages - -packages <- c( - "data.table", "dplyr", "janitor", "stringr", "ggplot2", "cowplot", - "forcats", "readxl", "forcats", "tidyr" -) - -for (i in packages) { - if (require(i, character.only = TRUE) == FALSE) { - install.packages(i, repos = "http://cran.us.r-project.org") - } else { - require(i, character.only = TRUE) - } -} - -## Directory - -wd <- c("C:\\git\\ca-refining\\outputs\\rev-submission\\cuf=0.6_beta-scenario=main\\tables\\health") #Vincent's WD -setwd(wd) -getwd() - -## State-wide mortality results - -fread("./cumulative_avoided_mortality.csv", stringsAsFactors = F)%>% - filter(demo_cat%in%"DAC")%>% - group_by(scen_id)%>% - summarise(cumul_mort_level = sum(cumul_mort_level))%>% - ungroup()%>% - mutate(diff = cumul_mort_level[scen_id == "BAU historic production"]-cumul_mort_level, - percent = (cumul_mort_level/cumul_mort_level[scen_id == "BAU historic production"])-1) - -## State-wide monetized mortality results - -fread("./cumulative_health_x_county.csv", stringsAsFactors = F)%>% - filter(demo_cat%in%"DAC")%>% - group_by(scen_id)%>% - summarise(mortality_pv = sum(mortality_pv_dem)/1000000000)%>% - ungroup()%>% - mutate(diff_pv = mortality_pv[scen_id == "BAU historic production"]-mortality_pv) - - -## State-wide monetized mortality by county (new results) - -fread("./cumulative_health_x_county.csv", stringsAsFactors = F)%>% - filter(demo_cat%in%"DAC")%>% - filter(scen_id %in% c("BAU historic production","LC1 low exports"))%>% - group_by(scen_id,NAME)%>% - summarize(mortality_pv_dem = sum(mortality_pv_dem))%>% - ungroup()%>% - group_by(scen_id)%>% - mutate(mortality_pv = sum(mortality_pv_dem))%>% - ungroup()%>% - mutate(diff_pv_state = mortality_pv[scen_id == "BAU historic production"]-mortality_pv, - diff_pv = mortality_pv_dem[scen_id == "BAU historic production"]-mortality_pv_dem)%>% - mutate(share_pv = (diff_pv/diff_pv_state))%>% - #filter(scen_id %in% c("LC1 low exports"))%>% - arrange(-share_pv) - -## By demographic - -fig5 <- fread("C:\\git\\ca-refining\\outputs\\rev-submission\\cuf=0.6_beta-scenario=main\\results\\figures\\figure-5\\state_disaggregated_npv_pc_fig_inputs.csv", - stringsAsFactors = F) - -fread("C:\\git\\ca-refining\\outputs\\rev-submission\\cuf=0.6_beta-scenario=main\\results\\figures\\figure-5\\state_disaggregated_npv_pc_fig_inputs.csv", - stringsAsFactors = F)%>% - filter(scen_id %in% c("BAU historical production","LC1 low exports") & demo_cat %in% "DAC" & segment %in% "health") - -## Sensitivity analyses - -#Refinery-level -fread("./state_npv_fig_inputs_health_ref.csv", stringsAsFactors = F)%>% - filter(unit_desc %in% "USD billion (2019 VSL)")%>% - #filter(scen_id %in% c("BAU historical production","LC1 low exports") & unit_desc %in% "USD billion (2019 VSL)")%>% - select(scen_id,value)%>% - distinct() - -#Growing age-based VSL -fread("C:\\git\\ca-refining\\outputs\\rev-submission\\cuf=0.6_beta-scenario=main\\results\\figures\\figure-3\\state_npv_fig_inputs_health.csv", - stringsAsFactors = F)%>% - filter(scen_id %in% c("BAU historical production","LC1 low exports") & unit_desc %in% "USD billion (annual VSL)") - -#Constant VSL -fread("./state_npv_fig_inputs_health_non_age_vsl.csv", stringsAsFactors = F)%>% - filter(scen_id %in% c("BAU historical production","LC1 low exports") & unit_desc %in% "USD billion (2019 VSL)") - -######################################################################################################################## -############# DEBUGGGING ######################################################################################################################## -######################################################################################################################## - - -fread("./cumulative_health_x_county.csv", stringsAsFactors = F)%>% - filter(demo_cat%in%"DAC")%>% - filter(scen_id %in% c("BAU historic production","LC1 low exports"))%>% - group_by(scen_id,NAME)%>% - summarize(mortality_pv_dem = sum(mortality_pv_dem), - mortality_level_dem = sum(mortality_level_dem))%>% - ungroup()%>% - group_by(scen_id)%>% - mutate(mortality_pv = sum(mortality_pv_dem))%>% - ungroup()%>% - mutate(diff_pv_state = mortality_pv[scen_id == "BAU historic production"]-mortality_pv, - diff_pv = mortality_pv_dem[scen_id == "BAU historic production"]-mortality_pv_dem)%>% - mutate(share_pv = (diff_pv/diff_pv_state))%>% - #filter(scen_id %in% c("LC1 low exports"))%>% - arrange(-share_pv)%>% - filter(NAME %in% c("Solano","San Bernardino")) - - -## State-wide monetized mortality by county (old results) - -fread("C:/Users/vince/Desktop/cumulative_health_x_county.csv", stringsAsFactors = F)%>% - filter(demo_cat%in%"DAC")%>% - filter(scen_id %in% c("BAU historic production","LC1 low exports"))%>% - group_by(scen_id,NAME)%>% - summarize(mortality_pv_dem = sum(mortality_pv_dem), - mortality_level_dem = sum(mortality_level_dem))%>% - ungroup()%>% - group_by(scen_id)%>% - mutate(mortality_pv = sum(mortality_pv_dem))%>% - ungroup()%>% - mutate(diff_pv_state = mortality_pv[scen_id == "BAU historic production"]-mortality_pv, - diff_pv = mortality_pv_dem[scen_id == "BAU historic production"]-mortality_pv_dem)%>% - mutate(share_pv = (diff_pv/diff_pv_state))%>% - #filter(scen_id %in% c("LC1 low exports"))%>% - arrange(-share_pv) - -fread("C:/Users/vince/Desktop/cumulative_health_x_county.csv", stringsAsFactors = F)%>% - filter(demo_cat%in%"DAC")%>% - filter(scen_id %in% c("BAU historic production","LC1 low exports"))%>% - group_by(scen_id,NAME)%>% - summarize(mortality_pv_dem = sum(mortality_pv_dem), - mortality_level_dem = sum(mortality_level_dem))%>% - ungroup()%>% - group_by(scen_id)%>% - mutate(mortality_pv = sum(mortality_pv_dem))%>% - ungroup()%>% - mutate(diff_pv_state = mortality_pv[scen_id == "BAU historic production"]-mortality_pv, - diff_pv = mortality_pv_dem[scen_id == "BAU historic production"]-mortality_pv_dem)%>% - mutate(share_pv = (diff_pv/diff_pv_state))%>% - #filter(scen_id %in% c("LC1 low exports"))%>% - arrange(-share_pv)%>% - filter(NAME %in% c("Solano","San Bernardino")) - - diff --git a/extras/labor_plots.R b/extras/labor_plots.R deleted file mode 100644 index f11c33e..0000000 --- a/extras/labor_plots.R +++ /dev/null @@ -1,184 +0,0 @@ -main_path <- "/Users/tracey/Library/CloudStorage/GoogleDrive-tmangin@ucsb.edu/Shared\ drives/emlab/projects/current-projects/calepa-cn/" - -tar_load(annual_labor) -tar_load(county_pop_ratios) - -labor_tmp <- annual_labor %>% - mutate(scenario = paste0(demand_scenario, "-", refining_scenario)) - - -ggplot(labor_tmp, aes(x = year, y = revenue / 1e9, color = county)) + - geom_line() + - facet_wrap(~scenario) + - labs( - x = NULL, - y = "Revenue (USD billion)", - color = NULL - ) + - theme_line - -## dac -## -------------------------------------------------------------- - -tar_load(ref_labor_demog_yr) - -dac_df <- copy(ref_labor_demog_yr) - -## change scenario names, factor -dac_df[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)] -dac_df[, scenario := gsub("LC1.", "Low ", scenario)] - -## scenarios for filtering -remove_scen <- c("Low demand - historic production") - -## add scenario title -dac_df[, scenario_title := str_replace(scenario, " - ", "\n")] - -## refactor -dac_df$scenario_title <- factor(dac_df$scenario_title, levels = c( - "BAU demand\nhistoric production", - "BAU demand\nhistoric exports", - "BAU demand\nlow exports", - "Low demand\nhistoric exports", - "Low demand\nlow exports", - "Low demand\nhistoric production" -)) - - -## refactor -dac_df$scenario <- factor(dac_df$scenario, levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" -)) - -dac_fig <- ggplot(dac_df %>% filter( - !scenario %in% remove_scen, - demo_cat == "DAC" -), aes(x = year, y = demo_emp / 1000, lty = title, color = county)) + - geom_line(linewidth = 0.8, alpha = 0.7) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = "Labor: FTE jobs (thousand)" - ) + - # ylim(c(0, 35)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - -ggsave( - plot = dac_fig, - device = "png", - filename = "dac_labor_levels.png", - path = paste0(main_path, "outputs/academic-out/refining/figures/2022-12-update/labor/"), - width = 12, - height = 4, - dpi = 600 -) - - - -## gaps -## ------------------------------------------------------------- - -dac_gaps_df <- copy(ref_labor_demog_yr) - -## change scenario names, factor -dac_gaps_df[, scenario := paste0(demand_scenario, " demand - ", refining_scenario)] -dac_gaps_df[, scenario := gsub("LC1.", "Low ", scenario)] - -## scenarios for filtering -remove_scen <- c("Low demand - historic production", "BAU demand - historic production") - -## refactor -dac_gaps_df[, scenario_title := scenario] -dac_gaps_df[, scenario_title := str_replace(scenario_title, " - ", "\n")] - -dac_gaps_df$scenario <- factor(dac_gaps_df$scenario, levels = c( - "BAU demand - historic production", - "BAU demand - historic exports", - "BAU demand - low exports", - "Low demand - historic exports", - "Low demand - low exports", - "Low demand - historic production" -)) - -dac_gaps_df$scenario_title <- factor(dac_gaps_df$scenario_title, levels = c( - "BAU demand\nhistoric production", - "BAU demand\nhistoric exports", - "BAU demand\nlow exports", - "Low demand\nhistoric exports", - "Low demand\nlow exports", - "Low demand\nhistoric production" -)) -## filter for dac -dac_gaps_df <- dac_gaps_df[demo_cat == "DAC"] - - -## calculate gaps (BAU - scenario) -dac_bau_gaps_df <- dac_gaps_df[scenario == "BAU demand - historic production"] -dac_bau_gaps_df <- dac_bau_gaps_df[, c("county", "year", "demo_cat", "demo_group", "title", "demo_emp")] -setnames(dac_bau_gaps_df, "demo_emp", "bau_demo_emp") - -dac_gaps_df <- merge(dac_gaps_df, dac_bau_gaps_df, - by = c("county", "year", "demo_cat", "demo_group", "title"), - all.x = T -) - -dac_gaps_df[, gap := demo_emp - bau_demo_emp] - -## fig -dac_gap_fig_b <- ggplot(dac_gaps_df %>% filter( - !scenario %in% remove_scen, - demo_cat == "DAC" -), aes(x = year, y = gap / 1000, lty = title, color = county)) + - geom_line(linewidth = 0.8, alpha = 0.7) + - geom_hline(yintercept = 0, color = "darkgray", linewidth = 0.5) + - facet_grid(demo_cat ~ scenario_title) + - labs( - x = NULL, - y = "Labor: FTE jobs difference from reference (thousand)" - ) + - # ylim(c(-0.31, 0)) + - scale_x_continuous( - breaks = c(2020, 2045), # Specify tick mark positions - labels = c(2020, 2045) - ) + # Specify tick mark labels - theme_line + - theme( - legend.position = "bottom", - legend.title = element_blank(), - axis.text.x = element_text(vjust = 0.5, hjust = 0.5), - # strip.text.x = element_blank(), - plot.margin = unit(c(0, 0, 0, 0), "cm"), - # axis.text.x = element_blank(), - axis.ticks.length.y = unit(0.1, "cm"), - axis.ticks.length.x = unit(0.1, "cm") - ) - -ggsave( - plot = dac_gap_fig_b, - device = "png", - filename = "dac_labor_gaps.png", - path = paste0(main_path, "outputs/academic-out/refining/figures/2022-12-update/labor/"), - width = 12, - height = 4, - dpi = 600 -) diff --git a/stata/0_data_prep.do b/stata/0_data_prep.do deleted file mode 100644 index 07bfe65..0000000 --- a/stata/0_data_prep.do +++ /dev/null @@ -1,198 +0,0 @@ - -******************************************************************************** -******************************************************************************** -global startDir "/Users/paigeweber/Dropbox/Refining" -cd $startDir - - - - - - - -******************************************************************************** -******************************************************************************** - -cd data/ - -******************************************************************************** -* refinery bbl processed -import delimited "reg_refin_crude_receipts.csv", clear - -gen ym = ym(year,month) -format ym %tm - -destring bbls, replace force - -keep ym bbls region - -sort ym region -bysort ym region: egen bbl_r = sum(bbls) -bysort ym region: gen count = _n - -replace bbl_r = bbl_r/10^6 -label variable bbl_r "bbl millions" -drop bbls -keep if count ==1 -drop count -rename bbl_r bbl -reshape wide bbl, i(ym) j(region) string - - - - -reshape long bbl, i(ym) j(product) string - -sort ym product - -gen tho_bbl = bbl*10^3 -label variable tho_bbl "thousands of barrels" - -drop bbl - -save "reg_refin_ym.dta", replace - -isid ym product -******************************************************************************** - - -clear -import delimited "Cushing_OK_WTI_Spot_Price_FOB.csv", clear - -rename cushing* WTI - -split(month), parse("-") -drop month - -rename month2 year -destring year, replace - -gen year2 = 2000 + year - -replace year2 = year2-100 if year2>2023 -tab year2 - - -drop year -rename year2 year - -rename month1 month - -gen month2 = month(date(month,"M")) -drop month -rename month2 month -gen ym = ym(year,month) -format ym %tm - -save "WTI_monthly_1986_2023.dta", replace -******************************************************************************** - -import excel "Finished_Products_Movements.xlsx", sheet("Gasoline Chart Data") cellrange(A5:FB24) clear -** UNITS IN THIS SHEET ARE THOUSANDS OF BARRELS ** -duplicates drop - -rename A product - -drop if product =="" - - -foreach v of varlist C-FB{ - - rename `v' date_`v' -} - -drop B - -reshape long date, i(product) j(ym) string - -rename date quantity - -egen seq month = seq(), f(1) t(12) -egen seq year = seq(), f(2007) t(2019) b(12) - -drop ym - -gen date = mdy(month, 1, year) -format date %td - -gen ym = ym(year, month) -format ym %tm - -drop date - -*reshape wide to construct net variables -drop month year -br - -isid ym product - -rename quantity tho_bbl - -* clean up product names -replace product = trim(product) - -replace product = "NC_FEx" if product =="NC Foreign Export" -replace product = "NC_FIm" if product =="NC Foreign Import" -replace product = "NC_NetIm" if product =="NC Net Imports" -replace product = "NC_InterEx" if product =="NC Interstate Export" -replace product = "NC_InterIm" if product =="NC Interstate Import" -replace product = "NC_Reno" if product =="NC Reno (North)" -replace product = "N_to_S" if product =="North to South" -replace product = "S_to_N" if product =="South to North" - -replace product = "Gas_BlndStck" if product =="Product: Gasoline & Blendstocks" -replace product = "SC_CalEvn_W_Adj" if product =="SC CalNEv P/L (West) Adjusted" -replace product = "SC_FEx" if product =="SC Foreign Export" -replace product = "SC_FIm" if product =="SC Foreign Import" -replace product = "SC_InterEx" if product =="SC Interstate Export" -replace product = "SC_InterIm" if product =="SC Interstate Import" - -replace product = "SC_W_Adj" if product =="SC Phoenix (West) Adjusted" -replace product = "Pipeline_KM" if product =="Source: Pipeline (KM Export Spreadsheet)" - -replace product = "SC_W_Adj" if product =="SC Phoenix (West) Adjusted" -replace product = "Pipeline_SC" if product =="Total SC Pipeline" - - -replace product = "SC_NetIm" if product =="SC Net Imports" - - -isid ym product - - - - -merge m:1 ym using "WTI_monthly_1986_2023" - -keep if _merge ==3 - -tab year - -drop _merge - -append using "reg_refin_ym" - -drop year -gen date = dofm(ym) -format date %d -gen year = year(date) - -keep if year >2006 - -sort ym product - -order ym product - -drop month - -gen month = month(date) - -bysort ym: carryforward WTI, replace - -tab year - -isid ym product - -drop if tho_bbl ==. -save "temp.dta", replace - diff --git a/stata/1_regressions_figures_stats.do b/stata/1_regressions_figures_stats.do deleted file mode 100644 index d547366..0000000 --- a/stata/1_regressions_figures_stats.do +++ /dev/null @@ -1,304 +0,0 @@ - - - - - -******************************************************************************** -******************************************************************************** -global startDir "/Users/paigeweber/Dropbox/Refining" -cd $startDir - - - - - - - -******************************************************************************** -******************************************************************************** - - -*run code/0_data_prep.do // this creates temp.dta for analysis - - -use "data/temp.dta", clear - -drop if tho_bbl ==. - -isid ym product - -rename tho_bbl bbl - -reshape wide bbl, i(ym) j(product) string - -rename bbl* * - -******************************************************************************** -******************************************************************************** - - - - -gen NC_net = NC_FEx + NC_FIm -gen SC_net = SC_FEx + SC_FIm - - -replace NC_net = -1*NC_net -replace SC_net = -1*SC_net - -su NC_net, detail -scalar NCmean = r(mean) - -su SC_net, detail // mean is negative -scalar SCmean = r(mean) - -* rescaling so both are above zero - -scalar pct = 100*(SCmean - NCmean)/SCmean - -di "percent larger exports in NC is:" -di pct - -gen pct = 100*(SC_net - NC_net)/SC_net - -su pct, detail - - -bysort year: egen sum_pct = mean(pct) - -gen pct_mean = . -gen pct_high = . -gen pct_low = . - -su pct, detail -gen pct_all = r(mean) - -foreach y of numlist 2007(1)2020{ - - su pct if year ==`y', detail - replace pct_mean = r(mean) if year ==`y' - replace pct_high = r(mean) + r(sd) if year == `y' - replace pct_low = r(mean) - r(sd) if year == `y' - - } - -set scheme lean1 - -twoway (scatter pct_mean year, msymbol(o) mcolor(blue%60)) /// - (rcap pct_high pct_low year) /// - (line pct_all year, lpattern(dash)), /// - xtitle("") ytitle("Percent", size(medsmall)) /// - xscale(r(2007 2020)) xlabel(2010(2)2020) /// - legend(pos(6) col(3) label(1 "Annual mean") label( 2 "95 Pct. CI") label(3 "Average all years")) /// - title("NC exports compared to SC exports", size(medsmall)) - - - -* sum to year since model does not include monthly - - -bysort year: egen yr_NC_net = sum(NC_net) -bysort year: egen yr_SC_net = sum(SC_net) - -bysort year: egen yr_north = sum(north) -bysort year: egen yr_south = sum(south) - -bysort year: egen yr_WTI = mean(WTI) - -bysort year: gen count = _n -count if count ==1 - -gen northXWTI = north*WTI -gen southXWTI = south*WTI - -gen WTIsq = WTI*WTI -gen northsq = north*north -gen southsq = south*south - -gen yearsq = year*year - - -**************************************************** - - -reghdfe NC_net north -estimates store m0 - -reghdfe NC_net north south WTI -estimates store m1 - -reghdfe NC_net north south WTI northXWTI southXWTI -estimates store m2 - -reghdfe NC_net northsq southsq WTI WTIsq northXWTI southXWTI north south -estimates store m3 - -reghdfe NC_net northsq southsq WTI WTIsq northXWTI southXWTI north south year yearsq -estimates store m3b - -reghdfe NC_net northsq southsq WTI WTIsq northXWTI southXWTI north south year yearsq N_to_S S_to_N -estimates store m3c - - - -reghdfe SC_net south -estimates store m4a - -reghdfe SC_net north south WTI -estimates store m4 - -reghdfe SC_net north south WTI northXWTI southXWTI -estimates store m5 - -reghdfe SC_net north south northsq southsq WTI WTIsq northXWTI southXWTI -estimates store m6 - -reghdfe SC_net north south northsq southsq WTI WTIsq northXWTI southXWTI year yearsq -estimates store m6b - - -reghdfe NC_net northsq southsq WTI WTIsq northXWTI southXWTI north south year yearsq N_to_S S_to_N -estimates store m6c - - -esttab m6b m6c m3b m3c, stats(N r2 stars) p - - - - - - -******************************************************************************** -******************************************************************************** - -gen CA_net = NC_net + SC_net -gen CA = north + south -gen CAXWTI = CA*WTI - -gen CAsq = CA*CA - -reghdfe CA_net CA WTI, absorb(i.year i.month) -estimates store m7 - -reghdfe CA_net CA WTI CAXWTI, absorb(i.year i.month) -estimates store m8 - -reghdfe CA_net CA CAsq WTI WTIsq CAXWTI, absorb(i.year i.month) -estimates store m9 - -esttab m7 m8 m9, stats(N r2 stars) p - - -reg CA_net WTI WTIsq i.year i.month -predict hat - -gen resid = CA_net - hat - -binscatter resid CA - -binscatter CA CA_net - -binscatter north NC_net - -binscatter south SC_net - - -twoway (scatter CA_net CA) /// - (lfit CA_net CA, lpattern(solid) lwidth(medthick) lcolor(midblue)), /// - legend(off) /// - xtitle("In state consumption (tho BBL)", size(small)) ytitle("Net exports (tho BBL)", size(small)) /// - title("California", size(small)) xlabel(,labsize(small)) ylabel(,labsize(small)) - -graph save figures/CA_net.gph, replace - -twoway (scatter NC_net north) /// - (lfit NC_net north, lpattern(solid) lwidth(medthick) lcolor(emerald)), /// - legend(off) /// - xtitle("In state consumption (tho BBL)", size(small)) ytitle("Net exports (tho BBL)", size(small)) /// - title("Northern CA", size(small)) xlabel(,labsize(small)) ylabel(,labsize(small)) yline(0, lpattern(dash)) - -graph save figures/NC_net.gph, replace - -twoway (scatter SC_net north) /// - (lfit SC_net north, lpattern(solid) lwidth(medthick) lcolor(purple)), /// - legend(off) /// - xtitle("In state consumption (tho BBL)", size(small)) ytitle("Net exports (tho BBL)", size(small)) /// - title("Southern CA", size(small)) xlabel(,labsize(small)) ylabel(,labsize(small)) yline(0, lpattern(dash)) - -graph save figures/SC_net.gph, replace - - - -graph combine figures/NC_net.gph figures/SC_net.gph, rows(1) ycommon - - -******************************************************************************** -******************************************************************************** - -* norht to south export share - -gen NoverS = NC_net/SC_net -su NoverS, detail - -* monthly -twoway (line NC_net ym, lcolor(midblue)) /// - (line SC_net ym, lcolor(gs8)), /// - legend(pos(6) col(2) label(1 "North CA") label(2 "South CA")) /// - yline(0, lcolor(black)) xtitle("") title("Net exports", size(medsmall)) /// - ytitle("BBL (tho)") - -* annual average - - -twoway (line yr_NC_net year, lcolor(midblue)) /// - (line yr_north year, lcolor(gs8) yaxis(2) ytitle(BBL (tho))), /// - legend(pos(6) col(2) label(1 "North CA net exports") label(2 "North comsumption")) /// - xtitle("") title("North: Net exports and consumption", size(medsmall)) /// - ytitle("BBL (tho)") - -save figures/north_net_consumption.gph, replace - - -twoway (line yr_SC_net year, lcolor(green%60)) /// - (line yr_south year, lcolor(gs8) yaxis(2) ytitle(BBL (tho))), /// - legend(pos(6) col(2) label(1 "South CA net exports") label(2 "South comsumption")) /// - xtitle("") title("South: Net exports and consumption", size(medsmall)) /// - ytitle("BBL (tho)") - -save figures/south_net_consumption.gph, replace - -cd figures/ -graph combine south_net_consumption.gph north_net_consumption.gph - - - -twoway (line yr_NC_net year, lcolor(midblue)) /// - (line yr_SC_net year, lcolor(gs8) ), /// - legend(pos(6) col(2) label(1 "North CA") label(2 "South CA")) /// - yline(0, lcolor(black)) xtitle("") title("Net exports", size(medsmall)) /// - ytitle("BBL (tho)") - -twoway (line yr_SC_net year, lcolor(green%50)) /// - (line yr_south year, lcolor(gs8) yaxis(2)), /// - legend(pos(6) col(2) label(1 "North CA") label(2 "South CA")) /// - yline(0, lcolor(black)) xtitle("") title("Net exports", size(medsmall)) /// - ytitle("BBL (tho)") - - - -twoway (line NC_net ym, lcolor(midblue) lpattern(dash)) /// - (line NC_hat_3 ym, lcolor(gs6) lpattern(solid)), /// - legend(pos(6) col(2) label(1 "Actuals") label(2 "Predicted")) /// - yline(0, lcolor(black)) xtitle("") title("North, net exports", size(medsmall)) /// - ytitle("BBL (tho)") - -twoway (line SC_net ym, lcolor(green%60) lpattern(dash)) /// - (line SC_hat_6 ym, lcolor(gs6) lpattern(solid)), /// - legend(pos(6) col(2) label(1 "Actuals") label(2 "Predicted")) /// - yline(0, lcolor(black)) xtitle("") title("South, net exports", size(medsmall)) /// - ytitle("BBL (tho)") - - - - - diff --git a/stata/clean_acs5_race_refining.do b/stata/clean_acs5_race_refining.do deleted file mode 100644 index a5bc124..0000000 --- a/stata/clean_acs5_race_refining.do +++ /dev/null @@ -1,60 +0,0 @@ -/*************************** HEADER **********************************/ - clear all - - //set local directory - if regexm("`c(pwd)'","")==1 { //Paige's machine - global startDir "" - global repoDir "" - } - if regexm("`c(pwd)'","/Users/kylemeng")==1 { //Kyle's machine - global startDir "" - global repoDir "" - //sysdir set PLUS $startDir/../../toolbox/STATA_toolbox/plus - sysdir set PLUS $repoDir/scripts/toolbox/STATA/plus - } - else if regexm(c(os),"Windows")==1 { //Danae's machine - global startDir "C:\Users\dhern125\Dropbox\UCSB-PhD\emLab\CALEPA\data_refining_paper" - global repoDir "C:\Users\Danae\Documents\GitHub\us_ej_disparities" - } - if "$startDir"=="" exit - disp "local path is $startDir" - - //set subfolders - global rawDir "$startDir/raw" - global processedDir "$startDir/processed" - global tempDir "$startDir/temp" - global tablesDir "$repoDir/tables" - global figuresDir "$repoDir/figures" - - - ***************2020 - import delimited using $rawDir/nhgis0024_csv/nhgis0024_ds249_20205_tract, clear - - gen total_pop=amp3e001 - gen hispanic=amp3e012 - gen white=amp3e003 - gen black=amp3e004 - gen aialnative=amp3e005 - gen asian=amp3e006 - - gen median_income=amr8e001 - - keep gisjoin total_pop hispanic white black aialnative asian median_income geoid year - - saveold $processedDir/pop_income_2020, replace - - ***************2021 - import delimited using $rawDir/nhgis0024_csv/nhgis0024_ds254_20215_tract, clear - - gen total_pop=aooce001 - gen hispanic=aooce012 - gen white=aooce003 - gen black=aooce004 - gen aialnative=aooce005 - gen asian=aooce006 - - gen median_income=aoqie001 - rename geo_id geoid - keep gisjoin total_pop hispanic white black aialnative asian median_income geoid year - - saveold $processedDir/pop_income_2021, replace \ No newline at end of file diff --git a/stata/merge_acs_refining.do b/stata/merge_acs_refining.do deleted file mode 100644 index 48c45ea..0000000 --- a/stata/merge_acs_refining.do +++ /dev/null @@ -1,273 +0,0 @@ -global dataDir "C:\Users\dhern125\Dropbox\UCSB-PhD\emLab\CALEPA\data_refining_paper" -global figuresDir "C:\Users\dhern125\Dropbox\UCSB-PhD\emLab\CALEPA\data_refining_paper\figures" - -******************************************************************************** -import delimited $dataDir/raw/refining-2023/refining_health_income_2023.csv, clear - - saveold $dataDir/processed/refining_health_income_2023, replace - - - -use $dataDir/processed/pop_income_2020, clear -*2020: 14000US01001020100 - gen census_tract=substr(geoid,8,.) - destring census_tract, replace - drop year - duplicates tag census_tract, gen(dup) - sort geoid - browse if dup==1 - - saveold $dataDir/processed/pop_income_2020_mod, replace - -use $dataDir/processed/refining_health_income_2023, clear - merge m:1 census_tract using $dataDir/processed/pop_income_2020_mod - /* - *issue here - Result # of obs. - ----------------------------------------- - not matched 269,189 - from master 190,674 (_merge==1) - from using 78,515 (_merge==2) - - matched 1,114,560 (_merge==3) - ----------------------------------------- - */ - drop if _merge==2 - - -/* - rename et1001 total_pop - gen minority_pct=(et2002+et2003+et2004+et2005+et2006+et2007+et2008+et2009+et2010)/total_pop - gen black_pct=et2002/total_pop - gen hispanic_pct=(et2006+et2007+et2008+et2009+et2010)/total_pop - sum minority_pct, det -*/ - - gen minority_pct=(hispanic+black+aialnative+asian)/total_pop - gen black_pct=black/total_pop - gen hispanic_pct=hispanic/total_pop - gen asian_pct=asian/total_pop - gen aialnative_pct=aialnative/total_pop - - rename total_pm25 totalpm25 - - gen white_num=totalpm25*(1-minority_pct)*total_pop - gen white_den=(1-minority_pct)*total_pop - -local group "minority black hispanic asian aialnative" - -foreach g in `group'{ - gen `g'_num=totalpm25*`g'_pct*total_pop - gen `g'_den=`g'_pct*total_pop -} -collapse (sum) white_num white_den minority_num minority_den black_num black_den hispanic_num hispanic_den asian_num asian_den aialnative_num aialnative_den, by(scen_id demand_scenario refining_scenario year) - -gen W=white_num/white_den -gen B=black_num/black_den -gen H=hispanic_num/hispanic_den -gen M=minority_num/minority_den -gen A=asian_num/asian_den -gen AIAL=aialnative_num/aialnative_den - -gen stat_BW=B-W -gen stat_MW=M-W -gen stat_HW=H-W -gen stat_AW=A-W -gen stat_AIALW=AIAL-W - -*FIGURE 1 -preserve -keep if scen_id=="BAU historic exports" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU historic exports") - graph export $figuresDir/bau_his_exports.png, as(png) -restore - -*FIGURE 2 -preserve -keep if scen_id=="BAU historic production" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU historic production") - graph export $figuresDir/bau_his_prod.png, as(png) -restore - -*FIGURE 3 -preserve -keep if scen_id=="BAU low exports" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU low exports") - graph export $figuresDir/bau_low_exports.png, as(png) -restore - -*FIGURE 4 -preserve -keep if scen_id=="LC1 historic exports" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 historic exports") - graph export $figuresDir/lc1_his_exports.png, as(png) -restore - -*FIGURE 5 -preserve -keep if scen_id=="LC1 historic production" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 historic production") - graph export $figuresDir/lc1_his_prod.png, as(png) -restore - -*FIGURE 6 -preserve -keep if scen_id=="LC1 low exports" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 low exports") - graph export $figuresDir/lc1_low_exports.png, as(png) -restore - - -***GAPS -*FIGURE 1 -preserve -keep if scen_id=="BAU historic exports" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("BAU historic exports") - graph export $figuresDir/bau_his_exports_gap.png, as(png) -restore - -*FIGURE 2 -preserve -keep if scen_id=="BAU historic production" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("BAU historic production") - graph export $figuresDir/bau_his_prod_gap.png, as(png) -restore - -*FIGURE 3 -preserve -keep if scen_id=="BAU low exports" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("BAU low exports") - graph export $figuresDir/bau_low_exports_gap.png, as(png) -restore - -*FIGURE 4 -preserve -keep if scen_id=="LC1 historic exports" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("LC1 historic exports") - graph export $figuresDir/lc1_his_exports_gap.png, as(png) -restore - -*FIGURE 5 -preserve -keep if scen_id=="LC1 historic production" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("LC1 historic production") - graph export $figuresDir/lc1_his_prod_gap.png, as(png) -restore - -*FIGURE 6 -preserve -keep if scen_id=="LC1 low exports" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 5 "AIALw gap" ) size(medsmall)) title("LC1 low exports") - graph export $figuresDir/lc1_low_exports_gap.png, as(png) -restore - - - - -******************************************************************************* - ***INCOME -use $dataDir/processed/pop_income_2021, clear -*2021: 1400000US01001020100 - gen census_tract=substr(geoid,10,.) - destring census_tract, replace - drop year - duplicates tag census_tract, gen(dup) - sort geoid - browse if dup==1 - - saveold $dataDir/processed/pop_income_2021_mod, replace - -use $dataDir/processed/refining_health_income_2023, clear - merge m:1 census_tract using $dataDir/processed/pop_income_2021_mod - -/* -*issue here - - ----------------------------------------- - not matched 269,189 - from master 190,674 (_merge==1) - from using 78,515 (_merge==2) - - matched 1,114,560 (_merge==3) - ----------------------------------------- -*/ - - - -egen deciles_inc=xtile(median_hh_income), nq(10) by(year) - -forvalues d=1/10{ - gen dec_`d'=(deciles_inc==`d') - gen deciles_num_`d'=total_pm25*dec_`d'*total_pop - gen deciles_den_`d'=dec_`d'*total_pop -} - -egen quantiles_inc=xtile(median_income), nq(4) by(year) - -forvalues d=1/4{ - gen quan_`d'=(deciles_inc==`d') - gen quantiles_num_`d'=total_pm25*quan_`d'*total_pop - gen quantiles_den_`d'=quan_`d'*total_pop -} - -collapse (sum) deciles_num_* deciles_den_* quantiles_den_* quantiles_num_*, by(scen_id demand_scenario refining_scenario year) - -gen D1=deciles_num_1/deciles_den_1 -gen D10=deciles_num_10/deciles_den_10 -gen Q1=quantiles_num_1/quantiles_den_1 -gen Q2=quantiles_num_2/quantiles_den_2 -gen Q3=quantiles_num_3/quantiles_den_3 -gen Q4=quantiles_num_4/quantiles_den_4 - - -gen stat_deciles=D1-D10 -gen stat_quantiles=Q1-Q4 - -la var stat_deciles "Decile 1-Decile 10 Gap" -la var stat_deciles "Q 1-Q 10 Gap" - - -*FIGURE 1 -preserve -keep if scen_id=="BAU historic exports" - twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall)) title("BAU historic exports") - graph export $figuresDir/bau_his_exports_inc.png, as(png) replace -restore - -*FIGURE 2 -preserve -keep if scen_id=="BAU historic production" - twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall)) title("BAU historic production") - graph export $figuresDir/bau_his_prod_inc.png, as(png) replace -restore - -*FIGURE 3 -preserve -keep if scen_id=="BAU low exports" - twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall)) title("BAU low exports") - graph export $figuresDir/bau_low_exports_inc.png, as(png) replace -restore - -*FIGURE 4 -preserve -keep if scen_id=="LC1 historic exports" - twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall)) title("LC1 historic exports") - graph export $figuresDir/lc1_his_exports_inc.png, as(png) replace -restore - -*FIGURE 5 -preserve -keep if scen_id=="LC1 historic production" - twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall)) title("LC1 historic production") - graph export $figuresDir/lc1_his_prod_inc.png, as(png) replace -restore - -*FIGURE 6 -preserve -keep if scen_id=="LC1 low exports" - twoway (line Q1 year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line Q2 year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line Q3 year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line Q4 year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Q1" 2 "Q2" 3 "Q3" 4 "Q4" ) size(medsmall)) title("LC1 low exports") - graph export $figuresDir/lc1_low_exports_inc.png, as(png) replace -restore - - - diff --git a/stata/repo_clean_acs5_race_refining.do b/stata/repo_clean_acs5_race_refining.do deleted file mode 100644 index f368c2c..0000000 --- a/stata/repo_clean_acs5_race_refining.do +++ /dev/null @@ -1,67 +0,0 @@ -/*************************** HEADER **********************************/ - clear all - - //set local directory - if regexm("`c(pwd)'","")==1 { //Paige's machine - global startDir "" - global repoDir "" - } - else if regexm(c(os),"Windows")==1 { //Danae's machine - global startDir "drives/emlab/projects/current-projects/calepa-cn" - global repoDir "C:\Users\Danae\Documents\GitHub\ca-refining" - } - if "$startDir"=="" exit - disp "local path is $startDir" - - //set subfolders - global rawDir "$startDir/data/Census/nhgis_2020" - global processedDir "$startDir/data/Census" - - ***************2020 - import delimited using $rawDir/nhgis0024_csv/nhgis0024_ds249_20205_tract, clear - - gen total_pop=amp3e001 - gen hispanic=amp3e012 - gen white=amp3e003 - gen black=amp3e004 - gen aialnative=amp3e005 - gen asian=amp3e006 - - gen median_income=amr8e001 - - keep gisjoin total_pop hispanic white black aialnative asian median_income geoid year state - - saveold $processedDir/pop_income_2020, replace - keep if state=="California" - export delimited $processedDir/pop_CA_geoid.csv, replace - - ***************2021 - import delimited using $rawDir/nhgis0024_csv/nhgis0024_ds254_20215_tract, clear - - gen total_pop=aooce001 - gen hispanic=aooce012 - gen white=aooce003 - gen black=aooce004 - gen aialnative=aooce005 - gen asian=aooce006 - - gen median_income=aoqie001 - rename geo_id geoid - keep gisjoin total_pop hispanic white black aialnative asian median_income geoid year - - saveold $processedDir/pop_income_2021, replace - - - ****************2017-2021 poverty - import delimited using $rawDir/nhgis0029_csv/nhgis0029_csv/nhgis0029_ds254_20215_tract.csv - - - rename aoqge001 total_pop - rename aoqge002 total_below_poverty - rename aoqge003 total_above_poverty - - rename geo_id geoid - keep gisjoin geoid year total_above_poverty total_below_poverty total_pop - - saveold $processedDir/pop_poverty_2020, replace - \ No newline at end of file diff --git a/stata/repo_merge_acs_refining_poverty_updated_ct.do b/stata/repo_merge_acs_refining_poverty_updated_ct.do deleted file mode 100644 index cc937ef..0000000 --- a/stata/repo_merge_acs_refining_poverty_updated_ct.do +++ /dev/null @@ -1,177 +0,0 @@ -/*************************** HEADER **********************************/ - clear all - - //set local directory - if regexm("`c(pwd)'","")==1 { //Paige's machine - global startDir "" - global repoDir "" - } - - else if regexm(c(os),"Windows")==1 { //Danae's machine - global startDir "drives/emlab/projects/current-projects/calepa-cn" - global repoDir "C:\Users\Danae\Documents\GitHub\ca-refining" - } - if "$startDir"=="" exit - disp "local path is $startDir" - - //set subfolders - global rawDir "$startDir/data/Census/nhgis_2020" - global processedDir "$startDir/data/Census" - global concentrationsDir "$startDir/outputs/refining-2023/health" - global figuresDir "$repoDir/stata/figures" - global graphDir "$repoDir/stata/gph" -******************************************************************************* - ***INCOME -use $processedDir/pop_poverty_2020, clear - gen census_tract=substr(geoid,10,.) - destring census_tract, replace - drop year - duplicates tag census_tract, gen(dup) - sort geoid - browse if dup==1 - - saveold $processedDir/pop_poverty_2021_mod, replace - -use $concentrationsDir/refining_health_census_tract, clear - merge m:1 census_tract using $processedDir/pop_poverty_2021_mod - -/* - Result # of obs. - ----------------------------------------- - not matched 76,286 - from master 0 (_merge==1) - from using 76,286 (_merge==2) - - matched 1,475,658 (_merge==3) - ----------------------------------------- - -*/ - -gen total_below_poverty_pct=total_below_poverty/total_pop -gen total_above_poverty_pct=total_above_poverty/total_pop -rename total_pm25 totalpm25 - -local group "total_above_poverty total_below_poverty" - -foreach g in `group'{ - gen `g'_num=totalpm25*`g'_pct*total_pop - gen `g'_den=`g'_pct*total_pop -} -collapse (sum) total_above_poverty_num total_above_poverty_den total_below_poverty_num total_below_poverty_den, by(scen_id demand_scenario refining_scenario year) - -gen NP=total_above_poverty_num/total_above_poverty_den -gen P=total_below_poverty_num/total_below_poverty_den - -gen stat_PNP=P-NP - - -*FIGURE 1 -preserve -keep if scen_id=="BAU historic exports" - twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("BAU historic exports") - graph export $figuresDir/bau_his_exports_pov.png, as(png) replace -restore - -*FIGURE 2 -preserve -keep if scen_id=="BAU historic production" - twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("BAU historic production") - graph export $figuresDir/bau_his_prod_pov.png, as(png) replace -restore - -*FIGURE 3 -preserve -keep if scen_id=="BAU low exports" - twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("BAU low exports") - graph export $figuresDir/bau_low_exports_pov.png, as(png) replace -restore - -*FIGURE 4 -preserve -keep if scen_id=="LC1 historic exports" - twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("LC1 historic exports") - graph export $figuresDir/lc1_his_exports_pov.png, as(png) replace -restore - -*FIGURE 5 -preserve -keep if scen_id=="LC1 historic production" - twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("LC1 historic production") - graph export $figuresDir/lc1_his_prod_pov.png, as(png) replace -restore - -*FIGURE 6 -preserve -keep if scen_id=="LC1 low exports" - twoway (line P year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line NP year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Below poverty" 2 "Above poverty" ) size(medsmall)) title("LC1 low exports") - graph export $figuresDir/lc1_low_exports_pov.png, as(png) replace -restore - - - -*GAP -*FIGURE 1 -preserve -keep if scen_id=="BAU historic exports" - twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("BAU historic exports") saving($graphDir/bau_his_exp_pov_gap.gph, replace) - graph export $figuresDir/bau_his_exports_pov_gap.png, as(png) replace -restore - -*FIGURE 2 -preserve -keep if scen_id=="BAU historic production" - twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("BAU historic production") saving($graphDir/bau_his_prod_pov_gap.gph, replace) - graph export $figuresDir/bau_his_prod_pov_gap.png, as(png) replace -restore - -*FIGURE 3 -preserve -keep if scen_id=="BAU low exports" - twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("BAU low exports") saving($graphDir/bau_low_exp_pov_gap.gph, replace) - graph export $figuresDir/bau_low_exports_pov_gap.png, as(png) replace -restore - -*FIGURE 4 -preserve -keep if scen_id=="LC1 historic exports" - twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("LC1 historic exports") saving($graphDir/lc1_his_exp_pov_gap.gph, replace) - graph export $figuresDir/lc1_his_exports_pov_gap.png, as(png) replace -restore - -*FIGURE 5 -preserve -keep if scen_id=="LC1 historic production" - twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("LC1 historic production") saving($graphDir/lc1_his_prod_pov_gap.gph, replace) - graph export $figuresDir/lc1_his_prod_pov_gap.png, as(png) replace -restore - -*FIGURE 6 -preserve -keep if scen_id=="LC1 low exports" - twoway (line stat_PNP year, lwidth(medthick) lcolor(turquoise) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap Below poverty and Above poverty" ) size(medsmall)) ylabel(0(0.01)0.04) title("LC1 low exports") saving($graphDir/lc1_low_exp_pov_gap.gph, replace) - graph export $figuresDir/lc1_low_exports_pov_gap.png, as(png) replace -restore - -*FINAL FIGURE WITH RELEVANT SCENARIOS - -grc1leg $graphDir/bau_his_prod_pov_gap.gph $graphDir/bau_his_exp_pov_gap.gph $graphDir/lc1_low_exp_pov_gap.gph, col(2) - - graph export $figuresDir/all_gap_poverty.png, as(png) replace - -************************************************************ -keep if year==2045 - -keep year scen_id stat_* - -sdf -reshape long stat_, i(scen_id year) j(group, string) - -rename stat_ gap - - - -la var gap "Gap value in end point (2045)" -gen groupt=" Poor-no poor gap" if group=="PNP" - -save $dataDir/processed/gaps_2045_poor, replace -graph hbar (mean) gap, over(scen_id) asyvars over(groupt) diff --git a/stata/repo_merge_acs_refining_updated_ct.do b/stata/repo_merge_acs_refining_updated_ct.do deleted file mode 100644 index f1e088b..0000000 --- a/stata/repo_merge_acs_refining_updated_ct.do +++ /dev/null @@ -1,283 +0,0 @@ -/*************************** HEADER **********************************/ - clear all - - //set local directory - if regexm("`c(pwd)'","")==1 { //Paige's machine - global startDir "" - global repoDir "" - } - - else if regexm(c(os),"Windows")==1 { //Danae's machine - global startDir "drives/emlab/projects/current-projects/calepa-cn" - global repoDir "C:\Users\Danae\Documents\GitHub\ca-refining" - } - if "$startDir"=="" exit - disp "local path is $startDir" - - //set subfolders - global rawDir "$startDir/data/Census/nhgis_2020" - global processedDir "$startDir/data/Census" - global concentrationsDir "$startDir/outputs/refining-2023/health" - global figuresDir "$repoDir/stata/figures" - global graphDir "$repoDir/stata/gph" -******************************************************************************** -import delimited $concentrationsDir/refining_health_census_tract.csv, clear - - saveold $concentrationsDir/refining_health_census_tract, replace - - - -use $dataDir/processed/pop_income_2020, clear -*2020: 14000US01001020100 - gen census_tract=substr(geoid,8,.) - destring census_tract, replace - drop year - duplicates tag census_tract, gen(dup) - sort geoid - browse if dup==1 - - saveold $processedDir/pop_income_2020_mod, replace - -use $concentrationsDir/refining_health_census_tract, clear - merge m:1 census_tract using $processedDir/pop_income_2020_mod - /* - - Result # of obs. - ----------------------------------------- - not matched 76,286 - from master 0 (_merge==1) - from using 76,286 (_merge==2) - - matched 1,475,658 (_merge==3) - ----------------------------------------- - - */ - drop if _merge==2 - - gen minority_pct=(hispanic+black+aialnative+asian)/total_pop - gen black_pct=black/total_pop - gen hispanic_pct=hispanic/total_pop - gen asian_pct=asian/total_pop - gen aialnative_pct=aialnative/total_pop - - rename total_pm25 totalpm25 - - gen white_num=totalpm25*(1-minority_pct)*total_pop - gen white_den=(1-minority_pct)*total_pop - -local group "minority black hispanic asian aialnative" - -foreach g in `group'{ - gen `g'_num=totalpm25*`g'_pct*total_pop - gen `g'_den=`g'_pct*total_pop -} - -*creating dac index -gen dac_population=total_pop if disadvantaged=="Yes" - replace dac_population=0 if disadvantaged=="No" -gen dac_num=totalpm25*dac_population - gen dac_den=dac_population -gen nodac_population=total_pop if disadvantaged=="No" - replace nodac_population=0 if disadvantaged=="Yes" -gen nodac_num=totalpm25*nodac_population - gen nodac_den=nodac_population - - -collapse (sum) white_num white_den minority_num minority_den black_num black_den hispanic_num hispanic_den asian_num asian_den aialnative_num aialnative_den dac_num dac_den nodac_num nodac_den, by(scen_id demand_scenario refining_scenario year) - -gen W=white_num/white_den -gen B=black_num/black_den -gen H=hispanic_num/hispanic_den -gen M=minority_num/minority_den -gen A=asian_num/asian_den -gen AIAL=aialnative_num/aialnative_den - -gen DAC=dac_num/dac_den -gen nDAC=nodac_num/nodac_den - -gen stat_BW=B-W -gen stat_MW=M-W -gen stat_HW=H-W -gen stat_AW=A-W -gen stat_AIALW=AIAL-W -gen stat_DAC=DAC-nDAC - - -*LEVELS -*FIGURE 1 -preserve -keep if scen_id=="BAU historic exports" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2025(10)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU historic exports") saving($graphDir/bau_his_exports.gph, replace) - graph export $figuresDir/bau_his_exports.png, as(png) replace -restore - -*FIGURE 2 -preserve -keep if scen_id=="BAU historic production" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(10)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU historic production") saving($graphDir/bau_his_prod.gph, replace) - graph export $figuresDir/bau_his_prod.png, as(png) replace -restore - -*FIGURE 3 -preserve -keep if scen_id=="BAU low exports" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(10)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("BAU low exports") saving($graphDir/bau_low_exports.gph, replace) - graph export $figuresDir/bau_low_exports.png, as(png) replace -restore - -*FIGURE 4 -preserve -keep if scen_id=="LC1 historic exports" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 historic exports") - graph export $figuresDir/lc1_his_exports.png, as(png) replace -restore - -*FIGURE 5 -preserve -keep if scen_id=="LC1 historic production" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 historic production") - graph export $figuresDir/lc1_his_prod.png, as(png) replace -restore - -*FIGURE 6 -preserve -keep if scen_id=="LC1 low exports" - twoway (line B year, lwidth(medthick) lcolor(gs10) lpattern(solid) xlabel(2020(5)2045)) (line H year, lwidth(medthick) lcolor(sky) lpattern(solid) xlabel(2020(5)2045, labsize(medium)) ) (line W year, lwidth(medthick) lcolor(black) lpattern(solid) xlabel(2020(5)2045)) (line A year, lwidth(medthick) lcolor(orange) lpattern(solid) xlabel(2020(5)2045)) (line AIAL year, lwidth(medthick) lcolor(green) lpattern(solid) xlabel(2020(5)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Black" 2 "Hispanic" 3 "white" 4 "Asian" 5 "American Indian" ) size(medsmall)) title("LC1 low exports") saving($graphDir/lc1_low_exports.gph, replace) - graph export $figuresDir/lc1_low_exports.png, as(png) replace -restore - -*FINAL FIGURE WITH RELEVANT SCENARIOS -grc1leg $graphDir/bau_his_prod.gph $graphDir/bau_his_exports.gph $graphDir/lc1_low_exports.gph, col(2) - graph export $figuresDir/all_race.png, as(png) replace - - - - -***GAPS -*FIGURE 1 -preserve -keep if scen_id=="BAU historic exports" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(15)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("BAU historic exports") saving($graphDir/bau_his_exp_gap.gph, replace) - graph export $figuresDir/bau_his_exports_gap.png, as(png) replace -restore - -*FIGURE 2 -preserve -keep if scen_id=="BAU historic production" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("BAU historic production") saving($graphDir/bau_his_prod_gap.gph, replace) - graph export $figuresDir/bau_his_prod_gap.png, as(png) replace -restore - -*FIGURE 3 -preserve -keep if scen_id=="BAU low exports" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("BAU low exports") saving($graphDir/bau_low_exp_gap.gph, replace) - graph export $figuresDir/bau_low_exports_gap.png, as(png) replace -restore - -*grc1leg $graphDir/bau_his_exp_gap.gph $graphDir/bau_his_prod_gap.gph $graphDir/bau_low_exp_gap.gph, col(3) - -*FIGURE 4 -preserve -keep if scen_id=="LC1 historic exports" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("LC1 historic exports") saving($graphDir/lc1_his_exp_gap.gph, replace) - graph export $figuresDir/lc1_his_exp_gap.png, as(png) replace -restore - -*FIGURE 5 -preserve -keep if scen_id=="LC1 historic production" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("LC1 historic production") saving($graphDir/lc1_his_prod_gap.gph, replace) - graph export $figuresDir/lc1_his_prod_gap.png, as(png) replace -restore - -*FIGURE 6 -preserve -keep if scen_id=="LC1 low exports" - twoway (line stat_BW year, lwidth(medthick) lcolor(gs10) lpattern(dash) xlabel(2020(5)2045)) (line stat_HW year, lwidth(medthick) lcolor(sky) lpattern(dash) xlabel(2020(5)2045, labsize(medium)) ) (line stat_AW year, lwidth(medthick) lcolor(orange) lpattern(dash) xlabel(2020(5)2045)) (line stat_AIALW year, lwidth(medthick) lcolor(green) lpattern(dash) xlabel(2020(10)2045)), xtitle("") ytitle("PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Bw gap" 2 "Hw gap" 3 "Aw gap" 4 "AIALw gap" ) size(medsmall)) title("LC1 low exports") saving($graphDir/lc1_low_exp_gap.gph, replace) - graph export $figuresDir/lc1_low_exp_gap.png, as(png) replace -restore - -*FINAL FIGURE WITH RELEVANT SCENARIOS - -grc1leg $graphDir/bau_his_prod_gap.gph $graphDir/bau_his_exp_gap.gph $graphDir/lc1_low_exp_gap.gph, col(2) - graph export $figuresDir/all_gap_race.png, as(png) replace - - - - - - -***GAPS DAC AND NON DAC -*FIGURE 1 -preserve -keep if scen_id=="BAU historic exports" - twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC" ) size(medsmall)) ylabel(0(.1).4) title("BAU historic exports") saving($graphDir/bau_his_exp_dac_gap.gph, replace) - graph export $figuresDir/bau_his_exports_dac_gap.png, as(png) replace -restore - -*FIGURE 2 -preserve -keep if scen_id=="BAU historic production" - twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC" ) size(medsmall)) ylabel(0(.1).4) title("BAU historic production") saving($graphDir/bau_his_prod_dac_gap.gph, replace) - graph export $figuresDir/bau_his_prod_dac_gap.png, as(png) replace -restore - -*FIGURE 3 -preserve -keep if scen_id=="BAU low exports" - twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC" ) size(medsmall)) ylabel(0(.1).4) title("BAU low exports") saving($graphDir/bau_low_exp_dac_gap.gph, replace) - graph export $figuresDir/bau_low_exports_dac_gap.png, as(png) replace -restore - -*FIGURE 4 -preserve -keep if scen_id=="LC1 historic exports" - twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC" ) size(medsmall)) ylabel(0(.1).4) title("LC1 historic exports") saving($graphDir/lc1_his_exp_dac_gap.gph, replace) - graph export $figuresDir/lc1_his_exports_dac_gap.png, as(png) replace -restore - -*FIGURE 5 -preserve -keep if scen_id=="LC1 historic production" - twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC") size(medsmall)) ylabel(0(.1).4) title("LC1 historic production") saving($graphDir/lc1_his_prod_dac_gap.gph, replace) - graph export $figuresDir/lc1_his_prod_dac_gap.png, as(png) replace -restore - -*FIGURE 6 -preserve -keep if scen_id=="LC1 low exports" - twoway (line stat_DAC year, lwidth(medthick) lcolor(sea) lpattern(dash) xlabel(2020(5)2045)), xtitle("") ytitle("Gap in PM{sub:2.5} ({&mu}g/m{sup:3}/person)", size(medium)) legend(pos(6) col(5) order(1 "Gap DAC and non-DAC") size(medsmall)) ylabel(0(.1).4) title("LC1 low exports") saving($graphDir/lc1_low_exp_dac_gap.gph, replace) - graph export $figuresDir/lc1_low_exports_dac_gap.png, as(png) replace -restore - -*FINAL FIGURE WITH RELEVANT SCENARIOS - -grc1leg $graphDir/bau_his_prod_dac_gap.gph $graphDir/bau_his_exp_dac_gap.gph $graphDir/lc1_low_exp_dac_gap.gph, col(2) - - graph export $figuresDir/all_gap_dac.png, as(png) replace - -*/ - - -******************************* - -keep if year==2045 - -keep year scen_id stat_* - -reshape long stat_, i(scen_id year) j(group, string) - -rename stat_ gap - -la var gap "Gap value in end point (2045)" -gen groupt=" Black-white gap" if group=="BW" - replace groupt="Asian-white gap" if group=="AW" - replace groupt="AIAN-white gap" if group=="AIALW" - replace groupt="Hispanic-white gap" if group=="HW" - replace groupt="Minority-white gap" if group=="MW" - replace groupt="DAC-nonDAC gap" if group=="DAC" - -append using $processedDir/gaps_2045_poor -graph hbar (mean) gap, over(scen_id) asyvars over(groupt) -sdf \ No newline at end of file