# ==============================================================================
# Exercise 4.2 Solution: Your First Join
# Module: Data Reshaping and Joins
# ==============================================================================

library(tidyverse)
library(here)
library(data.table)

panel_cit <- fread(here("r_training_datax", "Exercises", "data", "Intermediate", "panel_cit.csv"))

dt_firms <- fread(here("r_training_datax", "Exercises", "data", "Intermediate", "dt_firms.csv"))

str(panel_cit)
str(dt_firms)
head(panel_cit)
head(dt_firms)

original_rows <- nrow(panel_cit)

cit_with_firms <- left_join(
  panel_cit,
  dt_firms,
  by = c("firm_id", "year")
)

nrow(cit_with_firms)
nrow(panel_cit)
nrow(cit_with_firms) == nrow(panel_cit)

cit_with_firms %>%
  summarize(
    na_industry = sum(is.na(industry)),
    na_size = sum(is.na(size))
  )

cit_with_firms %>%
  filter(firm_id == "FIRM_001") %>%
  select(firm_id, year, taxable_income, tax_paid, industry, size)

fwrite(cit_with_firms, here("r_training_datax", "Exercises", "data", "Intermediate", "cit_with_firms.csv"))
