# Exercise 2: Date Operations - SOLUTION
# Module 3: Data Wrangling with dplyr
# ============================================================================

# Load required packages
library(dplyr)
library(lubridate)
library(data.table)
library(here)

# Load data from Intermediate folder
panel_vat <- fread(here("Data", "Intermediate", "panel_vat.csv"), cmd = FALSE)

# ============================================================================
# TASK 1: PARSE DATES
# ============================================================================

panel_vat$declaration_date <- as.Date(panel_vat$declaration_date)

class(panel_vat$declaration_date)

# ============================================================================
# TASK 2: EXTRACT DATE COMPONENTS
# ============================================================================

panel_vat_dates <- panel_vat %>%
  mutate(
    filing_year = year(declaration_date),
    filing_quarter = quarter(declaration_date),
    filing_month = month(declaration_date)
  )

head(panel_vat_dates)

# ============================================================================
# TASK 3: CALCULATE DIFFERENCES
# ============================================================================

panel_vat_gaps <- panel_vat_dates %>%
  arrange(firm_id, declaration_date) %>%
  group_by(firm_id) %>%
  mutate(days_since_last = as.numeric(declaration_date - lag(declaration_date))) %>%
  ungroup()

panel_vat_gaps %>%
  select(firm_id, declaration_date, days_since_last) %>%
  head(10)

# ============================================================================
# TASK 4: FILING ANALYSIS
# ============================================================================

panel_vat_deadlines <- panel_vat_dates %>%
  mutate(
    quarter_end = ceiling_date(declaration_date, "quarter") - 1,
    filing_deadline = quarter_end + 45,
    days_late = as.numeric(declaration_date - filing_deadline)
  )

panel_vat_deadlines %>%
  summarize(
    total_declarations = n(),
    late_filers = sum(days_late > 0, na.rm = TRUE),
    pct_late = round(100 * late_filers / total_declarations, 2)
  )

# ============================================================================
# TASK 5: RECENT FILERS
# ============================================================================

recent_filers <- panel_vat_dates %>%
  filter(declaration_date >= (today() - 180)) %>%
  distinct(firm_id)

nrow(recent_filers)

head(recent_filers)

# ============================================================================
# END OF EXERCISE 2 SOLUTION
# ============================================================================
