print(hat_tricks) # Connect to SQLite database (if you want raw SQL) conn <- db_connect() Query using SQL dbGetQuery(conn, " SELECT tournament_year, COUNT(*) as matches FROM matches GROUP BY tournament_year ORDER BY tournament_year ") Close connection db_disconnect(conn) 9. Useful Helper Functions # Get player information get_player(player_name = "Lionel Messi") Get match details get_match(match_id = 3960) Get tournament info get_tournament(tournament_id = "WC-2018") 10. Complete Workflow Example # Comprehensive analysis: Best teams by goal difference library(dplyr) best_teams <- matches %>% Calculate goal difference per match mutate( home_gd = home_goals - away_goals, away_gd = away_goals - home_goals ) %>% Pivot to team-level data tidyr::pivot_longer( cols = c(home_team, away_team), names_to = "location", values_to = "team" ) %>% mutate( goal_diff = case_when( location == "home_team" ~ home_gd, location == "away_team" ~ away_gd ) ) %>% Summarise by team group_by(team) %>% summarise( matches = n(), total_goal_diff = sum(goal_diff, na.rm = TRUE), avg_goal_diff = mean(goal_diff, na.rm = TRUE) ) %>% arrange(desc(total_goal_diff))
citation("fjelstulworldcup") Fjelstul, J. C. (2023). The fjelstulworldcup package: A comprehensive database of the FIFA World Cup. fjelstul world cup database r package
# Install from CRAN install.packages("fjelstulworldcup") Load the package library(fjelstulworldcup) library(dplyr) library(ggplot2) 2. Explore Available Datasets # List all datasets in the package ls("package:fjelstulworldcup") Key datasets: - matches : Match results and metadata - goals : All goals scored - cards : Yellow and red cards - substitutions : Substitutions made - players : Player information - teams : Team information - tournaments : World Cup tournaments - appearances : Player appearances in matches 3. Basic Data Exploration # View tournaments head(tournaments) View matches (first 6 rows) head(matches) Check structure of matches str(matches) Summary statistics summary(matches$home_goals) 4. Key Analysis Examples A. World Cup Winners by Year winners <- matches %>% filter(tournament_id == "WC", !is.na(home_goals), !is.na(away_goals), stage_name == "Final") %>% mutate(winner = ifelse(home_goals > away_goals, home_team, away_team)) %>% select(tournament_year, winner) print(winners) B. Top Goal Scorers of All Time top_scorers <- goals %>% filter(own_goal == 0, penalty == 0) %>% # exclude own goals & penalties group_by(player_name) %>% summarise(total_goals = n()) %>% arrange(desc(total_goals)) %>% head(10) print(top_scorers) C. Most Cards per Tournament cards_per_tournament <- cards %>% group_by(tournament_year, card_type) %>% summarise(total_cards = n()) %>% arrange(desc(total_cards)) print(cards_per_tournament) D. Home vs Away Goals Analysis goal_summary <- matches %>% summarise( avg_home_goals = mean(home_goals, na.rm = TRUE), avg_away_goals = mean(away_goals, na.rm = TRUE), total_home_goals = sum(home_goals, na.rm = TRUE), total_away_goals = sum(away_goals, na.rm = TRUE) ) print(goal_summary) 5. Visualization Examples Goals per Tournament Over Time goals_per_tournament <- matches %>% group_by(tournament_year) %>% summarise(total_goals = sum(home_goals + away_goals, na.rm = TRUE)) ggplot(goals_per_tournament, aes(x = tournament_year, y = total_goals)) + geom_line(color = "blue", size = 1) + geom_point(color = "red", size = 2) + labs(title = "Total Goals per FIFA World Cup Tournament", x = "Year", y = "Total Goals") + theme_minimal() Yellow vs Red Cards Over Time cards_summary <- cards %>% group_by(tournament_year, card_type) %>% summarise(count = n()) ggplot(cards_summary, aes(x = tournament_year, y = count, fill = card_type)) + geom_bar(stat = "identity", position = "dodge") + labs(title = "Cards per World Cup Tournament", x = "Year", y = "Number of Cards") + theme_minimal() 6. Advanced Analysis Team Performance Metrics team_performance <- matches %>% mutate( home_win = ifelse(home_goals > away_goals, 1, 0), away_win = ifelse(away_goals > home_goals, 1, 0), draw = ifelse(home_goals == away_goals, 1, 0) ) %>% group_by(home_team) %>% summarise( matches_played = n(), wins = sum(home_win), draws = sum(draw), losses = sum(away_win), goals_for = sum(home_goals, na.rm = TRUE), goals_against = sum(away_goals, na.rm = TRUE) ) %>% arrange(desc(wins)) head(team_performance, 10) Match Attendance Trends attendance_data <- matches %>% filter(!is.na(attendance)) %>% group_by(tournament_year) %>% summarise(avg_attendance = mean(attendance), total_attendance = sum(attendance)) ggplot(attendance_data, aes(x = tournament_year, y = avg_attendance)) + geom_line() + geom_point() + labs(title = "Average Match Attendance by Tournament Year", x = "Year", y = "Average Attendance") + theme_minimal() 7. Working with Player Data # Find players with most appearances top_appearances <- appearances %>% group_by(player_name) %>% summarise(appearances = n()) %>% arrange(desc(appearances)) %>% head(10) print(top_appearances) Players who scored hat-tricks hat_tricks <- goals %>% group_by(match_id, player_name) %>% summarise(goals_in_match = n()) %>% filter(goals_in_match >= 3) %>% left_join(matches %>% select(match_id, tournament_year, home_team, away_team), by = "match_id") print(hat_tricks) # Connect to SQLite database (if you