Read the file:
df_per_game <- read_csv(
'./data/2022-12-27-per-player-per-game.csv'
) %>%
clean_names() %>%
remove_empty(quiet = FALSE) %>%
remove_constant(quiet = FALSE)
## value for "which" not specified, defaulting to c("rows", "cols")
## Rows: 812 Columns: 31
## ── Column specification ─────────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Player, Pos, Tm, Player-additional
## dbl (27): Rk, Age, G, GS, MP, FG, FGA, FG%, 3P, 3PA, 3P%, 2P, 2PA, 2P%, eFG%, FT,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## No empty rows to remove.
##
## No empty columns to remove.
##
## No constant columns to remove.
First look at the data:
## Rows: 812
## Columns: 31
## $ rk <dbl> 1, 2, 3, 4, 5, 6, 6, 6, 7, 8, 9, 10, 10, 10, 11, 12, 13, …
## $ player <chr> "Precious Achiuwa", "Steven Adams", "Bam Adebayo", "Santi…
## $ pos <chr> "C", "C", "C", "PF", "C", "SG", "SG", "SG", "SG", "C", "P…
## $ age <dbl> 22, 28, 24, 21, 36, 23, 23, 23, 26, 23, 23, 28, 28, 28, 2…
## $ tm <chr> "TOR", "MEM", "MIA", "MEM", "BRK", "TOT", "NOP", "UTA", "…
## $ g <dbl> 73, 76, 56, 32, 47, 65, 50, 15, 66, 56, 54, 16, 3, 13, 69…
## $ gs <dbl> 28, 75, 56, 0, 12, 21, 19, 2, 61, 56, 1, 6, 0, 6, 11, 67,…
## $ mp <dbl> 23,6, 26,3, 32,6, 11,3, 22,3, 22,6, 26,3, 9,9, 27,3, 32,3…
## $ fg <dbl> 3,6, 2,8, 7,3, 1,7, 5,4, 3,9, 4,7, 1,1, 3,9, 6,6, 2,4, 2,…
## $ fga <dbl> 8,3, 5,1, 13,0, 4,1, 9,7, 10,5, 12,6, 3,2, 8,6, 9,7, 5,4,…
## $ fg_percent <dbl> 0,439, 0,547, 0,557, 0,402, 0,550, 0,372, 0,375, 0,333, 0…
## $ x3p <dbl> 0,8, 0,0, 0,0, 0,2, 0,3, 1,6, 1,9, 0,7, 2,4, 0,0, 0,6, 0,…
## $ x3pa <dbl> 2,1, 0,0, 0,1, 1,5, 1,0, 5,2, 6,1, 2,2, 5,9, 0,2, 2,0, 3,…
## $ x3p_percent <dbl> 0,359, 0,000, 0,000, 0,125, 0,304, 0,311, 0,311, 0,303, 0…
## $ x2p <dbl> 2,9, 2,8, 7,3, 1,5, 5,1, 2,3, 2,8, 0,4, 1,5, 6,6, 1,8, 1,…
## $ x2pa <dbl> 6,1, 5,0, 12,9, 2,6, 8,8, 5,3, 6,5, 1,0, 2,7, 9,6, 3,4, 2…
## $ x2p_percent <dbl> 0,468, 0,548, 0,562, 0,560, 0,578, 0,433, 0,434, 0,400, 0…
## $ e_fg_percent <dbl> 0,486, 0,547, 0,557, 0,424, 0,566, 0,449, 0,450, 0,438, 0…
## $ ft <dbl> 1,1, 1,4, 4,6, 0,6, 1,9, 1,2, 1,4, 0,7, 1,0, 2,9, 0,7, 0,…
## $ fta <dbl> 1,8, 2,6, 6,1, 1,0, 2,2, 1,7, 1,9, 0,8, 1,1, 4,2, 1,0, 1,…
## $ ft_percent <dbl> 0,595, 0,543, 0,753, 0,625, 0,873, 0,743, 0,722, 0,917, 0…
## $ orb <dbl> 2,0, 4,6, 2,4, 1,0, 1,6, 0,6, 0,7, 0,1, 0,5, 3,4, 0,5, 0,…
## $ drb <dbl> 4,5, 5,4, 7,6, 1,7, 3,9, 2,3, 2,6, 1,5, 2,9, 7,3, 1,4, 2,…
## $ trb <dbl> 6,5, 10,0, 10,1, 2,7, 5,5, 2,9, 3,3, 1,5, 3,4, 10,8, 1,9,…
## $ ast <dbl> 1,1, 3,4, 3,4, 0,7, 0,9, 2,4, 2,8, 1,1, 1,5, 1,6, 2,8, 2,…
## $ stl <dbl> 0,5, 0,9, 1,4, 0,2, 0,3, 0,7, 0,8, 0,3, 0,7, 0,8, 1,3, 0,…
## $ blk <dbl> 0,6, 0,8, 0,8, 0,3, 1,0, 0,4, 0,4, 0,3, 0,3, 1,3, 0,1, 0,…
## $ tov <dbl> 1,2, 1,5, 2,6, 0,5, 0,9, 1,4, 1,7, 0,5, 0,7, 1,7, 0,7, 0,…
## $ pf <dbl> 2,1, 2,0, 3,1, 1,1, 1,7, 1,6, 1,8, 1,0, 1,5, 1,7, 1,4, 1,…
## $ pts <dbl> 9,1, 6,9, 19,1, 4,1, 12,9, 10,6, 12,8, 3,5, 11,1, 16,1, 6…
## $ player_additional <chr> "achiupr01", "adamsst01", "adebaba01", "aldamsa01", "aldr…
Read the file:
df_advanced <- read_csv(
'./data/2022-12-27-per-player-advanced.csv'
) %>%
clean_names() %>%
remove_empty(quiet = FALSE) %>%
remove_constant(quiet = FALSE)
## value for "which" not specified, defaulting to c("rows", "cols")
## New names:
## Rows: 812 Columns: 30
## ── Column specification
## ───────────────────────────────────────────────────────────── Delimiter: "," chr
## (4): Player, Pos, Tm, Player-additional dbl (24): Rk, Age, G, MP, PER, TS%, 3PAr,
## FTr, ORB%, DRB%, TRB%, AST%, STL%, BLK%... lgl (2): ...20, ...25
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ Specify
## the column types or set `show_col_types = FALSE` to quiet this message.
## No empty rows to remove.
## Removing 2 empty columns of 30 columns total (Removed: x20, x25).
## No constant columns to remove.
## • `` -> `...20`
## • `` -> `...25`
First look at the data:
## Rows: 812
## Columns: 28
## $ rk <dbl> 1, 2, 3, 4, 5, 6, 6, 6, 7, 8, 9, 10, 10, 10, 11, 12, 13, …
## $ player <chr> "Precious Achiuwa", "Steven Adams", "Bam Adebayo", "Santi…
## $ pos <chr> "C", "C", "C", "PF", "C", "SG", "SG", "SG", "SG", "C", "P…
## $ age <dbl> 22, 28, 24, 21, 36, 23, 23, 23, 26, 23, 23, 28, 28, 28, 2…
## $ tm <chr> "TOR", "MEM", "MIA", "MEM", "BRK", "TOT", "NOP", "UTA", "…
## $ g <dbl> 73, 76, 56, 32, 47, 65, 50, 15, 66, 56, 54, 16, 3, 13, 69…
## $ mp <dbl> 1725, 1999, 1825, 360, 1050, 1466, 1317, 149, 1805, 1809,…
## $ per <dbl> 12,7, 17,6, 21,8, 10,2, 19,6, 10,5, 10,5, 10,2, 12,7, 23,…
## $ ts_percent <dbl> 0,503, 0,560, 0,608, 0,452, 0,604, 0,475, 0,474, 0,497, 0…
## $ x3p_ar <dbl> 0,259, 0,003, 0,008, 0,364, 0,100, 0,497, 0,483, 0,688, 0…
## $ f_tr <dbl> 0,217, 0,518, 0,466, 0,242, 0,223, 0,160, 0,153, 0,250, 0…
## $ orb_percent <dbl> 8,7, 17,9, 8,7, 9,4, 7,8, 2,7, 3,0, 0,8, 1,9, 12,0, 3,2, …
## $ drb_percent <dbl> 21,7, 22,0, 26,1, 16,1, 18,7, 11,5, 11,0, 15,6, 10,9, 24,…
## $ trb_percent <dbl> 14,9, 19,9, 17,5, 12,6, 13,4, 7,1, 6,9, 8,5, 6,5, 18,4, 6…
## $ ast_percent <dbl> 6,9, 16,1, 17,5, 7,7, 6,3, 16,1, 16,1, 15,5, 7,6, 8,2, 26…
## $ stl_percent <dbl> 1,1, 1,6, 2,2, 0,8, 0,6, 1,5, 1,5, 1,7, 1,2, 1,2, 4,2, 1,…
## $ blk_percent <dbl> 2,3, 2,7, 2,6, 2,5, 4,0, 1,5, 1,4, 2,4, 1,0, 3,7, 0,8, 1,…
## $ tov_percent <dbl> 11,3, 19,6, 14,4, 9,9, 8,0, 11,3, 11,2, 13,1, 6,7, 12,7, …
## $ usg_percent <dbl> 18,5, 12,0, 25,0, 18,4, 22,4, 24,1, 24,8, 17,9, 15,2, 18,…
## $ ows <dbl> 0,4, 3,8, 3,6, -0,1, 2,1, -1,1, -1,1, 0,0, 2,8, 5,4, 1,0,…
## $ dws <dbl> 2,1, 3,0, 3,5, 0,4, 1,0, 1,1, 0,9, 0,2, 1,4, 3,0, 1,1, 0,…
## $ ws <dbl> 2,5, 6,8, 7,2, 0,3, 3,1, 0,1, -0,1, 0,2, 4,2, 8,5, 2,1, 0…
## $ ws_48 <dbl> 0,070, 0,163, 0,188, 0,044, 0,141, 0,003, -0,005, 0,070, …
## $ obpm <dbl> -2,0, 1,0, 1,7, -4,2, 1,3, -1,8, -1,7, -2,9, 0,6, 2,7, -0…
## $ dbpm <dbl> -0,6, 1,0, 2,1, -1,5, -0,6, -1,1, -1,3, 1,2, -0,2, 1,2, 2…
## $ bpm <dbl> -2,6, 2,0, 3,8, -5,7, 0,7, -2,9, -3,0, -1,7, 0,4, 3,9, 1,…
## $ vorp <dbl> -0,2, 2,0, 2,7, -0,3, 0,7, -0,3, -0,3, 0,0, 1,1, 2,7, 0,8…
## $ player_additional <chr> "achiupr01", "adamsst01", "adebaba01", "aldamsa01", "aldr…
Rk: Rank.
Player: Player name.
Pos: Position.
Age: Player’s age on February 1 of the season.
Tm: Team.
G: Games.
GS: Games Started.
MP: Minutes Played Per Game.
FG: Field Goals Per Game.
FGA: Field Goal Attempts Per Game.
FG%: Field Goal Percentage.
3P: 3-Point Field Goals Per Game.
3PA: 3-Point Field Goal Attempts Per Game.
3P%: 3-Point Field Goal Percentage.
2P: 2-Point Field Goals Per Game.
2PA: 2-Point Field Goal Attempts Per Game.
2P%: 2-Point Field Goal Percentage.
eFG%: Effective Field Goal Percentage. Adjusts for a 3-point field goal being worth one more point than a 2-point field goal.
FT: Free Throws Per Game.
FTA: Free Throw Attempts Per Game.
FT%: Free Throw Percentage.
ORB: Offensive Rebounds Per Game.
DRB: Defensive Rebounds Per Game.
TRB: Total Rebounds Per Game.
AST: Assists Per Game.
STL: Steals Per Game.
BLK: Blocks Per Game.
TOV: Turnovers Per Game.
PF: Personal Fouls Per Game.
PTS: Points Per Game.
Player-additional: Unique identifier.
Rk: Rank.
Player: Player name.
Pos: Position.
Age: Player’s age on February 1 of the season.
Tm: Team.
G: Games.
MP: Minutes Played.
PER: Player Efficiency Rating. A measure of per-minute production standardized such that the league average is 15.
TS%: True Shooting Percentage. A measure of shooting efficiency that takes into account 2-point field goals, 3-point field goals, and free throws.
3PAr: 3-Point Attempt Rate. Percentage of FG Attempts from 3-Point Range.
FTr: Free Throw Attempt Rate. Number of FT Attempts Per FG Attempt.
ORB%: Offensive Rebound Percentage. An estimate of the percentage of available offensive rebounds a player grabbed while they were on the floor.
DRB%: Defensive Rebound Percentage. An estimate of the percentage of available defensive rebounds a player grabbed while they were on the floor.
TRB%: Total Rebound Percentage. An estimate of the percentage of available rebounds a player grabbed while they were on the floor.
AST%: Assist Percentage. An estimate of the percentage of teammate field goals a player assisted while they were on the floor.
STL%: Steal Percentage. An estimate of the percentage of opponent possessions that end with a steal by the player while they were on the floor.
BLK%: Block Percentage. An estimate of the percentage of opponent two-point field goal attempts blocked by the player while they were on the floor.
TOV%: Turnover Percentage. An estimate of turnovers committed per 100 plays.
USG%: Usage Percentage. An estimate of the percentage of team plays used by a player while they were on the floor.
OWS: Offensive Win Shares. An estimate of the number of wins contributed by a player due to offense.
DWS: Defensive Win Shares. An estimate of the number of wins contributed by a player due to defense.
WS: Win Shares. An estimate of the number of wins contributed by a player.
WS/48: Win Shares Per 48 Minutes. An estimate of the number of wins contributed by a player per 48 minutes (league average is approximately .100).
OBPM: Offensive Box Plus/Minus. A box score estimate of the offensive points per 100 possessions a player contributed above a league-average player, translated to an average team.
DBPM: Defensive Box Plus/Minus. A box score estimate of the defensive points per 100 possessions a player contributed above a league-average player, translated to an average team.
BPM: Box Plus/Minus. A box score estimate of the points per 100 possessions a player contributed above a league-average player, translated to an average team.
VORP: Value over Replacement Player. A box score estimate of the points per 100 TEAM possessions that a player contributed above a replacement-level (-2.0) player, translated to an average team and prorated to an 82-game season. Multiply by 2.70 to convert to wins over replacement.
Player-additional: Unique identifier.
Delete rk
column:
Rename all columns:
original_names <- names(df_per_game)
new_names <- c(
'player',
'position',
'age',
'team',
'games',
'games_started',
'minutes_played_average',
'goals_scored',
'goal_attempts',
'goal_pct',
'goals_scored_3p',
'goal_attempts_3p',
'goal_pct_3p',
'goals_scored_2p',
'goal_attempts_2p',
'goal_pct_2p',
'goals_effective_pct',
'free_throws_scored',
'free_throw_attempts',
'free_throw_pct',
'rebounds_offense',
'rebounds_defense',
'rebounds_total',
'assists',
'steals',
'blocks',
'turnovers',
'fouls',
'points_scored',
'player_id'
)
names(new_names) <- original_names
paste(
names(new_names),
new_names,
sep = ' -> ',
collapse = '\n'
) %>%
cat()
## player -> player
## pos -> position
## age -> age
## tm -> team
## g -> games
## gs -> games_started
## mp -> minutes_played_average
## fg -> goals_scored
## fga -> goal_attempts
## fg_percent -> goal_pct
## x3p -> goals_scored_3p
## x3pa -> goal_attempts_3p
## x3p_percent -> goal_pct_3p
## x2p -> goals_scored_2p
## x2pa -> goal_attempts_2p
## x2p_percent -> goal_pct_2p
## e_fg_percent -> goals_effective_pct
## ft -> free_throws_scored
## fta -> free_throw_attempts
## ft_percent -> free_throw_pct
## orb -> rebounds_offense
## drb -> rebounds_defense
## trb -> rebounds_total
## ast -> assists
## stl -> steals
## blk -> blocks
## tov -> turnovers
## pf -> fouls
## pts -> points_scored
## player_additional -> player_id
df_per_game <- df_per_game %>%
rename_with(
function(x) { new_names[x] }
)
Find players that appear more than once and keep only the row that has the totals:
dupes <- df_per_game %>%
get_dupes(player_id)
For these players, we keep only the row for the totals (TOT
):
df_per_game <- df_per_game %>%
keep_only_totals(dupes)
Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | |||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
player [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
position [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
age [numeric] |
|
22 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
team [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
games [numeric] |
|
82 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
games_started [numeric] |
|
78 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
minutes_played_average [numeric] |
|
280 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goals_scored [numeric] |
|
95 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_attempts [numeric] |
|
170 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_pct [numeric] |
|
259 distinct values | 9 (1,5%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goals_scored_3p [numeric] |
|
38 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_attempts_3p [numeric] |
|
91 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_pct_3p [numeric] |
|
192 distinct values | 44 (7,3%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goals_scored_2p [numeric] |
|
74 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_attempts_2p [numeric] |
|
132 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_pct_2p [numeric] |
|
260 distinct values | 16 (2,6%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goals_effective_pct [numeric] |
|
248 distinct values | 9 (1,5%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
free_throws_scored [numeric] |
|
59 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
free_throw_attempts [numeric] |
|
69 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
free_throw_pct [numeric] |
|
247 distinct values | 59 (9,8%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebounds_offense [numeric] |
|
39 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebounds_defense [numeric] |
|
80 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebounds_total [numeric] |
|
106 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
assists [numeric] |
|
77 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
steals [numeric] |
|
23 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
blocks [numeric] |
|
23 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
turnovers [numeric] |
|
43 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
fouls [numeric] |
|
39 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
points_scored [numeric] |
|
202 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
player_id [character] |
|
|
0 (0,0%) |
Generated by summarytools 1.0.1 (R version 4.2.2)
2022-12-30
Approximately \(16\%\) of players played for two or more teams during the season.
On average, a player plays only \(19\) minutes per game.
Columns that should contain percentages actually contain proportions.
All columns that contain proportions have some missing values. Fortunately, all of them will be discarded before we build the model, as they are derived — therefore, perfectly correlated with other columns.
What are the “other” positions?
df_per_game %>%
count(position, sort = TRUE) %>%
slice_tail(n = 3)
Turn proportions into percentages:
Why are some percentages NA
?
Because they are \(0\%\) of \(0\).
I will replace the NA
s with zeroes:
Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | |||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
player [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
position [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
age [numeric] |
|
22 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
team [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
games [numeric] |
|
82 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
games_started [numeric] |
|
78 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
minutes_played_average [numeric] |
|
280 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goals_scored [numeric] |
|
95 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_attempts [numeric] |
|
170 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_pct [numeric] |
|
259 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goals_scored_3p [numeric] |
|
38 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_attempts_3p [numeric] |
|
91 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_pct_3p [numeric] |
|
192 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goals_scored_2p [numeric] |
|
74 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_attempts_2p [numeric] |
|
132 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goal_pct_2p [numeric] |
|
260 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
goals_effective_pct [numeric] |
|
248 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
free_throws_scored [numeric] |
|
59 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
free_throw_attempts [numeric] |
|
69 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
free_throw_pct [numeric] |
|
247 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebounds_offense [numeric] |
|
39 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebounds_defense [numeric] |
|
80 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebounds_total [numeric] |
|
106 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
assists [numeric] |
|
77 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
steals [numeric] |
|
23 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
blocks [numeric] |
|
23 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
turnovers [numeric] |
|
43 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
fouls [numeric] |
|
39 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
points_scored [numeric] |
|
202 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
player_id [character] |
|
|
0 (0,0%) |
Generated by summarytools 1.0.1 (R version 4.2.2)
2022-12-30
Delete rk
column:
Rename all columns:
original_names <- names(df_advanced)
new_names <- c(
'player',
'position',
'age',
'team',
'games',
'minutes_played_total',
'efficiency',
'true_shooting_pct',
'attempt_rate_3p',
'attempt_rate_free_throw',
'rebound_offense_pct',
'rebound_defense_pct',
'rebound_total_pct',
'assist_pct',
'steal_pct',
'block_pct',
'turnover_pct',
'usage_pct',
'win_shares_offense',
'win_shares_defense',
'win_shares',
'win_shares_48',
'plus_minus_offense',
'plus_minus_defense',
'plus_minus',
'value_over_replacement',
'player_id'
)
names(new_names) <- original_names
paste(
names(new_names),
new_names,
sep = ' -> ',
collapse = '\n'
) %>%
cat()
## player -> player
## pos -> position
## age -> age
## tm -> team
## g -> games
## mp -> minutes_played_total
## per -> efficiency
## ts_percent -> true_shooting_pct
## x3p_ar -> attempt_rate_3p
## f_tr -> attempt_rate_free_throw
## orb_percent -> rebound_offense_pct
## drb_percent -> rebound_defense_pct
## trb_percent -> rebound_total_pct
## ast_percent -> assist_pct
## stl_percent -> steal_pct
## blk_percent -> block_pct
## tov_percent -> turnover_pct
## usg_percent -> usage_pct
## ows -> win_shares_offense
## dws -> win_shares_defense
## ws -> win_shares
## ws_48 -> win_shares_48
## obpm -> plus_minus_offense
## dbpm -> plus_minus_defense
## bpm -> plus_minus
## vorp -> value_over_replacement
## player_additional -> player_id
df_advanced <- df_advanced %>%
rename_with(
function(x) { new_names[x] }
)
Find players that appear more than once and keep only the row that has the totals:
dupes <- df_advanced %>%
get_dupes(player_id)
For these players, we keep only the row for the totals (TOT
):
df_advanced <- df_advanced %>%
keep_only_totals(dupes)
Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | |||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
player [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
position [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
age [numeric] |
|
22 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
team [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
games [numeric] |
|
82 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
minutes_played_total [numeric] |
|
501 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
efficiency [numeric] |
|
228 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
true_shooting_pct [numeric] |
|
261 distinct values | 8 (1,3%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
attempt_rate_3p [numeric] |
|
366 distinct values | 9 (1,5%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
attempt_rate_free_throw [numeric] |
|
312 distinct values | 9 (1,5%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebound_offense_pct [numeric] |
|
152 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebound_defense_pct [numeric] |
|
213 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebound_total_pct [numeric] |
|
177 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
assist_pct [numeric] |
|
239 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
steal_pct [numeric] |
|
55 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
block_pct [numeric] |
|
72 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
turnover_pct [numeric] |
|
176 distinct values | 8 (1,3%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
usage_pct [numeric] |
|
213 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
win_shares_offense [numeric] |
|
78 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
win_shares_defense [numeric] |
|
45 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
win_shares [numeric] |
|
98 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
win_shares_48 [numeric] |
|
263 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
plus_minus_offense [numeric] |
|
160 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
plus_minus_defense [numeric] |
|
97 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
plus_minus [numeric] |
|
180 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
value_over_replacement [numeric] |
|
57 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
player_id [character] |
|
|
0 (0,0%) |
Generated by summarytools 1.0.1 (R version 4.2.2)
2022-12-30
Here, percentages are really percentages (between \(0\) and \(100\)), except for true_shooting_pct
.
Offensive rebounds are harder than defensive rebounds.
Most statistics here have right-skewed distributions.
What are the “other” positions?
df_advanced %>%
count(position, sort = TRUE) %>%
slice_tail(n = 3)
Are the players in the advanced
data frame the same as in the per game
data frame?
## [1] TRUE
Variable | Stats / Values | Freqs (% of Valid) | Graph | Missing | |||||||||||||||||||||||||||||||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
player [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
position [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
age [numeric] |
|
22 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
team [character] |
|
|
0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
games [numeric] |
|
82 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
minutes_played_total [numeric] |
|
501 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
efficiency [numeric] |
|
228 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
true_shooting_pct [numeric] |
|
261 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
attempt_rate_3p [numeric] |
|
366 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
attempt_rate_free_throw [numeric] |
|
312 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebound_offense_pct [numeric] |
|
152 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebound_defense_pct [numeric] |
|
213 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
rebound_total_pct [numeric] |
|
177 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
assist_pct [numeric] |
|
239 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
steal_pct [numeric] |
|
55 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
block_pct [numeric] |
|
72 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
turnover_pct [numeric] |
|
176 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
usage_pct [numeric] |
|
213 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
win_shares_offense [numeric] |
|
78 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
win_shares_defense [numeric] |
|
45 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
win_shares [numeric] |
|
98 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
win_shares_48 [numeric] |
|
263 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
plus_minus_offense [numeric] |
|
160 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
plus_minus_defense [numeric] |
|
97 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
plus_minus [numeric] |
|
180 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
value_over_replacement [numeric] |
|
57 distinct values | 0 (0,0%) | ||||||||||||||||||||||||||||||||||||||||||||||||||||||||
player_id [character] |
|
|
0 (0,0%) |
Generated by summarytools 1.0.1 (R version 4.2.2)
2022-12-30