import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup, Comment
import re
import datetime


def get_4th_downs(url_gen):
    #first we need to extract the table. It's confusing. Follow the link above if you don't
    #fully understand what's happening.
    temp = requests.get(url_gen, stream = True)
    root = BeautifulSoup(temp.content, "html.parser")

    #we need to bypass the "comments" of the html table.
    placeholder_gen = root.select_one('#all_pbp .placeholder')
    comment = next(elem for elem in placeholder_gen.next_siblings if isinstance(elem, Comment))
    table = BeautifulSoup(comment, 'html.parser')

    tab = pd.read_html(str(table))
    playbyplay_this = tab[0]
    playbyplay_this = playbyplay_this.drop(['EPB', 'EPA'], axis = 1)

    #once we have the full play-by-play, we only care about 4th downs.
    playbyplay_this = playbyplay_this[playbyplay_this['Down'] == "4"]
    playbyplay_this = playbyplay_this.reset_index(drop=True)
    
    return playbyplay_this

#substitute the url for any game you like on pro-football-reference.com
playbyplay_4ths = get_4th_downs("https://www.pro-football-reference.com/boxscores/202112050nyj.htm")
pd.options.display.max_rows = 20
display(playbyplay_4ths)


def get_drives(url_gen, play_sheet):
    
    #similar to above, first extract the table from this difficult HTML code.
    temp = requests.get(url_gen, stream = True)
    root = BeautifulSoup(temp.content, "html.parser")
    root.prettify()

    #the names of the two teams are stored in these columns, we'll use them later.
    throwaway = play_sheet.columns.values
    visitor_name = throwaway[6]
    home_name = throwaway[7]

    #make a table for each drives table (one per team)
    placeholder1 = root.select_one('#all_vis_drives .placeholder')
    comment = next(elem for elem in placeholder1.next_siblings if isinstance(elem, Comment))
    table1 = BeautifulSoup(comment, 'html.parser')

    placeholder2 = root.select_one('#all_home_drives .placeholder')
    comment = next(elem for elem in placeholder2.next_siblings if isinstance(elem, Comment))
    table2 = BeautifulSoup(comment, 'html.parser')

    #read in the tables, add a "team" column so we know which team has the ball.
    tab1 = pd.read_html(str(table1))
    visitor_drives = tab1[0]
    visitor_drives['Team'] = visitor_name

    tab2 = pd.read_html(str(table2))
    home_drives = tab2[0]
    home_drives['Team'] = home_name

    #lastly, combine the tables into one, to be sorted later.
    all_drives = visitor_drives.append(home_drives, ignore_index = True)
    all_drives = all_drives.drop(all_drives.columns[0], axis = 1)
    return all_drives

all_drives = get_drives("https://www.pro-football-reference.com/boxscores/202112050nyj.htm", playbyplay_4ths)
all_drives


def get_total_time(quarters, times):
    new_times = []
    new_quarts = []
    
    #for each row, get quarter and time fields
    for i in range(0, len(quarters)):
        clock = times[i]
        quart = quarters[i]
        colon = clock.index(':')
        #split time fields into minutes and seconds
        minute = int(clock[0:colon])
        second = int(clock[colon+1:len(clock)])
    
        #converts to full game clock time
        #Overtime is a special case that we won't dive too deep into,
        #Just set the total time to the quarter time.
        if quart == "OT" or quart == "5" or quart == 5:
            quart = 5
            minute = minute
            second = second

        else:
            minute = minute + ((4 - int(quart)) * 15)
            if minute > 59:
                minute = 59
                second = 59

            elif minute < 0:
                minute = 0
                second = 0

        newval = datetime.time(minute = minute, second = second)
        #notice we return both the quarter and our new Total Time field.
        #This is so we can set "OT" for overtime to "5" to make that column numeric.
        new_quarts.append(quart)
        new_times.append(newval)
    
    return [new_times, new_quarts]


pd.set_option('display.max_rows', 200)
#add total time column and sort by it
def fix1(any_table):
    #any_table['Quarter'] = pd.to_numeric(any_table['Quarter'])
    new_times = get_total_time(any_table['Quarter'], any_table['Time'])

    any_table2 = any_table
    any_table2['Total Time'] = new_times[0]
    any_table2['Quarter'] = new_times[1]
    any_table['Quarter'] = pd.to_numeric(any_table['Quarter'])
    
    any_table2 = any_table2.sort_values(['Quarter', 'Total Time'], ascending = (True, False))
    any_table2 = any_table2.reset_index(drop = True)
    return any_table2

all_drives = fix1(all_drives)
all_drives


new_times = get_total_time(playbyplay_4ths['Quarter'], playbyplay_4ths['Time'])
playbyplay_4ths['Total Time'] = new_times[0]
playbyplay_4ths['Quarter'] = new_times[1]
playbyplay_4ths['Quarter'] = pd.to_numeric(playbyplay_4ths['Quarter'])
playbyplay_4ths


pd.set_option('display.max_rows', 200)

def match_drives_with_plays(drive_table, play_sheet):
    #starts at 0, doesn't need to reset since we go thru it once.
    drive = 0
    drive_num = []
    teams = []
    results = []
    total_plays_in_drive = []
    
    ot_drive = 0
    alt_drive_table = drive_table[drive_table['Quarter'] == 5]
    alt_drive_table = alt_drive_table.reset_index(drop = True)
    
    ot_numdrives = len(alt_drive_table['Total Time']) - 1
    numdrives = len(drive_table['Total Time']) - ot_numdrives - 2
    
    #for every 4th down play...
    for index, row in play_sheet.iterrows():
        time = play_sheet.at[index, 'Total Time']
        
        if play_sheet.at[index, 'Quarter'] == 5:
            while ot_drive < ot_numdrives + 1 and alt_drive_table.at[ot_drive, 'Total Time'] > time:
                ot_drive = ot_drive + 1
            #print("ended on drive: " + str(drive) + " and ot drive: " + str(ot_drive))
            index_of = drive + ot_drive - 1
        else:
            #for every drive...try to match the play with the drive
            while drive < numdrives + 1 and drive_table.at[drive, 'Total Time'] > time:
                drive = drive + 1

            index_of = drive - 1
            #print("matched play #" + str(index) + " with drive #" + str(index_of))
        
        #extract data
        drive_num.append(index_of)
        teams.append(drive_table.at[index_of, 'Team'])
        results.append(drive_table.at[index_of, 'Result'])
        total_plays_in_drive.append(drive_table.at[index_of, 'Plays'])

    #we are ready to add new columns to the table.
    new_plays = play_sheet
    new_plays['Drive #'] = drive_num
    new_plays['Team'] = teams
    new_plays['Drive Result'] = results
    new_plays['# of Plays'] = total_plays_in_drive
    return new_plays

match_drives_with_plays(all_drives, playbyplay_4ths)


def update_locations(play_sheet):
    new_locs = []
    for i in range(0, len(play_sheet['Location'])):

        #which team has the ball?
        team = play_sheet.at[i, 'Team']
        #split on space. first part is which half of the field we're on.
        #the second part is how many yards to go to the closest endzone.
        loc = play_sheet.at[i, 'Location'].split(' ')
        half = loc[0]

        #inside "your own half", you have to get to midfield plus another 50 yards.
        if team == half:
            new_locs.append((50 - int(loc[1])) + 50)
        #on the other half, you just have to get to the endzone.
        else:
            new_locs.append(int(loc[1]))

    new_sheet = play_sheet
    new_sheet['To EndZone'] = new_locs
    new_sheet = new_sheet.drop(['Location'], axis = 1)
    return new_sheet
    
playbyplay_4ths = update_locations(playbyplay_4ths)
playbyplay_4ths


def update_scores(play_sheet):
    new_scores = []
    for i in range(0, len(play_sheet['Team'])):
        team = play_sheet.at[i, 'Team']
    
        #get names of the teams like before
        throwaway = play_sheet.columns.values
        vis_name = throwaway[5]
        hom_name = throwaway[6]
        
        #score calculation
        vis_points = int(play_sheet.at[i, vis_name])
        hom_points = int(play_sheet.at[i, hom_name])
        if team == vis_name:
            new_scores.append(vis_points - hom_points)
        else:
            new_scores.append(hom_points - vis_points)
    new_sheet = play_sheet
    new_sheet = new_sheet.drop([vis_name, hom_name], axis = 1)
    new_sheet['Score'] = new_scores
    return new_sheet
        
playbyplay_4ths_final = update_scores(playbyplay_4ths)
playbyplay_4ths_final


team_names = ['Philadelphia Eagles', 'Dallas Cowboys', 'Washington Football Team', 'New York Giants',
             'Detroit Lions', 'Green Bay Packers', 'Minnesota Vikings', 'Chicago Bears',
             'Carolina Panthers', 'Tampa Bay Buccaneers', 'New Orleans Saints', 'Atlanta Falcons',
             'Seattle Seahawks', 'Arizona Cardinals', 'Los Angeles Rams', 'San Francisco 49ers',
             'New England Patriots', 'Miami Dolphins', 'Buffalo Bills', 'New York Jets', 
             'Pittsburgh Steelers', 'Baltimore Ravens', 'Cleveland Browns', 'Cincinnati Bengals',
             'Jacksonville Jaguars', 'Indianapolis Colts', 'Tennessee Titans', 'Houston Texans',
             'Kansas City Chiefs', 'Denver Broncos', 'Las Vegas Raiders', 'Los Angeles Chargers']


team_codes = ['phi', 'dal', 'was', 'nyg', 'det', 'gnb', 'min', 'chi', 'car', 'tam', 'nor', 'atl',
             'sea', 'crd', 'ram', 'sfo', 'nwe', 'mia', 'buf', 'nyj', 'pit', 'rav', 'cle', 'cin',
             'jax', 'clt', 'oti', 'htx', 'kan', 'den', 'rai', 'sdg']


r = requests.get("https://www.pro-football-reference.com/years/2021/games.htm")
root = BeautifulSoup(r.content, "html.parser")
root.prettify()

item = root.find(id="div_games")
tab = pd.read_html(str(item))
schedule_raw = tab[0]

schedule = schedule_raw[schedule_raw['Week'] != "Week"]
display(schedule)


abbrs = []

#change to this to get all the data available through Week 14 of the NFL season:
#schedule14 = schedule[:208]

# --->  CHANGE ME!  <----
#...if you want to
schedule14 = schedule[:80]

for index, row in schedule14.iterrows():
    winner = schedule14.at[index, 'Winner/tie']
    loser = schedule14.at[index, 'Loser/tie']

    #winner is always listed first, so '@' would indicate the home team lost.
    if schedule14.at[index, 'Unnamed: 5'] == "@":
        indy2 = team_names.index(loser)
        
    #likewise, the home team won if they are listed first.
    else:
        indy2 = team_names.index(winner)
        
    #match the team's name to their code in the table defined above
    abbreviation = team_codes[indy2]
    
    abbrs.append(schedule14.at[index, 'Date'].replace('-','') + '0' + abbreviation)
    
#abbrs


def get_full_table(url):
    #CALL ALL METHODS HERE
    temp_table = get_4th_downs(url)
    drives_table = get_drives(url, temp_table)
    drives_table = fix1(drives_table)
    
    new_times = get_total_time(temp_table['Quarter'], temp_table['Time'])
    temp_table['Total Time'] = new_times[0]
    temp_table['Quarter'] = new_times[1]
    temp_table['Quarter'] = pd.to_numeric(temp_table['Quarter'])
    #temp_table
    
    #DRIVES
    big_table = match_drives_with_plays(drives_table, temp_table)
    #LOCATIONS
    big_table = update_locations(big_table)
    #SCORES
    big_table = update_scores(big_table)
    
    return big_table


pd.options.display.max_rows = 15

start = "https://www.pro-football-reference.com/boxscores/"
end = ".htm"

all_4th_downs = pd.DataFrame()
i = 0

for link in abbrs:
    url = start + link + end

    #just to be safe, but all these links should work.
    try:
        #call all the methods we've built to get our fully usable table
        game_table = get_full_table(url)
        
        #keep track of what game this happened on as well
        game_table['Game ID'] = i
        
        all_4th_downs = all_4th_downs.append(game_table, ignore_index = True)
        
        print(url)
        i = i + 1
    except requests.exceptions.RequestException:
        print("passed on a link: " + str(link))
        #pass
    
display(all_4th_downs)

https://www.pro-football-reference.com/boxscores/202109090tam.htm
https://www.pro-football-reference.com/boxscores/202109120atl.htm
https://www.pro-football-reference.com/boxscores/202109120det.htm
https://www.pro-football-reference.com/boxscores/202109120car.htm
https://www.pro-football-reference.com/boxscores/202109120htx.htm
https://www.pro-football-reference.com/boxscores/202109120cin.htm
https://www.pro-football-reference.com/boxscores/202109120oti.htm
https://www.pro-football-reference.com/boxscores/202109120was.htm
https://www.pro-football-reference.com/boxscores/202109120buf.htm
https://www.pro-football-reference.com/boxscores/202109120clt.htm
https://www.pro-football-reference.com/boxscores/202109120nor.htm
https://www.pro-football-reference.com/boxscores/202109120nwe.htm
https://www.pro-football-reference.com/boxscores/202109120kan.htm
https://www.pro-football-reference.com/boxscores/202109120nyg.htm
https://www.pro-football-reference.com/boxscores/202109120ram.htm
https://www.pro-football-reference.com/boxscores/202109130rai.htm
https://www.pro-football-reference.com/boxscores/202109160was.htm
https://www.pro-football-reference.com/boxscores/202109190jax.htm
https://www.pro-football-reference.com/boxscores/202109190mia.htm
https://www.pro-football-reference.com/boxscores/202109190chi.htm
https://www.pro-football-reference.com/boxscores/202109190clt.htm
https://www.pro-football-reference.com/boxscores/202109190phi.htm
https://www.pro-football-reference.com/boxscores/202109190pit.htm
https://www.pro-football-reference.com/boxscores/202109190car.htm
https://www.pro-football-reference.com/boxscores/202109190cle.htm
https://www.pro-football-reference.com/boxscores/202109190nyj.htm
https://www.pro-football-reference.com/boxscores/202109190tam.htm
https://www.pro-football-reference.com/boxscores/202109190crd.htm
https://www.pro-football-reference.com/boxscores/202109190sea.htm
https://www.pro-football-reference.com/boxscores/202109190sdg.htm
https://www.pro-football-reference.com/boxscores/202109190rav.htm
https://www.pro-football-reference.com/boxscores/202109200gnb.htm
https://www.pro-football-reference.com/boxscores/202109230htx.htm
https://www.pro-football-reference.com/boxscores/202109260nyg.htm
https://www.pro-football-reference.com/boxscores/202109260det.htm
https://www.pro-football-reference.com/boxscores/202109260kan.htm
https://www.pro-football-reference.com/boxscores/202109260jax.htm
https://www.pro-football-reference.com/boxscores/202109260buf.htm
https://www.pro-football-reference.com/boxscores/202109260cle.htm
https://www.pro-football-reference.com/boxscores/202109260pit.htm
https://www.pro-football-reference.com/boxscores/202109260nwe.htm
https://www.pro-football-reference.com/boxscores/202109260oti.htm
https://www.pro-football-reference.com/boxscores/202109260den.htm
https://www.pro-football-reference.com/boxscores/202109260rai.htm
https://www.pro-football-reference.com/boxscores/202109260min.htm
https://www.pro-football-reference.com/boxscores/202109260ram.htm
https://www.pro-football-reference.com/boxscores/202109260sfo.htm
https://www.pro-football-reference.com/boxscores/202109270dal.htm
https://www.pro-football-reference.com/boxscores/202109300cin.htm
https://www.pro-football-reference.com/boxscores/202110030buf.htm
https://www.pro-football-reference.com/boxscores/202110030chi.htm
https://www.pro-football-reference.com/boxscores/202110030min.htm
https://www.pro-football-reference.com/boxscores/202110030nor.htm
https://www.pro-football-reference.com/boxscores/202110030mia.htm
https://www.pro-football-reference.com/boxscores/202110030nyj.htm
https://www.pro-football-reference.com/boxscores/202110030atl.htm
https://www.pro-football-reference.com/boxscores/202110030dal.htm
https://www.pro-football-reference.com/boxscores/202110030phi.htm
https://www.pro-football-reference.com/boxscores/202110030sfo.htm
https://www.pro-football-reference.com/boxscores/202110030ram.htm
https://www.pro-football-reference.com/boxscores/202110030den.htm
https://www.pro-football-reference.com/boxscores/202110030gnb.htm
https://www.pro-football-reference.com/boxscores/202110030nwe.htm
https://www.pro-football-reference.com/boxscores/202110040sdg.htm
https://www.pro-football-reference.com/boxscores/202110070sea.htm
https://www.pro-football-reference.com/boxscores/202110100atl.htm
https://www.pro-football-reference.com/boxscores/202110100car.htm
https://www.pro-football-reference.com/boxscores/202110100tam.htm
https://www.pro-football-reference.com/boxscores/202110100pit.htm
https://www.pro-football-reference.com/boxscores/202110100min.htm
https://www.pro-football-reference.com/boxscores/202110100htx.htm
https://www.pro-football-reference.com/boxscores/202110100jax.htm
https://www.pro-football-reference.com/boxscores/202110100cin.htm
https://www.pro-football-reference.com/boxscores/202110100was.htm
https://www.pro-football-reference.com/boxscores/202110100rai.htm
https://www.pro-football-reference.com/boxscores/202110100sdg.htm
https://www.pro-football-reference.com/boxscores/202110100crd.htm
https://www.pro-football-reference.com/boxscores/202110100dal.htm
https://www.pro-football-reference.com/boxscores/202110100kan.htm
https://www.pro-football-reference.com/boxscores/202110110rav.htm


import matplotlib.pyplot as plt

#set size to a more readable region
import seaborn as sns
sns.set(rc = {'figure.figsize':(15,8)})

fig, ax = plt.subplots()
ax.set_ylabel("# of 4th downs played")
ax.set_xlabel("Quarter")
all_4th_downs['Quarter'].value_counts().plot(ax = ax, kind='bar')

<AxesSubplot:xlabel='Quarter', ylabel='# of 4th downs played'>


import seaborn as sns

#countplots count the number of occurences of each value.
ax = sns.countplot(x="# of Plays",data=all_4th_downs)


#increase figure size
sns.set(rc = {'figure.figsize':(15,8)})
ax = sns.countplot(x="Team",data=all_4th_downs)


classification = []
pd.set_option('display.max_rows', 10)

#for each row in the 4th downs data table...
for i in range(0, len(all_4th_downs['Detail'])):
    details = all_4th_downs.at[i, 'Detail']
    words = details.lower()
    
    #attempt to classify the play based on its description
    if "punts" in words or "punt" in words:
        classification.append("Punt")
    
    elif "field goal" in words:
        classification.append("Field Goal")
        
    elif "penalty" in words and "accept" in words:
        classification.append("Penalty Play")
        
    else:
        classification.append("Conversion")

#add new column for the classifications we came up with
all_4th_downs['Decision'] = classification
all_4th_downs


import seaborn as sns

ax = sns.countplot(x="Decision",data=all_4th_downs)


yards_togo = all_4th_downs.groupby(["To EndZone", "Decision"]).size().unstack(level=1)
yards_togo = yards_togo.drop(["Penalty Play"], axis=1)


sns.set(rc = {'figure.figsize':(10,20)})
yards_togo.plot(
    kind = 'barh',
    stacked = True,
    title = '4th Down Decisions by Location on Field',
    mark_right = True)

<AxesSubplot:title={'center':'4th Down Decisions by Location on Field'}, ylabel='To EndZone'>


score_table = all_4th_downs.groupby(["Score", "Decision"]).size().unstack(level=1)
score_table = score_table.drop(["Penalty Play"], axis=1)

#same idea as before, stacked bar graph is a good representation
sns.set(rc = {'figure.figsize':(10,20)})
score_table.plot(
    kind = 'barh',
    stacked = True,
    title = '4th Down Decisions by Score',
    mark_right = True)

<AxesSubplot:title={'center':'4th Down Decisions by Score'}, ylabel='Score'>


score_table = all_4th_downs.groupby(["Quarter", "Decision"]).size().unstack(level=1)
score_table = score_table.drop(["Penalty Play"], axis=1)

sns.set(rc = {'figure.figsize':(10,12)})
score_table.plot(
    kind = 'barh',
    stacked = True,
    title = '4th Down Decisions by Quarter',
    mark_right = True)

<AxesSubplot:title={'center':'4th Down Decisions by Quarter'}, ylabel='Quarter'>


#only keep the 3 attributes listed above.
brief_table = all_4th_downs.drop(["Down", "Time", "Detail", "Total Time", "Drive #", "Team", "Drive Result", \
                                 "# of Plays", "Game ID"], axis = 1)

#we only care about coaching decisions- ignore penalties
brief_table = brief_table[brief_table['Decision'] != 'Penalty Play']
brief_table = brief_table.reset_index(drop = True)

#the decisions column will be our "y" column, and this table, our "X".
decisions = brief_table['Decision']
brief_table = brief_table.drop(["Decision"], axis = 1)

#convert all columns to int fields
brief_table['Quarter'] = pd.to_numeric(brief_table['Quarter'])
brief_table['ToGo'] = pd.to_numeric(brief_table['ToGo'])
brief_table['To EndZone'] = pd.to_numeric(brief_table['To EndZone'])
brief_table['Score'] = pd.to_numeric(brief_table['Score'])

brief_table


#!pip install sklearn
from sklearn.model_selection import train_test_split
from sklearn import tree

decider = tree.DecisionTreeClassifier()
#partition the data into training and testing components
X_train, X_test, y_train, y_test = train_test_split(brief_table, decisions, test_size = 0.25)
decider.fit(X_train, y_train)

X_test


predictions = decider.predict(X_test)
X_test = X_test.reset_index(drop=True)
X_test['D.T. Reccomendation'] = predictions
X_test


#this plots the decision tree so we can see the logic.
#tree.plot_tree(decider)


evaluation_table = X_test.reset_index(drop=True)
evaluation_table['Actual Decision'] = y_test.reset_index(drop=True)
new_sticks = []
new_scores = []
for index, row in evaluation_table.iterrows():
    togo = evaluation_table.at[index, 'ToGo']
    points = evaluation_table.at[index, 'Score']
    new_sticks.append("4th & " + str(togo))
    if points > 0:
        new_scores.append("Up " + str(points))
    elif points < 0:
        new_scores.append("Down " + str(points*-1))
    else:
        new_scores.append("Tied")
evaluation_table['ToGo'] = new_sticks
evaluation_table['Score'] = new_scores
evaluation_table


count = 0
for index, _ in evaluation_table.iterrows():
    if evaluation_table.at[index, 'D.T. Reccomendation'] == evaluation_table.at[index, 'Actual Decision']:
        count = count + 1

print("Accuracy: " + str((count / len(evaluation_table) * 100)) + "%")

Accuracy: 80.0%

	Quarter	Time	Down	ToGo	Location	Detail	PHI	NYJ
0	2	9:26	4	1	PHI 1	Zach Wilson pass complete to Ryan Griffin for ...	14	18
1	2	2:24	4	13	NYJ 22	Braden Mann punts 45 yards, recovered by Jalen...	21	18
2	2	0:22	4	11	NYJ 13	Jake Elliott 31 yard field goal good	24	18
3	3	7:05	4	14	NYJ 14	Jake Elliott 32 yard field goal good	27	18
4	3	5:39	4	8	NYJ 33	Braden Mann punts 33 yards downed by Daniel Brown	27	18
5	3	2:52	4	4	NYJ 48	Penalty on C.J. Mosley: Encroachment, 5 yards ...	27	18
6	3	1:24	4	2	NYJ 34	Gardner Minshew up the middle for no gain (tac...	27	18
7	4	14:18	4	4	NYJ 25	Jake Elliott 43 yard field goal good	30	18
8	4	9:48	4	5	NYJ 28	Jake Elliott 46 yard field goal good	33	18
9	4	5:29	4	10	PHI 37	Zach Wilson pass incomplete intended for Ty Jo...	33	18
10	4	1:54	4	26	PHI 43	Arryn Siposs punts 38 yards, fair catch by Bra...	33	18
11	4	0:31	4	3	NYJ 44	Zach Wilson pass complete short left to Ty Joh...	33	18

	Quarter	Time	LOS	Plays	Length	Net Yds	Result	Team
0	1	12:42	PHI 25	6	3:17	75	Touchdown	PHI
1	1	3:18	PHI 31	8	4:06	69	Touchdown	PHI
2	2	9:22	PHI 6	8	4:52	94	Touchdown	PHI
3	2	2:10	NYJ 47	10	1:51	34	Field Goal	PHI
4	3	15:00	PHI 16	14	8:22	70	Field Goal	PHI
5	3	5:28	PHI 34	13	6:13	41	Field Goal	PHI
6	4	13:02	PHI 24	6	3:20	48	Field Goal	PHI
7	4	5:23	PHI 37	7	3:35	6	Punt	PHI
8	1	15:00	PHI 21	5	2:18	21	Touchdown	NYJ
9	1	9:25	NYJ 32	11	6:07	68	Touchdown	NYJ
10	2	14:12	NYJ 25	9	4:50	75	Touchdown	NYJ
11	2	4:30	NYJ 25	3	2:20	-3	Punt	NYJ
12	2	0:19	NYJ 25	1	0:19	-1	End of Half	NYJ
13	3	6:38	NYJ 30	3	1:10	3	Punt	NYJ
14	4	14:15	NYJ 25	6	1:13	32	Interception	NYJ
15	4	9:42	NYJ 25	11	4:19	38	Downs	NYJ
16	4	1:48	NYJ 19	8	1:48	43	End of Game	NYJ

	Quarter	Time	LOS	Plays	Length	Net Yds	Result	Team	Total Time
0	1	15:00	PHI 21	5	2:18	21	Touchdown	NYJ	00:59:59
1	1	12:42	PHI 25	6	3:17	75	Touchdown	PHI	00:57:42
2	1	9:25	NYJ 32	11	6:07	68	Touchdown	NYJ	00:54:25
3	1	3:18	PHI 31	8	4:06	69	Touchdown	PHI	00:48:18
4	2	14:12	NYJ 25	9	4:50	75	Touchdown	NYJ	00:44:12
5	2	9:22	PHI 6	8	4:52	94	Touchdown	PHI	00:39:22
6	2	4:30	NYJ 25	3	2:20	-3	Punt	NYJ	00:34:30
7	2	2:10	NYJ 47	10	1:51	34	Field Goal	PHI	00:32:10
8	2	0:19	NYJ 25	1	0:19	-1	End of Half	NYJ	00:30:19
9	3	15:00	PHI 16	14	8:22	70	Field Goal	PHI	00:30:00
10	3	6:38	NYJ 30	3	1:10	3	Punt	NYJ	00:21:38
11	3	5:28	PHI 34	13	6:13	41	Field Goal	PHI	00:20:28
12	4	14:15	NYJ 25	6	1:13	32	Interception	NYJ	00:14:15
13	4	13:02	PHI 24	6	3:20	48	Field Goal	PHI	00:13:02
14	4	9:42	NYJ 25	11	4:19	38	Downs	NYJ	00:09:42
15	4	5:23	PHI 37	7	3:35	6	Punt	PHI	00:05:23
16	4	1:48	NYJ 19	8	1:48	43	End of Game	NYJ	00:01:48

	Quarter	Time	Down	ToGo	Location	Detail	PHI	NYJ	Total Time
0	2	9:26	4	1	PHI 1	Zach Wilson pass complete to Ryan Griffin for ...	14	18	00:39:26
1	2	2:24	4	13	NYJ 22	Braden Mann punts 45 yards, recovered by Jalen...	21	18	00:32:24
2	2	0:22	4	11	NYJ 13	Jake Elliott 31 yard field goal good	24	18	00:30:22
3	3	7:05	4	14	NYJ 14	Jake Elliott 32 yard field goal good	27	18	00:22:05
4	3	5:39	4	8	NYJ 33	Braden Mann punts 33 yards downed by Daniel Brown	27	18	00:20:39
5	3	2:52	4	4	NYJ 48	Penalty on C.J. Mosley: Encroachment, 5 yards ...	27	18	00:17:52
6	3	1:24	4	2	NYJ 34	Gardner Minshew up the middle for no gain (tac...	27	18	00:16:24
7	4	14:18	4	4	NYJ 25	Jake Elliott 43 yard field goal good	30	18	00:14:18
8	4	9:48	4	5	NYJ 28	Jake Elliott 46 yard field goal good	33	18	00:09:48
9	4	5:29	4	10	PHI 37	Zach Wilson pass incomplete intended for Ty Jo...	33	18	00:05:29
10	4	1:54	4	26	PHI 43	Arryn Siposs punts 38 yards, fair catch by Bra...	33	18	00:01:54
11	4	0:31	4	3	NYJ 44	Zach Wilson pass complete short left to Ty Joh...	33	18	00:00:31

	Quarter	Time	Down	ToGo	Location	Detail	PHI	NYJ	Total Time	Drive #	Team	Drive Result	# of Plays
0	2	9:26	4	1	PHI 1	Zach Wilson pass complete to Ryan Griffin for ...	14	18	00:39:26	4	NYJ	Touchdown	9
1	2	2:24	4	13	NYJ 22	Braden Mann punts 45 yards, recovered by Jalen...	21	18	00:32:24	6	NYJ	Punt	3
2	2	0:22	4	11	NYJ 13	Jake Elliott 31 yard field goal good	24	18	00:30:22	7	PHI	Field Goal	10
3	3	7:05	4	14	NYJ 14	Jake Elliott 32 yard field goal good	27	18	00:22:05	9	PHI	Field Goal	14
4	3	5:39	4	8	NYJ 33	Braden Mann punts 33 yards downed by Daniel Brown	27	18	00:20:39	10	NYJ	Punt	3
5	3	2:52	4	4	NYJ 48	Penalty on C.J. Mosley: Encroachment, 5 yards ...	27	18	00:17:52	11	PHI	Field Goal	13
6	3	1:24	4	2	NYJ 34	Gardner Minshew up the middle for no gain (tac...	27	18	00:16:24	11	PHI	Field Goal	13
7	4	14:18	4	4	NYJ 25	Jake Elliott 43 yard field goal good	30	18	00:14:18	11	PHI	Field Goal	13
8	4	9:48	4	5	NYJ 28	Jake Elliott 46 yard field goal good	33	18	00:09:48	13	PHI	Field Goal	6
9	4	5:29	4	10	PHI 37	Zach Wilson pass incomplete intended for Ty Jo...	33	18	00:05:29	14	NYJ	Downs	11
10	4	1:54	4	26	PHI 43	Arryn Siposs punts 38 yards, fair catch by Bra...	33	18	00:01:54	15	PHI	Punt	7
11	4	0:31	4	3	NYJ 44	Zach Wilson pass complete short left to Ty Joh...	33	18	00:00:31	16	NYJ	End of Game	8

4th and Onwards!

A full analysis of 4th downs in today's NFL -- By Alex Ackler

BACKGROUND INFO: Rules and Terms¶

BACKGROUND INFO: Choices on 4th down¶

Data Scraping¶

Data Tidying¶

Exploratory Data Analysis¶

Advanced Analysis¶

Advanced Analysis, Part 2¶

Predictions¶

Conclusion¶

	Week	Day	Date	Time	Winner/tie	Unnamed: 5	Loser/tie	Unnamed: 7	PtsW	PtsL	YdsW	TOW	YdsL	TOL
0	1	Thu	2021-09-09	8:20PM	Tampa Bay Buccaneers	NaN	Dallas Cowboys	boxscore	31	29	431	4	451	1
1	1	Sun	2021-09-12	1:00PM	Philadelphia Eagles	@	Atlanta Falcons	boxscore	32	6	434	0	260	0
2	1	Sun	2021-09-12	1:00PM	San Francisco 49ers	@	Detroit Lions	boxscore	41	33	442	2	430	1
3	1	Sun	2021-09-12	1:00PM	Carolina Panthers	NaN	New York Jets	boxscore	19	14	381	1	252	1
4	1	Sun	2021-09-12	1:00PM	Houston Texans	NaN	Jacksonville Jaguars	boxscore	37	21	449	0	395	3
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
284	18	Sun	2022-01-09	1:00PM	New England Patriots	@	Miami Dolphins	preview	NaN	NaN	NaN	NaN	NaN	NaN
285	18	Sun	2022-01-09	4:25PM	San Francisco 49ers	@	Los Angeles Rams	preview	NaN	NaN	NaN	NaN	NaN	NaN
286	18	Sun	2022-01-09	4:25PM	Seattle Seahawks	@	Arizona Cardinals	preview	NaN	NaN	NaN	NaN	NaN	NaN
287	18	Sun	2022-01-09	4:25PM	Kansas City Chiefs	@	Denver Broncos	preview	NaN	NaN	NaN	NaN	NaN	NaN
288	18	Sun	2022-01-09	4:25PM	Los Angeles Chargers	@	Las Vegas Raiders	preview	NaN	NaN	NaN	NaN	NaN	NaN

	Quarter	Time	Down	ToGo	Detail	Total Time	Drive #	Team	Drive Result	# of Plays	To EndZone	Score	Game ID
0	1	13:26	4	2	Bradley Pinion punts 65 yards out of bounds	00:58:26	0	TAM	Punt	3	67	0	0
1	1	9:42	4	15	Bryan Anger punts 38 yards out of bounds	00:54:42	1	DAL	Punt	9	44	0	0
2	2	15:00	4	5	Bradley Pinion punts downed by Jaydon Mickens....	00:45:00	4	TAM	Punt	5	40	0	0
3	2	14:53	4	15	Bradley Pinion punts 42 yards, returned by Ced...	00:44:53	4	TAM	Punt	5	50	0	0
4	2	12:59	4	3	Penalty on DAL: Delay of Game, 5 yards (accept...	00:42:59	5	DAL	Punt	3	88	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...
1161	2	4:34	4	5	Rigoberto Sanchez punts 52 yards, returned by ...	00:34:34	7	IND	Punt	3	77	7	79
1162	2	1:39	4	4	Justin Tucker 23 yard field goal good	00:31:39	8	BAL	Field Goal	9	5	-4	79
1163	2	0:10	4	11	Carson Wentz pass complete short left to Micha...	00:30:10	9	IND	Field Goal	8	37	4	79
1164	4	12:04	4	6	Rodrigo Blankenship 43 yard field goal good	00:12:04	14	IND	Field Goal	9	25	16	79
1165	4	4:37	4	12	Rodrigo Blankenship 37 yard field goal no good...	00:04:37	16	IND	Blocked FG	9	19	8	79

	Quarter	ToGo	To EndZone	Score
0	1	2	67	0
1	1	15	44	0
2	2	5	40	0
3	2	15	50	0
4	2	8	93	0
...	...	...	...	...
1092	2	5	77	7
1093	2	4	5	-4
1094	2	11	37	4
1095	4	6	25	16
1096	4	12	19	8

	Quarter	ToGo	To EndZone	Score
376	4	7	52	-23
459	1	18	60	0
160	1	9	62	-3
438	3	9	44	-1
393	1	4	13	3
...	...	...	...	...
978	2	10	16	11
364	1	6	54	-7
817	1	2	52	0
794	3	15	77	0
205	1	4	38	-10

	Quarter	ToGo	To EndZone	Score	D.T. Reccomendation
0	4	7	52	-23	Conversion
1	1	18	60	0	Punt
2	1	9	62	-3	Punt
3	3	9	44	-1	Punt
4	1	4	13	3	Field Goal
...	...	...	...	...	...
270	2	10	16	11	Field Goal
271	1	6	54	-7	Punt
272	1	2	52	0	Conversion
273	3	15	77	0	Punt
274	1	4	38	-10	Conversion

	Quarter	ToGo	To EndZone	Score	D.T. Reccomendation	Actual Decision
0	4	4th & 7	52	Down 23	Conversion	Conversion
1	1	4th & 18	60	Tied	Punt	Punt
2	1	4th & 9	62	Down 3	Punt	Punt
3	3	4th & 9	44	Down 1	Punt	Punt
4	1	4th & 4	13	Up 3	Field Goal	Field Goal
...	...	...	...	...	...	...
270	2	4th & 10	16	Up 11	Field Goal	Field Goal
271	1	4th & 6	54	Down 7	Punt	Punt
272	1	4th & 2	52	Tied	Conversion	Punt
273	3	4th & 15	77	Tied	Punt	Punt
274	1	4th & 4	38	Down 10	Conversion	Conversion

	Quarter	ToGo	To EndZone	Score
376	4	7	52	-23
459	1	18	60	0
160	1	9	62	-3
438	3	9	44	-1
393	1	4	13	3
...	...	...	...	...
978	2	10	16	11
364	1	6	54	-7
817	1	2	52	0
794	3	15	77	0
205	1	4	38	-10

	Quarter	ToGo	To EndZone	Score
376	4	7	52	-23
459	1	18	60	0
160	1	9	62	-3
438	3	9	44	-1
393	1	4	13	3
...	...	...	...	...
978	2	10	16	11
364	1	6	54	-7
817	1	2	52	0
794	3	15	77	0
205	1	4	38	-10