# Income Taxes
- https://en.wikipedia.org/wiki/Income_tax
- https://en.wikipedia.org/wiki/Income_tax_in_the_United_States
- https://en.wikipedia.org/wiki/Adjusted_gross_income
- https://en.wikipedia.org/wiki/Tax_bracket#Tax_brackets_in_the_United_States
- https://en.wikipedia.org/wiki/Rate_schedule_(federal_income_tax)
  - https://www.nerdwallet.com/blog/taxes/federal-income-tax-brackets/
- https://en.wikipedia.org/wiki/Tax_deduction
  - https://en.wikipedia.org/wiki/Itemized_deduction
  - https://www.nerdwallet.com/blog/taxes/tax-deductions-tax-breaks/
  - https://en.wikipedia.org/wiki/Tax_credit#United_States
- https://github.com/PSLmodels/Tax-Calculator/blob/master/taxcalc/calcfunctions.py

## Self-employment Tax
- https://www.irs.gov/taxtopics/tc554
- https://www.irs.gov/help/ita/do-i-have-income-subject-to-self-employment-tax
- https://www.nerdwallet.com/blog/taxes/self-employment-tax/
- https://www.irs.gov/businesses/small-businesses-self-employed/questions-and-answers-for-the-additional-medicare-tax

## Small-business Taxes
- "Publication 334 (2018), Tax Guide for Small Business  
  (For Individuals Who Use Schedule C or C-EZ)"  
  https://www.irs.gov/publications/p334

In [1]:
# RATE_SCHEDULE = {'single': [
#    (max_amount, rate)
#]}

In [2]:
import bs4, requests, pandas as pd
import json
import pprint
import typing
from typing import Dict, Union
from numbers import Real

# Create a simple ~structlog
import logging
log = logging.getLogger('tax.rateschedule')
log.setLevel(logging.DEBUG)

def logg(*args, **kwargs):
    output = None
    if args and kwargs:
        output = (args, kwargs)
    elif args:
        output = args if len(args) > 1 else args[0]
    elif kwargs:
        output = kwargs
    #log.debug(json.dumps(output, indent=2))  # int64 is unserializable
    # log.debug(pprint.pformat(output, sort_dicts=False))  # < py3.8 sorts dicts
    log.debug(repr(output))

# Cache HTTP requests
import requests_cache
requests_cache.install_cache('income_tax')

import pytest
try:
    get_ipython()
    import ipytest
    ipytest.config(rewrite_asserts=True, magics=True)
    __file__ = "income_taxes.ipynb"
except:
    pass

In [3]:
def get_tax_rate_table_from_nerdwallet(year=2019):
    """Parse the table from 
    """
    url = 'https://www.nerdwallet.com/blog/taxes/federal-income-tax-brackets/'
    if year != 2019:
        raise ValueError(f"Year requested not supported. Check: {url}")
    resp = requests.get(url)
    #bs = bs4.BeautifulSoup(resp.text)
    #tables = bs.findAll('table', {'class': "tablepress"})
    #tbl = tables[0]
    df = pd.read_html(resp.text)[0]
    return df
tax_rate_html_df = get_tax_rate_table_from_nerdwallet(year=2019)
tax_rate_html_df

Unnamed: 0,Tax rate,Single,"Married, filing jointly","Married, filing separately",Head of household
0,10%,"$0 to $9,700","$0 to $19,400","$0 to $9,700","$0 to $13,850"
1,12%,"$9,701 to $39,475","$19,401 to $78,950","$9,701 to $39,475","$13,851 to $52,850"
2,22%,"$39,476 to $84,200","$78,951 to $168,400","$39,476 to $84,200","$52,851 to $84,200"
3,24%,"$84,201 to $160,725","$168,401 to $321,450","$84,201 to $160,725","$84,201 to $160,700"
4,32%,"$160,726 to $204,100","$321,451 to $408,200","$160,726 to $204,100","$160,701 to $204,100"
5,35%,"$204,101 to $510,300","$408,201 to $612,350","$204,101 to $306,175","$204,101 to $510,300"
6,37%,"$510,301 or more","$612,351 or more","$306,176 or more","$510,301 or more"


In [4]:
def clean_and_split_column(row):
    x = str(row)
    values = None
    if " to " in x:
        values = x.split(" to ", 1)
    elif " or " in x:
        values = [x.split(" or ", 1)[0], "-1"]
    else:
        raise ValueError((row, row.name, values))
    if len(values) == 1:
        raise ValueError((x, values))
    return pd.Series(int(x) for x in values)

def reshape_tax_rate_table_from_nerdwallet(df):
    df.columns = [col.lower() for col in df.columns]

    colnames = ['single', 'married, filing jointly',
                'married, filing separately', 'head of household']
    for col in [c for c in colnames if c in df.columns]:
        df[col] = df[col].apply(lambda x: x.replace("$", "").replace(",",""))
        cols = [f"{col} min", f"{col} max"]
        output = df[col].apply(clean_and_split_column)
        df[cols] = output
        #print(df[cols])
    df['tax rate'] = pd.to_numeric(df['tax rate'].str.rstrip('%')) / 100.0
    return df

tax_rate_df = reshape_tax_rate_table_from_nerdwallet(tax_rate_html_df)
tax_rate_df

Unnamed: 0,tax rate,single,"married, filing jointly","married, filing separately",head of household,single min,single max,"married, filing jointly min","married, filing jointly max","married, filing separately min","married, filing separately max",head of household min,head of household max
0,0.1,0 to 9700,0 to 19400,0 to 9700,0 to 13850,0,9700,0,19400,0,9700,0,13850
1,0.12,9701 to 39475,19401 to 78950,9701 to 39475,13851 to 52850,9701,39475,19401,78950,9701,39475,13851,52850
2,0.22,39476 to 84200,78951 to 168400,39476 to 84200,52851 to 84200,39476,84200,78951,168400,39476,84200,52851,84200
3,0.24,84201 to 160725,168401 to 321450,84201 to 160725,84201 to 160700,84201,160725,168401,321450,84201,160725,84201,160700
4,0.32,160726 to 204100,321451 to 408200,160726 to 204100,160701 to 204100,160726,204100,321451,408200,160726,204100,160701,204100
5,0.35,204101 to 510300,408201 to 612350,204101 to 306175,204101 to 510300,204101,510300,408201,612350,204101,306175,204101,510300
6,0.37,510301 or more,612351 or more,306176 or more,510301 or more,510301,-1,612351,-1,306176,-1,510301,-1


In [5]:
log.setLevel(logging.INFO)
class RateSchedule_USA:
    COUNTRY = 'USA'
    FILING_STATUSES = dict.fromkeys((
        'single', 'married, filing jointly',
        'married, filing separately', 'head of household'
    ))
    def __init__(self, df: pd.DataFrame):
        self.df = df
        
    def get_rate(self, taxable_income: Real, filingstatus: str) -> dict:
        """
        Args:
            taxable_income (Number): Taxable Income
            filing_status (str): Filing Status
        Returns:
            dict: ``{taxes_due, tax_bracket, after_tax_income, effective_tax_rate}``
        """
        _filingstatus = filingstatus.lower()
        if _filingstatus not in self.FILING_STATUSES:
            raise ValueError(f"{filingstatus} is not in {self.FILING_STATUSES}")
        
        ctx = dict()
        ctx['taxable_income']: Number = taxable_income
        ctx['taxes_due']: Number = None
        ctx['tax_bracket']: Number = None
        ctx['after_tax_income']: Number = None
        ctx['effective_tax_rate']: Number = None
            
        ctx['levels']: List[amount: Number, tax_rate: Number] = []
        remaining: Number = ctx['taxable_income']
        thislevel: Union[Number,None] = None
            
        cols = self.df[
            ["tax rate", f"{filingstatus} min", f"{filingstatus} max"]]      
        for idx, (rate, min_, max_) in cols.iterrows():
            logg(remaining=remaining, thislevel=thislevel, thisrate=rate, ctx=ctx)
            # TODO: is this correct?
            if remaining >= 0:
                if remaining <= max_:
                    thislevel = remaining
                elif remaining > max_:
                    thislevel = max_
                remaining = remaining - thislevel
                ctx['levels'].append((thislevel, rate))
                ctx['tax_bracket'] = rate
                ctx['taxes_due'] = (
                    (ctx['taxes_due'] if ctx['taxes_due'] is not None else 0) +
                    thislevel * rate)
            if remaining == 0:
                break
        logg(remaining=remaining, thislevel=thislevel, thisrate=rate, ctx=ctx)
        ctx['remaining'] = remaining
        if remaining:
            raise Exception("Remaining is > 0", remaining)
        return ctx

def calculate_etr_aft(data: Dict) -> Dict:
    ctx = {}
    ctx['effective_tax_rate']: Number = (
        data['taxes_due'] / data['income'] if data['income'] else 0)
    ctx['after_tax_income']: Number = data['income'] - data['taxes_due']
    return ctx


rate_schedules = dict()
rate_schedules['USA'] = {}
rate_schedules['USA'][2019] = RateSchedule_USA(tax_rate_df)  # ()

RATE_SCHEDULE = rate_schedules['USA'][2019]
RATE_SCHEDULE.get_rate(1_000_000, 'single')

{'taxable_income': 1000000,
 'taxes_due': 303747.0,
 'tax_bracket': 0.35,
 'after_tax_income': None,
 'effective_tax_rate': None,
 'levels': [(9700.0, 0.1),
  (39475.0, 0.12),
  (84200.0, 0.22),
  (160725.0, 0.24),
  (204100.0, 0.32),
  (501800.0, 0.35)],
 'remaining': 0.0}

In [6]:
url = 'https://www.nerdwallet.com/blog/taxes/tax-deductions-tax-breaks/'
resp = requests.get(url)
std_deduction_html_df = pd.read_html(resp.text)[1]
std_deduction_html_df

Unnamed: 0,Filing status,2019 tax year,2020 tax year
0,Single,"$12,200","$12,400"
1,"Married, filing jointly","$24,400","$24,800"
2,"Married, filing separately","$12,200","$12,400"
3,Head of household,"$18,350","$18,650"


In [7]:
def reshape_std_deduction_tbl(df):
    df.columns = [col.split()[0] for col in df.columns]
    df = df.T
    df.columns = [col.lower() for col in df.iloc[0]]
    df = df.iloc[1:]
    for col in df.columns:
        df[col] = pd.to_numeric(df[col].apply(lambda x: x.replace(",","").lstrip("$")))
    df.index = pd.to_numeric(df.index)
    return df
    
std_deduction_df = reshape_std_deduction_tbl(std_deduction_html_df)
std_deduction_df

Unnamed: 0,single,"married, filing jointly","married, filing separately",head of household
2019,12200,24400,12200,18350
2020,12400,24800,12400,18650


In [8]:
def calculate_agi(income: Real, filingstatus: str, year=2019,
                  std_deduction_df: pd.DataFrame=std_deduction_df) -> Real:
    ctx = {}
    ctx['income']: Real = income
    ctx['year']: int = year
    ctx['filingstatus']: str = filingstatus

    try:
        ctx['standard_deduction']: Real = std_deduction_df[filingstatus][year]
    except ValueError as e:
        raise
    ctx['agi']: Real = income
    if income <= ctx['standard_deduction']:
        ctx['agi'] = 0
    else:
        ctx['agi'] = income - ctx['standard_deduction']
    return ctx

calculate_agi(100_000, filingstatus='single', std_deduction_df=std_deduction_df)
calculate_agi(0, filingstatus='single', std_deduction_df=std_deduction_df)

{'income': 0,
 'year': 2019,
 'filingstatus': 'single',
 'standard_deduction': 12200,
 'agi': 0}

In [9]:
def calculate_taxes(income: Real, filingstatus: str=None, rate_schedule=RATE_SCHEDULE):
    ctx = dict()
    ctx['income'] = income
    ctx.update(calculate_agi(income, filingstatus=filingstatus))
    ctx['deductions'] = 0
    ctx['taxable_income'] = ctx['agi'] - ctx['deductions']
    ctx.update(rate_schedule.get_rate(ctx['taxable_income'], filingstatus=filingstatus))
    ctx.update(calculate_etr_aft(ctx))
    assert ctx['income'] == income
    return ctx

In [17]:
%%run_pytest[clean]
log.setLevel(logging.DEBUG)
stdded_single = std_deduction_df['single'][2019]
@pytest.mark.parametrize('args,expected_output',[
    ((0, 'single'), dict(income=0, agi=0, taxes_due=0, tax_bracket=0.1, after_tax_income=0, effective_tax_rate=0)),
    ((1, 'single'), dict(income=1, agi=0, taxes_due=0, tax_bracket=0.1, after_tax_income=1, effective_tax_rate=0)),
    ((100_000, 'single'), dict(income=100_000, agi=100_000-stdded_single, taxes_due=15_246, tax_bracket=0.1, after_tax_income=1, effective_tax_rate=0)),

])
def test_calculate_taxes(args, expected_output):
    output = calculate_taxes(*args)
    assert 'income' in output
    assert output['income'] == args[0]
    assert 'filingstatus' in output
    assert output['filingstatus'] == args[1]
    assert 'agi' in output
    assert 'deductions' in output
    assert 'taxable_income' in output
    assert 'tax_bracket' in output
    assert 'taxes_due' in output
    assert 'after_tax_income' in output
    assert 'effective_tax_rate' in output
    print(output['standard_deduction'])
    assert sum(l[0] for l in output['levels']) == output['taxable_income']
    assert output['levels'][-1][1] == output['tax_bracket']
    assert {key:output[key] for key in expected_output} == expected_output

platform linux -- Python 3.7.3, pytest-5.2.2, py-1.8.0, pluggy-0.13.0
rootdir: /home/wturner/-wrk/-ce36/math/src/notebooks/personalfinance
plugins: cov-2.8.1
collected 3 items

income_taxes.py 

DEBUG:tax.rateschedule:{'remaining': 0, 'thislevel': None, 'thisrate': 0.1, 'ctx': {'taxable_income': 0, 'taxes_due': None, 'tax_bracket': None, 'after_tax_income': None, 'effective_tax_rate': None, 'levels': []}}
DEBUG:tax.rateschedule:{'remaining': 0, 'thislevel': 0, 'thisrate': 0.1, 'ctx': {'taxable_income': 0, 'taxes_due': 0.0, 'tax_bracket': 0.1, 'after_tax_income': None, 'effective_tax_rate': None, 'levels': [(0, 0.1)]}}


.

DEBUG:tax.rateschedule:{'remaining': 0, 'thislevel': None, 'thisrate': 0.1, 'ctx': {'taxable_income': 0, 'taxes_due': None, 'tax_bracket': None, 'after_tax_income': None, 'effective_tax_rate': None, 'levels': []}}
DEBUG:tax.rateschedule:{'remaining': 0, 'thislevel': 0, 'thisrate': 0.1, 'ctx': {'taxable_income': 0, 'taxes_due': 0.0, 'tax_bracket': 0.1, 'after_tax_income': None, 'effective_tax_rate': None, 'levels': [(0, 0.1)]}}


.

DEBUG:tax.rateschedule:{'remaining': 87800, 'thislevel': None, 'thisrate': 0.1, 'ctx': {'taxable_income': 87800, 'taxes_due': None, 'tax_bracket': None, 'after_tax_income': None, 'effective_tax_rate': None, 'levels': []}}
DEBUG:tax.rateschedule:{'remaining': 78100.0, 'thislevel': 9700.0, 'thisrate': 0.12, 'ctx': {'taxable_income': 87800, 'taxes_due': 970.0, 'tax_bracket': 0.1, 'after_tax_income': None, 'effective_tax_rate': None, 'levels': [(9700.0, 0.1)]}}
DEBUG:tax.rateschedule:{'remaining': 38625.0, 'thislevel': 39475.0, 'thisrate': 0.22, 'ctx': {'taxable_income': 87800, 'taxes_due': 5707.0, 'tax_bracket': 0.12, 'after_tax_income': None, 'effective_tax_rate': None, 'levels': [(9700.0, 0.1), (39475.0, 0.12)]}}
DEBUG:tax.rateschedule:{'remaining': 0.0, 'thislevel': 38625.0, 'thisrate': 0.22, 'ctx': {'taxable_income': 87800, 'taxes_due': 14204.5, 'tax_bracket': 0.22, 'after_tax_income': None, 'effective_tax_rate': None, 'levels': [(9700.0, 0.1), (39475.0, 0.12), (38625.0, 0.22)]}}


F                                                    [100%]

________________ test_calculate_taxes[args2-expected_output2] ________________

args = (100000, 'single')
expected_output = {'after_tax_income': 1, 'agi': 87800, 'effective_tax_rate': 0, 'income': 100000, ...}

    @pytest.mark.parametrize('args,expected_output',[
        ((0, 'single'), dict(income=0, agi=0, taxes_due=0, tax_bracket=0.1, after_tax_income=0, effective_tax_rate=0)),
        ((1, 'single'), dict(income=1, agi=0, taxes_due=0, tax_bracket=0.1, after_tax_income=1, effective_tax_rate=0)),
        ((100_000, 'single'), dict(income=100_000, agi=100_000-stdded_single, taxes_due=15_246, tax_bracket=0.1, after_tax_income=1, effective_tax_rate=0)),
    
    ])
    def test_calculate_taxes(args, expected_output):
        output = calculate_taxes(*args)
        assert 'income' in output
        assert output['income'] == args[0]
        assert 'filingstatus' in output
        assert output['filingstatus'] == args[1]
      

In [14]:
15_378 / 87_800

0.1751480637813212