%pylab inline
import os     
from ipypublish import nb_setup
%load_ext rpy2.ipython

Populating the interactive namespace from numpy and matplotlib


#Run if on Windows machine
%load_ext RWinOut

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


%%R
data = read.csv("DSTMAA_data/goog.csv", header=TRUE)
print(head(data))
m = length(data)
n = length(data[,1])
print(c('Number of columns:',m,' Number of rows:',n))

        Date     Open     High      Low    Close Adj.Close   Volume
1 2004-08-19 49.67690 51.69378 47.66995 49.84580  49.84580 44994500
2 2004-08-20 50.17863 54.18756 49.92529 53.80505  53.80505 23005800
3 2004-08-23 55.01717 56.37334 54.17266 54.34653  54.34653 18393200
4 2004-08-24 55.26058 55.43942 51.45036 52.09616  52.09616 15361800
5 2004-08-25 52.14087 53.65105 51.60436 52.65751  52.65751  9257400
6 2004-08-26 52.13591 53.62621 51.99184 53.60634  53.60634  7148200
[1] "Number of columns:" "7"                  " Number of rows:"  
[4] "3607"


%%R
stkp = as.matrix(data[,6])
plot(stkp,type='l',col="blue")
grid(lwd=2)


%%R
library(magrittr)
data[,6] %>% plot(type='l',col='blue') %>% grid(lwd=2)


%%R
library(quantmod)
tickers = c("AAPL","MSFT","IBM","CSCO","C")
getSymbols(tickers)
getSymbols("^GSPC")
tickers = c(tickers,"GSPC")


%%R
print(head(AAPL))
length(tickers)

           AAPL.Open AAPL.High AAPL.Low AAPL.Close AAPL.Volume AAPL.Adjusted
2007-01-03  12.32714  12.36857 11.70000   11.97143   309579900      10.41635
2007-01-04  12.00714  12.27857 11.97429   12.23714   211815100      10.64755
2007-01-05  12.25286  12.31428 12.05714   12.15000   208685400      10.57173
2007-01-08  12.28000  12.36143 12.18286   12.21000   199276700      10.62393
2007-01-09  12.35000  13.28286 12.16429   13.22429   837324600      11.50647
2007-01-10  13.53571  13.97143 13.35000   13.85714   738220000      12.05711
[1] 6


%%R
for (t in tickers) {
  a = get(noquote(t))[,1]
  print(c(t,length(a)))
}

[1] "AAPL" "3268"
[1] "MSFT" "3268"
[1] "IBM"  "3268"
[1] "CSCO" "3268"
[1] "C"    "3268"
[1] "GSPC" "3268"


%%R
df = list()
j = 0
for (t in tickers) {
  j = j + 1
  a = noquote(t)
  b = data.frame(get(a)[,6])
  b$dt = row.names(b)
  df[[j]] = b
}


%%R
stock_table = df[[1]]
for (j in 2:length(df)) {
  stock_table = merge(stock_table,df[[j]],by="dt")
}
print(dim(stock_table))
class(stock_table)

[1] 3268    7
[1] "data.frame"


%%R
par(mfrow=c(3,2))   #Set the plot area to six plots
for (j in 1:length(tickers)) {
  plot(as.Date(stock_table[,1]),stock_table[,j+1], type="l",
       ylab=tickers[j],xlab="date")
}
par(mfrow=c(1,1))  #Set the plot figure back to a single plot


%%R
n = length(stock_table[,1])
rets = stock_table[,2:(length(tickers)+1)]
for (j in 1:length(tickers)) {
  rets[2:n,j] = diff(log(rets[,j]))
}
rets$dt = stock_table$dt
rets = rets[2:n,]   #lose the first row when converting to returns
print(head(rets))
class(rets)

  AAPL.Adjusted MSFT.Adjusted IBM.Adjusted CSCO.Adjusted    C.Adjusted
2   0.021952752  -0.001675780  0.010635618  0.0259846938 -0.0034450807
3  -0.007146552  -0.005719328 -0.009094258  0.0003512162 -0.0052807781
4   0.004926396   0.009736840  0.015077467  0.0056044941  0.0050991365
5   0.079799664   0.001001738  0.011760737 -0.0056044941 -0.0087575193
6   0.046745721  -0.010063596 -0.011861730  0.0073491998 -0.0080957555
7  -0.012448531   0.034463050 -0.002429803  0.0003488354  0.0007391458
  GSPC.Adjusted         dt
2  0.0012275323 2007-01-04
3 -0.0061031679 2007-01-05
4  0.0022178572 2007-01-08
5 -0.0005168099 2007-01-09
6  0.0019384723 2007-01-10
7  0.0063198611 2007-01-11
[1] "data.frame"


%%R
summary(rets)

 AAPL.Adjusted       MSFT.Adjusted         IBM.Adjusted       
 Min.   :-0.197470   Min.   :-0.1245786   Min.   :-0.0864189  
 1st Qu.:-0.007762   1st Qu.:-0.0073024   1st Qu.:-0.0062800  
 Median : 0.001008   Median : 0.0004589   Median : 0.0004686  
 Mean   : 0.001012   Mean   : 0.0005989   Mean   : 0.0002113  
 3rd Qu.: 0.011061   3rd Qu.: 0.0086850   3rd Qu.: 0.0073029  
 Max.   : 0.130194   Max.   : 0.1706256   Max.   : 0.1089891  
 CSCO.Adjusted          C.Adjusted         GSPC.Adjusted       
 Min.   :-0.1768644   Min.   :-0.4946963   Min.   :-0.0946951  
 1st Qu.:-0.0071493   1st Qu.:-0.0106260   1st Qu.:-0.0038841  
 Median : 0.0005723   Median : 0.0000000   Median : 0.0006632  
 Mean   : 0.0002427   Mean   :-0.0005693   Mean   : 0.0002517  
 3rd Qu.: 0.0086450   3rd Qu.: 0.0104800   3rd Qu.: 0.0055557  
 Max.   : 0.1479931   Max.   : 0.4563160   Max.   : 0.1095720  
      dt           
 Length:3267       
 Class :character  
 Mode  :character


%%R
cor(rets[,1:length(tickers)])

              AAPL.Adjusted MSFT.Adjusted IBM.Adjusted CSCO.Adjusted C.Adjusted
AAPL.Adjusted     1.0000000     0.4890636    0.4598145     0.4922452  0.3734263
MSFT.Adjusted     0.4890636     1.0000000    0.5344394     0.5937782  0.4110750
IBM.Adjusted      0.4598145     0.5344394    1.0000000     0.5554172  0.4202308
CSCO.Adjusted     0.4922452     0.5937782    0.5554172     1.0000000  0.4563835
C.Adjusted        0.3734263     0.4110750    0.4202308     0.4563835  1.0000000
GSPC.Adjusted     0.6196366     0.7141322    0.6854599     0.7186963  0.6660740
              GSPC.Adjusted
AAPL.Adjusted     0.6196366
MSFT.Adjusted     0.7141322
IBM.Adjusted      0.6854599
CSCO.Adjusted     0.7186963
C.Adjusted        0.6660740
GSPC.Adjusted     1.0000000


%%R
library(corrgram)
corrgram(rets[,1:length(tickers)], order=TRUE, lower.panel=panel.ellipse,
 upper.panel=panel.pts, text.panel=panel.txt)


%%R
betas = NULL
for (j in 1:(length(tickers)-1)) {
  res = lm(rets[,j]~rets[,6])
  betas[j] = res$coefficients[2]
}
print(betas)

[1] 0.9996354 0.9899433 0.7809676 1.0658362 1.8969453


%%R
betas = matrix(betas)
avgrets = colMeans(rets[,1:(length(tickers)-1)])
res = lm(avgrets~betas)
print(summary(res))
plot(betas,avgrets)
abline(res,col="red")

Call:
lm(formula = avgrets ~ betas)

Residuals:
AAPL.Adjusted MSFT.Adjusted  IBM.Adjusted CSCO.Adjusted    C.Adjusted 
    0.0005626     0.0001394    -0.0004620    -0.0001392    -0.0001008 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept)  0.0014723  0.0006141   2.397   0.0961 .
betas       -0.0010231  0.0005074  -2.016   0.1372  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.0004393 on 3 degrees of freedom
Multiple R-squared:  0.5754,	Adjusted R-squared:  0.4338 
F-statistic: 4.065 on 1 and 3 DF,  p-value: 0.1372


%%R
tickers = read.table("DSTMAA_data/tickers.csv",header=FALSE,sep=":")


%%R
head(tickers)

        V1   V2
1 NasdaqGS ACOR
2 NasdaqGS AKAM
3     NYSE  ARE
4 NasdaqGS AMZN
5 NasdaqGS AAPL
6 NasdaqGS AREX


%%R
n = dim(tickers)[1]
print(n)

names(tickers) = c("Exchange","Symbol")
head(tickers)

[1] 98
  Exchange Symbol
1 NasdaqGS   ACOR
2 NasdaqGS   AKAM
3     NYSE    ARE
4 NasdaqGS   AMZN
5 NasdaqGS   AAPL
6 NasdaqGS   AREX


%%R 
nasdaq_names = stockSymbols(exchange="NASDAQ")
nyse_names = stockSymbols(exchange="NYSE")
amex_names = stockSymbols(exchange="AMEX")

#If it does not work use the file below
#load("DSTMAA_data/stock_exchange.Rdata")

/Users/sanjivda/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Fetching NASDAQ symbols...

  warnings.warn(x, RRuntimeWarning)

Error in names(x) <- value : 
  'names' attribute [8] must be the same length as the vector [1]

/Users/sanjivda/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Error in names(x) <- value : 
  'names' attribute [8] must be the same length as the vector [1]

  warnings.warn(x, RRuntimeWarning)


%%R
nyse_names = read.csv("DSTMAA_data/nyse_names.csv", header=TRUE)
amex_names = read.csv("DSTMAA_data/amex_names.csv", header=TRUE)
nasdaq_names = read.csv("DSTMAA_data/nasdaq_names.csv", header=TRUE)


%%R
head(nasdaq_names)

  Symbol                                   Name LastSale MarketCap IPOyear
1    TXG                     10x Genomics, Inc.    79.44    $7.64B    2019
2     YI                              111, Inc.     5.16  $425.43M    2018
3    PIH 1347 Property Insurance Holdings, Inc.     5.47   $33.37M    2014
4  PIHPP 1347 Property Insurance Holdings, Inc.    26.51   $18.56M     n/a
5   TURN               180 Degree Capital Corp.   2.1399    $66.6M     n/a
6   FLWS                1-800 FLOWERS.COM, Inc.    14.22  $918.48M    1999
             Sector                                         industry
1     Capital Goods Biotechnology: Laboratory Analytical Instruments
2       Health Care                         Medical/Nursing Services
3           Finance                       Property-Casualty Insurers
4           Finance                       Property-Casualty Insurers
5           Finance                       Finance/Investors Services
6 Consumer Services                           Other Specialty Stores
                        Summary.Quote  X
1   https://old.nasdaq.com/symbol/txg NA
2    https://old.nasdaq.com/symbol/yi NA
3   https://old.nasdaq.com/symbol/pih NA
4 https://old.nasdaq.com/symbol/pihpp NA
5  https://old.nasdaq.com/symbol/turn NA
6  https://old.nasdaq.com/symbol/flws NA


%%R
co_names = rbind(nyse_names,nasdaq_names,amex_names)


%%R
dim(co_names)

[1] 6996    9


%%R
result = merge(tickers,co_names,by="Symbol")
head(result)

  Symbol Exchange                                  Name LastSale MarketCap
1   AAPL NasdaqGS                            Apple Inc.   284.27 $1263.09B
2   ACOR NasdaqGS             Acorda Therapeutics, Inc.     1.92   $92.22M
3   AKAM NasdaqGS             Akamai Technologies, Inc.    85.73   $13.85B
4   AMZN NasdaqGS                      Amazon.com, Inc.  1789.21  $887.09B
5    ARE     NYSE Alexandria Real Estate Equities, Inc.    160.1   $18.44B
6    BLK     NYSE                       BlackRock, Inc.   499.64   $77.54B
  IPOyear            Sector
1    1980        Technology
2    2006       Health Care
3    1999     Miscellaneous
4    1997 Consumer Services
5     n/a Consumer Services
6    1999           Finance
                                                       industry
1                                        Computer Manufacturing
2 Biotechnology: Biological Products (No Diagnostic Substances)
3                                             Business Services
4                                Catalog/Specialty Distribution
5                                 Real Estate Investment Trusts
6                            Investment Bankers/Brokers/Service
                       Summary.Quote  X
1 https://old.nasdaq.com/symbol/aapl NA
2 https://old.nasdaq.com/symbol/acor NA
3 https://old.nasdaq.com/symbol/akam NA
4 https://old.nasdaq.com/symbol/amzn NA
5  https://old.nasdaq.com/symbol/are NA
6  https://old.nasdaq.com/symbol/blk NA


%%R
library(DT)
datatable(co_names, options = list(pageLength = 25),filter="top")


%%R
library(stringr)

#READ IN THE LIST OF TICKERS
tickers = read.table("DSTMAA_data/tickers.csv",header=FALSE,sep=":")
n = dim(tickers)[1]
names(tickers) = c("Exchange","Symbol")
tickers$ceo = NA

#PULL CEO NAMES FROM YAHOO/GOOGLE FINANCE (take random 10 firms)
library(rvest)
library(magrittr)
for (j in 1:5) {
  url = paste("https://finance.yahoo.com/quote/",tickers[j,2],"/profile?p=",tickers[j,2],sep="")
  #url = paste("https://finance.google.com/finance?q=",tickers[j,2],sep="")
  #text = readLines(url)
  #idx = grep("Chief Executive",text)
  #if (length(idx)>0) {
  #  tickers[j,3] = str_split(text[idx-2],">")[[1]][2]
  #}
  #else {
  #  tickers[j,3] = NA
  #}
  doc = read_html(url)
  tabel = doc %>% html_nodes("table") %>% html_table()
  if (length(tabel)>0) {
    tickers[j,3] = tabel[[1]]$Name[1]
  }
  else {
    tickers[j,3] = NA
  }
  print(tickers[j,])
}

#WRITE CEO_NAMES TO CSV
write.table(tickers,file="DSTMAA_data/ceo_names.csv",
            row.names=FALSE,sep=",")

/Users/sanjivda/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Loading required package: xml2

  warnings.warn(x, RRuntimeWarning)
/Users/sanjivda/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2

  warnings.warn(x, RRuntimeWarning)

  Exchange Symbol            ceo
1 NasdaqGS   ACOR Dr. Ron  Cohen
  Exchange Symbol                     ceo
2 NasdaqGS   AKAM Dr. F. Thomson Leighton
  Exchange Symbol                          ceo
3     NYSE    ARE Mr. Joel S. Marcus CPA, J.D.
  Exchange Symbol                  ceo
4 NasdaqGS   AMZN Mr. Jeffrey P. Bezos
  Exchange Symbol                 ceo
5 NasdaqGS   AAPL Mr. Timothy D. Cook


nb_setup.images_hconcat(["DSTMAA_images/yahoofinance_AAPL.png"], width=1000)


%%R
head(tickers)

  Exchange Symbol                          ceo
1 NasdaqGS   ACOR               Dr. Ron  Cohen
2 NasdaqGS   AKAM      Dr. F. Thomson Leighton
3     NYSE    ARE Mr. Joel S. Marcus CPA, J.D.
4 NasdaqGS   AMZN         Mr. Jeffrey P. Bezos
5 NasdaqGS   AAPL          Mr. Timothy D. Cook
6 NasdaqGS   AREX                         <NA>


%%R
tickers = c("AAPL","YHOO","IBM","CSCO","C","GSPC")
apply(rets[,1:(length(tickers)-1)],2,mean)

AAPL.Adjusted MSFT.Adjusted  IBM.Adjusted CSCO.Adjusted    C.Adjusted 
 0.0010121052  0.0005988692  0.0002112783  0.0002426701 -0.0005693044


%%R
colMeans(rets[,1:(length(tickers)-1)])

AAPL.Adjusted MSFT.Adjusted  IBM.Adjusted CSCO.Adjusted    C.Adjusted 
 0.0010121052  0.0005988692  0.0002112783  0.0002426701 -0.0005693044


%%R
#FUNCTION TO READ IN CSV FILES FROM FRED
#Enter SeriesID as a text string
readFRED = function(SeriesID) {
  url = paste("https://research.stlouisfed.org/fred2/series/",
          SeriesID, "/downloaddata/",SeriesID,".csv",sep="")
  data = readLines(url)
  n = length(data)
  data = data[2:n]
  n = length(data)
  df = matrix(0,n,2)   #top line is header
  for (j in 1:n) {
    tmp = strsplit(data[j],",")
    df[j,1] = tmp[[1]][1]
    df[j,2] = tmp[[1]][2]
  }
  rate = as.numeric(df[,2])
  idx = which(rate>0)
  idx = setdiff(seq(1,n),idx)
  rate[idx] = -99
  date = df[,1]
  df = data.frame(date,rate)
  names(df)[2] = SeriesID
  result = df
}


%%R
#EXTRACT TERM STRUCTURE DATA FOR ALL RATES FROM 1 MO to 30 YRS FROM FRED
id_list = c("DGS1MO","DGS3MO","DGS6MO","DGS1","DGS2","DGS3",
            "DGS5","DGS7","DGS10","DGS20","DGS30")
k = 0
for (id in id_list) {
  out = readFRED(id)
  if (k>0) { rates = merge(rates,out,"date",all=TRUE) }
  else { rates = out }
  k = k + 1
}

head(rates)

        date DGS1MO DGS3MO DGS6MO DGS1 DGS2 DGS3 DGS5 DGS7 DGS10 DGS20 DGS30
1 2001-07-31   3.67   3.54   3.47 3.53 3.79 4.06 4.57 4.86  5.07  5.61  5.51
2 2001-08-01   3.65   3.53   3.47 3.56 3.83 4.09 4.62 4.90  5.11  5.63  5.53
3 2001-08-02   3.65   3.53   3.46 3.57 3.89 4.17 4.69 4.97  5.17  5.68  5.57
4 2001-08-03   3.63   3.52   3.47 3.57 3.91 4.22 4.72 4.99  5.20  5.70  5.59
5 2001-08-06   3.62   3.52   3.47 3.56 3.88 4.17 4.71 4.99  5.19  5.70  5.59
6 2001-08-07   3.63   3.52   3.47 3.56 3.90 4.19 4.72 5.00  5.20  5.71  5.60


%%R
load("DSTMAA_data/rates.Rdata")


%%R
#CONVERT ALL DATES TO NUMERIC AND SORT BY DATE
dates = rates[,1]
library(stringr)
dates = as.numeric(str_replace_all(dates,"-",""))
res = sort(dates,index.return=TRUE)
for (j in 1:dim(rates)[2]) {
  rates[,j] = rates[res$ix,j]
}

head(rates)

        date DGS1MO DGS3MO DGS6MO DGS1 DGS2 DGS3 DGS5 DGS7 DGS10 DGS20 DGS30
1 1962-01-02     NA     NA     NA 3.22   NA 3.70 3.88   NA  4.06    NA    NA
2 1962-01-03     NA     NA     NA 3.24   NA 3.70 3.87   NA  4.03    NA    NA
3 1962-01-04     NA     NA     NA 3.24   NA 3.69 3.86   NA  3.99    NA    NA
4 1962-01-05     NA     NA     NA 3.26   NA 3.71 3.89   NA  4.02    NA    NA
5 1962-01-08     NA     NA     NA 3.31   NA 3.71 3.91   NA  4.03    NA    NA
6 1962-01-09     NA     NA     NA 3.32   NA 3.74 3.93   NA  4.05    NA    NA


%%R
#REMOVE THE NA ROWS
idx = which(rowSums(is.na(rates))==0)
rates2 = rates[idx,]
print(head(rates2))

            date DGS1MO DGS3MO DGS6MO DGS1 DGS2 DGS3 DGS5 DGS7 DGS10 DGS20
10326 2001-07-31   3.67   3.54   3.47 3.53 3.79 4.06 4.57 4.86  5.07  5.61
10327 2001-08-01   3.65   3.53   3.47 3.56 3.83 4.09 4.62 4.90  5.11  5.63
10328 2001-08-02   3.65   3.53   3.46 3.57 3.89 4.17 4.69 4.97  5.17  5.68
10329 2001-08-03   3.63   3.52   3.47 3.57 3.91 4.22 4.72 4.99  5.20  5.70
10330 2001-08-06   3.62   3.52   3.47 3.56 3.88 4.17 4.71 4.99  5.19  5.70
10331 2001-08-07   3.63   3.52   3.47 3.56 3.90 4.19 4.72 5.00  5.20  5.71
      DGS30
10326  5.51
10327  5.53
10328  5.57
10329  5.59
10330  5.59
10331  5.60


#download.file(url="http://www.fdic.gov/bank/individual/
#failed/banklist.csv",destfile="failed_banks.csv")


%%R
url = "https://www.fdic.gov/bank/individual/failed/banklist.csv"
#url = "DSTMAA_data/banklist.csv"
data = readLines(url)
head(data)

[1] "Bank Name,City,ST,CERT,Acquiring Institution,Closing Date,Updated Date"                     
[2] "City National Bank of New Jersey,Newark,NJ,21111,Industrial Bank,1-Nov-19,19-Dec-19"        
[3] "Resolute Bank,Maumee,OH,58317,Buckeye State Bank,25-Oct-19,19-Dec-19"                       
[4] "Louisa Community Bank,Louisa,KY,58112,Kentucky Farmers Bank Corporation,25-Oct-19,19-Dec-19"
[5] "The Enloe State Bank,Cooper,TX,10716,\"Legend Bank, N. A.\",31-May-19,19-Dec-19"            
[6] "Washington Federal Bank for Savings,Chicago,IL,30570,Royal Savings Bank,15-Dec-17,24-Jul-19"


%%R
data = read.csv(url)
print(names(data))

[1] "Bank.Name"             "City"                  "ST"                   
[4] "CERT"                  "Acquiring.Institution" "Closing.Date"         
[7] "Updated.Date"


%%R
print(head(data))
data$count = 1
print(head(data))

                                        Bank.Name    City ST  CERT
1                City National Bank of New Jersey  Newark NJ 21111
2                                   Resolute Bank  Maumee OH 58317
3                           Louisa Community Bank  Louisa KY 58112
4                            The Enloe State Bank  Cooper TX 10716
5             Washington Federal Bank for Savings Chicago IL 30570
6 The Farmers and Merchants State Bank of Argonia Argonia KS 17719
              Acquiring.Institution Closing.Date Updated.Date
1                   Industrial Bank     1-Nov-19    19-Dec-19
2                Buckeye State Bank    25-Oct-19    19-Dec-19
3 Kentucky Farmers Bank Corporation    25-Oct-19    19-Dec-19
4                Legend Bank, N. A.    31-May-19    19-Dec-19
5                Royal Savings Bank    15-Dec-17    24-Jul-19
6                       Conway Bank    13-Oct-17    12-Aug-19
                                        Bank.Name    City ST  CERT
1                City National Bank of New Jersey  Newark NJ 21111
2                                   Resolute Bank  Maumee OH 58317
3                           Louisa Community Bank  Louisa KY 58112
4                            The Enloe State Bank  Cooper TX 10716
5             Washington Federal Bank for Savings Chicago IL 30570
6 The Farmers and Merchants State Bank of Argonia Argonia KS 17719
              Acquiring.Institution Closing.Date Updated.Date count
1                   Industrial Bank     1-Nov-19    19-Dec-19     1
2                Buckeye State Bank    25-Oct-19    19-Dec-19     1
3 Kentucky Farmers Bank Corporation    25-Oct-19    19-Dec-19     1
4                Legend Bank, N. A.    31-May-19    19-Dec-19     1
5                Royal Savings Bank    15-Dec-17    24-Jul-19     1
6                       Conway Bank    13-Oct-17    12-Aug-19     1


%%R
any(is.na(data))

[1] FALSE


%%R
res = sort(as.matrix(data$ST),index.return=TRUE)
print(head(data[res$ix,]))
print(head(sort(unique(data$ST))))
print(length(unique(data$ST)))

                                   Bank.Name         City ST  CERT
95  Alabama Trust Bank, National Association    Sylacauga AL 35224
179                            Superior Bank   Birmingham AL 17750
180                              Nexity Bank   Birmingham AL 19794
332                       First Lowndes Bank Fort Deposit AL 24957
371           New South Federal Savings Bank     Irondale AL 32276
428                        CapitalSouth Bank   Birmingham AL 22130
                  Acquiring.Institution Closing.Date Updated.Date count
95                 Southern States Bank    18-May-12    21-Sep-15     1
179 Superior Bank, National Association    15-Apr-11    29-Jan-19     1
180            AloStar Bank of Commerce    15-Apr-11     9-Dec-19     1
332                 First Citizens Bank    19-Mar-10    21-Mar-14     1
371                           Beal Bank    18-Dec-09     1-Feb-19     1
428                          IBERIABANK    21-Aug-09    20-Feb-18     1
[1] AL AR AZ CA CO CT
44 Levels: AL AR AZ CA CO CT FL GA HI IA ID IL IN KS KY LA MA MD MI MN ... WY
[1] 44


%%R
head(aggregate(count ~ ST,data,sum),10)

   ST count
1  AL     7
2  AR     4
3  AZ    16
4  CA    41
5  CO    10
6  CT     2
7  FL    75
8  GA    93
9  HI     1
10 IA     2


%%R
acq = aggregate(count~Acquiring.Institution,data,sum)
idx = sort(as.matrix(acq$count),decreasing=TRUE,index.return=TRUE)$ix
head(acq[idx,],15)

                  Acquiring.Institution count
175                         No Acquirer    31
230        State Bank and Trust Company    12
114 First-Citizens Bank & Trust Company    11
10                          Ameris Bank    10
270                      U.S. Bank N.A.     9
68            Community & Southern Bank     8
28                   Bank of the Ozarks     7
48                      Centennial Bank     7
234                  Stearns Bank, N.A.     7
50    CenterState Bank of Florida, N.A.     6
51                         Central Bank     6
159             MB Financial Bank, N.A.     6
210            Republic Bank of Chicago     6
55     CertusBank, National Association     5
65                  Columbia State Bank     5


%%R
head(data)

library(lubridate)
data$Cdate = dmy(data$Closing.Date)
data$Cyear = year(data$Cdate)
fd = aggregate(count~Cyear,data,sum)
print(fd)

/Users/sanjivda/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: 
Attaching package: ‘lubridate’


  warnings.warn(x, RRuntimeWarning)
/Users/sanjivda/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: The following object is masked from ‘package:base’:

    date


  warnings.warn(x, RRuntimeWarning)

   Cyear count
1   2000     2
2   2001     4
3   2002    11
4   2003     3
5   2004     4
6   2007     3
7   2008    25
8   2009   140
9   2010   157
10  2011    92
11  2012    51
12  2013    24
13  2014    18
14  2015     8
15  2016     5
16  2017     8
17  2019     4


%%R
plot(count~Cyear,data=fd,type="l",lwd=3,col="red",xlab="Year")
grid(lwd=3)


%%R
data$Cmonth = month(data$Cdate)
fd = aggregate(count~Cmonth,data,sum)
print(fd)

   Cmonth count
1       1    51
2       2    44
3       3    40
4       4    59
5       5    44
6       6    36
7       7    74
8       8    41
9       9    38
10     10    61
11     11    36
12     12    35


%%R
plot(count~Cmonth,data=fd,type="l",lwd=3,col="green"); grid(lwd=3)


%%R
data$Cday = day(data$Cdate)
fd = aggregate(count~Cday,data,sum)
print(fd)

   Cday count
1     1     9
2     2    20
3     3     4
4     4    21
5     5    16
6     6    14
7     7    20
8     8    14
9     9    10
10   10    14
11   11    18
12   12    10
13   13    16
14   14    20
15   15    21
16   16    22
17   17    23
18   18    21
19   19    30
20   20    27
21   21    17
22   22    18
23   23    31
24   24    19
25   25    15
26   26    16
27   27    19
28   28    19
29   29    16
30   30    30
31   31     9


%%R
plot(count~Cday,data=fd,type="l",lwd=3,col="blue"); grid(lwd=3)


%%R
data = read.csv("DSTMAA_data/CA_Crimes_Data_2004-2013.csv",header=TRUE)


%%R
library(data.table)
D_T = as.data.table(data)
print(class(D_T))

[1] "data.table" "data.frame"


%%R
print(dim(D_T))
print(names(D_T))
head(D_T)

[1] 7301   69
 [1] "Year"                "County"              "NCICCode"           
 [4] "Violent_sum"         "Homicide_sum"        "ForRape_sum"        
 [7] "Robbery_sum"         "AggAssault_sum"      "Property_sum"       
[10] "Burglary_sum"        "VehicleTheft_sum"    "LTtotal_sum"        
[13] "ViolentClr_sum"      "HomicideClr_sum"     "ForRapeClr_sum"     
[16] "RobberyClr_sum"      "AggAssaultClr_sum"   "PropertyClr_sum"    
[19] "BurglaryClr_sum"     "VehicleTheftClr_sum" "LTtotalClr_sum"     
[22] "TotalStructural_sum" "TotalMobile_sum"     "TotalOther_sum"     
[25] "GrandTotal_sum"      "GrandTotClr_sum"     "RAPact_sum"         
[28] "ARAPact_sum"         "FROBact_sum"         "KROBact_sum"        
[31] "OROBact_sum"         "SROBact_sum"         "HROBnao_sum"        
[34] "CHROBnao_sum"        "GROBnao_sum"         "CROBnao_sum"        
[37] "RROBnao_sum"         "BROBnao_sum"         "MROBnao_sum"        
[40] "FASSact_sum"         "KASSact_sum"         "OASSact_sum"        
[43] "HASSact_sum"         "FEBURact_Sum"        "UBURact_sum"        
[46] "RESDBUR_sum"         "RNBURnao_sum"        "RDBURnao_sum"       
[49] "RUBURnao_sum"        "NRESBUR_sum"         "NNBURnao_sum"       
[52] "NDBURnao_sum"        "NUBURnao_sum"        "MVTact_sum"         
[55] "TMVTact_sum"         "OMVTact_sum"         "PPLARnao_sum"       
[58] "PSLARnao_sum"        "SLLARnao_sum"        "MVLARnao_sum"       
[61] "MVPLARnao_sum"       "BILARnao_sum"        "FBLARnao_sum"       
[64] "COMLARnao_sum"       "AOLARnao_sum"        "LT400nao_sum"       
[67] "LT200400nao_sum"     "LT50200nao_sum"      "LT50nao_sum"        
   Year         County                         NCICCode Violent_sum
1: 2004 Alameda County Alameda Co. Sheriff's Department         461
2: 2004 Alameda County                          Alameda         342
3: 2004 Alameda County                           Albany          42
4: 2004 Alameda County                         Berkeley         557
5: 2004 Alameda County                       Emeryville          83
6: 2004 Alameda County                          Fremont         454
   Homicide_sum ForRape_sum Robbery_sum AggAssault_sum Property_sum
1:            5          29         174            253         3351
2:            1          12          89            240         2231
3:            1           3          29              9          718
4:            4          17         355            181         8611
5:            2           4          53             24         1066
6:            5          24         165            260         5723
   Burglary_sum VehicleTheft_sum LTtotal_sum ViolentClr_sum HomicideClr_sum
1:          731              947        1673            170               5
2:          376              333        1522            244               1
3:          130              142         446             10               0
4:         1382             1128        6101            169               1
5:           94              228         744             15               1
6:          939              881        3903            232               2
   ForRapeClr_sum RobberyClr_sum AggAssaultClr_sum PropertyClr_sum
1:              4             43               118             275
2:              8             45               190             330
3:              1              3                 6              53
4:              6             72                90             484
5:              0              8                 6             169
6:             18             51               161             697
   BurglaryClr_sum VehicleTheftClr_sum LTtotalClr_sum TotalStructural_sum
1:              58                 129             88                   7
2:              65                  57            208                   5
3:              24                   2             27                   3
4:              58                  27            399                  21
5:              14                   4            151                   0
6:              84                 135            478                   8
   TotalMobile_sum TotalOther_sum GrandTotal_sum GrandTotClr_sum RAPact_sum
1:              23              3             33               4         27
2:               1              9             15               5         12
3:               0              5              8               0          3
4:              21             17             59              15         12
5:               1              0              1               0          4
6:              10              3             21               5         23
   ARAPact_sum FROBact_sum KROBact_sum OROBact_sum SROBact_sum HROBnao_sum
1:           2          53          17           9          95          81
2:           0          18           4          11          56          49
3:           0           9           1           1          18          21
4:           5         126          20          71         138         201
5:           0          13           6           1          33          33
6:           1          64          22           6          73          89
   CHROBnao_sum GROBnao_sum CROBnao_sum RROBnao_sum BROBnao_sum MROBnao_sum
1:           19           6          13          17          13          25
2:           14           0           3           9           4          10
3:            1           0           1           2           3           1
4:           58           6           2          24          22          42
5:           11           2           1           1           0           5
6:           19           3          28           2          12          12
   FASSact_sum KASSact_sum OASSact_sum HASSact_sum FEBURact_Sum UBURact_sum
1:          17          35         132          69          436         295
2:           8          23          86         123          183         193
3:           0           3           4           2           61          69
4:          15          16          73          77          748         634
5:           4           0           9          11           61          33
6:          19          56         120          65          698         241
   RESDBUR_sum RNBURnao_sum RDBURnao_sum RUBURnao_sum NRESBUR_sum NNBURnao_sum
1:         538          131          252          155         193           76
2:         213           40           67          106         163           31
3:          73           11           60            2          57           25
4:         962          225          418          319         420          171
5:          36            8           25            3          58           40
6:         593          106          313          174         346           76
   NDBURnao_sum NUBURnao_sum MVTact_sum TMVTact_sum OMVTact_sum PPLARnao_sum
1:           33           84        879           2          66           14
2:           18          114        250          59          24            0
3:           31            1        116          21           5            1
4:          112          137        849         169         110           22
5:           14            4        182          33          13           17
6:           34          236        719          95          67            3
   PSLARnao_sum SLLARnao_sum MVLARnao_sum MVPLARnao_sum BILARnao_sum
1:           14           76         1048            56           54
2:            1          176          652            14          176
3:            2           27          229            31           47
4:           34          376         2373          1097          374
5:            2          194          219           122           35
6:           26          391         2269           325           79
   FBLARnao_sum COMLARnao_sum AOLARnao_sum LT400nao_sum LT200400nao_sum
1:          192             5          214          681             301
2:          172             8          323          371             274
3:           60             1           48           76             101
4:          539             7         1279         1257            1124
5:           44             0          111          254             110
6:          266            13          531         1298             663
   LT50200nao_sum LT50nao_sum
1:            308         383
2:            336         541
3:            120         149
4:           1178        2542
5:            141         239
6:            738        1204


%%R
setkey(D_T,Year)

crime = 6
res = D_T[,sum(ForRape_sum),by=Year]
print(res)
class(res)

    Year   V1
 1: 2004 9598
 2: 2005 9345
 3: 2006 9213
 4: 2007 9047
 5: 2008 8906
 6: 2009 8698
 7: 2010 8325
 8: 2011 7678
 9: 2012 7828
10: 2013 7459
[1] "data.table" "data.frame"


%%R
plot(res$Year,res$V1,type="b",lwd=3,col="blue",
	xlab="Year",ylab="Forced Rape")


%%R
setkey(D_T,County)
res = D_T[,sum(ForRape_sum),by=County]
print(res)
setnames(res,"V1","Rapes")

County_Rapes = as.data.table(res)  #This is not really needed
setkey(County_Rapes,Rapes)
print(County_Rapes)

                    County    V1
 1:         Alameda County  4979
 2:          Alpine County    15
 3:          Amador County   153
 4:           Butte County   930
 5:       Calaveras County   148
 6:          Colusa County    60
 7:    Contra Costa County  1848
 8:       Del Norte County   236
 9:       El Dorado County   351
10:          Fresno County  1960
11:           Glenn County    56
12:        Humboldt County   495
13:        Imperial County   263
14:            Inyo County    52
15:            Kern County  1935
16:           Kings County   356
17:            Lake County   262
18:          Lassen County    96
19:     Los Angeles County 21483
20:          Madera County   408
21:           Marin County   452
22:        Mariposa County    46
23:       Mendocino County   328
24:          Merced County   738
25:           Modoc County    64
26:            Mono County    61
27:        Monterey County  1062
28:            Napa County   354
29:          Nevada County   214
30:          Orange County  4509
31:          Placer County   611
32:          Plumas County   115
33:       Riverside County  4321
34:      Sacramento County  4084
35:      San Benito County   151
36:  San Bernardino County  4900
37:       San Diego County  7378
38:   San Francisco County  1498
39:     San Joaquin County  1612
40: San Luis Obispo County   900
41:       San Mateo County  1381
42:   Santa Barbara County  1352
43:     Santa Clara County  3832
44:      Santa Cruz County   865
45:          Shasta County  1089
46:          Sierra County     2
47:        Siskiyou County   143
48:          Solano County  1150
49:          Sonoma County  1558
50:      Stanislaus County  1348
51:          Sutter County   274
52:          Tehama County   165
53:         Trinity County    28
54:          Tulare County  1114
55:        Tuolumne County   160
56:         Ventura County  1146
57:            Yolo County   729
58:            Yuba County   277
                    County    V1
                    County Rapes
 1:          Sierra County     2
 2:          Alpine County    15
 3:         Trinity County    28
 4:        Mariposa County    46
 5:            Inyo County    52
 6:           Glenn County    56
 7:          Colusa County    60
 8:            Mono County    61
 9:           Modoc County    64
10:          Lassen County    96
11:          Plumas County   115
12:        Siskiyou County   143
13:       Calaveras County   148
14:      San Benito County   151
15:          Amador County   153
16:        Tuolumne County   160
17:          Tehama County   165
18:          Nevada County   214
19:       Del Norte County   236
20:            Lake County   262
21:        Imperial County   263
22:          Sutter County   274
23:            Yuba County   277
24:       Mendocino County   328
25:       El Dorado County   351
26:            Napa County   354
27:           Kings County   356
28:          Madera County   408
29:           Marin County   452
30:        Humboldt County   495
31:          Placer County   611
32:            Yolo County   729
33:          Merced County   738
34:      Santa Cruz County   865
35: San Luis Obispo County   900
36:           Butte County   930
37:        Monterey County  1062
38:          Shasta County  1089
39:          Tulare County  1114
40:         Ventura County  1146
41:          Solano County  1150
42:      Stanislaus County  1348
43:   Santa Barbara County  1352
44:       San Mateo County  1381
45:   San Francisco County  1498
46:          Sonoma County  1558
47:     San Joaquin County  1612
48:    Contra Costa County  1848
49:            Kern County  1935
50:          Fresno County  1960
51:     Santa Clara County  3832
52:      Sacramento County  4084
53:       Riverside County  4321
54:          Orange County  4509
55:  San Bernardino County  4900
56:         Alameda County  4979
57:       San Diego County  7378
58:     Los Angeles County 21483
                    County Rapes


%%R
par(las=2)  #makes label horizontal
#par(mar=c(3,4,2,1))  #increase y-axis margins
barplot(County_Rapes$Rapes, names.arg=County_Rapes$County,
horiz=TRUE, cex.names=0.4, col=8)


%%R
trips = read.csv("DSTMAA_data/201408_trip_data.csv",header=TRUE)
print(names(trips))

 [1] "Trip.ID"         "Duration"        "Start.Date"      "Start.Station"  
 [5] "Start.Terminal"  "End.Date"        "End.Station"     "End.Terminal"   
 [9] "Bike.."          "Subscriber.Type" "Zip.Code"


%%R
print(length(trips$Trip.ID))
print(summary(trips$Duration/60))
print(mean(trips$Duration/60,trim=0.01))

[1] 171792
     Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
    1.000     5.750     8.617    18.875    12.683 11941.333 
[1] 13.10277


%%R
start_stn = unique(trips$Start.Terminal)
print(sort(start_stn))
print(length(start_stn))

 [1]  2  3  4  5  6  7  8  9 10 11 12 13 14 16 21 22 23 24 25 26 27 28 29 30 31
[26] 32 33 34 35 36 37 38 39 41 42 45 46 47 48 49 50 51 54 55 56 57 58 59 60 61
[51] 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 80 82 83 84
[1] 70


%%R
end_stn = unique(trips$End.Terminal)
print(sort(end_stn))
print(length(end_stn))

 [1]  2  3  4  5  6  7  8  9 10 11 12 13 14 16 21 22 23 24 25 26 27 28 29 30 31
[26] 32 33 34 35 36 37 38 39 41 42 45 46 47 48 49 50 51 54 55 56 57 58 59 60 61
[51] 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 80 82 83 84
[1] 70


%%R
require(plyr)
library(dplyr)


%%R
res = filter(trips,Start.Terminal==50,End.Terminal==51)
head(res)

  Trip.ID Duration      Start.Date                        Start.Station
1  432024     3954 8/30/2014 14:46 Harry Bridges Plaza (Ferry Building)
2  432022     4120 8/30/2014 14:44 Harry Bridges Plaza (Ferry Building)
3  431895     1196 8/30/2014 12:04 Harry Bridges Plaza (Ferry Building)
4  431891     1249 8/30/2014 12:03 Harry Bridges Plaza (Ferry Building)
5  430408      145  8/29/2014 9:08 Harry Bridges Plaza (Ferry Building)
6  429148      862 8/28/2014 13:47 Harry Bridges Plaza (Ferry Building)
  Start.Terminal        End.Date           End.Station End.Terminal Bike..
1             50 8/30/2014 15:52 Embarcadero at Folsom           51    306
2             50 8/30/2014 15:52 Embarcadero at Folsom           51    659
3             50 8/30/2014 12:24 Embarcadero at Folsom           51    556
4             50 8/30/2014 12:23 Embarcadero at Folsom           51    621
5             50  8/29/2014 9:11 Embarcadero at Folsom           51    400
6             50 8/28/2014 14:02 Embarcadero at Folsom           51    589
  Subscriber.Type Zip.Code
1        Customer    94952
2        Customer    94952
3        Customer    11238
4        Customer    11238
5      Subscriber    94070
6      Subscriber    94107


%%R
trips_sorted = arrange(trips,Start.Station,End.Station)
head(trips_sorted)

  Trip.ID Duration      Start.Date Start.Station Start.Terminal        End.Date
1  426408      120  8/27/2014 7:40 2nd at Folsom             62  8/27/2014 7:42
2  411496    21183 8/16/2014 13:36 2nd at Folsom             62 8/16/2014 19:29
3  396676     3707  8/6/2014 11:38 2nd at Folsom             62  8/6/2014 12:40
4  385761      123 7/29/2014 19:52 2nd at Folsom             62 7/29/2014 19:55
5  364633     6395 7/15/2014 13:39 2nd at Folsom             62 7/15/2014 15:26
6  362776     9433 7/14/2014 13:36 2nd at Folsom             62 7/14/2014 16:13
    End.Station End.Terminal Bike.. Subscriber.Type Zip.Code
1 2nd at Folsom           62    527      Subscriber    94107
2 2nd at Folsom           62    508        Customer    94105
3 2nd at Folsom           62    109        Customer    31200
4 2nd at Folsom           62    421      Subscriber    94107
5 2nd at Folsom           62    448        Customer     2184
6 2nd at Folsom           62    454        Customer     2184


%%R
trips_sorted = arrange(trips,desc(Start.Station),End.Station)
head(trips_sorted)

  Trip.ID Duration      Start.Date
1  416755      285 8/20/2014 11:37
2  411270      257  8/16/2014 7:03
3  410269      286 8/15/2014 10:34
4  405273      382 8/12/2014 14:27
5  398372      401  8/7/2014 10:10
6  393012      317  8/4/2014 10:59
                                  Start.Station Start.Terminal        End.Date
1 Yerba Buena Center of the Arts (3rd @ Howard)             68 8/20/2014 11:42
2 Yerba Buena Center of the Arts (3rd @ Howard)             68  8/16/2014 7:07
3 Yerba Buena Center of the Arts (3rd @ Howard)             68 8/15/2014 10:38
4 Yerba Buena Center of the Arts (3rd @ Howard)             68 8/12/2014 14:34
5 Yerba Buena Center of the Arts (3rd @ Howard)             68  8/7/2014 10:16
6 Yerba Buena Center of the Arts (3rd @ Howard)             68  8/4/2014 11:04
    End.Station End.Terminal Bike.. Subscriber.Type Zip.Code
1 2nd at Folsom           62    383        Customer    95060
2 2nd at Folsom           62    614      Subscriber    94107
3 2nd at Folsom           62    545      Subscriber    94127
4 2nd at Folsom           62    344        Customer    94110
5 2nd at Folsom           62    597      Subscriber    94127
6 2nd at Folsom           62    367      Subscriber    94127


%%R
byStartStation = group_by(trips,Start.Station)
res = summarise(byStartStation, count=n(), time=mean(Duration)/60)
print(res)

# A tibble: 70 x 3
   Start.Station                   count  time
   <fct>                           <int> <dbl>
 1 2nd at Folsom                    4165  9.32
 2 2nd at South Park                4569 11.6 
 3 2nd at Townsend                  6824 15.1 
 4 5th at Howard                    3183 14.2 
 5 Adobe on Almaden                  360 10.1 
 6 Arena Green / SAP Center          510 43.8 
 7 Beale at Market                  4293 15.7 
 8 Broadway at Main                   22 54.8 
 9 Broadway St at Battery St        2433 15.3 
10 California Ave Caltrain Station   329 51.3 
# … with 60 more rows


%%R
library(quantmod)
#nasdaq_names = stockSymbols(exchange = "NASDAQ")
#nyse_names = stockSymbols(exchange = "NYSE")
#amex_names = stockSymbols(exchange = "AMEX")
load("DSTMAA_data/stock_exchange.Rdata")

[1] 6860    9


%%R
tickers = rbind(nasdaq_names,nyse_names,amex_names)
tickers$Count = 1
print(dim(tickers))

[1] 6996   10


%%R
idx = complete.cases(tickers)
df = tickers[idx,]
print(nrow(df))

[1] 0


%%R
library(dplyr)
library(magrittr)

idx = which(!is.na(tickers$IPOyear))
df = tickers[idx,]
res = df %>% group_by(IPOyear) %>% summarise(numIPO = sum(Count))
print(res)

# A tibble: 43 x 2
   IPOyear numIPO
   <fct>    <dbl>
 1 1972         2
 2 1973         1
 3 1980         2
 4 1981         5
 5 1982         1
 6 1983        10
 7 1984         2
 8 1985         4
 9 1986        32
10 1987        25
# … with 33 more rows


%%R
n = length(res$IPOyear)
res = res[1:(n-1),]   #removes the n/a year
barplot(res$numIPO,names.arg = res$IPOyear)


%%R
library(rbokeh)
p = figure(width=500,height=300) %>% ly_points(IPOyear,numIPO,data=res,hover=c(IPOyear,numIPO)) %>% ly_lines(IPOyear,numIPO,data=res)
p

MoRe: Data Handling and Other Useful Things {#MoreDataHandling}¶

Getting started¶

Data Extraction of stocks using the quantmod package¶

Print the length of each stock series.¶

Convert closing adjusted prices of all stocks into individual data.frames.¶

Using the merge function¶

Make a single data frame¶

Plot the stock series¶

Convert the data into returns¶

Descriptive statistics¶

Correlation matrix¶

Correlogram¶

Market regression¶

Return versus systematic risk¶

Extracting online corporate data¶

Get all stock symbols from exchanges¶

Using the DT package¶

Web scraping¶

Using the apply class of functions¶

Getting interest rate data from FRED¶

Using the custom function¶

Organize the data by date¶

Handling missing values¶

Cross-Sectional Data (an example)¶

Access file from the web using the readLines function¶

Or, read the file from CSV¶

Failed banks by State¶

Check for missing data¶

Sort by State¶

Use the aggregate function (for subtotals)¶

Data by acquiring bank¶

Handling dates with lubridate¶

By Month¶

By Day¶

Using the data.table package¶

California Crime Statistics¶

Examine the data.table¶

Indexing the data.table¶

Plotting from the data.table¶

By County¶

Barplot of crime¶

Bay Area Bike Share data¶

Summarize Trips Data¶

Start and End Bike Stations¶

The plyr package family¶

Filter the data¶

Sorting using the arrange function¶

Reverse order sort¶

Descriptive statistics¶

Other functions in dplyr¶

Application to IPO Data¶

Bokeh plots¶