%pylab inline
import os
from ipypublish import nb_setup

Populating the interactive namespace from numpy and matplotlib


%load_ext rpy2.ipython


#%load_ext RWinOut


%%R
#PLOT HISTOGRAM FROM STANDARD NORMAL RANDOM NUMBERS
x = rnorm(1000000)
hist(x,50)
grid(col="blue",lwd=2)


%%R
#SYSTEM COMMANDS
#The following command will show the files in the directory which are notebooks.
print(system("ls -lt"))  #This command will not work in the notebook.

[1] 0


%%R
#READ IN DATA FROM CSV FILE
data = read.csv("DSTMAA_data/goog.csv",header=TRUE)
print(head(data))
m = length(data)
n = length(data[,1])
print(c("Number of columns = ",m))
print(c("Length of data series = ",n))

        Date     Open     High      Low    Close Adj.Close   Volume
1 2004-08-19 49.67690 51.69378 47.66995 49.84580  49.84580 44994500
2 2004-08-20 50.17863 54.18756 49.92529 53.80505  53.80505 23005800
3 2004-08-23 55.01717 56.37334 54.17266 54.34653  54.34653 18393200
4 2004-08-24 55.26058 55.43942 51.45036 52.09616  52.09616 15361800
5 2004-08-25 52.14087 53.65105 51.60436 52.65751  52.65751  9257400
6 2004-08-26 52.13591 53.62621 51.99184 53.60634  53.60634  7148200
[1] "Number of columns = " "7"                   
[1] "Length of data series = " "3607"


%%R
#REVERSE ORDER THE DATA (Also get some practice with a for loop)
for (j in 1:m) {
    data[,j] = rev(data[,j])
}
print(head(data))
stkp = as.matrix(data[,7])
plot(stkp,type="l",col="blue")
grid(lwd=2)

        Date    Open    High     Low   Close Adj.Close  Volume
1 2018-12-14 1049.98 1062.60 1040.79 1042.10   1042.10 1685900
2 2018-12-13 1068.07 1079.76 1053.93 1061.90   1061.90 1329800
3 2018-12-12 1068.00 1081.65 1062.79 1063.68   1063.68 1523800
4 2018-12-11 1056.49 1060.60 1039.84 1051.75   1051.75 1394700
5 2018-12-10 1035.05 1048.45 1023.29 1039.55   1039.55 1807700
6 2018-12-07 1060.01 1075.26 1028.50 1036.58   1036.58 2101200


%%R
#install.packages("quantmod")

NULL


%%R
#USE THE QUANTMOD PACKAGE TO GET STOCK DATA
library(quantmod)
getSymbols("IBM")

/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Loading required package: xts

  warnings.warn(x, RRuntimeWarning)
/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Loading required package: zoo

  warnings.warn(x, RRuntimeWarning)
/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: 
Attaching package: ‘zoo’


  warnings.warn(x, RRuntimeWarning)
/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: The following objects are masked from ‘package:base’:

    as.Date, as.Date.numeric


  warnings.warn(x, RRuntimeWarning)
/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Registered S3 method overwritten by 'xts':
  method     from
  as.zoo.xts zoo 

  warnings.warn(x, RRuntimeWarning)
/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Loading required package: TTR

  warnings.warn(x, RRuntimeWarning)
/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Registered S3 method overwritten by 'quantmod':
  method            from
  as.zoo.data.frame zoo 

  warnings.warn(x, RRuntimeWarning)
/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Version 0.4-0 included new data defaults. See ?getSymbols.
Learn from a quantmod author: https://www.datacamp.com/courses/importing-and-managing-financial-data-in-r

  warnings.warn(x, RRuntimeWarning)
/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: ‘getSymbols’ currently uses auto.assign=TRUE by default, but will
use auto.assign=FALSE in 0.5-0. You will still be able to use
‘loadSymbols’ to automatically load data. getOption("getSymbols.env")
and getOption("getSymbols.auto.assign") will still be checked for
alternate defaults.

This message is shown once per session and may be disabled by setting 
options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.


  warnings.warn(x, RRuntimeWarning)

[1] "IBM"


%%R
chartSeries(IBM)


%%R
head(IBM)

           IBM.Open IBM.High IBM.Low IBM.Close IBM.Volume IBM.Adjusted
2007-01-03    97.18    98.40   96.26     97.27    9196800     68.48550
2007-01-04    97.25    98.79   96.88     98.31   10524500     69.21778
2007-01-05    97.60    97.95   96.91     97.42    7221300     68.59115
2007-01-08    98.50    99.50   98.35     98.90   10340000     69.63318
2007-01-09    99.08   100.33   99.07    100.07   11108200     70.45693
2007-01-10    98.50    99.05   97.93     98.89    8744800     69.62614


%%R
library(magrittr)
dts = IBM %>% as.data.frame %>% row.names
dts %>% head %>% print
dts %>% length %>% print

[1] "2007-01-03" "2007-01-04" "2007-01-05" "2007-01-08" "2007-01-09"
[6] "2007-01-10"
[1] 3205


%%R
stkp = as.matrix(IBM$IBM.Adjusted)
rets = diff(log(stkp))
dts = as.Date(dts)
plot(dts,stkp,type="l",col="blue",xlab="Years",ylab="Stock Price of IBM")
grid(lwd=2)


%%R
#DESCRIPTIVE STATS
summary(IBM)

     Index               IBM.Open         IBM.High         IBM.Low     
 Min.   :2007-01-03   Min.   : 72.74   Min.   : 76.98   Min.   : 69.5  
 1st Qu.:2010-03-10   1st Qu.:125.92   1st Qu.:126.99   1st Qu.:124.7  
 Median :2013-05-15   Median :149.60   Median :150.54   Median :148.3  
 Mean   :2013-05-14   Mean   :149.81   Mean   :151.00   Mean   :148.7  
 3rd Qu.:2016-07-20   3rd Qu.:175.65   3rd Qu.:177.27   3rd Qu.:174.8  
 Max.   :2019-09-25   Max.   :215.38   Max.   :215.90   Max.   :214.3  
   IBM.Close        IBM.Volume        IBM.Adjusted   
 Min.   : 71.74   Min.   : 1027500   Min.   : 52.12  
 1st Qu.:126.08   1st Qu.: 3489400   1st Qu.: 94.93  
 Median :149.25   Median : 4699100   Median :132.22  
 Mean   :149.89   Mean   : 5631818   Mean   :122.00  
 3rd Qu.:175.77   3rd Qu.: 6799200   3rd Qu.:144.85  
 Max.   :215.80   Max.   :30770700   Max.   :169.22


%%R
#STOCK VOLATILITY
sigma_daily = sd(rets)
sigma_annual = sigma_daily*sqrt(252)
print(sigma_annual)
print(c("Sharpe ratio = ",mean(rets)*252/sigma_annual))

[1] 0.2217987
[1] "Sharpe ratio = "  "0.26146725711648"


%%R
library(quantmod)
getSymbols(c("GOOG","AAPL","CSCO","IBM"))

[1] "GOOG" "AAPL" "CSCO" "IBM"


%%R
goog = as.numeric(GOOG[,6])
aapl = as.numeric(AAPL[,6])
csco = as.numeric(CSCO[,6])
ibm = as.numeric(IBM[,6])
stkdata = cbind(goog,aapl,csco,ibm)
dim(stkdata)

[1] 3205    4


%%R
n = dim(stkdata)[1]
rets = log(stkdata[2:n,]/stkdata[1:(n-1),])
colMeans(rets)

        goog         aapl         csco          ibm 
0.0005235354 0.0009525354 0.0002568174 0.0002301313


%%R
cv = cov(rets)
print(cv,2)
cr = cor(rets)
print(cr,4)

        goog    aapl    csco     ibm
goog 0.00032 0.00019 0.00017 0.00011
aapl 0.00019 0.00039 0.00018 0.00013
csco 0.00017 0.00018 0.00033 0.00014
ibm  0.00011 0.00013 0.00014 0.00020
       goog   aapl   csco    ibm
goog 1.0000 0.5462 0.5063 0.4557
aapl 0.5462 1.0000 0.4945 0.4616
csco 0.5063 0.4945 1.0000 0.5589
ibm  0.4557 0.4616 0.5589 1.0000


%%R
#Only works in RStudio
#View(data)

NULL


%%R
#THIS IS A DATA FRAME AND CAN BE REFERENCED BY COLUMN NAMES
print(names(data))
print(head(data$Close))

[1] "Date"      "Open"      "High"      "Low"       "Close"     "Adj.Close"
[7] "Volume"   
[1] 1042.10 1061.90 1063.68 1051.75 1039.55 1036.58


%%R
class(data)

[1] "data.frame"


%%R
#DESCRIPTIVE STATISTICS
summary(data)

         Date           Open              High              Low         
 2004-08-19:   1   Min.   :  49.27   Min.   :  50.54   Min.   :  47.67  
 2004-08-20:   1   1st Qu.: 232.20   1st Qu.: 234.77   1st Qu.: 229.58  
 2004-08-23:   1   Median : 303.78   Median : 306.46   Median : 301.22  
 2004-08-24:   1   Mean   : 438.93   Mean   : 442.87   Mean   : 434.65  
 2004-08-25:   1   3rd Qu.: 584.82   3rd Qu.: 587.96   3rd Qu.: 579.83  
 2004-08-26:   1   Max.   :1271.00   Max.   :1273.89   Max.   :1249.02  
 (Other)   :3601                                                        
     Close           Adj.Close           Volume        
 Min.   :  49.68   Min.   :  49.68   Min.   :    7900  
 1st Qu.: 232.10   1st Qu.: 232.10   1st Qu.: 2050450  
 Median : 303.94   Median : 303.94   Median : 4791200  
 Mean   : 438.82   Mean   : 438.82   Mean   : 7518149  
 3rd Qu.: 583.90   3rd Qu.: 583.90   3rd Qu.: 9869450  
 Max.   :1268.33   Max.   :1268.33   Max.   :82768100


%%R
#USING A PARTICULAR COLUMN
stkp = data$Adj.Close
dt = data$Date
print(c("Length of stock series = ",length(stkp)))
#Ln of differenced stk prices gives continuous returns
rets = diff(log(stkp))  #diff() takes first differences
print(c("Length of return series = ",length(rets)))
print(head(rets))
plot(rets,type="l",col="blue")

[1] "Length of stock series = " "3607"                     
[1] "Length of return series = " "3606"                      
[1]  0.018821894  0.001674866 -0.011279201 -0.011667469 -0.002861184
[6]  0.030544219


%%R
print(summary(rets))

      Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
-0.1822511 -0.0098321 -0.0005632 -0.0008431  0.0075836  0.1234015


%%R
r_sd = sd(rets)
r_sd_annual = r_sd*sqrt(252)
print(c(r_sd,r_sd_annual))

#What if we take the stdev of annualized returns?
print(sd(rets*252))
#Huh?
print(sd(rets*252))/252
print(sd(rets*252))/sqrt(252)

[1] 0.01896794 0.30110676
[1] 4.779922
[1] 4.779922
[1] 4.779922
[1] 0.3011068


%%R
#Variance
r_var = var(rets)
r_var_annual = var(rets)*252
print(c(r_var,r_var_annual))

[1] 0.0003597829 0.0906652783


%%R
#HIGHER-ORDER MOMENTS
library(moments)
hist(rets,50)
print(c("Skewness=",skewness(rets)))
print(c("Kurtosis=",kurtosis(rets)))

[1] "Skewness="         "-0.57512743445236"
[1] "Kurtosis="        "12.6866762557976"


%%R
print(skewness(rnorm(1000000)))
print(kurtosis(rnorm(1000000)))

[1] 0.00078868
[1] 2.994231


%%R
#READ IN MORE DATA USING SPACE DELIMITED FILE
data = read.table("DSTMAA_data/markowitzdata.txt",header=TRUE)
print(head(data))
print(c("Length of data series = ",length(data$X.DATE)))

    X.DATE         SUNW         MSFT          IBM         CSCO        AMZN
1 20010102 -0.087443948  0.000000000 -0.002205882 -0.129084975 -0.10843374
2 20010103  0.297297299  0.105187319  0.115696386  0.240150094  0.26576576
3 20010104 -0.060606062  0.010430248 -0.015191546  0.013615734 -0.11743772
4 20010105 -0.096774191  0.014193549  0.008718981 -0.125373140 -0.06048387
5 20010108  0.006696429 -0.003816794 -0.004654255 -0.002133106  0.02575107
6 20010109  0.044345897  0.058748405 -0.010688043  0.015818726  0.09623431
    mktrf     smb     hml      rf
1 -0.0345 -0.0037  0.0209 0.00026
2  0.0527  0.0097 -0.0493 0.00026
3 -0.0121  0.0083 -0.0015 0.00026
4 -0.0291  0.0027  0.0242 0.00026
5 -0.0037 -0.0053  0.0129 0.00026
6  0.0046  0.0044 -0.0026 0.00026
[1] "Length of data series = " "1507"


%%R
#COMPUTE COVARIANCE AND CORRELATION
rets = as.data.frame(cbind(data$SUNW,data$MSFT,data$IBM,data$CSCO,data$AMZN))
names(rets) = c("SUNW","MSFT","IBM","CSCO","AMZN")
print(cov(rets))
print(cor(rets))

             SUNW         MSFT          IBM         CSCO         AMZN
SUNW 0.0014380649 0.0003241903 0.0003104236 0.0007174466 0.0004594254
MSFT 0.0003241903 0.0003646160 0.0001968077 0.0003301491 0.0002678712
IBM  0.0003104236 0.0001968077 0.0002991120 0.0002827622 0.0002056656
CSCO 0.0007174466 0.0003301491 0.0002827622 0.0009502685 0.0005041975
AMZN 0.0004594254 0.0002678712 0.0002056656 0.0005041975 0.0016479809
          SUNW      MSFT       IBM      CSCO      AMZN
SUNW 1.0000000 0.4477060 0.4733132 0.6137298 0.2984349
MSFT 0.4477060 1.0000000 0.5959466 0.5608788 0.3455669
IBM  0.4733132 0.5959466 1.0000000 0.5303729 0.2929333
CSCO 0.6137298 0.5608788 0.5303729 1.0000000 0.4029038
AMZN 0.2984349 0.3455669 0.2929333 0.4029038 1.0000000


%%R
#Repeat the same process using pipes
library(magrittr)
rets %>% cor %>% print

          SUNW      MSFT       IBM      CSCO      AMZN
SUNW 1.0000000 0.4477060 0.4733132 0.6137298 0.2984349
MSFT 0.4477060 1.0000000 0.5959466 0.5608788 0.3455669
IBM  0.4733132 0.5959466 1.0000000 0.5303729 0.2929333
CSCO 0.6137298 0.5608788 0.5303729 1.0000000 0.4029038
AMZN 0.2984349 0.3455669 0.2929333 0.4029038 1.0000000


%%R
x = matrix(rnorm(12),4,3)
print(x)

           [,1]       [,2]        [,3]
[1,]  0.0518732 -0.6717193 -0.92050937
[2,] -0.2082150  0.4202647 -0.47640145
[3,] -0.1571675 -0.2385928 -0.88844408
[4,] -0.5663517  0.1489406  0.09808534


%%R
print(t(x),3)

        [,1]   [,2]   [,3]    [,4]
[1,]  0.0519 -0.208 -0.157 -0.5664
[2,] -0.6717  0.420 -0.239  0.1489
[3,] -0.9205 -0.476 -0.888  0.0981


%%R
print(t(x) %*% x,3)
print(x %*% t(x),3)

       [,1]   [,2]  [,3]
[1,]  0.392 -0.169 0.136
[2,] -0.169  0.707 0.645
[3,]  0.136  0.645 1.873
       [,1]  [,2]    [,3]    [,4]
[1,]  1.301 0.145  0.9699 -0.2197
[2,]  0.145 0.447  0.3557  0.1338
[3,]  0.970 0.356  0.8710 -0.0337
[4,] -0.220 0.134 -0.0337  0.3526


%%R
#CREATE A RANDOM MATRIX
x = matrix(runif(12),4,3)
print(x)
print(x*2)
print(x+x)
print(t(x) %*% x)   #THIS SHOULD BE 3x3
#print(x %*% x)  #SHOULD GIVE AN ERROR

           [,1]      [,2]        [,3]
[1,] 0.11745159 0.7604409 0.616825059
[2,] 0.57022200 0.1588325 0.660023327
[3,] 0.05157777 0.3268232 0.314505190
[4,] 0.40146040 0.8728381 0.005924898
          [,1]      [,2]      [,3]
[1,] 0.2349032 1.5208819 1.2336501
[2,] 1.1404440 0.3176650 1.3200467
[3,] 0.1031555 0.6536465 0.6290104
[4,] 0.8029208 1.7456762 0.0118498
          [,1]      [,2]      [,3]
[1,] 0.2349032 1.5208819 1.2336501
[2,] 1.1404440 0.3176650 1.3200467
[3,] 0.1031555 0.6536465 0.6290104
[4,] 0.8029208 1.7456762 0.0118498
          [,1]      [,2]      [,3]
[1,] 0.5027787 0.5471515 0.4674070
[2,] 0.5471515 1.4721579 0.6818513
[3,] 0.4674070 0.6818513 0.9150526


%%R
cv_inv = solve(cv)
print(cv_inv,3)

      goog  aapl  csco   ibm
goog  5052 -1619 -1222 -1014
aapl -1619  4121  -961 -1053
csco -1222  -961  5207 -2439
ibm  -1014 -1053 -2439  8180


%%R
print(cv_inv %*% cv,3)

          goog      aapl     csco       ibm
goog  1.00e+00 -6.97e-17 2.02e-16  4.33e-17
aapl  2.07e-16  1.00e+00 1.97e-16  9.96e-17
csco -1.16e-16 -1.70e-16 1.00e+00 -1.48e-16
ibm   2.95e-17  6.03e-18 1.39e-16  1.00e+00


%%R
library(corpcor)
is.positive.definite(cv)

[1] TRUE


%%R
cv = cov(rets)
print(round(cv,6))
cv_inv = solve(cv)   #TAKE THE INVERSE
print(round(cv_inv %*% cv,2)) #CHECK THAT WE GET IDENTITY MATRIX

         SUNW     MSFT      IBM     CSCO     AMZN
SUNW 0.001438 0.000324 0.000310 0.000717 0.000459
MSFT 0.000324 0.000365 0.000197 0.000330 0.000268
IBM  0.000310 0.000197 0.000299 0.000283 0.000206
CSCO 0.000717 0.000330 0.000283 0.000950 0.000504
AMZN 0.000459 0.000268 0.000206 0.000504 0.001648
     SUNW MSFT IBM CSCO AMZN
SUNW    1    0   0    0    0
MSFT    0    1   0    0    0
IBM     0    0   1    0    0
CSCO    0    0   0    1    0
AMZN    0    0   0    0    1


%%R
#CHECK IF MATRIX IS POSITIVE DEFINITE (why do we check this?)
library(corpcor)
is.positive.definite(cv)

[1] TRUE


%%R
#ROOT SOLVING IN R
library(rootSolve)
fn = function(x,y) {
    result = (x^2+y^2-1)^3 - x^2*y^3
}
yy = 1
sol = multiroot(f=fn,start=1,maxiter=10000,rtol=0.000001,atol=0.000001,ctol=0.00001,y=yy)
print(c("solution=",sol$root))
check = fn(sol$root,yy)
print(check)

[1] "solution=" "1"        
[1] 0


%%R
fn = function(x) {
    result = 0.065*(x*(1-x))^0.5- 0.05 +0.05*x
}
sol = uniroot.all(f=fn,c(0,1))
print(sol)
check = fn(sol)
print(check)

[1] 1.0000000 0.3717627
[1] 0.000000e+00 1.041576e-06


%%R
data = read.table("DSTMAA_data/markowitzdata.txt",header=TRUE)  #THESE DATA ARE RETURNS
print(names(data))  #THIS IS A DATA FRAME (important construct in R)
head(data)

 [1] "X.DATE" "SUNW"   "MSFT"   "IBM"    "CSCO"   "AMZN"   "mktrf"  "smb"   
 [9] "hml"    "rf"    
    X.DATE         SUNW         MSFT          IBM         CSCO        AMZN
1 20010102 -0.087443948  0.000000000 -0.002205882 -0.129084975 -0.10843374
2 20010103  0.297297299  0.105187319  0.115696386  0.240150094  0.26576576
3 20010104 -0.060606062  0.010430248 -0.015191546  0.013615734 -0.11743772
4 20010105 -0.096774191  0.014193549  0.008718981 -0.125373140 -0.06048387
5 20010108  0.006696429 -0.003816794 -0.004654255 -0.002133106  0.02575107
6 20010109  0.044345897  0.058748405 -0.010688043  0.015818726  0.09623431
    mktrf     smb     hml      rf
1 -0.0345 -0.0037  0.0209 0.00026
2  0.0527  0.0097 -0.0493 0.00026
3 -0.0121  0.0083 -0.0015 0.00026
4 -0.0291  0.0027  0.0242 0.00026
5 -0.0037 -0.0053  0.0129 0.00026
6  0.0046  0.0044 -0.0026 0.00026


%%R
#RUN A MULTIVARIATE REGRESSION ON STOCK DATA
Y = as.matrix(data$SUNW)
X = as.matrix(data[,3:6])
res = lm(Y~X)
summary(res)

Call:
lm(formula = Y ~ X)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.233758 -0.014921 -0.000711  0.014214  0.178859 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.0007256  0.0007512  -0.966  0.33422    
XMSFT        0.1382312  0.0529045   2.613  0.00907 ** 
XIBM         0.3791500  0.0566232   6.696 3.02e-11 ***
XCSCO        0.5769097  0.0317799  18.153  < 2e-16 ***
XAMZN        0.0324899  0.0204802   1.586  0.11286    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.02914 on 1502 degrees of freedom
Multiple R-squared:  0.4112,	Adjusted R-squared:  0.4096 
F-statistic: 262.2 on 4 and 1502 DF,  p-value: < 2.2e-16


%%R
#CHECK THE REGRESSION
n = length(Y)
X = cbind(matrix(1,n,1),X)
b = solve(t(X) %*% X) %*% (t(X) %*% Y)
print(b)

              [,1]
     -0.0007256342
MSFT  0.1382312148
IBM   0.3791500328
CSCO  0.5769097262
AMZN  0.0324898716


%%R
#REGRESSION ON NCAA BASKETBALL PLAYOFF DATA
ncaa = read.table("DSTMAA_data/ncaa.txt",header=TRUE)
print(head(ncaa))
y = ncaa[3]
y = as.matrix(y)
x = ncaa[4:14]
x = as.matrix(x)

  No          NAME GMS  PTS  REB  AST   TO  A.T STL BLK   PF    FG    FT   X3P
1  1 NorthCarolina   6 84.2 41.5 17.8 12.8 1.39 6.7 3.8 16.7 0.514 0.664 0.417
2  2      Illinois   6 74.5 34.0 19.0 10.2 1.87 8.0 1.7 16.5 0.457 0.753 0.361
3  3    Louisville   5 77.4 35.4 13.6 11.0 1.24 5.4 4.2 16.6 0.479 0.702 0.376
4  4 MichiganState   5 80.8 37.8 13.0 12.6 1.03 8.4 2.4 19.8 0.445 0.783 0.329
5  5       Arizona   4 79.8 35.0 15.8 14.5 1.09 6.0 6.5 13.3 0.542 0.759 0.397
6  6      Kentucky   4 72.8 32.3 12.8 13.5 0.94 7.3 3.5 19.5 0.510 0.663 0.400


%%R
fm = lm(y~x)
res = summary(fm)
res

Call:
lm(formula = y ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.5074 -0.5527 -0.2454  0.6705  2.2344 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -10.194804   2.892203  -3.525 0.000893 ***
xPTS         -0.010442   0.025276  -0.413 0.681218    
xREB          0.105048   0.036951   2.843 0.006375 ** 
xAST         -0.060798   0.091102  -0.667 0.507492    
xTO          -0.034545   0.071393  -0.484 0.630513    
xA.T          1.325402   1.110184   1.194 0.237951    
xSTL          0.181015   0.068999   2.623 0.011397 *  
xBLK          0.007185   0.075054   0.096 0.924106    
xPF          -0.031705   0.044469  -0.713 0.479050    
xFG          13.823190   3.981191   3.472 0.001048 ** 
xFT           2.694716   1.118595   2.409 0.019573 *  
xX3P          2.526831   1.754038   1.441 0.155698    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9619 on 52 degrees of freedom
Multiple R-squared:  0.5418,	Adjusted R-squared:  0.4448 
F-statistic: 5.589 on 11 and 52 DF,  p-value: 7.889e-06


%%R
#CREATING DATA FRAMES
ncaa_data_frame = data.frame(y=as.matrix(ncaa[3]),x=as.matrix(ncaa[4:14]))
fm = lm(y~x,data=ncaa_data_frame)
summary(fm)

Call:
lm(formula = y ~ x, data = ncaa_data_frame)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.5074 -0.5527 -0.2454  0.6705  2.2344 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -10.194804   2.892203  -3.525 0.000893 ***
xPTS         -0.010442   0.025276  -0.413 0.681218    
xREB          0.105048   0.036951   2.843 0.006375 ** 
xAST         -0.060798   0.091102  -0.667 0.507492    
xTO          -0.034545   0.071393  -0.484 0.630513    
xA.T          1.325402   1.110184   1.194 0.237951    
xSTL          0.181015   0.068999   2.623 0.011397 *  
xBLK          0.007185   0.075054   0.096 0.924106    
xPF          -0.031705   0.044469  -0.713 0.479050    
xFG          13.823190   3.981191   3.472 0.001048 ** 
xFT           2.694716   1.118595   2.409 0.019573 *  
xX3P          2.526831   1.754038   1.441 0.155698    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9619 on 52 degrees of freedom
Multiple R-squared:  0.5418,	Adjusted R-squared:  0.4448 
F-statistic: 5.589 on 11 and 52 DF,  p-value: 7.889e-06


%%R
x = matrix(runif(300),100,3)
y = 5 + 4*x[,1] + 3*x[,2] + 2*x[,3] + rnorm(100)
y = as.matrix(y)
res = lm(y~x)
print(summary(res))

Call:
lm(formula = y ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.3499 -0.7816  0.1548  0.8725  2.4822 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   5.4788     0.3996  13.711  < 2e-16 ***
x1            4.2712     0.3748  11.396  < 2e-16 ***
x2            2.3911     0.4029   5.935 4.66e-08 ***
x3            1.5251     0.3949   3.862 0.000204 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.093 on 96 degrees of freedom
Multiple R-squared:  0.6193,	Adjusted R-squared:  0.6074 
F-statistic: 52.05 on 3 and 96 DF,  p-value: < 2.2e-16


%%R
e = res$residuals
SSE = sum(e^2)
SST = sum((y-mean(y))^2)
SSM = SST - SSE
print(c(SSE,SSM,SST))
R2 = 1 - SSE/SST
print(R2)
n = dim(x)[1]
p = dim(x)[2]+1
MSE = SSE/(n-p)
MSM = SSM/(p-1)
MST = SST/(n-1)
print(c(n,p,MSE,MSM,MST))
Fstat = MSM/MSE
print(Fstat)

[1] 114.7558 186.6638 301.4196
[1] 0.6192822
[1] 100.000000   4.000000   1.195373  62.221270   3.044643
[1] 52.05176


%%R
ncaa = read.table("DSTMAA_data/ncaa.txt",header=TRUE)
y = as.matrix(ncaa[3])
x = as.matrix(ncaa[4:14])
result = lm(y~x)
library(lmtest)
bptest(result)

	studentized Breusch-Pagan test

data:  result
BP = 15.538, df = 11, p-value = 0.1592


%%R
wuns = matrix(1,64,1)
z = cbind(wuns,x)
b = solve(t(z) %*% z) %*% (t(z) %*% y)
result = lm(y~x)

library(car)
vb = hccm(result)
stdb = sqrt(diag(vb))
tstats = b/stdb
print(tstats)

/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Loading required package: carData

  warnings.warn(x, RRuntimeWarning)

            GMS
    -2.68006069
PTS -0.38212818
REB  2.38342637
AST -0.40848721
TO  -0.28709450
A.T  0.65632053
STL  2.13627108
BLK  0.09548606
PF  -0.68036944
FG   3.52193532
FT   2.35677255
X3P  1.23897636


%%R
summary(result)

Call:
lm(formula = y ~ x)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.5074 -0.5527 -0.2454  0.6705  2.2344 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -10.194804   2.892203  -3.525 0.000893 ***
xPTS         -0.010442   0.025276  -0.413 0.681218    
xREB          0.105048   0.036951   2.843 0.006375 ** 
xAST         -0.060798   0.091102  -0.667 0.507492    
xTO          -0.034545   0.071393  -0.484 0.630513    
xA.T          1.325402   1.110184   1.194 0.237951    
xSTL          0.181015   0.068999   2.623 0.011397 *  
xBLK          0.007185   0.075054   0.096 0.924106    
xPF          -0.031705   0.044469  -0.713 0.479050    
xFG          13.823190   3.981191   3.472 0.001048 ** 
xFT           2.694716   1.118595   2.409 0.019573 *  
xX3P          2.526831   1.754038   1.441 0.155698    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.9619 on 52 degrees of freedom
Multiple R-squared:  0.5418,	Adjusted R-squared:  0.4448 
F-statistic: 5.589 on 11 and 52 DF,  p-value: 7.889e-06


%%R
data = read.csv("DSTMAA_data/goog.csv",header=TRUE)
head(data)

        Date     Open     High      Low    Close Adj.Close   Volume
1 2004-08-19 49.67690 51.69378 47.66995 49.84580  49.84580 44994500
2 2004-08-20 50.17863 54.18756 49.92529 53.80505  53.80505 23005800
3 2004-08-23 55.01717 56.37334 54.17266 54.34653  54.34653 18393200
4 2004-08-24 55.26058 55.43942 51.45036 52.09616  52.09616 15361800
5 2004-08-25 52.14087 53.65105 51.60436 52.65751  52.65751  9257400
6 2004-08-26 52.13591 53.62621 51.99184 53.60634  53.60634  7148200


%%R
n = length(data$Close)
stkp = rev(data$Adj.Close)
rets = as.matrix(log(stkp[2:n]/stkp[1:(n-1)]))
n = length(rets)
plot(rets,type="l",col="blue")
print(n)

[1] 3606


%%R
cor(rets[1:(n-1)],rets[2:n])

[1] 0.009747685


%%R
library(car)
res = lm(rets[2:n]~rets[1:(n-1)])
durbinWatsonTest(res,max.lag=10)

 lag Autocorrelation D-W Statistic p-value
   1    5.491099e-05      1.995490   0.906
   2   -9.073958e-03      2.013593   0.636
   3   -1.063362e-03      1.996041   0.888
   4    1.528849e-02      1.963262   0.254
   5   -3.708613e-03      2.000267   0.970
   6   -3.852869e-02      2.069481   0.030
   7   -2.878048e-04      1.989592   0.864
   8    2.693173e-02      1.935061   0.084
   9   -2.954766e-02      2.047633   0.144
  10    1.458095e-02      1.959268   0.262
 Alternative hypothesis: rho[lag] != 0


%%R
md = read.table("DSTMAA_data/markowitzdata.txt",header=TRUE)
names(md)

 [1] "X.DATE" "SUNW"   "MSFT"   "IBM"    "CSCO"   "AMZN"   "mktrf"  "smb"   
 [9] "hml"    "rf"


%%R
y = as.matrix(md[2])
x = as.matrix(md[7:9])
rf = as.matrix(md[10])
y = y-rf
library(car)
results = lm(y ~ x)
print(summary(results))

Call:
lm(formula = y ~ x)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.213676 -0.014356 -0.000733  0.014462  0.191089 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -0.000197   0.000785  -0.251   0.8019    
xmktrf       1.657968   0.085816  19.320   <2e-16 ***
xsmb         0.299735   0.146973   2.039   0.0416 *  
xhml        -1.544633   0.176049  -8.774   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.03028 on 1503 degrees of freedom
Multiple R-squared:  0.3636,	Adjusted R-squared:  0.3623 
F-statistic: 286.3 on 3 and 1503 DF,  p-value: < 2.2e-16


%%R
durbinWatsonTest(results,max.lag=6)

 lag Autocorrelation D-W Statistic p-value
   1     -0.07231926      2.144549   0.000
   2     -0.04595240      2.079356   0.130
   3      0.02958136      1.926791   0.182
   4     -0.01608143      2.017980   0.670
   5     -0.02360625      2.032176   0.484
   6     -0.01874952      2.021745   0.616
 Alternative hypothesis: rho[lag] != 0


%%R
#CORRECT FOR AUTOCORRELATION
library(sandwich)
b = results$coefficients
print(b)
vb = NeweyWest(results,lag=1)
stdb = sqrt(diag(vb))
tstats = b/stdb
print(tstats)

  (Intercept)        xmktrf          xsmb          xhml 
-0.0001970164  1.6579682191  0.2997353765 -1.5446330690 
(Intercept)      xmktrf        xsmb        xhml 
 -0.2633665  15.5779184   1.8300340  -6.1036120


nb_setup.images_hconcat(["DSTMAA_images/ARequityindexes.png"], width=500)


%%R
#LOG-LIKELIHOOD FUNCTION
LL = function(params,rets) {
    alpha = params[1]; sigsq = params[2]
    logf = -log(sqrt(2*pi*sigsq)) - (rets-alpha)^2/(2*sigsq)
    LL = -sum(logf)
}


%%R
#READ DATA
data = read.csv("DSTMAA_data/goog.csv",header=TRUE)
stkp = data$Adj.Close
#Ln of differenced stk prices gives continuous returns
rets = diff(log(stkp))  #diff() takes first differences
print(c("mean return = ",mean(rets),mean(rets)*252))
print(c("stdev returns = ",sd(rets),sd(rets)*sqrt(252)))

#Create starting guess for parameters
params = c(0.001,0.001)
res = nlm(LL,params,rets)
print(res)

[1] "mean return = "       "0.000843055708193043" "0.212450038464647"   
[1] "stdev returns = "   "0.0189679427010029" "0.301106755616259" 
$minimum
[1] -9181.616

$estimate
[1] 0.0008439354 0.0003597036

$gradient
[1]  8.818863e+00 -2.840669e+05

$code
[1] 3

$iterations
[1] 9


%%R
h = 1/252
alpha = res$estimate[1]
sigsq = res$estimate[2]
print(c("alpha=",alpha))
print(c("sigsq=",sigsq))

sigma = sqrt(sigsq/h)
mu = alpha/h + 0.5*sigma^2
print(c("mu=",mu))
print(c("sigma=",sigma))

print(mean(rets*252))
print(sd(rets)*sqrt(252))

[1] "alpha="               "0.000843935402288685"
[1] "sigsq="               "0.000359703605306595"
[1] "mu="              "0.25799437564538"
[1] "sigma="           "0.30107359322475"
[1] 0.21245
[1] 0.3011068


%%R
y = c(rep(1,32),rep(0,32))
print(y)
x = as.matrix(ncaa[,4:14])
y = as.matrix(y)

 [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0
[39] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


%%R
h = glm(y~x, family=binomial(link="logit"))
print(logLik(h))
print(summary(h))

'log Lik.' -21.44779 (df=12)

Call:
glm(formula = y ~ x, family = binomial(link = "logit"))

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.80174  -0.40502  -0.00238   0.37584   2.31767  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)   
(Intercept) -45.83315   14.97564  -3.061  0.00221 **
xPTS         -0.06127    0.09549  -0.642  0.52108   
xREB          0.49037    0.18089   2.711  0.00671 **
xAST          0.16422    0.26804   0.613  0.54010   
xTO          -0.38405    0.23434  -1.639  0.10124   
xA.T          1.56351    3.17091   0.493  0.62196   
xSTL          0.78360    0.32605   2.403  0.01625 * 
xBLK          0.07867    0.23482   0.335  0.73761   
xPF           0.02602    0.13644   0.191  0.84874   
xFG          46.21374   17.33685   2.666  0.00768 **
xFT          10.72992    4.47729   2.397  0.01655 * 
xX3P          5.41985    5.77966   0.938  0.34838   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 88.723  on 63  degrees of freedom
Residual deviance: 42.896  on 52  degrees of freedom
AIC: 66.896

Number of Fisher Scoring iterations: 6


%%R
h = lm(y~x)
summary(h)

Call:
lm(formula = y ~ x)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.65982 -0.26830  0.03183  0.24712  0.83049 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) -4.114185   1.174308  -3.503 0.000953 ***
xPTS        -0.005569   0.010263  -0.543 0.589709    
xREB         0.046922   0.015003   3.128 0.002886 ** 
xAST         0.015391   0.036990   0.416 0.679055    
xTO         -0.046479   0.028988  -1.603 0.114905    
xA.T         0.103216   0.450763   0.229 0.819782    
xSTL         0.063309   0.028015   2.260 0.028050 *  
xBLK         0.023088   0.030474   0.758 0.452082    
xPF          0.011492   0.018056   0.636 0.527253    
xFG          4.842722   1.616465   2.996 0.004186 ** 
xFT          1.162177   0.454178   2.559 0.013452 *  
xX3P         0.476283   0.712184   0.669 0.506604    
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.3905 on 52 degrees of freedom
Multiple R-squared:  0.5043,	Adjusted R-squared:  0.3995 
F-statistic:  4.81 on 11 and 52 DF,  p-value: 4.514e-05


%%R
h = glm(y~x, family=binomial(link="probit"))
print(logLik(h))
print(summary(h))

'log Lik.' -21.27924 (df=12)

Call:
glm(formula = y ~ x, family = binomial(link = "probit"))

Deviance Residuals: 
     Min        1Q    Median        3Q       Max  
-1.76353  -0.41212  -0.00031   0.34996   2.24568  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)   
(Intercept) -26.28219    8.09608  -3.246  0.00117 **
xPTS         -0.03463    0.05385  -0.643  0.52020   
xREB          0.28493    0.09939   2.867  0.00415 **
xAST          0.10894    0.15735   0.692  0.48874   
xTO          -0.23742    0.13642  -1.740  0.08180 . 
xA.T          0.71485    1.86701   0.383  0.70181   
xSTL          0.45963    0.18414   2.496  0.01256 * 
xBLK          0.03029    0.13631   0.222  0.82415   
xPF           0.01041    0.07907   0.132  0.89529   
xFG          26.58461    9.38711   2.832  0.00463 **
xFT           6.28278    2.51452   2.499  0.01247 * 
xX3P          3.15824    3.37841   0.935  0.34988   
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 88.723  on 63  degrees of freedom
Residual deviance: 42.558  on 52  degrees of freedom
AIC: 66.558

Number of Fisher Scoring iterations: 8


%%R
library(tseries)
res = garch(rets,order=c(1,1))
summary(res)

/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: 
    ‘tseries’ version: 0.10-46

    ‘tseries’ is a package for time series analysis and computational
    finance.

    See ‘library(help="tseries")’ for details.


  warnings.warn(x, RRuntimeWarning)

 ***** ESTIMATION WITH ANALYTICAL GRADIENT ***** 


     I     INITIAL X(I)        D(I)

     1     3.238046e-04     1.000e+00
     2     5.000000e-02     1.000e+00
     3     5.000000e-02     1.000e+00

    IT   NF      F         RELDF    PRELDF    RELDX   STPPAR   D*STEP   NPRELDF
     0    1 -1.256e+04
     1    7 -1.256e+04  1.53e-04  2.28e-04  1.1e-04  2.2e+10  1.1e-05  2.54e+06
     2    8 -1.256e+04  1.75e-05  1.93e-05  9.8e-05  2.0e+00  1.1e-05  2.23e+01
     3   16 -1.260e+04  3.10e-03  5.12e-03  4.6e-01  2.0e+00  8.6e-02  2.22e+01
     4   19 -1.268e+04  6.96e-03  4.74e-03  7.5e-01  2.0e+00  3.5e-01  1.17e+00
     5   21 -1.271e+04  1.96e-03  1.87e-03  7.9e-02  2.0e+00  6.9e-02  4.77e+02
     6   23 -1.276e+04  4.13e-03  4.09e-03  1.3e-01  2.0e+00  1.4e-01  4.53e+04
     7   25 -1.277e+04  9.35e-04  1.08e-03  2.2e-02  2.0e+00  2.8e-02  1.19e+00
     8   27 -1.280e+04  1.83e-03  3.28e-03  8.0e-02  2.0e+00  1.1e-01  1.12e+00
     9   36 -1.280e+04  3.90e-04  7.87e-04  3.1e-06  4.9e+00  4.6e-06  1.12e-02
    10   37 -1.280e+04  2.19e-06  1.88e-06  3.0e-06  2.0e+00  4.6e-06  8.03e-04
    11   38 -1.280e+04  1.03e-07  1.06e-07  3.0e-06  2.0e+00  4.6e-06  8.54e-04
    12   45 -1.280e+04  1.05e-04  1.55e-04  1.2e-02  1.5e+00  1.9e-02  8.52e-04
    13   48 -1.282e+04  1.33e-03  1.14e-03  3.5e-02  0.0e+00  7.5e-02  1.91e-03
    14   50 -1.283e+04  9.18e-04  9.78e-04  2.6e-02  1.6e+00  5.4e-02  6.43e-03
    15   51 -1.284e+04  4.95e-04  6.20e-04  2.4e-02  8.8e-01  5.4e-02  1.18e-03
    16   60 -1.284e+04  2.64e-05  6.14e-05  4.0e-07  4.4e+00  7.0e-07  2.18e-04
    17   61 -1.284e+04  1.78e-06  1.69e-06  3.1e-07  2.0e+00  7.0e-07  6.27e-05
    18   62 -1.284e+04  6.64e-09  7.01e-09  3.1e-07  2.0e+00  7.0e-07  6.40e-05
    19   70 -1.284e+04  3.10e-05  3.74e-05  5.1e-03  8.0e-01  1.1e-02  6.39e-05
    20   72 -1.284e+04  4.16e-06  6.33e-06  1.6e-03  7.7e-01  3.5e-03  1.02e-05
    21   73 -1.284e+04  1.20e-06  2.33e-06  2.2e-03  0.0e+00  4.6e-03  2.33e-06
    22   74 -1.284e+04  3.80e-07  3.87e-07  7.1e-04  0.0e+00  1.5e-03  3.87e-07
    23   75 -1.284e+04  3.79e-10  6.53e-10  1.3e-05  0.0e+00  2.4e-05  6.53e-10
    24   76 -1.284e+04  2.34e-11  1.76e-11  8.6e-07  0.0e+00  1.9e-06  1.76e-11

 ***** RELATIVE FUNCTION CONVERGENCE *****

 FUNCTION    -1.284035e+04   RELDX        8.555e-07
 FUNC. EVALS      76         GRAD. EVALS      25
 PRELDF       1.763e-11      NPRELDF      1.763e-11

     I      FINAL X(I)        D(I)          G(I)

     1    1.030513e-05     1.000e+00     6.155e+01
     2    8.206572e-02     1.000e+00     1.801e-02
     3    8.926080e-01     1.000e+00     2.204e-02


Call:
garch(x = rets, order = c(1, 1))

Model:
GARCH(1,1)

Residuals:
     Min       1Q   Median       3Q      Max 
-6.41740 -0.45176  0.03289  0.56373  9.81315 

Coefficient(s):
    Estimate  Std. Error  t value Pr(>|t|)    
a0 1.031e-05   5.597e-07    18.41   <2e-16 ***
a1 8.207e-02   3.994e-03    20.55   <2e-16 ***
b1 8.926e-01   4.104e-03   217.51   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Diagnostic Tests:
	Jarque Bera Test

data:  Residuals
X-squared = 14703, df = 2, p-value < 2.2e-16


	Box-Ljung test

data:  Squared.Residuals
X-squared = 0.0083245, df = 1, p-value = 0.9273


%%R
#PLOT VOLATILITY TIMES SERIES
print(names(res))
plot(res$fitted.values[,1],type="l",col="red")
grid(lwd=2)

 [1] "order"         "coef"          "n.likeli"      "n.used"       
 [5] "residuals"     "fitted.values" "series"        "frequency"    
 [9] "call"          "vcov"


%%R
par(mfrow=c(2,1))
plot(res$fitted.values[,1],col="blue",type="l")
plot(stkp,type="l",col="red")


%%R
md = read.table("DSTMAA_data/markowitzdata.txt",header=TRUE)
y = as.matrix(md[2:4])
library(stats)
var6 = ar(y,aic=TRUE,order=6)
print(var6$order)
print(var6$ar)

[1] 1
, , SUNW

         SUNW       MSFT         IBM
1 -0.00985635 0.02224093 0.002072782

, , MSFT

         SUNW       MSFT       IBM
1 0.008658304 -0.1369503 0.0306552

, , IBM

         SUNW      MSFT         IBM
1 -0.04517035 0.0975497 -0.01283037


%%R
print(var6$aic)

        0         1         2         3         4         5         6 
23.950676  0.000000  2.762663  5.284709  5.164238 10.065300  8.924513


%%R
print(var6$partialacf)

, , SUNW

         SUNW         MSFT          IBM
1 -0.00985635  0.022240931  0.002072782
2 -0.07857841 -0.019721982 -0.006210487
3  0.03382375  0.003658121  0.032990758
4  0.02259522  0.030023132  0.020925226
5 -0.03944162 -0.030654949 -0.012384084
6 -0.03109748 -0.021612632 -0.003164879

, , MSFT

          SUNW        MSFT          IBM
1  0.008658304 -0.13695027  0.030655201
2 -0.053224374 -0.02396291 -0.047058278
3  0.080632420  0.03720952 -0.004353203
4 -0.038171317 -0.07573402 -0.004913021
5  0.002727220  0.05886752  0.050568308
6  0.242148823  0.03534206  0.062799122

, , IBM

         SUNW         MSFT         IBM
1 -0.04517035  0.097549700 -0.01283037
2  0.05436993  0.021189756  0.05430338
3 -0.08990973 -0.077140955 -0.03979962
4  0.06651063  0.056250866  0.05200459
5  0.03117548 -0.056192843 -0.06080490
6 -0.13131366 -0.003776726 -0.01502191


%%R
ar(y,method="ols",order=3)

Call:
ar(x = y, order.max = 3, method = "ols")

$ar
, , 1

        SUNW       MSFT       IBM
SUNW 0.01407 -0.0006952 -0.036839
MSFT 0.02693 -0.1440645  0.100557
IBM  0.01330  0.0211160 -0.009662

, , 2

          SUNW     MSFT     IBM
SUNW -0.082017 -0.04079 0.04812
MSFT -0.020668 -0.01722 0.01761
IBM  -0.006717 -0.04790 0.05537

, , 3

         SUNW      MSFT      IBM
SUNW 0.035412  0.081961 -0.09139
MSFT 0.003999  0.037252 -0.07719
IBM  0.033571 -0.003906 -0.04031


$x.intercept
      SUNW       MSFT        IBM 
-9.623e-05 -7.366e-05 -6.257e-05 

$var.pred
          SUNW      MSFT       IBM
SUNW 0.0013593 0.0003007 0.0002842
MSFT 0.0003007 0.0003511 0.0001888
IBM  0.0002842 0.0001888 0.0002881


nb_setup.images_hconcat(["DSTMAA_images/ARcross.png"], width=500)


%%R
#Black-Merton-Scholes 1973
#sig: volatility
#S: stock price
#K: strike price
#T: maturity
#r: risk free rate
#q: dividend rate
#cp = 1 for calls and -1 for puts
#optprice: observed option price
bms73 = function(sig,S,K,T,r,q,cp=1,optprice) {
    d1 = (log(S/K)+(r-q+0.5*sig^2)*T)/(sig*sqrt(T))
    d2 = d1 - sig*sqrt(T)
    if (cp==1) {
        optval = S*exp(-q*T)*pnorm(d1)-K*exp(-r*T)*pnorm(d2)
        }
    else {
        optval = -S*exp(-q*T)*pnorm(-d1)+K*exp(-r*T)*pnorm(-d2)
    }
    #If option price is supplied we want the implied vol, else optprice
    bs = optval - optprice
}

#Function to return Imp Vol with starting guess sig0
impvol = function(sig0,S,K,T,r,q,cp,optprice) {
    sol = nls.lm(par=sig0,fn=bms73,S=S,K=K,T=T,r=r,q=q,
          cp=cp,optprice=optprice)
}


%%R
library(minpack.lm)
optprice = 4
res = impvol(0.2,40,40,1,0.03,0,-1,optprice)
print(names(res))
print(c("Implied vol = ",res$par))

Error: package or namespace load failed for ‘minpack.lm’ in dyn.load(file, DLLpath = DLLpath, ...):
 unable to load shared object '/Users/srdas/anaconda3/lib/R/library/minpack.lm/libs/minpack.lm.dylib':
  dlopen(/Users/srdas/anaconda3/lib/R/library/minpack.lm/libs/minpack.lm.dylib, 6): Library not loaded: @rpath/libopenblasp-r0.3.7.dylib
  Referenced from: /Users/srdas/anaconda3/lib/R/library/minpack.lm/libs/minpack.lm.dylib
  Reason: image not found


%%R
#CHECK
optp = bms73(res$par,40,40,1,0.03,0,0,4) + optprice
print(c("Check option price = ",optp))

Error in withVisible({ : object 'optprice' not found

/Users/srdas/anaconda3/lib/python3.7/site-packages/rpy2/rinterface/__init__.py:146: RRuntimeWarning: Error in withVisible({ : object 'optprice' not found

  warnings.warn(x, RRuntimeWarning)


#rcgi1
nb_setup.images_hconcat(['DSTMAA_images/rcgi1.png'], width=500)


nb_setup.images_hconcat(['DSTMAA_images/rcgi2.png'], width=500)


%%R
#! /usr/bin/R
#library(CGIwithR,warn.conflicts=FALSE)

Open Source: R Programming and Statistical Analysis {#IntroductoryRprogamming}¶

Got R?¶

System Commands¶

Loading Data¶

Getting External Stock Data¶

Data Frames¶

Higher-Order Moments¶

Reading space delimited files¶

Pipes with magrittr¶

Matrices¶

Root Finding¶

Regression¶

P-Values, t-statistics¶

Parts of a regression¶

Bias in regression coefficients¶

Heteroskedasticity¶

Auto-Regressive Models¶

Maximum Likelihood¶

Logit¶

Probit¶

ARCH and GARCH¶

Vector Autoregression¶

Solving Non-Linear Equations¶

Web-Enabling R Functions¶

Causal Inference¶

Top 10 Coding Mistakes made by Data Scientists¶