r - for loop depends on all previous iterations -
i'm pretty new r , programming in general. wondering if can me following: have loop in r each iteration depending on previous. have data set 200k rows, loop takes forever execute. there faster way this?
here's code:
library(data.table) (i in 2:nrow(dt)) { if (dt$price2[i]%in%dt$price2[1:i-1]) { #if price occurred before dt$dummyid[i] = dt$id[max(which(dt$price2[i] == dt$price2[1:i-1]))] #record latest id of price's order if (sum(dt$size[(which(dt$price2[i] == dt$price2[1:i-1]))]) == 0) #if cumulative sum == 0 (excl. current order) {dt$id[i] = i} #means order got cancelled, assign new id else {dt$id[i] = dt$dummyid[i]} #assign latest id } else {dt$id[i] = i} #price never occurred, new id if (dt$id[i]%in%dt$id[1:i-1]) { #if id occurred before if (sum(dt$size[(which(dt$id[i] == dt$id[1:i]))]) == 0) #and cumulative sum == 0 (incl. current order) {dt$arc[i] = "c"} #cancel order else { dt$arc[i] = "r" #replace order dt$price3[i] = sum(dt$size[(which(dt$id[i] == dt$id[1:i]))]) #with order of size } } else { dt$arc[i] = "a" #add new order } } }
here i'm trying reformat raw data taq nyse openbook orderbook package can read it. if knows other way of doing this, i'd appreciate it.
basically, raw data (4 leftmost columns) tick changes in bid/ask orders particular stock intraday.
i edited top row manually here. type here 0 = bid, 1 = ask, price2 = price + type*0.000001 distinguish same prices between bids/asks. size not positive.
what i'm trying code:
i'm trying treat orders same price 1 order until it's cumulative size == 0, ok purposes. arc column should = add (if price has never occured before, or order price got cancelled), c = cancel (if cumulative size == 0), r = replace (if price has occured before , cumulative size <> 0).
i need id indicate order got cancelled, added or replaced.
edit:
sorry not including sample dataset. here is:
> head(dt, n = 20) time type price size price2 dummyid id arc price3 1: 0 0 20.00 200 20.00 0 1 na 2: 0 0 24.41 200 24.41 na na na na 3: 0 0 32.50 200 32.50 na na na na 4: 0 0 38.40 1000 38.40 na na na na 5: 0 0 40.50 1700 40.50 na na na na 6: 0 0 41.50 100 41.50 na na na na 7: 0 0 41.69 100 41.69 na na na na 8: 0 0 42.28 100 42.28 na na na na 9: 0 0 43.00 100 43.00 na na na na 10: 0 0 45.00 1700 45.00 na na na na 11: 0 0 45.12 300 45.12 na na na na 12: 0 0 45.76 200 45.76 na na na na 13: 0 0 46.00 100 46.00 na na na na 14: 0 0 46.76 200 46.76 na na na na 15: 0 0 47.00 200 47.00 na na na na 16: 0 0 48.00 500 48.00 na na na na 17: 0 0 48.10 100 48.10 na na na na 18: 0 0 48.25 400 48.25 na na na na 19: 0 0 48.71 300 48.71 na na na na 20: 0 0 49.05 500 49.05 na na na na > dput(head(dt,20)) structure(list(time = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), type = c(0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l, 0l), price = c(20, 24.41, 32.5, 38.4, 40.5, 41.5, 41.69, 42.28, 43, 45, 45.12, 45.76, 46, 46.76, 47, 48, 48.1, 48.25, 48.71, 49.05), size = c(200l, 200l, 200l, 1000l, 1700l, 100l, 100l, 100l, 100l, 1700l, 300l, 200l, 100l, 200l, 200l, 500l, 100l, 400l, 300l, 500l), price2 = c(20, 24.41, 32.5, 38.4, 40.5, 41.5, 41.69, 42.28, 43, 45, 45.12, 45.76, 46, 46.76, 47, 48, 48.1, 48.25, 48.71, 49.05), dummyid = c("0", na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na), id = c("1", na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na), arc = c("a", na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na), price3 = c(na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na, na)), .names = c("time", "type", "price", "size", "price2", "dummyid", "id", "arc", "price3"), class = c("data.table", "data.frame"), row.names = c(na, -20l))
thanks!
Comments
Post a Comment