登录
  • #数学|统计

R语‌‌‍‍‌‍‍‌‍‌‍‍‌‌‌‌‌‌‌‍‍‌‍‍‌‌‌‍‌‌‌‌言读取csv文件学习笔记(求米想下载单词list)

jimyokl
169
0
正在学R,需要对csv文件数据进行处理,同时也求好心人打赏米,我想在GRE板下载Magoosh1000单词附件。

1. 首先是从列表(list)循环写每个 向量/数组 到csv文件,使用lapply可以重复写:

lapply(data, function(x) write.table( data.frame(x), 'coef_list_AR_1400.csv' , append= T, sep=',' ,row.names = F, col.names = F))

2. 读取csv文件的话,可以直接安装tidyverse包,里面有很多现成的例子,参考网址如下

readr.tidyverse.org

网页上有相应例子可以参考,怎么使用read_csv

3. 比2稍微进阶一点点的是如下指南,更加详细地讲解了read_csv的说明

r4ds.had.co.nz

最后附上代码,谢谢各位看官,谢谢打赏

代码比较乱,但注释掉一部分后可以运行

# ---------------------data.frame() begins------------------

temp_frame <- data.frame(

name=c("liming", "zhangcong", "wangjian"), #not friendly to Chinese char

age=c(30, 35, 28), height=c(180, 162, 175), stringsAsFactors=FALSE)

print(d); names(d)

library(tidyverse)

challenge <- read_csv(readr_example("challenge.csv"))

read_csv(I("x,y\n1,2\n3,4"), col_type="dc")

read_csv(I("x,y\n1,2\n3,4"))

read_csv(I("x,y\n1,2\n3,4"), col_types = list(col_double(), col_character()))

t.class <- read_csv("Oct23.csv", col_names=FALSE)

rownames(t.class)

nrow(t.class)

print(t.class, n=214 )

# ---------------------data.frame() ends------------------

# ---------------------ggplot() begins------------------

devtools::install_github("kjhealy/socviz")

library(gapminder)

library(ggplot2)

head(gapminder, 20)

ggplot_obj <- ggplot(data=gapminder, mapping=aes(x=gdpPercap, y=lifeExp, color=continent))

ggplot_obj+geom_point()+geom_smooth(method = "loess")+scale_x_log10(labels=scales::dollar)

ggplot()+

geom_col(data = df, mapping = aes(x=date,y=n,fill=species), color = 'black')+

scale_fill_manual(values = c('fin' = 'grey',

'humpback' = 'blue',

'right' = 'red',

'sei' = 'darkslategrey'))+

scale_y_continuous(limits = c(0,50), breaks = seq(0,50,5), expand = c(0,0))+

scale_x_date(date_breaks = '3 day', date_labels = '%b %d')+

labs(x = NULL, y = 'Detections per day', fill = 'Species')+

theme_classic()

ggplot(mpg, aes(displ, hwy, colour = class)) +

geom_point()

# ---------------------ggplot() ends------------------

# main program

library(quantmod);library(forecast);library(tseries);library(timeSeries);library(xts);library(ggplot2)

#log and diff

getSymbols("MSFT", src="yahoo",from ="1986-03-13", to ="2021-10-08")

cls_p <- na.omit(MSFT[,4]);lg_diff_p <- diff(log(cls_p), lag=1);lg_diff_p <- lg_diff_p[!is.na(lg_diff_p)];

# calculate how many times the regression will be done

row_num <- nrow(lg_diff_p);

window_size <- 1400;

rolling_time <- (row_num - window_size) ;

#initiate a list to store coefficients

coef_list_AR <- NULL; coef_list_MA <- NULL;

num_0_AR <- 0; num_0_MA <- 0;

#for loop to regress and get the coefficients

for (i in 1:rolling_time) {

start_ <- i; end_ <- row_num-rolling_time+i;

print(c(i,end_));

# regress with auto.arima()

fit_ <- auto.arima(coredata(lg_diff_p[c(i: end_),]))

paste(fit_, collapse = "\n"); print(fit_)

# intercept the coefficients of AR part and MA part

result <- substring(fit_, 7, 11)

result <- as.numeric(unlist(strsplit(result, split = ",")))

# initiate 2 arrays store AR and MA coefficients for each regression

temp_AR <- c(); temp_MA <- c();

# if AR part with no coefficient, assign 0 to coef_list_AR[[i]]

if(result[1] != 0){

for (j in seq(1, by = 1, length = result[1])) {

temp_AR[j] = fit_$coef[j]

}

dim(temp_AR) <- c(1, result[1]) #zero vector can't be dimmed

}else{

temp_AR <- c(0); num_0_AR <- num_0_AR + 1

}

# if MA part with no coefficient, assign 0 to coef_list_MA[[i]]

if(result[3] != 0){

for (k in seq(1, by = 1, length = result[3])) {

temp_MA[k] = fit_$coef[k]

}

dim(temp_MA) <- c(1, result[3]) #zero vector can't be dimmed

}else{

temp_MA <- c(0); num_0_MA <- num_0_MA + 1

}

#assign coefficient array to corresponding list

coef_list_AR[[i]] <- temp_AR;coef_list_MA[[i]] <- temp_MA;

}

#save AR coefficient list to csv file

lapply(coef_list_AR, function(x) write.table( data.frame(x), 'coef_list_AR_1400.csv' , append= T, sep=',' ,row.names = F, col.names = F))

#save MA coefficient list to csv file

lapply(coef_list_MA, function(x) write.table( data.frame(x), 'coef_list_MA_1400.csv' , append= T, sep=',' ,row.names = F, col.names = F))



补充内容 (2021-10-24 12:25 +8:00):

首先谢谢版主的大米,

今天在别的帖子里也看得了对 r4ds 的引用,

这本书讲 tidyverse 非常详细,强烈推荐:

r4ds.had.co.nz

有实体书,大厂 O'Reilly 出品:

If you’d like a physical copy of the book, you can order it from amazon; it was published by O’Reilly in January 2017.

讲tibble:

r4ds.had.co.nz
0条回复
热度排序

发表回复