* %>%
* %T>% (call a function for its side effect)
* %$% (exposition of variables)
* %<>% reassigns the result to the original data used inside the function
library(tidyverse)
library(magrittr)
library(ggmap)
library(plotly)
# This one won't work for the summary()
rnorm(50) %>%
matrix(ncol = 2) %>%
plot() %>%
summary()
## Length Class Mode
## 0 NULL NULL
# This one does
rnorm(50) %>%
matrix(ncol = 2) %T>%
plot() %>%
summary()
## V1 V2
## Min. :-1.40045 Min. :-2.21410
## 1st Qu.: 0.09584 1st Qu.:-0.20650
## Median : 0.31137 Median : 0.35701
## Mean : 0.26360 Mean : 0.08358
## 3rd Qu.: 0.62430 3rd Qu.: 0.79852
## Max. : 1.99764 Max. : 1.47612
as.tibble()
read_delim(col_type = xyz)
(generic)guess_max = n
will guess column types for all values within n rowsparse_number()
reads a string and returns only numbersparse_factors()
finds unknown levels in factorsparse_strings()
p <- problems(tricky_dataset)
Identify problems in the data framep %$% table(col)
create a table of resultsdf %>% filter(near(0.23, carat))
will filter for all entries with carat near 0.23arrange(x, y, desc(z))
arranges data frame by x, y, and z descendingselect(x:z, everything())
will bring x,y,z to the front, and the rest aftertransmute()
retains only the new column(s) created, and removes the ones used to make themsafely()
deals with errors (from purrr), similar to possibly()
# Original
my_cars <- mtcars[, c(1:4, 10)]
my_cars <- my_cars[my_cars$disp > mean(my_cars$disp), ]
my_cars <- colMeans(my_cars)
# In magrittr
my_cars <- mtcars[, c(1:4, 10)] %>%
filter(disp > mean(disp)) %>%
colMeans
my_cars
## mpg cyl disp hp gear
## 15.520000 7.866667 346.760000 202.600000 3.266667
# Original
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
colSums(cars)
## speed dist
## 770 2149
# In magrittr
cars %T>%
{print(summary(.))} %>%
colSums()
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
## speed dist
## 770 2149
# Original
cor(mtcars$gear, mtcars$mpg)
## [1] 0.4802848
cor(mtcars)
## mpg cyl disp hp drat wt
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
## cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
## disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
## hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
## drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
## wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
## qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
## vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
## am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
## gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
## carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
## qsec vs am gear carb
## mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
## cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
## hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
## drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
## wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
## qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
## am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
## gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
## carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
# In magrittr
mtcars %$% cor(gear, mpg)
## [1] 0.4802848
mtcars %>% cor()
## mpg cyl disp hp drat wt
## mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
## cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
## disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
## hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
## drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
## wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
## qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
## vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
## am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
## gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
## carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
## qsec vs am gear carb
## mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
## cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
## disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
## hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
## drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
## wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
## qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
## vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
## am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
## gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
## carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
# Original
dim_summary <- function(nrows, ncols) {
print(
paste0('Matrix M has: ', nrows, ' rows and ', ncols, ' columns.')
)
}
# distr1 <- rnorm(16)
# MO <- matrix(distr1, ncol = 4)
# plot(MO)
# MO <- MO + sample(MO)
# dim_summary(nrows = nrow(MO), ncols = ncol(MO))
# distr2 <- rnorm(16)
#
# NO <- matrix(distr2, ncol = 4)
# colnames(NO) <- letters[1:4]
# summary(NO)
#
# PO <- MO %x% t(NO)
# heatmap(PO)
# names(PO) <- letters[1:dim(PO)[2]]
# cor(PO[ ,'a'], PO[ ,'i'])
# In magrittr
M <- matrix(rnorm(16), ncol = 4)
M <- M %T>%
{plot(.)} %>%
rbind(matrix(sample(M), ncol = 4)) %T>%
{dim_summary(nrows = nrow(.), ncols = ncol(.))}
## [1] "Matrix M has: 8 rows and 4 columns."
N <- matrix(rnorm(16), ncol = 4) %>%
set_colnames(letters[1:4]) %T>%
{print(summary(.))}
## a b c
## Min. :-1.003118 Min. :-1.39734 Min. :-0.7569
## 1st Qu.:-0.869178 1st Qu.:-1.24790 1st Qu.: 0.2249
## Median :-0.568565 Median :-0.53217 Median : 0.8082
## Mean :-0.302292 Mean :-0.09709 Mean : 0.4975
## 3rd Qu.:-0.001679 3rd Qu.: 0.61864 3rd Qu.: 1.0808
## Max. : 0.931080 Max. : 2.07334 Max. : 1.1302
## d
## Min. :-1.4884
## 1st Qu.:-0.2705
## Median : 0.5755
## Mean : 0.2106
## 3rd Qu.: 1.0566
## Max. : 1.1800
P <- M %x% t(N) %T>%
{heatmap(.)} %>%
set_colnames(letters[1:dim(.)[2]]) %>%
as.data.frame() %$%
print(cor(a,i))
## [1] 0.4408845
ggplot(data, mapping = aes(x, y))+
geom_?+facet_function()+scale_function()+theme_function()
df <- read.csv("data_wsj.csv",header=T,stringsAsFactors=F,skip=2)
fun1 <- function(x) ifelse(all(is.na(x)),NA,sum(x,na.rm=TRUE))
cols <- c("#e7f0fa","#c9e2f6","#95cbee","#0099dc","#4ab04a", "#ffd73e","#eec73a","#e29421","#f05336","#ce472e")
df_clean <- df %>%
gather(state, value, -c(YEAR, WEEK)) %>%
mutate(value=str_replace(value,"^-$",NA_character_),
value=as.numeric(value)) %>%
group_by(YEAR,state) %>%
summarise(total=fun1(value)) %>%
mutate(state=str_replace_all(state,"[.]"," "),
state=str_to_title(state))
colnames(df_clean) <- tolower(colnames(df_clean))
ggplot(df_clean, aes(year, reorder(state, desc(state)), fill = total))+
geom_tile(color = "white",
size = 0.25) +
scale_y_discrete(expand=c(0,0))+
scale_x_continuous(expand=c(0,0),breaks=seq(1930,2010,by=10))+
scale_fill_gradientn(colors=cols,na.value="grey95",
limits=c(0,4000),
values=c(0,0.01,0.02,0.03,0.09,0.1,0.15,0.25,0.4,0.5,1),
labels=c("0k","1k","2k","3k","4k"),
guide=guide_colourbar(ticks=T,nbin=50,
barheight=.5,label=T,
barwidth=10))+
labs(x="",y="",fill="",title="Measles")+
coord_fixed()+
geom_segment(x=1963,xend=1963,y=0,yend=51.5,size=.6,alpha=0.7) +
annotate("text",label="Vaccine introduced",x=1963,y=53,
vjust=1,hjust=0,size=I(3))+
theme_minimal()+
theme(legend.position="bottom",
legend.justification="center",
legend.direction="horizontal",
legend.text=element_text(color="grey20"),
axis.text.y=element_text(size=6,hjust=1,vjust=0.5),
axis.text.x=element_text(size=8),
axis.ticks.y=element_blank(),
title=element_text(hjust=-.07,vjust=1),
panel.grid=element_blank())
get_map()
input: location, output: ggmap object
ggmap()
input: ggmap object, output: ggplot objectgeocode()
input: location, output: lat/lon positiongeom_trek()
for to/from lines# Map Oslo
map <- get_map("oslo, norway", maptype = "watercolor", zoom = 15)
ggmap(map)
# Map Norway
norway <- get_map(location = "bronnoysund, norway", zoom = 4)
ggmap(norway)
uppsala_t <- read.table("uppsala_tm_1722-2017/uppsala_tm_1722-2017.dat")
head(uppsala_t)
colnames(uppsala_t) <- c("year","month","day","temp","tempcorr","place")
head(uppsala_t)
str(uppsala_t)
## 'data.frame': 108101 obs. of 6 variables:
## $ year : int 1722 1722 1722 1722 1722 1722 1722 1722 1722 1722 ...
## $ month : int 1 1 1 1 1 1 1 1 1 1 ...
## $ day : int 12 13 14 15 16 17 18 19 20 21 ...
## $ temp : num 1.9 2.3 1.8 0.9 -1.8 0.5 0.1 -1.8 0.5 1.8 ...
## $ tempcorr: num 1.8 2.2 1.7 0.8 -1.9 0.4 0 -1.9 0.4 1.6 ...
## $ place : int 1 1 1 1 1 1 1 1 1 1 ...
table(uppsala_t$place)
##
## 1 2 3 4 5 6
## 102942 2325 90 470 2122 152
table(uppsala_t$month)
##
## 1 2 3 4 5 6 7 8 9 10 11 12
## 9165 8360 9176 8880 9176 8880 9176 9176 8880 9176 8880 9176
htemp <- plot_ly(uppsala_t, x = ~temp, type = "histogram")
hcorr <- plot_ly(uppsala_t, x = ~tempcorr, type = "histogram")
subplot(htemp, hcorr)
htemp <-
plot_ly(
uppsala_t[uppsala_t$place == 1, ],
x = ~ temp,
type = "histogram",
color = I("darkblue"),
name = "Temperature"
)
hcorr <-
plot_ly(
uppsala_t[uppsala_t$place == 1, ],
x = ~ tempcorr,
type = "histogram",
color = I("tomato3"),
name = "Corrected Temperature"
) %>%
layout(yaxis = list(title = "", zeroline = FALSE, showline = FALSE, showticklabels = FALSE, showgrid = TRUE))
subplot(htemp, hcorr)
plot_ly(uppsala_t) %>%
add_boxplot(~temp, name = "Measured Temperature") %>%
add_boxplot(~tempcorr, name = "Corrected Temperature") %>%
layout(title = "Daily temperature in Uppsala from 1722 - 2017", margin = list(l = 150))
xax <- list(type = "category",
title = "Month")
yax <- list(title = "Temperature")
plot_ly(uppsala_t, y = ~temp, x = ~factor(month), type = "box")%>%
layout(xaxis = xax, yaxis = yax)
# Prepare data frame
ut <- uppsala_t%>%
group_by(year,month) %>%
summarise_at(vars(temp), funs(mean(., na.rm=TRUE)))
# Create plot
p <- plot_ly(ut, x = ~month, y = ~temp)
add_lines(
add_lines(p, alpha = 0.2, name = "1722 - 2017", hoverinfo = "none", color = I("grey")),
name = "1722", data = filter(ut, year == 1722)) %>%
add_lines(name = "2017", data = filter(ut, year == 2017), color = I("tomato3"))
# To animate, add frame
anim <- plot_ly(ut, x = ~month, y = ~temp, frame = ~year, width = 500, height = 500)
anim
## No trace type specified:
## Based on info supplied, a 'scatter' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#scatter
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plot.ly/r/reference/#scatter-mode