################################################################################################## ### R code for the paper "Verbs of horizontal and vertical motion: a corpus study in Estonian" ### ### Piia Taremaa (2021), Finnish Journal of Linguistics ################################################################################################## # R code for the paper "Verbs of horizontal and vertical motion: a corpus study in Estonian" # published in the Finnish Journal of Linguistics in 2021 # Author: PIIA TAREMAA ################ ### PACKAGES ### ################ # R Core Team (2020). R: A language and environment for statistical computing. R Foundation for Statistical Computing, Vienna, Austria. URL https://www.R-project.org/. library(dplyr) # Hadley Wickham, Romain Franēois, Lionel Henry and Kirill Müller (2021). dplyr: A Grammar of Data Manipulation. R package version 1.0.6. https://CRAN.R-project.org/package=dplyr library(sjPlot) # Lüdecke D (2021). _sjPlot: Data Visualization for Statistics in Social Science_. R package version 2.8.8, . (Accessed 24.08.2021) library(reshape2) # Hadley Wickham (2007). Reshaping Data with the reshape Package. Journal of Statistical Software, 21(12), 1-20. URL http://www.jstatsoft.org/v21/i12/. (Accessed 24.08.2021) library(party) # Hothorn, Torsten, Kurt Hornik, Carolin Strobl & Achim Zeileis. 2015. Package party: A Laboratory for Recursive Partytioning. R package version 1.3.5. http://CRAN.R-project.org/package=party (accessed 24 August 2021). library(Hmisc) # Harrell Jr, Frank E. 2021. Hmisc: Harrell Miscellaneous. R package version 4.5-0. https://CRAN.R-project.org/package=Hmisc. (Accessed 24 August 2021). ################ ### DATASETS ### ################ hv = read.delim("hv_2021_Taremaa.txt", header = TRUE, sep = "\t", encoding = "UTF-8") hv <- hv %>% mutate_if(is.character,as.factor) head(hv) nrow(hv) str(hv) hv_gr = read.delim("hvgr_2021_Taremaa.txt", header = T, encoding = "UTF-8") colnames(hv_gr) head(hv_gr) ############### ### FIGURES ### ############### ### Hierarchical agglomerative clustering: Figures 1 & 2 # Figure 1 x <- data.frame( table(hv$Verb, hv$Source)[,"yes"], table(hv$Verb, hv$FromDirection)[,"yes"], table(hv$Verb, hv$Location)[,"yes"], table(hv$Verb, hv$Trajectory)[,"yes"], table(hv$Verb, hv$Direction)[,"yes"], table(hv$Verb, hv$Goal)[,"yes"], table(hv$Verb, hv$Distance)[,"yes"], table(hv$Verb, hv$Result)[,"yes"], table(hv$Verb, hv$Cause)[,"yes"], table(hv$Verb, hv$Purpose)[,"yes"], table(hv$Verb, hv$Time)[,"yes"], table(hv$Verb, hv$MannerInstr)[,"yes"], table(hv$Verb, hv$CoMover)[,"yes"] ) colnames(x) <- c("Source", "FromDirection", "Location","Trajectory", "Direction", "Goal", "Distance", "Result", "Cause", "Purp.Goal", "Time", "Manner", "Co-mover") write.table(x, file = "x.txt", row.names = T, sep = "\t") x = read.delim("x.txt", sep = "\t") exampleB_klaster <- x dist <- dist(exampleB_klaster, method="euclidean", upper=T, diag=TRUE) hc <- hclust(dist, method="ward.D") plot(hc, main="") rect.hclust(hc, 2) # Figure 2 x <- data.frame( table(hv_gr$Verb, hv_gr$Polarity_Aff)[,"yes"], table(hv_gr$Verb, hv_gr$Polarity_Neg)[,"yes"], table(hv_gr$Verb, hv_gr$Mood_Indicative)[,"yes"], table(hv_gr$Verb, hv_gr$Mood_Conditional)[,"yes"], table(hv_gr$Verb, hv_gr$Mood_Imperative)[,"yes"], table(hv_gr$Verb, hv_gr$Mood_Jussive)[,"yes"], table(hv_gr$Verb, hv_gr$Mood_Quotative)[,"yes"], table(hv_gr$Verb, hv_gr$Voice_Pers)[,"yes"], table(hv_gr$Verb, hv_gr$Voice_Impers)[,"yes"], table(hv_gr$Verb, hv_gr$Aspect_Unspecified)[,"yes"], table(hv_gr$Verb, hv_gr$Aspect_Perfective)[,"yes"], table(hv_gr$Verb, hv_gr$Aspect_Progressive)[,"yes"], table(hv_gr$Verb, hv_gr$Tense_Present)[,"yes"], table(hv_gr$Verb, hv_gr$Tense_Past)[,"yes"], table(hv_gr$Verb, hv_gr$Person_1)[,"yes"], table(hv_gr$Verb, hv_gr$Person_2)[,"yes"], table(hv_gr$Verb, hv_gr$Person_3)[,"yes"], table(hv_gr$Verb, hv_gr$Person_Unclear)[,"yes"], table(hv_gr$Verb, hv_gr$Num_SG)[,"yes"], table(hv_gr$Verb, hv_gr$Num_Pl)[,"yes"], table(hv_gr$Verb, hv_gr$Num_Unclear)[,"yes"] ) colnames(x) <- c("Polarity_Aff", "Polarity_Neg","Mood_Indicative", "Mood_Conditional", "Mood_Imperative", "Mood_Jussive", "Mood_Quotative", "Voice_Pers", "Voice_Impers", "Aspect_Unspecified", "Aspect_Perfective", "Aspect_Progressive", "Tense_Present", "Tense_Past", "Person_1", "Person_2", "Person_3", "Person_Unclear", "Num_SG", "Num_Pl","Num_Unclear") exampleB_klaster <- x dist <- dist(exampleB_klaster, method="euclidean", upper=T, diag=TRUE) hc <- hclust(dist, method="ward.D") plot(hc, main="") rect.hclust(hc, 3) ### Conditional random forests: Figure 3 set.seed(67) mycontrols <- cforest_unbiased(ntree=500, mtry=4) hv.cforest.k = cforest(HorVert ~ MannerInstr + Result + Source + FromDirection + Location + Trajectory + Direction + Goal + Distance + Time + CoMover + Purpose + Cause + Mood + Aspect + Polarity + Voice + Tense + Person + Number + Genre, data = hv, controls=mycontrols) hv.cforest.k.varimp = varimp(hv.cforest.k, conditional = TRUE) hv.cforest.k.varimp dotplot(sort(hv.cforest.k.varimp), xlab = "", panel = function(x,y){ panel.dotplot(x, y, pch=16, cex=1.1, col="black") panel.abline(v=abs(min(hv.cforest.k.varimp))) } ) hv.cforest.k_pred <- unlist(treeresponse(hv.cforest.k))[c(FALSE,TRUE)] somers2(hv.cforest.k_pred, as.numeric(hv$HorVert) - 1) ### Conditional inference tree: Figure 4 hv.ctree = ctree(HorVert ~ MannerInstr + Result + Source + FromDirection + Location + Trajectory + Direction + Goal + Distance + Time + CoMover + Purpose + Cause + Mood + Aspect + Polarity + Voice + Tense + Person + Number + Genre, controls = ctree_control(maxdepth = 4, minbucket = 20), data = hv) plot(hv.ctree) preds.tree = predict(hv.ctree) tab = table(preds.tree, hv$HorVert) sum(diag(tab))/sum(tab) ############## ### TABLES ### ############## attach(hv) ### The presence and absence of spatial expressions: Table 3 sjt.xtab(HorVert, SpatExprPresent, show.row.prc = T) # for residuals (SpExp.xtabs = xtabs(~ HorVert + SpatExprPresent)) (chi.SpExp = chisq.test(SpExp.xtabs)) chi.SpExp$res ### The type of spatial expressions in the motion clauses: Table 4 hvh = select(hv, HorVert, Source, Location, Trajectory, Direction, Goal) nrow(hvh) hvhlong = melt(hvh, id.vars="HorVert") head(hvhlong) nrow(hvhlong) hvhlong2 = filter(hvhlong, value == "yes") nrow(hvhlong2) sjt.xtab(hvhlong2$HorVert, hvhlong2$variable, show.row.prc = T) (hvhlong2.xtabs = xtabs(~ hvhlong2$HorVert + hvhlong2$variable)) (chi.res = chisq.test(hvhlong2.xtabs)) chi.res$res # Result: Table 5 sjt.xtab(HorVert, Result, show.row.prc = T) (result.xtabs = xtabs(~ HorVert + Result)) (chi.res = chisq.test(result.xtabs)) chi.res$res # Manner: Table 6 sjt.xtab(HorVert, MannerInstr, show.row.prc = T) (mi.xtabs = xtabs(~ HorVert + MannerInstr)) (chi.mi = chisq.test(mi.xtabs)) chi.mi$res # Person: Table 7 sjt.xtab(HorVert, Person, show.row.prc = T) (p.xtabs = xtabs(~ HorVert + Person)) (chi.p = chisq.test(p.xtabs)) chi.p$res # Number: Table 8 sjt.xtab(HorVert, Number, show.row.prc = T) (n.xtabs = xtabs(~ HorVert + Number)) (chi.n = chisq.test(n.xtabs)) chi.n$res