####################################################################################################### ### R code for the paper "Speed and space: semantic asymmetries in motion descriptions in Estonian" ### ####################################################################################################### # Authors: Piia Taremaa and Anetta Kopecka # Journal: Cognitive Linguistics # Status of the paper: accepted ################ ### PACKAGES ### ################ rm(list=ls(all=TRUE)) library(dplyr) library(Rcmdr) library(sjPlot) library(psych) library(lsr) library(reshape2) library(ggplot2) library(party) library(factoextra) library(FactoMineR) ############ ### DATA ### ############ data = read.delim("Taremaa&Kopecka_Speed and space_CL10.11.2022.txt", header = T, encoding = "UTF-8") data <- data %>% mutate_if(is.character,as.factor) data$VerbSpeedCat <- with(data, binVariable(VerbSpeed, bins=3, method='natural', labels=c('slowVerb','mediumVerb','fastVerb'))) data$VerbType = factor(data$VerbType, levels = c("MannerVerb", "SourceVerb", "GoalVerb")) data$VerbSpeedCat = factor(data$VerbSpeedCat, levels = c("slowVerb", "mediumVerb", "fastVerb")) data$VerbCat = factor(data$VerbCat, levels = c("mediumSV", "mediumGV", "slowMV", "mediumMV", "fastMV")) head(data) colnames(data) nrow(data) str(data) ############## ### TABLES ### ############## # Table 1: Coding schema str(data) colnames(data) # Table 2: Motion verbs of the study categorised based on VerbType and VerbSpeed. sjt.xtab(data$Verb, data$VerbSpeedCat) # Table 3: The distribution and forms of the semantic units in the corpus data of 12,300 clauses. # For N of occurrences and main morphosyntactic devices summary(data) # Unique morphosyntactic devices (not presented in the paper) unique(data$SourceForm) unique(data$LocationForm) unique(data$TrajectoryForm) unique(data$DirectionForm) unique(data$GoalForm) unique(data$DistanceForm) unique(data$TimeForm) unique(data$PurposeForm) unique(data$ResultForm) unique(data$MannerFormLongOrdered) unique(data$SpeedFormLong) # For length in words (library(psych); not presented in the paper) describe(data$SourceLength) describe(data$LocationLength) describe(data$TrajectoryLength) describe(data$DirectionLength) describe(data$GoalLength) describe(data$DistanceLength) describe(data$PurposeLength) describe(data$ResultLength) describe(data$TimeLength) describe(data$MannerLength) # Table 4: The distribution of different types of movers in the data. summary(data$MoverAnimacy) # Table 5: The presence or absence of spatial expressions in clauses that contain Speed modifiers (Speed+) and in those that do not (Speed-) dataSP = data dataSP$SlowOrFast = recode(dataSP$SlowOrFast, "'slowModif'='speedplus'") dataSP$SlowOrFast = recode(dataSP$SlowOrFast, "'fastModif'='speedplus'") dataSP$SlowOrFast = recode(dataSP$SlowOrFast, "'variableModif'='speedplus'") dataSP$SlowOrFast = recode(dataSP$SlowOrFast, "'noSpeedModif'='speedminus'") dataSP$SlowOrFast = factor(dataSP$SlowOrFast) table(dataSP$SlowOrFast) tab_xtab(dataSP$SlowOrFast, dataSP$SpatialExprPresence, show.row.prc = T, show.summary = T) chisq.test(dataSP$SlowOrFast, dataSP$SpatialExprPresence, correct = F) # without continuity correction cramersV( dataSP$SlowOrFast, dataSP$SpatialExprPresence ) # Table 6: The distribution of speed modifiers across spatial expressions summary(data$SlowOrFast) dataS = filter(data, SlowOrFast != "noSpeedModif", SlowOrFast != "variableModif") dataS$SlowOrFast = factor(dataS$SlowOrFast) summary(dataS$SlowOrFast) pro2 = select(dataS, SlowOrFast, Source, Location, Trajectory, Direction, Goal, Distance) nrow(pro2) propikk = melt(pro2, id.vars="SlowOrFast") head(propikk) nrow(propikk) propik = filter(propikk, value == "yes") nrow(propik) colnames(propik) tab_xtab(propik$SlowOrFast, propik$variable, show.row.prc = T, show.summary = T) chisq.test(propik$SlowOrFast, propik$variable) cramersV( propik$SlowOrFast, propik$variable ) # 0.2539014 ############### ### FIGURES ### ############### # Figure 1: Multiple correspondence analysis. Colours indicate VerbType. data2 = select(data, Source, Location, Trajectory, Direction, Goal, Time, Purpose, MoverAnimacy, Verb, Manner, Result, Distance, SlowOrFast) cats = apply(data2, 2, function(x) nlevels(as.factor(x))) mca1 = MCA(data2, graph = FALSE) mca1_vars_df = data.frame(mca1$var$coord, Variable = rep(names(cats), cats)) mca1_obs_df = data.frame(mca1$ind$coord) fviz_contrib(mca1, choice = "var", axes = 1, top = 10) fviz_contrib(mca1, choice = "var", axes = 2, top = 10) fviz_contrib(mca1, choice = "var", axes = 1:2, top = 20) ind = get_mca_ind(mca1) # VerbType fviz_mca_biplot(mca1, geom.var = "text", geom.ind = "point", repel = T, col.var = "black", label = "all", invisible = "none", alpha.ind = 0.3, select.ind = list(contrib = 12300), select.var = list(contrib = 50), habillage = data$VerbType, palette = c("gold", "blue", "red"), ggtheme = theme_minimal(), title ="") # Figure 2: Multiple correspondence analysis. Colours indicate VerbSpeed. # SpeedCat fviz_mca_biplot(mca1, geom.var = "text", geom.ind = "point", repel = T, col.var = "black", label = "all", invisible = "none", alpha.ind = 0.3, select.ind = list(contrib = 12300), select.var = list(contrib = 50), habillage = data$VerbSpeedCat, palette = c("orange", "grey", "darkgreen"), ggtheme = theme_minimal(), title ="") # Figure 3: Conditional inference tree for VerbSpeed data$MoverAnimacy <- recode(data$MoverAnimacy, "'animate'='an'") data$MoverAnimacy <- recode(data$MoverAnimacy, "'inanimate'='inan'") data$MoverAnimacy <- recode(data$MoverAnimacy, "'vehicle'='veh'") data$MoverAnimacy <- recode(data$MoverAnimacy, "'unclear'='un'") data$SlowOrFast <- recode(data$SlowOrFast, "'slowModif'='slowM'") data$SlowOrFast <- recode(data$SlowOrFast, "'fastModif'='fastM'") data$SlowOrFast <- recode(data$SlowOrFast, "'noSpeedModif'='noM'") data$SlowOrFast <- recode(data$SlowOrFast, "'variableModif'='varM'") # ctree1 = ctree(VerbSpeed ~ Source + Location + Trajectory + Direction + Goal + Distance + Result + Time + Purpose + MoverAnimacy + Manner + SlowOrFast, data = data, controls = ctree_control(maxdepth = 5, minbucket = 50)) plot(ctree1) # Accuracy predicted1 <- predict(ctree1) head(predicted1) actual1 <- data$VerbSpeed head(actual1) cor(predicted1, actual1) # Figure 4: Conditional inference tree for VerbCat data$VerbCat <- recode(data$VerbCat, "'slowMV'='sMV'") data$VerbCat <- recode(data$VerbCat, "'fastMV'='fMV'") data$VerbCat <- recode(data$VerbCat, "'mediumMV'='mMV'") data$VerbCat <- recode(data$VerbCat, "'mediumGV'='mGV'") data$VerbCat <- recode(data$VerbCat, "'mediumSV'='mSV'") data$VerbCat = factor(data$VerbCat, levels = c("mSV", "mGV", "sMV", "mMV", "fMV")) # ctree2 = ctree(VerbCat ~ Source + Location + Trajectory + Direction + Goal + Purpose + Distance + Result + Time + Manner + SlowOrFast + MoverAnimacy, data = data , controls = ctree_control(maxdepth = 3, minbucket = 50)) plot(ctree2) # Accuracy predicted2 <- predict(ctree2) head(predicted2) actual2 <- data$VerbCat head(actual2) (t <- table(actual2, predicted2)) (accuracy <- sum(diag(t))/sum(t)) prop.table(table(data$VerbCat))