Skip to contents

as_data_frame() is an alternative to A number of objects in mclm can be turned into dataframes with one of these functions.


as_data_frame(x, row.names = NULL, optional = FALSE, ...)

# S3 method for default
as_data_frame(x, row.names = NULL, optional = FALSE, ...)

# S3 method for assoc_scores, ...)

# S3 method for conc, ...)

# S3 method for fnames, ...)

# S3 method for freqlist, row.names = NULL, optional = FALSE, ...)

# S3 method for details.slma, ...)

# S3 method for slma, ...)

# S3 method for tokens, ...)

# S3 method for types, ...)



Object to coerce to data.frame.


NULL or a character vector giving the rownames for the dataframe.


Logical. If TRUE, setting rownames and converting column names is optional (see


Additional arguments


Object of class data.frame


# for an assoc_scores object ---------------------
a <- c(10,    30,    15,    1)
b <- c(200, 1000,  5000,  300)
c <- c(100,   14,    16,    4)
d <- c(300, 5000, 10000, 6000)
types <- c("four", "fictitious", "toy", "examples")
(scores <- assoc_abcd(a, b, c, d, types = types))
#> Association scores (types in list: 4)
#>         type  a    PMI G_signed|   b   c     d dir  exp_a DP_rows
#> 1       four 10 -1.921  -45.432| 200 100   300  -1 37.869  -0.202
#> 2 fictitious 30  2.000   56.959|1000  14  5000   1  7.498   0.026
#> 3        toy 15  0.536    2.984|5000  16 10000   1 10.343   0.001
#> 4   examples  1  2.067    1.473| 300   4  6000   1  0.239   0.003
#> <number of extra columns to the right: 7>
#>         type  a    b   c     d dir      exp_a      DP_rows    RR_rows       OR
#> 1       four 10  200 100   300  -1 37.8688525 -0.202380952  0.1904762  0.15000
#> 2 fictitious 30 1000  14  5000   1  7.4983455  0.026334032 10.4313454 10.71429
#> 3        toy 15 5000  16 10000   1 10.3429579  0.001393583  1.8723829  1.87500
#> 4   examples  1  300   4  6000   1  0.2386994  0.002656037  4.9867110  5.00000
#>            MS        Dice        PMI chi2_signed   G_signed          t
#> 1 0.047619048 0.062500000 -1.9210117  -38.158009 -45.431519 -8.8860423
#> 2 0.029126214 0.055865922  2.0003183   81.993003  56.958917  4.1184552
#> 3 0.002991027 0.005945303  0.5363137    3.153303   2.983872  1.2030435
#> 4 0.003322259 0.006535948  2.0667329    2.551819   1.473313  0.7613609
#>     p_fisher_1
#> 1 1.000000e+00
#> 2 6.106227e-14
#> 3 5.916695e-02
#> 4 2.170331e-01
#>         type  a    b   c     d dir      exp_a      DP_rows    RR_rows       OR
#> 1       four 10  200 100   300  -1 37.8688525 -0.202380952  0.1904762  0.15000
#> 2 fictitious 30 1000  14  5000   1  7.4983455  0.026334032 10.4313454 10.71429
#> 3        toy 15 5000  16 10000   1 10.3429579  0.001393583  1.8723829  1.87500
#> 4   examples  1  300   4  6000   1  0.2386994  0.002656037  4.9867110  5.00000
#>            MS        Dice        PMI chi2_signed   G_signed          t
#> 1 0.047619048 0.062500000 -1.9210117  -38.158009 -45.431519 -8.8860423
#> 2 0.029126214 0.055865922  2.0003183   81.993003  56.958917  4.1184552
#> 3 0.002991027 0.005945303  0.5363137    3.153303   2.983872  1.2030435
#> 4 0.003322259 0.006535948  2.0667329    2.551819   1.473313  0.7613609
#>     p_fisher_1
#> 1 1.000000e+00
#> 2 6.106227e-14
#> 3 5.916695e-02
#> 4 2.170331e-01

# for a conc object ------------------------------
(conc_data <- conc('A very small corpus.', '\\w+', as_text = TRUE))
#> Concordance-based data frame (number of observations: 4)
#> idx                                           left|match |right             
#>   1                                               |  A   |very small corpus.
#>   2                                              A| very |small corpus.     
#>   3                                         A very|small |corpus.           
#>   4                                   A very small|corpus|.                 
#> This data frame has 6 columns:
#>    column
#> 1 glob_id
#> 2      id
#> 3  source
#> 4    left
#> 5   match
#> 6   right
#>   glob_id id source          left  match               right
#> 1       1  1      -                    A  very small corpus.
#> 2       2  2      -            A    very       small corpus.
#> 3       3  3      -       A very   small             corpus.
#> 4       4  4      - A very small  corpus                   .

# for an fnames object ---------------------------
cwd_fnames <- as_fnames(c('file1', 'file2'))
#>   filename
#> 1    file1
#> 2    file2

# for a freqlist, types or tokens object ---------
toy_corpus <- "Once upon a time there was a tiny toy corpus.
  It consisted of three sentences. And it lived happily ever after."
(flist <- freqlist(toy_corpus, as_text = TRUE))
#> Frequency list (types in list: 19, tokens in list: 21)
#> rank      type abs_freq nrm_freq
#> ---- --------- -------- --------
#>    1         a        2  952.381
#>    2        it        2  952.381
#>    3     after        1  476.190
#>    4       and        1  476.190
#>    5 consisted        1  476.190
#>    6    corpus        1  476.190
#>    7      ever        1  476.190
#>    8   happily        1  476.190
#>    9     lived        1  476.190
#>   10        of        1  476.190
#>   11      once        1  476.190
#>   12 sentences        1  476.190
#>   13     there        1  476.190
#>   14     three        1  476.190
#>   15      time        1  476.190
#>   16      tiny        1  476.190
#>   17       toy        1  476.190
#>   18      upon        1  476.190
#>   19       was        1  476.190
#>    rank      type abs_freq nrm_freq
#> 1     1         a        2 952.3810
#> 2     2        it        2 952.3810
#> 3     3     after        1 476.1905
#> 4     4       and        1 476.1905
#> 5     5 consisted        1 476.1905
#> 6     6    corpus        1 476.1905
#> 7     7      ever        1 476.1905
#> 8     8   happily        1 476.1905
#> 9     9     lived        1 476.1905
#> 10   10        of        1 476.1905
#> 11   11      once        1 476.1905
#> 12   12 sentences        1 476.1905
#> 13   13     there        1 476.1905
#> 14   14     three        1 476.1905
#> 15   15      time        1 476.1905
#> 16   16      tiny        1 476.1905
#> 17   17       toy        1 476.1905
#> 18   18      upon        1 476.1905
#> 19   19       was        1 476.1905

(flist2 <- keep_re(flist, "^..?$"))
#> Frequency list (types in list: 3, tokens in list: 5)
#> <total number of tokens: 21>
#> rank orig_rank type abs_freq nrm_freq
#> ---- --------- ---- -------- --------
#>    1         1    a        2  952.381
#>    2         2   it        2  952.381
#>    3        10   of        1  476.190
#> function (x, row.names = NULL, optional = FALSE, ...) 
#> {
#>     if (is.null(x)) 
#>         return(
#>     UseMethod("")
#> }
#> <bytecode: 0x561fc4f60440>
#> <environment: namespace:base>

(toks <- tokenize(toy_corpus))
#> Token sequence of length 21
#> idx     token
#> --- ---------
#>   1      once
#>   2      upon
#>   3         a
#>   4      time
#>   5     there
#>   6       was
#>   7         a
#>   8      tiny
#>   9       toy
#>  10    corpus
#>  11        it
#>  12 consisted
#>  13        of
#>  14     three
#>  15 sentences
#>  16       and
#>  17        it
#>  18     lived
#>  19   happily
#>  20      ever
#> ...
#>        token
#> 1       once
#> 2       upon
#> 3          a
#> 4       time
#> 5      there
#> 6        was
#> 7          a
#> 8       tiny
#> 9        toy
#> 10    corpus
#> 11        it
#> 12 consisted
#> 13        of
#> 14     three
#> 15 sentences
#> 16       and
#> 17        it
#> 18     lived
#> 19   happily
#> 20      ever
#> 21     after

(toks <- tokenize(toy_corpus))
#> Token sequence of length 21
#> idx     token
#> --- ---------
#>   1      once
#>   2      upon
#>   3         a
#>   4      time
#>   5     there
#>   6       was
#>   7         a
#>   8      tiny
#>   9       toy
#>  10    corpus
#>  11        it
#>  12 consisted
#>  13        of
#>  14     three
#>  15 sentences
#>  16       and
#>  17        it
#>  18     lived
#>  19   happily
#>  20      ever
#> ...
#>        token
#> 1       once
#> 2       upon
#> 3          a
#> 4       time
#> 5      there
#> 6        was
#> 7          a
#> 8       tiny
#> 9        toy
#> 10    corpus
#> 11        it
#> 12 consisted
#> 13        of
#> 14     three
#> 15 sentences
#> 16       and
#> 17        it
#> 18     lived
#> 19   happily
#> 20      ever
#> 21     after