Skip to contents

as_data_frame() is an alternative to as.data.frame(). A number of objects in mclm can be turned into dataframes with one of these functions.

Usage

as_data_frame(x, row.names = NULL, optional = FALSE, ...)

# S3 method for default
as_data_frame(x, row.names = NULL, optional = FALSE, ...)

# S3 method for assoc_scores
as.data.frame(x, ...)

# S3 method for conc
as.data.frame(x, ...)

# S3 method for fnames
as.data.frame(x, ...)

# S3 method for freqlist
as.data.frame(x, row.names = NULL, optional = FALSE, ...)

# S3 method for details.slma
as.data.frame(x, ...)

# S3 method for slma
as.data.frame(x, ...)

# S3 method for tokens
as.data.frame(x, ...)

# S3 method for types
as.data.frame(x, ...)

Arguments

x

Object to coerce to data.frame.

row.names

NULL or a character vector giving the rownames for the dataframe.

optional

Logical. If TRUE, setting rownames and converting column names is optional (see as.data.frame()).

...

Additional arguments

Value

Object of class data.frame

Examples

# for an assoc_scores object ---------------------
a <- c(10,    30,    15,    1)
b <- c(200, 1000,  5000,  300)
c <- c(100,   14,    16,    4)
d <- c(300, 5000, 10000, 6000)
types <- c("four", "fictitious", "toy", "examples")
(scores <- assoc_abcd(a, b, c, d, types = types))
#> Association scores (types in list: 4)
#>         type  a    PMI G_signed|   b   c     d dir  exp_a DP_rows
#> 1       four 10 -1.921  -45.432| 200 100   300  -1 37.869  -0.202
#> 2 fictitious 30  2.000   56.959|1000  14  5000   1  7.498   0.026
#> 3        toy 15  0.536    2.984|5000  16 10000   1 10.343   0.001
#> 4   examples  1  2.067    1.473| 300   4  6000   1  0.239   0.003
#> <number of extra columns to the right: 7>
#> 

as.data.frame(scores)
#>         type  a    b   c     d dir      exp_a      DP_rows    RR_rows       OR
#> 1       four 10  200 100   300  -1 37.8688525 -0.202380952  0.1904762  0.15000
#> 2 fictitious 30 1000  14  5000   1  7.4983455  0.026334032 10.4313454 10.71429
#> 3        toy 15 5000  16 10000   1 10.3429579  0.001393583  1.8723829  1.87500
#> 4   examples  1  300   4  6000   1  0.2386994  0.002656037  4.9867110  5.00000
#>            MS        Dice        PMI chi2_signed   G_signed          t
#> 1 0.047619048 0.062500000 -1.9210117  -38.158009 -45.431519 -8.8860423
#> 2 0.029126214 0.055865922  2.0003183   81.993003  56.958917  4.1184552
#> 3 0.002991027 0.005945303  0.5363137    3.153303   2.983872  1.2030435
#> 4 0.003322259 0.006535948  2.0667329    2.551819   1.473313  0.7613609
#>     p_fisher_1
#> 1 1.000000e+00
#> 2 6.106227e-14
#> 3 5.916695e-02
#> 4 2.170331e-01
as_data_frame(scores)
#>         type  a    b   c     d dir      exp_a      DP_rows    RR_rows       OR
#> 1       four 10  200 100   300  -1 37.8688525 -0.202380952  0.1904762  0.15000
#> 2 fictitious 30 1000  14  5000   1  7.4983455  0.026334032 10.4313454 10.71429
#> 3        toy 15 5000  16 10000   1 10.3429579  0.001393583  1.8723829  1.87500
#> 4   examples  1  300   4  6000   1  0.2386994  0.002656037  4.9867110  5.00000
#>            MS        Dice        PMI chi2_signed   G_signed          t
#> 1 0.047619048 0.062500000 -1.9210117  -38.158009 -45.431519 -8.8860423
#> 2 0.029126214 0.055865922  2.0003183   81.993003  56.958917  4.1184552
#> 3 0.002991027 0.005945303  0.5363137    3.153303   2.983872  1.2030435
#> 4 0.003322259 0.006535948  2.0667329    2.551819   1.473313  0.7613609
#>     p_fisher_1
#> 1 1.000000e+00
#> 2 6.106227e-14
#> 3 5.916695e-02
#> 4 2.170331e-01

# for a conc object ------------------------------
(conc_data <- conc('A very small corpus.', '\\w+', as_text = TRUE))
#> Concordance-based data frame (number of observations: 4)
#> idx                                           left|match |right             
#>   1                                               |  A   |very small corpus.
#>   2                                              A| very |small corpus.     
#>   3                                         A very|small |corpus.           
#>   4                                   A very small|corpus|.                 
#> 
#> This data frame has 6 columns:
#>    column
#> 1 glob_id
#> 2      id
#> 3  source
#> 4    left
#> 5   match
#> 6   right
as.data.frame(conc_data)
#>   glob_id id source          left  match               right
#> 1       1  1      -                    A  very small corpus.
#> 2       2  2      -            A    very       small corpus.
#> 3       3  3      -       A very   small             corpus.
#> 4       4  4      - A very small  corpus                   .

# for an fnames object ---------------------------
cwd_fnames <- as_fnames(c('file1', 'file2'))
as.data.frame(cwd_fnames)
#>   filename
#> 1    file1
#> 2    file2

# for a freqlist, types or tokens object ---------
toy_corpus <- "Once upon a time there was a tiny toy corpus.
  It consisted of three sentences. And it lived happily ever after."
(flist <- freqlist(toy_corpus, as_text = TRUE))
#> Frequency list (types in list: 19, tokens in list: 21)
#> rank      type abs_freq nrm_freq
#> ---- --------- -------- --------
#>    1         a        2  952.381
#>    2        it        2  952.381
#>    3     after        1  476.190
#>    4       and        1  476.190
#>    5 consisted        1  476.190
#>    6    corpus        1  476.190
#>    7      ever        1  476.190
#>    8   happily        1  476.190
#>    9     lived        1  476.190
#>   10        of        1  476.190
#>   11      once        1  476.190
#>   12 sentences        1  476.190
#>   13     there        1  476.190
#>   14     three        1  476.190
#>   15      time        1  476.190
#>   16      tiny        1  476.190
#>   17       toy        1  476.190
#>   18      upon        1  476.190
#>   19       was        1  476.190
as.data.frame(flist)
#>    rank      type abs_freq nrm_freq
#> 1     1         a        2 952.3810
#> 2     2        it        2 952.3810
#> 3     3     after        1 476.1905
#> 4     4       and        1 476.1905
#> 5     5 consisted        1 476.1905
#> 6     6    corpus        1 476.1905
#> 7     7      ever        1 476.1905
#> 8     8   happily        1 476.1905
#> 9     9     lived        1 476.1905
#> 10   10        of        1 476.1905
#> 11   11      once        1 476.1905
#> 12   12 sentences        1 476.1905
#> 13   13     there        1 476.1905
#> 14   14     three        1 476.1905
#> 15   15      time        1 476.1905
#> 16   16      tiny        1 476.1905
#> 17   17       toy        1 476.1905
#> 18   18      upon        1 476.1905
#> 19   19       was        1 476.1905

(flist2 <- keep_re(flist, "^..?$"))
#> Frequency list (types in list: 3, tokens in list: 5)
#> <total number of tokens: 21>
#> rank orig_rank type abs_freq nrm_freq
#> ---- --------- ---- -------- --------
#>    1         1    a        2  952.381
#>    2         2   it        2  952.381
#>    3        10   of        1  476.190
as.data.frame
#> function (x, row.names = NULL, optional = FALSE, ...) 
#> {
#>     if (is.null(x)) 
#>         return(as.data.frame(list()))
#>     UseMethod("as.data.frame")
#> }
#> <bytecode: 0x561fc4f60440>
#> <environment: namespace:base>

(toks <- tokenize(toy_corpus))
#> Token sequence of length 21
#> idx     token
#> --- ---------
#>   1      once
#>   2      upon
#>   3         a
#>   4      time
#>   5     there
#>   6       was
#>   7         a
#>   8      tiny
#>   9       toy
#>  10    corpus
#>  11        it
#>  12 consisted
#>  13        of
#>  14     three
#>  15 sentences
#>  16       and
#>  17        it
#>  18     lived
#>  19   happily
#>  20      ever
#> ...
#> 
as.data.frame(toks)
#>        token
#> 1       once
#> 2       upon
#> 3          a
#> 4       time
#> 5      there
#> 6        was
#> 7          a
#> 8       tiny
#> 9        toy
#> 10    corpus
#> 11        it
#> 12 consisted
#> 13        of
#> 14     three
#> 15 sentences
#> 16       and
#> 17        it
#> 18     lived
#> 19   happily
#> 20      ever
#> 21     after

(toks <- tokenize(toy_corpus))
#> Token sequence of length 21
#> idx     token
#> --- ---------
#>   1      once
#>   2      upon
#>   3         a
#>   4      time
#>   5     there
#>   6       was
#>   7         a
#>   8      tiny
#>   9       toy
#>  10    corpus
#>  11        it
#>  12 consisted
#>  13        of
#>  14     three
#>  15 sentences
#>  16       and
#>  17        it
#>  18     lived
#>  19   happily
#>  20      ever
#> ...
#> 
as.data.frame(toks)
#>        token
#> 1       once
#> 2       upon
#> 3          a
#> 4       time
#> 5      there
#> 6        was
#> 7          a
#> 8       tiny
#> 9        toy
#> 10    corpus
#> 11        it
#> 12 consisted
#> 13        of
#> 14     three
#> 15 sentences
#> 16       and
#> 17        it
#> 18     lived
#> 19   happily
#> 20      ever
#> 21     after