source("https://bioconductor.org/biocLite.R")
biocLite("package")
library(Biostrings)
library(dplyr)
library(ggplot2)
library(GenomicRanges)
library(airway)
library(SingleCellExperiment)
dna <- c("ACTAGACCA", "GGGTCGACA")
dna_set <- DNAStringSet(dna)
reverseComplement(dna_set)
## A DNAStringSet instance of length 2
## width seq
## [1] 9 TGGTCTAGT
## [2] 9 TGTCGACCC
# Get file from Biostrings package library files
filepath <- system.file(package="Biostrings", "extdata", "dm3_upstream2000.fa.gz")
readLines(filepath, 6)
## [1] ">NM_078863_up_2000_chr2L_16764737_f chr2L:16764737-16766736"
## [2] "gttggtggcccaccagtgccaaaatacacaagaagaagaaacagcatctt"
## [3] "gacactaaaatgcaaaaattgctttgcgtcaatgactcaaaacgaaaatg"
## [4] "tttttgtgcttttcgaacaaaaaattgggagcagaatattggattcgctt"
## [5] "ttttcgcatcgaatatcttaagggagcaagggaagaaccatctagaataa"
## [6] "taaagaagaccaaaatgtatcgtaactaaaggttttttttattaattatt"
# Read DNA seq from file to DNAstringset object
dna2 <- Biostrings::readDNAStringSet(filepath)
length(dna2)
## [1] 26454
reverseComplement(dna2)
## A DNAStringSet instance of length 26454
## width seq names
## [1] 2000 ACCGTGCAACCGGTAAACT...CACTGGTGGGCCACCAAC NM_078863_up_2000...
## [2] 2000 ATCGAGGATGACTTTCCGT...GGGCGCCTACATAAATAA NM_001201794_up_2...
## [3] 2000 ATCGAGGATGACTTTCCGT...GGGCGCCTACATAAATAA NM_001201795_up_2...
## [4] 2000 ATCGAGGATGACTTTCCGT...GGGCGCCTACATAAATAA NM_001201796_up_2...
## [5] 2000 ATCGAGGATGACTTTCCGT...GGGCGCCTACATAAATAA NM_001201797_up_2...
## ... ... ...
## [26450] 2000 GTTTTATTGAAATTTAATT...TTTATTAGTCTTGTAAAT NM_001111010_up_2...
## [26451] 2000 AATATATAACAACTCAAAC...GGTCGTCCTTCGTATATC NM_001015258_up_2...
## [26452] 2000 AATATATAACAACTCAAAC...GGTCGTCCTTCGTATATC NM_001110997_up_2...
## [26453] 2000 AATATATAACAACTCAAAC...GGTCGTCCTTCGTATATC NM_001276245_up_2...
## [26454] 2000 CAATTTGTTCTTACACTTT...AGTTAACTAATACATACG NM_001015497_up_2...
# Calculates GC-content in each sequence
gc_cont <- letterFrequency(dna2, "GC", as.prob = TRUE)
class(gc_cont)
## [1] "matrix"
dim(gc_cont)
## [1] 26454 1
head(gc_cont)
## G|C
## [1,] 0.3785
## [2,] 0.4300
## [3,] 0.4300
## [4,] 0.4300
## [5,] 0.4300
## [6,] 0.4300
hist(gc_cont)
# In ggplot2
gc_df <- as_tibble(gc_cont)
ggplot(gc_df, aes(`G|C`))+
geom_histogram(color = "white")+
theme_classic()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Getting the class of the DNAstringset class
getClass(Class = class(dna2))
## Class "DNAStringSet" [package "Biostrings"]
##
## Slots:
##
## Name: pool ranges elementType
## Class: SharedRaw_Pool GroupedIRanges character
##
## Name: elementMetadata metadata
## Class: DataTable_OR_NULL list
##
## Extends:
## Class "XStringSet", directly
## Class "XRawList", by class "XStringSet", distance 2
## Class "XVectorList", by class "XStringSet", distance 3
## Class "List", by class "XStringSet", distance 4
## Class "Vector", by class "XStringSet", distance 5
## Class "list_OR_List", by class "XStringSet", distance 5
## Class "Annotated", by class "XStringSet", distance 6
##
## Known Subclasses: "QualityScaledDNAStringSet"
# S4 packages are printed in this way
class(dna2)
## [1] "DNAStringSet"
## attr(,"package")
## [1] "Biostrings"
isS4(dna2)
## [1] TRUE
# List available methods in DNAStringSet
methods(class="DNAStringSet")
## [1] ! !=
## [3] $ $<-
## [5] %in% [
## [7] [[ [[<-
## [9] [<- <
## [11] <= ==
## [13] > >=
## [15] aggregate alphabetFrequency
## [17] anyNA append
## [19] as.character as.complex
## [21] as.data.frame as.env
## [23] as.factor as.integer
## [25] as.list as.logical
## [27] as.matrix as.numeric
## [29] as.raw as.vector
## [31] by c
## [33] cbind chartr
## [35] coerce compact
## [37] compareStrings complement
## [39] concatenateObjects consensusMatrix
## [41] consensusString countOverlaps
## [43] countPattern countPDict
## [45] dinucleotideFrequencyTest do.call
## [47] droplevels duplicated
## [49] elementMetadata elementMetadata<-
## [51] elementNROWS elementType
## [53] eval expand
## [55] expand.grid extractAt
## [57] extractROWS Filter
## [59] findOverlaps getListElement
## [61] hasOnlyBaseLetters head
## [63] ifelse2 intersect
## [65] is.na is.unsorted
## [67] isEmpty isMatchingEndingAt
## [69] isMatchingStartingAt lapply
## [71] length lengths
## [73] letterFrequency match
## [75] matchPattern matchPDict
## [77] mcols mcols<-
## [79] merge metadata
## [81] metadata<- mstack
## [83] names names<-
## [85] nchar neditEndingAt
## [87] neditStartingAt NROW
## [89] nucleotideFrequencyAt oligonucleotideFrequency
## [91] order overlapsAny
## [93] PairwiseAlignments PairwiseAlignmentsSingleSubject
## [95] parallelSlotNames parallelVectorNames
## [97] pcompare pcompareRecursively
## [99] PDict PWM
## [101] rank rbind
## [103] Reduce relist
## [105] relistToClass rename
## [107] rep rep.int
## [109] replaceAt replaceLetterAt
## [111] replaceROWS rev
## [113] revElements reverse
## [115] reverseComplement ROWNAMES
## [117] sapply seqinfo
## [119] seqinfo<- seqlevelsInUse
## [121] seqtype seqtype<-
## [123] setdiff setequal
## [125] setListElement shiftApply
## [127] show showAsCell
## [129] sort split
## [131] split<- stack
## [133] stringDist strsplit
## [135] subseq subseq<-
## [137] subset subsetByOverlaps
## [139] table tail
## [141] tapply threebands
## [143] toString transform
## [145] translate trimLRPatterns
## [147] twoWayAlphabetFrequency union
## [149] unique uniqueLetters
## [151] unlist unsplit
## [153] unstrsplit updateObject
## [155] values values<-
## [157] vcountPattern vcountPDict
## [159] vmatchPattern vwhichPDict
## [161] which.isMatchingEndingAt which.isMatchingStartingAt
## [163] whichPDict width
## [165] window window<-
## [167] windows with
## [169] within xtabs
## [171] xvcopy zipdown
## see '?methods' for accessing help and source code
gr <- GRanges(c("chr1:10-19","chr1:30-39"))
gr
## GRanges object with 2 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 10-19 *
## [2] chr1 30-39 *
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
length(gr)
## [1] 2
# Add a column with sequence data
mcols(gr) <- data.frame(x = c(1,2))
gr
## GRanges object with 2 ranges and 1 metadata column:
## seqnames ranges strand | x
## <Rle> <IRanges> <Rle> | <numeric>
## [1] chr1 10-19 * | 1
## [2] chr1 30-39 * | 2
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
width(gr)
## [1] 10 10
# Shifts the position of the ranges by 1
shift(gr, 1)
## GRanges object with 2 ranges and 1 metadata column:
## seqnames ranges strand | x
## <Rle> <IRanges> <Rle> | <numeric>
## [1] chr1 11-20 * | 1
## [2] chr1 31-40 * | 2
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
# Introduce snps
snp <- GRanges(c("chr1:15","chr1:22"))
snp
## GRanges object with 2 ranges and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 15 *
## [2] chr1 22 *
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
# Identify overlaps between snps and the genes
countOverlaps(snp, gr)
## [1] 1 0
snp[countOverlaps(snp, gr) > 0]
## GRanges object with 1 range and 0 metadata columns:
## seqnames ranges strand
## <Rle> <IRanges> <Rle>
## [1] chr1 15 *
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
findOverlaps(snp, gr)
## Hits object with 1 hit and 0 metadata columns:
## queryHits subjectHits
## <integer> <integer>
## [1] 1 1
## -------
## queryLength: 2 / subjectLength: 2
# Overlapping genes
gene <- GRanges(c("chr1:10-19","chr1:12-20","chr1:30-39"))
mcols(gene) <- data.frame(symbol=c("foo","bar","baz"))
gene
## GRanges object with 3 ranges and 1 metadata column:
## seqnames ranges strand | symbol
## <Rle> <IRanges> <Rle> | <factor>
## [1] chr1 10-19 * | foo
## [2] chr1 12-20 * | bar
## [3] chr1 30-39 * | baz
## -------
## seqinfo: 1 sequence from an unspecified genome; no seqlengths
countOverlaps(gene)
## [1] 2 2 1
data(airway)
airway
## class: RangedSummarizedExperiment
## dim: 64102 8
## metadata(1): ''
## assays(1): counts
## rownames(64102): ENSG00000000003 ENSG00000000005 ... LRG_98 LRG_99
## rowData names(0):
## colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
## colData names(9): SampleName cell ... Sample BioSample
head(assay(airway))
## SRR1039508 SRR1039509 SRR1039512 SRR1039513 SRR1039516
## ENSG00000000003 679 448 873 408 1138
## ENSG00000000005 0 0 0 0 0
## ENSG00000000419 467 515 621 365 587
## ENSG00000000457 260 211 263 164 245
## ENSG00000000460 60 55 40 35 78
## ENSG00000000938 0 0 2 0 1
## SRR1039517 SRR1039520 SRR1039521
## ENSG00000000003 1047 770 572
## ENSG00000000005 0 0 0
## ENSG00000000419 799 417 508
## ENSG00000000457 331 233 229
## ENSG00000000460 63 76 60
## ENSG00000000938 0 0 0
colData(airway)
## DataFrame with 8 rows and 9 columns
## SampleName cell dex albut Run avgLength
## <factor> <factor> <factor> <factor> <factor> <integer>
## SRR1039508 GSM1275862 N61311 untrt untrt SRR1039508 126
## SRR1039509 GSM1275863 N61311 trt untrt SRR1039509 126
## SRR1039512 GSM1275866 N052611 untrt untrt SRR1039512 126
## SRR1039513 GSM1275867 N052611 trt untrt SRR1039513 87
## SRR1039516 GSM1275870 N080611 untrt untrt SRR1039516 120
## SRR1039517 GSM1275871 N080611 trt untrt SRR1039517 126
## SRR1039520 GSM1275874 N061011 untrt untrt SRR1039520 101
## SRR1039521 GSM1275875 N061011 trt untrt SRR1039521 98
## Experiment Sample BioSample
## <factor> <factor> <factor>
## SRR1039508 SRX384345 SRS508568 SAMN02422669
## SRR1039509 SRX384346 SRS508567 SAMN02422675
## SRR1039512 SRX384349 SRS508571 SAMN02422678
## SRR1039513 SRX384350 SRS508572 SAMN02422670
## SRR1039516 SRX384353 SRS508575 SAMN02422682
## SRR1039517 SRX384354 SRS508576 SAMN02422673
## SRR1039520 SRX384357 SRS508579 SAMN02422683
## SRR1039521 SRX384358 SRS508580 SAMN02422677
Single-cell RNASeq
url <- "https://scrnaseq-public-datasets.s3.amazonaws.com/scater-objects/manno_mouse.rds"
fl <- BiocFileCache::bfcrpath(rnames = url)
sce <- readRDS(fl)
library(GenABEL)
# Import data as gwaa class (S4)
genodata <- load.gwaa.data(
phenofile = "phenotype.dat",
genofile = "genotype.raw",
makemap = F,
sort = F)
## ids loaded...
## marker names loaded...
## chromosome data loaded...
## map data loaded...
## allele coding data loaded...
## strand data loaded...
## genotype data loaded...
## snp.data object created...
## assignment of gwaa.data object FORCED; X-errors were not checked!
# Class: gwaa.data
class(genodata)
## [1] "gwaa.data"
## attr(,"package")
## [1] "GenABEL"
# Class: snp.data
class(genodata@gtdata)
## [1] "snp.data"
## attr(,"package")
## [1] "GenABEL"
# Extract genotype data
gtdata <- genodata@gtdata
# How many snps?
nsnps(gtdata)
## [1] 729048
# How many individuals?
nids(gtdata)
## [1] 200
# Individual 27, marker 5, 6 and 7
# Genotype is represented in binary format in @gtps (40, 40, 80)
gtdata[27, 5:7]
## @nids = 1
## @nsnps = 3
## @nbytes = 1
## @idnames = id2_26
## @snpnames = rs12124819 rs4040617 rs4970383
## @chromosome = 1 1 1
## @coding = 0b 04 04
## @strand = 00 00 00
## @map = 766409 769185 828418
## @male = 0
## @gtps =
## 40 40 80
# Access the genotype non-binary
as.character(gtdata[27, 5:7])
## rs12124819 rs4040617 rs4970383
## id2_26 "G/G" "A/A" "A/G"
# Summary of all individuals on 5 markers
# NoMeasured = how many individuals with the genotype on that row
sum_data <- summary(gtdata[,1:5])
head(sum_data)
plot(sum_data$Q.2)
# Phenotypic data
phenodata <- genodata@phdata
summary(phenodata)
## id sex pheno
## Length:200 Min. :0.000 Min. :0.0
## Class :character 1st Qu.:0.000 1st Qu.:0.0
## Mode :character Median :1.000 Median :0.5
## Mean :0.535 Mean :0.5
## 3rd Qu.:1.000 3rd Qu.:1.0
## Max. :1.000 Max. :1.0
# Access phenotype data
# Which sexes for which phenotype?
table(phenodata[,2:3])
## pheno
## sex 0 1
## 0 43 50
## 1 57 50
# Filter out people/markers with extremely low call rate
qc1 <- check.marker(genodata,
callrate = 0.95,
perid.call = 0.99,
maf = 0.05,
hweidsubset = genodata@phdata$pheno == 0)
## Excluding people/markers with extremely low call rate...
## 729048 markers and 200 people in total
## 0 people excluded because of call rate < 0.1
## 0 markers excluded because of call rate < 0.1
## Passed: 729048 markers and 200 people
##
## RUN 1
## 729048 markers and 200 people in total
## 105778 (14.50906%) markers excluded as having low (<5%) minor allele frequency
## 934 (0.1281123%) markers excluded because of low (<95%) call rate
## 0 (0%) markers excluded because they are out of HWE (FDR <0.2)
## 107 (53.5%) people excluded because of low (<99%) call rate
## Mean autosomal HET is 0.343121 (s.e. 0.006399162)
## 0 people excluded because too high autosomal heterozygosity (FDR <1%)
## Mean IBS is 0.7155259 (s.e. 0.007647288), as based on 2000 autosomal markers
## 0 (0%) people excluded because of too high IBS (>=0.95)
## In total, 622431 (85.37586%) markers passed all criteria
## In total, 93 (46.5%) people passed all criteria
##
## RUN 2
## 622431 markers and 93 people in total
## 11695 (1.878923%) markers excluded as having low (<5%) minor allele frequency
## 3 (0.0004819811%) markers excluded because of low (<95%) call rate
## 0 (0%) markers excluded because they are out of HWE (FDR <0.2)
## 0 (0%) people excluded because of low (<99%) call rate
## Mean autosomal HET is 0.3499146 (s.e. 0.006736866)
## 0 people excluded because too high autosomal heterozygosity (FDR <1%)
## Mean IBS is 0.7118663 (s.e. 0.008025326), as based on 2000 autosomal markers
## 0 (0%) people excluded because of too high IBS (>=0.95)
## In total, 610733 (98.12059%) markers passed all criteria
## In total, 93 (100%) people passed all criteria
##
## RUN 3
## 610733 markers and 93 people in total
## 0 (0%) markers excluded as having low (<5%) minor allele frequency
## 0 (0%) markers excluded because of low (<95%) call rate
## 0 (0%) markers excluded because they are out of HWE (FDR <0.2)
## 0 (0%) people excluded because of low (<99%) call rate
## Mean autosomal HET is 0.3499146 (s.e. 0.006736866)
## 0 people excluded because too high autosomal heterozygosity (FDR <1%)
## Mean IBS is 0.7163355 (s.e. 0.008108263), as based on 2000 autosomal markers
## 0 (0%) people excluded because of too high IBS (>=0.95)
## In total, 610733 (100%) markers passed all criteria
## In total, 93 (100%) people passed all criteria
class(qc1)
## [1] "check.marker"
str(qc1)
## List of 7
## $ nofreq : chr [1:117473] "rs2340587" "rs3748592" "rs2272756" "rs3748594" ...
## $ nocall : chr [1:937] "rs4648499" "rs4648360" "rs6424089" "rs4495664" ...
## $ nohwe : chr(0)
## $ idnocall: chr [1:107] "id2_0" "id2_1" "id2_3" "id2_4" ...
## $ snpok : chr [1:610733] "rs3131969" "rs3131967" "rs1048488" "rs12562034" ...
## $ idok : chr [1:93] "id2_2" "id2_5" "id2_6" "id2_12" ...
## $ call :List of 5
## ..$ name : chr [1:729048] "rs3131969" "rs3131967" "rs1048488" "rs12562034" ...
## ..$ ids : chr [1:200] "id2_0" "id2_1" "id2_2" "id2_3" ...
## ..$ map : Named num [1:729048] 744045 744197 750775 758311 766409 ...
## .. ..- attr(*, "names")= chr [1:729048] "rs3131969" "rs3131967" "rs1048488" "rs12562034" ...
## ..$ chromosome: Named chr [1:729048] "1" "1" "1" "1" ...
## .. ..- attr(*, "names")= chr [1:729048] "rs3131969" "rs3131967" "rs1048488" "rs12562034" ...
## ..$ call : language check.marker(data = genodata, callrate = 0.95, perid.call = 0.99, maf = 0.05, hweidsubset = genodata@phdata$pheno == 0)
## - attr(*, "class")= chr "check.marker"
summary(qc1)
## $`Per-SNP fails statistics`
## NoCall NoMAF NoHWE Redundant Xsnpfail
## NoCall 842 95 0 0 0
## NoMAF NA 117378 0 0 0
## NoHWE NA NA 0 0 0
## Redundant NA NA NA 0 0
## Xsnpfail NA NA NA NA 0
##
## $`Per-person fails statistics`
## IDnoCall HetFail IBSFail isfemale ismale isXXY otherSexErr
## IDnoCall 107 0 0 0 0 0 0
## HetFail NA 0 0 0 0 0 0
## IBSFail NA NA 0 0 0 0 0
## isfemale NA NA NA 0 0 0 0
## ismale NA NA NA NA 0 0 0
## isXXY NA NA NA NA NA 0 0
## otherSexErr NA NA NA NA NA NA 0
# Filter for the passed data only
data.clean <- genodata[qc1$idok, qc1$snpok]
# 107 persons had many snps that were uncalled
nids(genodata)
## [1] 200
nids(data.clean)
## [1] 93
# Change perid.call to 95 %
qc2 <- check.marker(genodata,
callrate = 0.95,
perid.call = 0.95,
maf = 0.05,
hweidsubset = genodata@phdata$pheno == 0)
## Excluding people/markers with extremely low call rate...
## 729048 markers and 200 people in total
## 0 people excluded because of call rate < 0.1
## 0 markers excluded because of call rate < 0.1
## Passed: 729048 markers and 200 people
##
## RUN 1
## 729048 markers and 200 people in total
## 105778 (14.50906%) markers excluded as having low (<5%) minor allele frequency
## 934 (0.1281123%) markers excluded because of low (<95%) call rate
## 0 (0%) markers excluded because they are out of HWE (FDR <0.2)
## 3 (1.5%) people excluded because of low (<95%) call rate
## Mean autosomal HET is 0.343121 (s.e. 0.006399162)
## 0 people excluded because too high autosomal heterozygosity (FDR <1%)
## Mean IBS is 0.7150325 (s.e. 0.007782171), as based on 2000 autosomal markers
## 0 (0%) people excluded because of too high IBS (>=0.95)
## In total, 622431 (85.37586%) markers passed all criteria
## In total, 197 (98.5%) people passed all criteria
##
## RUN 2
## 622431 markers and 197 people in total
## 1231 (0.1977729%) markers excluded as having low (<5%) minor allele frequency
## 84 (0.01349547%) markers excluded because of low (<95%) call rate
## 0 (0%) markers excluded because they are out of HWE (FDR <0.2)
## 0 (0%) people excluded because of low (<95%) call rate
## Mean autosomal HET is 0.3435735 (s.e. 0.006437736)
## 0 people excluded because too high autosomal heterozygosity (FDR <1%)
## Mean IBS is 0.7143686 (s.e. 0.007953909), as based on 2000 autosomal markers
## 0 (0%) people excluded because of too high IBS (>=0.95)
## In total, 621116 (99.78873%) markers passed all criteria
## In total, 197 (100%) people passed all criteria
##
## RUN 3
## 621116 markers and 197 people in total
## 0 (0%) markers excluded as having low (<5%) minor allele frequency
## 0 (0%) markers excluded because of low (<95%) call rate
## 0 (0%) markers excluded because they are out of HWE (FDR <0.2)
## 0 (0%) people excluded because of low (<95%) call rate
## Mean autosomal HET is 0.3435735 (s.e. 0.006437736)
## 0 people excluded because too high autosomal heterozygosity (FDR <1%)
## Mean IBS is 0.7118725 (s.e. 0.008213621), as based on 2000 autosomal markers
## 0 (0%) people excluded because of too high IBS (>=0.95)
## In total, 621116 (100%) markers passed all criteria
## In total, 197 (100%) people passed all criteria
summary(qc2)
## $`Per-SNP fails statistics`
## NoCall NoMAF NoHWE Redundant Xsnpfail
## NoCall 923 95 0 0 0
## NoMAF NA 106914 0 0 0
## NoHWE NA NA 0 0 0
## Redundant NA NA NA 0 0
## Xsnpfail NA NA NA NA 0
##
## $`Per-person fails statistics`
## IDnoCall HetFail IBSFail isfemale ismale isXXY otherSexErr
## IDnoCall 3 0 0 0 0 0 0
## HetFail NA 0 0 0 0 0 0
## IBSFail NA NA 0 0 0 0 0
## isfemale NA NA NA 0 0 0 0
## ismale NA NA NA NA 0 0 0
## isXXY NA NA NA NA NA 0 0
## otherSexErr NA NA NA NA NA NA 0
data.clean2 <- genodata[qc2$idok, qc2$snpok]
# Only 3 individuals lost
nids(genodata)
## [1] 200
nids(data.clean2)
## [1] 197
# Identify autosomal markers
autosomalMarkers <- autosomal(data.clean2)
gkin <- ibs(data.clean2[,autosomalMarkers], weight = "freq")
str(gkin)
## num [1:197, 1:197] 0.50649 0.00246 -0.00155 -0.003 -0.00152 ...
## - attr(*, "dimnames")=List of 2
## ..$ : chr [1:197] "id2_0" "id2_1" "id2_2" "id2_3" ...
## ..$ : chr [1:197] "id2_0" "id2_1" "id2_2" "id2_3" ...
## - attr(*, "Var")= num [1:197] 0.503 0.521 0.51 0.497 0.504 ...
# Genomic distance matrix
gdm <- as.dist(0.5 - gkin)
mds <- cmdscale(gdm)
plot(mds)
# Fit model
an <- qtscore(formula = pheno ~ sex, data.clean2, trait = "binomial")
sum(is.na(data.clean2@phdata$pheno))
## [1] 0
plot(an)
summary(an)
## Summary for top 10 results, sorted by P1df
estlambda(an[,"P1df"], plot = T)
## $estimate
## [1] 1.014491
##
## $se
## [1] 2.867801e-05
invisible(capture.output(h2h <- polygenic(pheno~sex, kinship.matrix = gkin, data = data.clean2, llfun="polylik")))
## Warning in sqrt(eigValInvSig): NaNs produced
## Warning in log(es): NaNs produced
mm <- mmscore(h2object = h2h, data = data.clean2, clambda = T)
plot(mm)