## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE) ## ----workflow_chunk0, message=FALSE, warning=FALSE---------------------------- library(NestLink) ## ----------------------------------------------------------------------------- library(ExperimentHub) eh <- ExperimentHub() query(eh, "NestLink") ## ----define.input.output------------------------------------------------------ # dataFolder <- file.path(path.package(package = 'NestLink'), 'extdata') # expFile <- list.files(dataFolder, pattern='*.fastq.gz', full.names = TRUE) expFile <- query(eh, c("NestLink", "NL42_100K.fastq.gz"))[[1]] scratchFolder <- tempdir() setwd(scratchFolder) ## ----load.knownNB------------------------------------------------------------- # knownNB_File <- list.files(dataFolder, # pattern='knownNB.txt', full.names = TRUE) knownNB_File <- query(eh, c("NestLink", "knownNB.txt"))[[1]] knownNB_data <- read.table(knownNB_File, sep='\t', header = TRUE, row.names = 1, stringsAsFactors = FALSE) knownNB <- Biostrings::translate(DNAStringSet(knownNB_data$Sequence)) names(knownNB) <- rownames(knownNB_data) knownNB <- sapply(knownNB, toString) ## ----setupParameter----------------------------------------------------------- param <- list() param[['nReads']] <- 100 #Number of Reads from the start of fastq file to process param[['maxMismatch']] <- 1 #Number of accepted mismatches for all pattern search steps param[['NB_Linker1']] <- "GGCCggcggGGCC" #Linker Sequence left to nanobody param[['NB_Linker2']] <- "GCAGGAGGA" #Linker Sequence right to nanobody param[['ProteaseSite']] <- "TTAGTCCCAAGA" #Sequence next to flycode param[['FC_Linker']] <- "GGCCaaggaggcCGG" #Linker Sequence next to flycode param[['knownNB']] <- knownNB param[['minRelBestHitFreq']] <- 0.8 #minimal fraction of the dominant nanobody for a specific flycode param[['minConsensusScore']] <- 0.9 #minimal fraction per sequence position in nanabody consensus sequence calculation param[['minNanobodyLength']] <- 348 #minimal nanobody length in [nt] param[['minFlycodeLength']] <- 33 #minimal flycode length in [nt] param[['FCminFreq']] <- 1 #minimal number of subreads for a specific flycode to keep it in the analysis ## ----filterExtractTranslateSequences, message=FALSE--------------------------- system.time(NB2FC <- runNGSAnalysis(file = expFile[1], param)) ## ----sanityCheck.NB.FC.linkage------------------------------------------------ head(NB2FC, 2) ## ----write.AA.FASTA----------------------------------------------------------- head(nanobodyFlycodeLinking.as.fasta(NB2FC)) ## ----sessionInfo, echo=FALSE-------------------------------------------------- sessionInfo() # References