site: {"static_files":[{"name":".nojekyll","modified_time":"2021-05-31 04:45:05 -0500","basename":".nojekyll","extname":"","collection":null,"path":"/.nojekyll"},{"name":"CNAME","modified_time":"2021-05-31 04:45:05 -0500","basename":"CNAME","extname":"","collection":null,"path":"/CNAME"},{"name":"CODEOWNERS","modified_time":"2021-05-31 04:45:05 -0500","basename":"CODEOWNERS","extname":"","collection":null,"path":"/CODEOWNERS"},{"name":"CODE_OF_CONDUCT.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"CODE_OF_CONDUCT","extname":".md","collection":null,"path":"/CODE_OF_CONDUCT.md"},{"name":"Dockerfile.clean","modified_time":"2021-05-31 04:45:05 -0500","basename":"Dockerfile","extname":".clean","collection":null,"path":"/Dockerfile.clean"},{"name":"Dockerfile.devel","modified_time":"2021-05-31 04:45:05 -0500","basename":"Dockerfile","extname":".devel","collection":null,"path":"/Dockerfile.devel"},{"name":"Procfile","modified_time":"2021-05-31 04:45:05 -0500","basename":"Procfile","extname":"","collection":null,"path":"/Procfile"},{"name":"app.json","modified_time":"2021-05-31 04:45:05 -0500","basename":"app","extname":".json","collection":null,"path":"/app.json"},{"name":"VIB.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"VIB","extname":".css","collection":null,"path":"/assets/css/VIB.css"},{"name":"academicons.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"academicons","extname":".css","collection":null,"path":"/assets/css/academicons.css"},{"name":"bootstrap-theme.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap-theme","extname":".css","collection":null,"path":"/assets/css/bootstrap-theme.css"},{"name":"bootstrap-theme.css.map","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap-theme.css","extname":".map","collection":null,"path":"/assets/css/bootstrap-theme.css.map"},{"name":"bootstrap-theme.min.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap-theme.min","extname":".css","collection":null,"path":"/assets/css/bootstrap-theme.min.css"},{"name":"bootstrap-theme.min.css.map","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap-theme.min.css","extname":".map","collection":null,"path":"/assets/css/bootstrap-theme.min.css.map"},{"name":"bootstrap-toc.min.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap-toc.min","extname":".css","collection":null,"path":"/assets/css/bootstrap-toc.min.css"},{"name":"bootstrap.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap","extname":".css","collection":null,"path":"/assets/css/bootstrap.css"},{"name":"bootstrap.css.map","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap.css","extname":".map","collection":null,"path":"/assets/css/bootstrap.css.map"},{"name":"bootstrap.min.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap.min","extname":".css","collection":null,"path":"/assets/css/bootstrap.min.css"},{"name":"bootstrap.min.css.map","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap.min.css","extname":".map","collection":null,"path":"/assets/css/bootstrap.min.css.map"},{"name":"font-awesome.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"font-awesome","extname":".css","collection":null,"path":"/assets/css/font-awesome.css"},{"name":"main.scss","modified_time":"2021-05-31 04:45:05 -0500","basename":"main","extname":".scss","collection":null,"path":"/assets/css/main.scss"},{"name":"slides.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"slides","extname":".css","collection":null,"path":"/assets/css/slides.css"},{"name":"syntax_highlighting.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"syntax_highlighting","extname":".css","collection":null,"path":"/assets/css/syntax_highlighting.css"},{"name":"plot1.R","modified_time":"2021-05-31 04:45:05 -0500","basename":"plot1","extname":".R","collection":null,"path":"/assets/files/git-introduction/plot1.R"},{"name":"plot2.R","modified_time":"2021-05-31 04:45:05 -0500","basename":"plot2","extname":".R","collection":null,"path":"/assets/files/git-introduction/plot2.R"},{"name":"Dense-Regular.otf","modified_time":"2021-05-31 04:45:05 -0500","basename":"Dense-Regular","extname":".otf","collection":null,"path":"/assets/fonts/Dense-Regular.otf"},{"name":"Dense-Regular.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"Dense-Regular","extname":".ttf","collection":null,"path":"/assets/fonts/Dense-Regular.ttf"},{"name":"FontAwesome.otf","modified_time":"2021-05-31 04:45:05 -0500","basename":"FontAwesome","extname":".otf","collection":null,"path":"/assets/fonts/FontAwesome.otf"},{"name":"MaterialIcons.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"MaterialIcons","extname":".woff","collection":null,"path":"/assets/fonts/MaterialIcons.woff"},{"name":"MaterialIcons.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"MaterialIcons","extname":".woff2","collection":null,"path":"/assets/fonts/MaterialIcons.woff2"},{"name":"OpenSans.otf","modified_time":"2021-05-31 04:45:05 -0500","basename":"OpenSans","extname":".otf","collection":null,"path":"/assets/fonts/OpenSans.otf"},{"name":"OpenSans.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"OpenSans","extname":".woff","collection":null,"path":"/assets/fonts/OpenSans.woff"},{"name":"OpenSans.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"OpenSans","extname":".woff2","collection":null,"path":"/assets/fonts/OpenSans.woff2"},{"name":"academicons.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"academicons","extname":".eot","collection":null,"path":"/assets/fonts/academicons.eot"},{"name":"academicons.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"academicons","extname":".svg","collection":null,"path":"/assets/fonts/academicons.svg"},{"name":"academicons.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"academicons","extname":".ttf","collection":null,"path":"/assets/fonts/academicons.ttf"},{"name":"academicons.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"academicons","extname":".woff","collection":null,"path":"/assets/fonts/academicons.woff"},{"name":"fontawesome-webfont.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"fontawesome-webfont","extname":".eot","collection":null,"path":"/assets/fonts/fontawesome-webfont.eot"},{"name":"fontawesome-webfont.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"fontawesome-webfont","extname":".svg","collection":null,"path":"/assets/fonts/fontawesome-webfont.svg"},{"name":"fontawesome-webfont.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"fontawesome-webfont","extname":".ttf","collection":null,"path":"/assets/fonts/fontawesome-webfont.ttf"},{"name":"fontawesome-webfont.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"fontawesome-webfont","extname":".woff","collection":null,"path":"/assets/fonts/fontawesome-webfont.woff"},{"name":"fontawesome-webfont.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"fontawesome-webfont","extname":".woff2","collection":null,"path":"/assets/fonts/fontawesome-webfont.woff2"},{"name":"glyphicons-halflings-regular.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"glyphicons-halflings-regular","extname":".eot","collection":null,"path":"/assets/fonts/glyphicons-halflings-regular.eot"},{"name":"glyphicons-halflings-regular.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"glyphicons-halflings-regular","extname":".svg","collection":null,"path":"/assets/fonts/glyphicons-halflings-regular.svg"},{"name":"glyphicons-halflings-regular.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"glyphicons-halflings-regular","extname":".ttf","collection":null,"path":"/assets/fonts/glyphicons-halflings-regular.ttf"},{"name":"glyphicons-halflings-regular.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"glyphicons-halflings-regular","extname":".woff","collection":null,"path":"/assets/fonts/glyphicons-halflings-regular.woff"},{"name":"glyphicons-halflings-regular.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"glyphicons-halflings-regular","extname":".woff2","collection":null,"path":"/assets/fonts/glyphicons-halflings-regular.woff2"},{"name":"GTN-60px.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GTN-60px","extname":".png","collection":null,"path":"/assets/images/GTN-60px.png"},{"name":"GTN.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GTN","extname":".png","collection":null,"path":"/assets/images/GTN.png"},{"name":"GTNLogo1000.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GTNLogo1000","extname":".png","collection":null,"path":"/assets/images/GTNLogo1000.png"},{"name":"bioinformatics_core_rgb_neg.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bioinformatics_core_rgb_neg","extname":".png","collection":null,"path":"/assets/images/bioinformatics_core_rgb_neg.png"},{"name":"introblockheader.jpg","modified_time":"2021-05-31 04:45:05 -0500","basename":"introblockheader","extname":".jpg","collection":null,"path":"/assets/images/introblockheader.jpg"},{"name":"logo.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"logo","extname":".svg","collection":null,"path":"/assets/images/logo.svg"},{"name":"qr-gtn-black-logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-black-logo","extname":".png","collection":null,"path":"/assets/images/qr-gtn-black-logo.png"},{"name":"qr-gtn-black.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-black","extname":".png","collection":null,"path":"/assets/images/qr-gtn-black.png"},{"name":"qr-gtn-darkblue-logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-darkblue-logo","extname":".png","collection":null,"path":"/assets/images/qr-gtn-darkblue-logo.png"},{"name":"qr-gtn-darkblue.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-darkblue","extname":".png","collection":null,"path":"/assets/images/qr-gtn-darkblue.png"},{"name":"qr-gtn-darkgrey-logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-darkgrey-logo","extname":".png","collection":null,"path":"/assets/images/qr-gtn-darkgrey-logo.png"},{"name":"qr-gtn-darkgrey.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-darkgrey","extname":".png","collection":null,"path":"/assets/images/qr-gtn-darkgrey.png"},{"name":"qr-gtn-logo.xcf","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-logo","extname":".xcf","collection":null,"path":"/assets/images/qr-gtn-logo.xcf"},{"name":"qr-gtn-white-logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-white-logo","extname":".png","collection":null,"path":"/assets/images/qr-gtn-white-logo.png"},{"name":"qr-gtn-white.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn-white","extname":".png","collection":null,"path":"/assets/images/qr-gtn-white.png"},{"name":"qr-gtn.xcf","modified_time":"2021-05-31 04:45:05 -0500","basename":"qr-gtn","extname":".xcf","collection":null,"path":"/assets/images/qr-gtn.xcf"},{"name":"search.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"search","extname":".svg","collection":null,"path":"/assets/images/search.svg"},{"name":"vib_notag_neg_rgb.jpg","modified_time":"2021-05-31 04:45:05 -0500","basename":"vib_notag_neg_rgb","extname":".jpg","collection":null,"path":"/assets/images/vib_notag_neg_rgb.jpg"},{"name":"bootstrap-toc.min.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap-toc.min","extname":".js","collection":null,"path":"/assets/js/bootstrap-toc.min.js"},{"name":"bootstrap.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap","extname":".js","collection":null,"path":"/assets/js/bootstrap.js"},{"name":"bootstrap.min.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"bootstrap.min","extname":".js","collection":null,"path":"/assets/js/bootstrap.min.js"},{"name":"clipboard.min.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"clipboard.min","extname":".js","collection":null,"path":"/assets/js/clipboard.min.js"},{"name":"details-element-polyfill.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"details-element-polyfill","extname":".js","collection":null,"path":"/assets/js/details-element-polyfill.js"},{"name":"jquery.slim.min.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"jquery.slim.min","extname":".js","collection":null,"path":"/assets/js/jquery.slim.min.js"},{"name":"main.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"main","extname":".js","collection":null,"path":"/assets/js/main.js"},{"name":"popper.min.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"popper.min","extname":".js","collection":null,"path":"/assets/js/popper.min.js"},{"name":"remark-latest.min.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"remark-latest.min","extname":".js","collection":null,"path":"/assets/js/remark-latest.min.js"},{"name":"index.html","modified_time":"2021-05-31 04:45:05 -0500","basename":"index","extname":".html","collection":null,"path":"/badges/index.html"},{"name":"Dense-Regular.2c017720.otf","modified_time":"2021-05-31 04:45:05 -0500","basename":"Dense-Regular.2c017720","extname":".otf","collection":null,"path":"/courses/Dense-Regular.2c017720.otf"},{"name":"Dense-Regular.6886b0f1.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"Dense-Regular.6886b0f1","extname":".ttf","collection":null,"path":"/courses/Dense-Regular.6886b0f1.ttf"},{"name":"MaterialIcons-Regular.309c1598.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"MaterialIcons-Regular.309c1598","extname":".woff","collection":null,"path":"/courses/MaterialIcons-Regular.309c1598.woff"},{"name":"MaterialIcons-Regular.5744d005.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"MaterialIcons-Regular.5744d005","extname":".ttf","collection":null,"path":"/courses/MaterialIcons-Regular.5744d005.ttf"},{"name":"MaterialIcons-Regular.827a59b6.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"MaterialIcons-Regular.827a59b6","extname":".eot","collection":null,"path":"/courses/MaterialIcons-Regular.827a59b6.eot"},{"name":"MaterialIcons-Regular.d5b7635d.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"MaterialIcons-Regular.d5b7635d","extname":".woff2","collection":null,"path":"/courses/MaterialIcons-Regular.d5b7635d.woff2"},{"name":"OpenSans.078346f1.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"OpenSans.078346f1","extname":".woff2","collection":null,"path":"/courses/OpenSans.078346f1.woff2"},{"name":"OpenSans.09201306.otf","modified_time":"2021-05-31 04:45:05 -0500","basename":"OpenSans.09201306","extname":".otf","collection":null,"path":"/courses/OpenSans.09201306.otf"},{"name":"OpenSans.ebb5bc12.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"OpenSans.ebb5bc12","extname":".woff","collection":null,"path":"/courses/OpenSans.ebb5bc12.woff"},{"name":"ace.d6e2fa52.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ace.d6e2fa52","extname":".js","collection":null,"path":"/courses/ace.d6e2fa52.js"},{"name":"dense.a16f76d5.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"dense.a16f76d5","extname":".css","collection":null,"path":"/courses/dense.a16f76d5.css"},{"name":"ace.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ace","extname":".js","collection":null,"path":"/courses/editor/ace.js"},{"name":"ext-beautify.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-beautify","extname":".js","collection":null,"path":"/courses/editor/ext-beautify.js"},{"name":"ext-elastic_tabstops_lite.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-elastic_tabstops_lite","extname":".js","collection":null,"path":"/courses/editor/ext-elastic_tabstops_lite.js"},{"name":"ext-emmet.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-emmet","extname":".js","collection":null,"path":"/courses/editor/ext-emmet.js"},{"name":"ext-error_marker.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-error_marker","extname":".js","collection":null,"path":"/courses/editor/ext-error_marker.js"},{"name":"ext-keybinding_menu.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-keybinding_menu","extname":".js","collection":null,"path":"/courses/editor/ext-keybinding_menu.js"},{"name":"ext-language_tools.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-language_tools","extname":".js","collection":null,"path":"/courses/editor/ext-language_tools.js"},{"name":"ext-linking.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-linking","extname":".js","collection":null,"path":"/courses/editor/ext-linking.js"},{"name":"ext-modelist.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-modelist","extname":".js","collection":null,"path":"/courses/editor/ext-modelist.js"},{"name":"ext-options.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-options","extname":".js","collection":null,"path":"/courses/editor/ext-options.js"},{"name":"ext-prompt.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-prompt","extname":".js","collection":null,"path":"/courses/editor/ext-prompt.js"},{"name":"ext-rtl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-rtl","extname":".js","collection":null,"path":"/courses/editor/ext-rtl.js"},{"name":"ext-searchbox.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-searchbox","extname":".js","collection":null,"path":"/courses/editor/ext-searchbox.js"},{"name":"ext-settings_menu.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-settings_menu","extname":".js","collection":null,"path":"/courses/editor/ext-settings_menu.js"},{"name":"ext-spellcheck.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-spellcheck","extname":".js","collection":null,"path":"/courses/editor/ext-spellcheck.js"},{"name":"ext-split.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-split","extname":".js","collection":null,"path":"/courses/editor/ext-split.js"},{"name":"ext-static_highlight.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-static_highlight","extname":".js","collection":null,"path":"/courses/editor/ext-static_highlight.js"},{"name":"ext-statusbar.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-statusbar","extname":".js","collection":null,"path":"/courses/editor/ext-statusbar.js"},{"name":"ext-textarea.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-textarea","extname":".js","collection":null,"path":"/courses/editor/ext-textarea.js"},{"name":"ext-themelist.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-themelist","extname":".js","collection":null,"path":"/courses/editor/ext-themelist.js"},{"name":"ext-whitespace.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ext-whitespace","extname":".js","collection":null,"path":"/courses/editor/ext-whitespace.js"},{"name":"keybinding-emacs.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"keybinding-emacs","extname":".js","collection":null,"path":"/courses/editor/keybinding-emacs.js"},{"name":"keybinding-sublime.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"keybinding-sublime","extname":".js","collection":null,"path":"/courses/editor/keybinding-sublime.js"},{"name":"keybinding-vim.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"keybinding-vim","extname":".js","collection":null,"path":"/courses/editor/keybinding-vim.js"},{"name":"mode-abap.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-abap","extname":".js","collection":null,"path":"/courses/editor/mode-abap.js"},{"name":"mode-abc.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-abc","extname":".js","collection":null,"path":"/courses/editor/mode-abc.js"},{"name":"mode-actionscript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-actionscript","extname":".js","collection":null,"path":"/courses/editor/mode-actionscript.js"},{"name":"mode-ada.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-ada","extname":".js","collection":null,"path":"/courses/editor/mode-ada.js"},{"name":"mode-apache_conf.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-apache_conf","extname":".js","collection":null,"path":"/courses/editor/mode-apache_conf.js"},{"name":"mode-apex.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-apex","extname":".js","collection":null,"path":"/courses/editor/mode-apex.js"},{"name":"mode-applescript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-applescript","extname":".js","collection":null,"path":"/courses/editor/mode-applescript.js"},{"name":"mode-aql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-aql","extname":".js","collection":null,"path":"/courses/editor/mode-aql.js"},{"name":"mode-asciidoc.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-asciidoc","extname":".js","collection":null,"path":"/courses/editor/mode-asciidoc.js"},{"name":"mode-asl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-asl","extname":".js","collection":null,"path":"/courses/editor/mode-asl.js"},{"name":"mode-assembly_x86.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-assembly_x86","extname":".js","collection":null,"path":"/courses/editor/mode-assembly_x86.js"},{"name":"mode-autohotkey.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-autohotkey","extname":".js","collection":null,"path":"/courses/editor/mode-autohotkey.js"},{"name":"mode-batchfile.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-batchfile","extname":".js","collection":null,"path":"/courses/editor/mode-batchfile.js"},{"name":"mode-bro.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-bro","extname":".js","collection":null,"path":"/courses/editor/mode-bro.js"},{"name":"mode-c9search.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-c9search","extname":".js","collection":null,"path":"/courses/editor/mode-c9search.js"},{"name":"mode-c_cpp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-c_cpp","extname":".js","collection":null,"path":"/courses/editor/mode-c_cpp.js"},{"name":"mode-cirru.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-cirru","extname":".js","collection":null,"path":"/courses/editor/mode-cirru.js"},{"name":"mode-clojure.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-clojure","extname":".js","collection":null,"path":"/courses/editor/mode-clojure.js"},{"name":"mode-cobol.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-cobol","extname":".js","collection":null,"path":"/courses/editor/mode-cobol.js"},{"name":"mode-coffee.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-coffee","extname":".js","collection":null,"path":"/courses/editor/mode-coffee.js"},{"name":"mode-coldfusion.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-coldfusion","extname":".js","collection":null,"path":"/courses/editor/mode-coldfusion.js"},{"name":"mode-crystal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-crystal","extname":".js","collection":null,"path":"/courses/editor/mode-crystal.js"},{"name":"mode-csharp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-csharp","extname":".js","collection":null,"path":"/courses/editor/mode-csharp.js"},{"name":"mode-csound_document.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-csound_document","extname":".js","collection":null,"path":"/courses/editor/mode-csound_document.js"},{"name":"mode-csound_orchestra.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-csound_orchestra","extname":".js","collection":null,"path":"/courses/editor/mode-csound_orchestra.js"},{"name":"mode-csound_score.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-csound_score","extname":".js","collection":null,"path":"/courses/editor/mode-csound_score.js"},{"name":"mode-csp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-csp","extname":".js","collection":null,"path":"/courses/editor/mode-csp.js"},{"name":"mode-css.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-css","extname":".js","collection":null,"path":"/courses/editor/mode-css.js"},{"name":"mode-curly.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-curly","extname":".js","collection":null,"path":"/courses/editor/mode-curly.js"},{"name":"mode-d.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-d","extname":".js","collection":null,"path":"/courses/editor/mode-d.js"},{"name":"mode-dart.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-dart","extname":".js","collection":null,"path":"/courses/editor/mode-dart.js"},{"name":"mode-diff.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-diff","extname":".js","collection":null,"path":"/courses/editor/mode-diff.js"},{"name":"mode-django.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-django","extname":".js","collection":null,"path":"/courses/editor/mode-django.js"},{"name":"mode-dockerfile.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-dockerfile","extname":".js","collection":null,"path":"/courses/editor/mode-dockerfile.js"},{"name":"mode-dot.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-dot","extname":".js","collection":null,"path":"/courses/editor/mode-dot.js"},{"name":"mode-drools.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-drools","extname":".js","collection":null,"path":"/courses/editor/mode-drools.js"},{"name":"mode-edifact.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-edifact","extname":".js","collection":null,"path":"/courses/editor/mode-edifact.js"},{"name":"mode-eiffel.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-eiffel","extname":".js","collection":null,"path":"/courses/editor/mode-eiffel.js"},{"name":"mode-ejs.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-ejs","extname":".js","collection":null,"path":"/courses/editor/mode-ejs.js"},{"name":"mode-elixir.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-elixir","extname":".js","collection":null,"path":"/courses/editor/mode-elixir.js"},{"name":"mode-elm.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-elm","extname":".js","collection":null,"path":"/courses/editor/mode-elm.js"},{"name":"mode-erlang.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-erlang","extname":".js","collection":null,"path":"/courses/editor/mode-erlang.js"},{"name":"mode-forth.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-forth","extname":".js","collection":null,"path":"/courses/editor/mode-forth.js"},{"name":"mode-fortran.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-fortran","extname":".js","collection":null,"path":"/courses/editor/mode-fortran.js"},{"name":"mode-fsharp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-fsharp","extname":".js","collection":null,"path":"/courses/editor/mode-fsharp.js"},{"name":"mode-fsl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-fsl","extname":".js","collection":null,"path":"/courses/editor/mode-fsl.js"},{"name":"mode-ftl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-ftl","extname":".js","collection":null,"path":"/courses/editor/mode-ftl.js"},{"name":"mode-gcode.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-gcode","extname":".js","collection":null,"path":"/courses/editor/mode-gcode.js"},{"name":"mode-gherkin.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-gherkin","extname":".js","collection":null,"path":"/courses/editor/mode-gherkin.js"},{"name":"mode-gitignore.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-gitignore","extname":".js","collection":null,"path":"/courses/editor/mode-gitignore.js"},{"name":"mode-glsl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-glsl","extname":".js","collection":null,"path":"/courses/editor/mode-glsl.js"},{"name":"mode-gobstones.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-gobstones","extname":".js","collection":null,"path":"/courses/editor/mode-gobstones.js"},{"name":"mode-golang.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-golang","extname":".js","collection":null,"path":"/courses/editor/mode-golang.js"},{"name":"mode-graphqlschema.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-graphqlschema","extname":".js","collection":null,"path":"/courses/editor/mode-graphqlschema.js"},{"name":"mode-groovy.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-groovy","extname":".js","collection":null,"path":"/courses/editor/mode-groovy.js"},{"name":"mode-haml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-haml","extname":".js","collection":null,"path":"/courses/editor/mode-haml.js"},{"name":"mode-handlebars.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-handlebars","extname":".js","collection":null,"path":"/courses/editor/mode-handlebars.js"},{"name":"mode-haskell.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-haskell","extname":".js","collection":null,"path":"/courses/editor/mode-haskell.js"},{"name":"mode-haskell_cabal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-haskell_cabal","extname":".js","collection":null,"path":"/courses/editor/mode-haskell_cabal.js"},{"name":"mode-haxe.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-haxe","extname":".js","collection":null,"path":"/courses/editor/mode-haxe.js"},{"name":"mode-hjson.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-hjson","extname":".js","collection":null,"path":"/courses/editor/mode-hjson.js"},{"name":"mode-html.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-html","extname":".js","collection":null,"path":"/courses/editor/mode-html.js"},{"name":"mode-html_elixir.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-html_elixir","extname":".js","collection":null,"path":"/courses/editor/mode-html_elixir.js"},{"name":"mode-html_ruby.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-html_ruby","extname":".js","collection":null,"path":"/courses/editor/mode-html_ruby.js"},{"name":"mode-ini.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-ini","extname":".js","collection":null,"path":"/courses/editor/mode-ini.js"},{"name":"mode-io.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-io","extname":".js","collection":null,"path":"/courses/editor/mode-io.js"},{"name":"mode-jack.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-jack","extname":".js","collection":null,"path":"/courses/editor/mode-jack.js"},{"name":"mode-jade.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-jade","extname":".js","collection":null,"path":"/courses/editor/mode-jade.js"},{"name":"mode-java.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-java","extname":".js","collection":null,"path":"/courses/editor/mode-java.js"},{"name":"mode-javascript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-javascript","extname":".js","collection":null,"path":"/courses/editor/mode-javascript.js"},{"name":"mode-json.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-json","extname":".js","collection":null,"path":"/courses/editor/mode-json.js"},{"name":"mode-jsoniq.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-jsoniq","extname":".js","collection":null,"path":"/courses/editor/mode-jsoniq.js"},{"name":"mode-jsp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-jsp","extname":".js","collection":null,"path":"/courses/editor/mode-jsp.js"},{"name":"mode-jssm.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-jssm","extname":".js","collection":null,"path":"/courses/editor/mode-jssm.js"},{"name":"mode-jsx.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-jsx","extname":".js","collection":null,"path":"/courses/editor/mode-jsx.js"},{"name":"mode-julia.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-julia","extname":".js","collection":null,"path":"/courses/editor/mode-julia.js"},{"name":"mode-kotlin.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-kotlin","extname":".js","collection":null,"path":"/courses/editor/mode-kotlin.js"},{"name":"mode-latex.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-latex","extname":".js","collection":null,"path":"/courses/editor/mode-latex.js"},{"name":"mode-less.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-less","extname":".js","collection":null,"path":"/courses/editor/mode-less.js"},{"name":"mode-liquid.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-liquid","extname":".js","collection":null,"path":"/courses/editor/mode-liquid.js"},{"name":"mode-lisp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-lisp","extname":".js","collection":null,"path":"/courses/editor/mode-lisp.js"},{"name":"mode-livescript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-livescript","extname":".js","collection":null,"path":"/courses/editor/mode-livescript.js"},{"name":"mode-logiql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-logiql","extname":".js","collection":null,"path":"/courses/editor/mode-logiql.js"},{"name":"mode-logtalk.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-logtalk","extname":".js","collection":null,"path":"/courses/editor/mode-logtalk.js"},{"name":"mode-lsl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-lsl","extname":".js","collection":null,"path":"/courses/editor/mode-lsl.js"},{"name":"mode-lua.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-lua","extname":".js","collection":null,"path":"/courses/editor/mode-lua.js"},{"name":"mode-luapage.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-luapage","extname":".js","collection":null,"path":"/courses/editor/mode-luapage.js"},{"name":"mode-lucene.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-lucene","extname":".js","collection":null,"path":"/courses/editor/mode-lucene.js"},{"name":"mode-makefile.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-makefile","extname":".js","collection":null,"path":"/courses/editor/mode-makefile.js"},{"name":"mode-markdown.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-markdown","extname":".js","collection":null,"path":"/courses/editor/mode-markdown.js"},{"name":"mode-mask.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-mask","extname":".js","collection":null,"path":"/courses/editor/mode-mask.js"},{"name":"mode-matlab.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-matlab","extname":".js","collection":null,"path":"/courses/editor/mode-matlab.js"},{"name":"mode-maze.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-maze","extname":".js","collection":null,"path":"/courses/editor/mode-maze.js"},{"name":"mode-mel.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-mel","extname":".js","collection":null,"path":"/courses/editor/mode-mel.js"},{"name":"mode-mixal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-mixal","extname":".js","collection":null,"path":"/courses/editor/mode-mixal.js"},{"name":"mode-mushcode.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-mushcode","extname":".js","collection":null,"path":"/courses/editor/mode-mushcode.js"},{"name":"mode-mysql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-mysql","extname":".js","collection":null,"path":"/courses/editor/mode-mysql.js"},{"name":"mode-nginx.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-nginx","extname":".js","collection":null,"path":"/courses/editor/mode-nginx.js"},{"name":"mode-nim.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-nim","extname":".js","collection":null,"path":"/courses/editor/mode-nim.js"},{"name":"mode-nix.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-nix","extname":".js","collection":null,"path":"/courses/editor/mode-nix.js"},{"name":"mode-nsis.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-nsis","extname":".js","collection":null,"path":"/courses/editor/mode-nsis.js"},{"name":"mode-objectivec.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-objectivec","extname":".js","collection":null,"path":"/courses/editor/mode-objectivec.js"},{"name":"mode-ocaml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-ocaml","extname":".js","collection":null,"path":"/courses/editor/mode-ocaml.js"},{"name":"mode-pascal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-pascal","extname":".js","collection":null,"path":"/courses/editor/mode-pascal.js"},{"name":"mode-perl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-perl","extname":".js","collection":null,"path":"/courses/editor/mode-perl.js"},{"name":"mode-perl6.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-perl6","extname":".js","collection":null,"path":"/courses/editor/mode-perl6.js"},{"name":"mode-pgsql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-pgsql","extname":".js","collection":null,"path":"/courses/editor/mode-pgsql.js"},{"name":"mode-php.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-php","extname":".js","collection":null,"path":"/courses/editor/mode-php.js"},{"name":"mode-php_laravel_blade.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-php_laravel_blade","extname":".js","collection":null,"path":"/courses/editor/mode-php_laravel_blade.js"},{"name":"mode-pig.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-pig","extname":".js","collection":null,"path":"/courses/editor/mode-pig.js"},{"name":"mode-plain_text.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-plain_text","extname":".js","collection":null,"path":"/courses/editor/mode-plain_text.js"},{"name":"mode-powershell.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-powershell","extname":".js","collection":null,"path":"/courses/editor/mode-powershell.js"},{"name":"mode-praat.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-praat","extname":".js","collection":null,"path":"/courses/editor/mode-praat.js"},{"name":"mode-prolog.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-prolog","extname":".js","collection":null,"path":"/courses/editor/mode-prolog.js"},{"name":"mode-properties.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-properties","extname":".js","collection":null,"path":"/courses/editor/mode-properties.js"},{"name":"mode-protobuf.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-protobuf","extname":".js","collection":null,"path":"/courses/editor/mode-protobuf.js"},{"name":"mode-puppet.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-puppet","extname":".js","collection":null,"path":"/courses/editor/mode-puppet.js"},{"name":"mode-python.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-python","extname":".js","collection":null,"path":"/courses/editor/mode-python.js"},{"name":"mode-r.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-r","extname":".js","collection":null,"path":"/courses/editor/mode-r.js"},{"name":"mode-razor.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-razor","extname":".js","collection":null,"path":"/courses/editor/mode-razor.js"},{"name":"mode-rdoc.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-rdoc","extname":".js","collection":null,"path":"/courses/editor/mode-rdoc.js"},{"name":"mode-red.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-red","extname":".js","collection":null,"path":"/courses/editor/mode-red.js"},{"name":"mode-redshift.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-redshift","extname":".js","collection":null,"path":"/courses/editor/mode-redshift.js"},{"name":"mode-rhtml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-rhtml","extname":".js","collection":null,"path":"/courses/editor/mode-rhtml.js"},{"name":"mode-rst.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-rst","extname":".js","collection":null,"path":"/courses/editor/mode-rst.js"},{"name":"mode-ruby.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-ruby","extname":".js","collection":null,"path":"/courses/editor/mode-ruby.js"},{"name":"mode-rust.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-rust","extname":".js","collection":null,"path":"/courses/editor/mode-rust.js"},{"name":"mode-sass.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-sass","extname":".js","collection":null,"path":"/courses/editor/mode-sass.js"},{"name":"mode-scad.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-scad","extname":".js","collection":null,"path":"/courses/editor/mode-scad.js"},{"name":"mode-scala.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-scala","extname":".js","collection":null,"path":"/courses/editor/mode-scala.js"},{"name":"mode-scheme.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-scheme","extname":".js","collection":null,"path":"/courses/editor/mode-scheme.js"},{"name":"mode-scss.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-scss","extname":".js","collection":null,"path":"/courses/editor/mode-scss.js"},{"name":"mode-sh.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-sh","extname":".js","collection":null,"path":"/courses/editor/mode-sh.js"},{"name":"mode-sjs.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-sjs","extname":".js","collection":null,"path":"/courses/editor/mode-sjs.js"},{"name":"mode-slim.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-slim","extname":".js","collection":null,"path":"/courses/editor/mode-slim.js"},{"name":"mode-smarty.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-smarty","extname":".js","collection":null,"path":"/courses/editor/mode-smarty.js"},{"name":"mode-snippets.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-snippets","extname":".js","collection":null,"path":"/courses/editor/mode-snippets.js"},{"name":"mode-soy_template.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-soy_template","extname":".js","collection":null,"path":"/courses/editor/mode-soy_template.js"},{"name":"mode-space.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-space","extname":".js","collection":null,"path":"/courses/editor/mode-space.js"},{"name":"mode-sparql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-sparql","extname":".js","collection":null,"path":"/courses/editor/mode-sparql.js"},{"name":"mode-sql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-sql","extname":".js","collection":null,"path":"/courses/editor/mode-sql.js"},{"name":"mode-sqlserver.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-sqlserver","extname":".js","collection":null,"path":"/courses/editor/mode-sqlserver.js"},{"name":"mode-stylus.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-stylus","extname":".js","collection":null,"path":"/courses/editor/mode-stylus.js"},{"name":"mode-svg.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-svg","extname":".js","collection":null,"path":"/courses/editor/mode-svg.js"},{"name":"mode-swift.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-swift","extname":".js","collection":null,"path":"/courses/editor/mode-swift.js"},{"name":"mode-tcl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-tcl","extname":".js","collection":null,"path":"/courses/editor/mode-tcl.js"},{"name":"mode-terraform.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-terraform","extname":".js","collection":null,"path":"/courses/editor/mode-terraform.js"},{"name":"mode-tex.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-tex","extname":".js","collection":null,"path":"/courses/editor/mode-tex.js"},{"name":"mode-text.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-text","extname":".js","collection":null,"path":"/courses/editor/mode-text.js"},{"name":"mode-textile.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-textile","extname":".js","collection":null,"path":"/courses/editor/mode-textile.js"},{"name":"mode-toml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-toml","extname":".js","collection":null,"path":"/courses/editor/mode-toml.js"},{"name":"mode-tsx.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-tsx","extname":".js","collection":null,"path":"/courses/editor/mode-tsx.js"},{"name":"mode-turtle.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-turtle","extname":".js","collection":null,"path":"/courses/editor/mode-turtle.js"},{"name":"mode-twig.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-twig","extname":".js","collection":null,"path":"/courses/editor/mode-twig.js"},{"name":"mode-typescript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-typescript","extname":".js","collection":null,"path":"/courses/editor/mode-typescript.js"},{"name":"mode-vala.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-vala","extname":".js","collection":null,"path":"/courses/editor/mode-vala.js"},{"name":"mode-vbscript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-vbscript","extname":".js","collection":null,"path":"/courses/editor/mode-vbscript.js"},{"name":"mode-velocity.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-velocity","extname":".js","collection":null,"path":"/courses/editor/mode-velocity.js"},{"name":"mode-verilog.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-verilog","extname":".js","collection":null,"path":"/courses/editor/mode-verilog.js"},{"name":"mode-vhdl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-vhdl","extname":".js","collection":null,"path":"/courses/editor/mode-vhdl.js"},{"name":"mode-visualforce.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-visualforce","extname":".js","collection":null,"path":"/courses/editor/mode-visualforce.js"},{"name":"mode-wollok.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-wollok","extname":".js","collection":null,"path":"/courses/editor/mode-wollok.js"},{"name":"mode-xml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-xml","extname":".js","collection":null,"path":"/courses/editor/mode-xml.js"},{"name":"mode-xquery.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-xquery","extname":".js","collection":null,"path":"/courses/editor/mode-xquery.js"},{"name":"mode-yaml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-yaml","extname":".js","collection":null,"path":"/courses/editor/mode-yaml.js"},{"name":"mode-zeek.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mode-zeek","extname":".js","collection":null,"path":"/courses/editor/mode-zeek.js"},{"name":"abap.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"abap","extname":".js","collection":null,"path":"/courses/editor/snippets/abap.js"},{"name":"abc.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"abc","extname":".js","collection":null,"path":"/courses/editor/snippets/abc.js"},{"name":"actionscript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"actionscript","extname":".js","collection":null,"path":"/courses/editor/snippets/actionscript.js"},{"name":"ada.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ada","extname":".js","collection":null,"path":"/courses/editor/snippets/ada.js"},{"name":"apache_conf.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"apache_conf","extname":".js","collection":null,"path":"/courses/editor/snippets/apache_conf.js"},{"name":"apex.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"apex","extname":".js","collection":null,"path":"/courses/editor/snippets/apex.js"},{"name":"applescript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"applescript","extname":".js","collection":null,"path":"/courses/editor/snippets/applescript.js"},{"name":"aql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"aql","extname":".js","collection":null,"path":"/courses/editor/snippets/aql.js"},{"name":"asciidoc.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"asciidoc","extname":".js","collection":null,"path":"/courses/editor/snippets/asciidoc.js"},{"name":"asl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"asl","extname":".js","collection":null,"path":"/courses/editor/snippets/asl.js"},{"name":"assembly_x86.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"assembly_x86","extname":".js","collection":null,"path":"/courses/editor/snippets/assembly_x86.js"},{"name":"autohotkey.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"autohotkey","extname":".js","collection":null,"path":"/courses/editor/snippets/autohotkey.js"},{"name":"batchfile.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"batchfile","extname":".js","collection":null,"path":"/courses/editor/snippets/batchfile.js"},{"name":"bro.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"bro","extname":".js","collection":null,"path":"/courses/editor/snippets/bro.js"},{"name":"c9search.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"c9search","extname":".js","collection":null,"path":"/courses/editor/snippets/c9search.js"},{"name":"c_cpp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"c_cpp","extname":".js","collection":null,"path":"/courses/editor/snippets/c_cpp.js"},{"name":"cirru.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"cirru","extname":".js","collection":null,"path":"/courses/editor/snippets/cirru.js"},{"name":"clojure.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"clojure","extname":".js","collection":null,"path":"/courses/editor/snippets/clojure.js"},{"name":"cobol.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"cobol","extname":".js","collection":null,"path":"/courses/editor/snippets/cobol.js"},{"name":"coffee.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"coffee","extname":".js","collection":null,"path":"/courses/editor/snippets/coffee.js"},{"name":"coldfusion.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"coldfusion","extname":".js","collection":null,"path":"/courses/editor/snippets/coldfusion.js"},{"name":"crystal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"crystal","extname":".js","collection":null,"path":"/courses/editor/snippets/crystal.js"},{"name":"csharp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"csharp","extname":".js","collection":null,"path":"/courses/editor/snippets/csharp.js"},{"name":"csound_document.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"csound_document","extname":".js","collection":null,"path":"/courses/editor/snippets/csound_document.js"},{"name":"csound_orchestra.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"csound_orchestra","extname":".js","collection":null,"path":"/courses/editor/snippets/csound_orchestra.js"},{"name":"csound_score.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"csound_score","extname":".js","collection":null,"path":"/courses/editor/snippets/csound_score.js"},{"name":"csp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"csp","extname":".js","collection":null,"path":"/courses/editor/snippets/csp.js"},{"name":"css.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"css","extname":".js","collection":null,"path":"/courses/editor/snippets/css.js"},{"name":"curly.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"curly","extname":".js","collection":null,"path":"/courses/editor/snippets/curly.js"},{"name":"d.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"d","extname":".js","collection":null,"path":"/courses/editor/snippets/d.js"},{"name":"dart.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"dart","extname":".js","collection":null,"path":"/courses/editor/snippets/dart.js"},{"name":"diff.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"diff","extname":".js","collection":null,"path":"/courses/editor/snippets/diff.js"},{"name":"django.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"django","extname":".js","collection":null,"path":"/courses/editor/snippets/django.js"},{"name":"dockerfile.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"dockerfile","extname":".js","collection":null,"path":"/courses/editor/snippets/dockerfile.js"},{"name":"dot.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"dot","extname":".js","collection":null,"path":"/courses/editor/snippets/dot.js"},{"name":"drools.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"drools","extname":".js","collection":null,"path":"/courses/editor/snippets/drools.js"},{"name":"edifact.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"edifact","extname":".js","collection":null,"path":"/courses/editor/snippets/edifact.js"},{"name":"eiffel.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"eiffel","extname":".js","collection":null,"path":"/courses/editor/snippets/eiffel.js"},{"name":"ejs.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ejs","extname":".js","collection":null,"path":"/courses/editor/snippets/ejs.js"},{"name":"elixir.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"elixir","extname":".js","collection":null,"path":"/courses/editor/snippets/elixir.js"},{"name":"elm.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"elm","extname":".js","collection":null,"path":"/courses/editor/snippets/elm.js"},{"name":"erlang.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"erlang","extname":".js","collection":null,"path":"/courses/editor/snippets/erlang.js"},{"name":"forth.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"forth","extname":".js","collection":null,"path":"/courses/editor/snippets/forth.js"},{"name":"fortran.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"fortran","extname":".js","collection":null,"path":"/courses/editor/snippets/fortran.js"},{"name":"fsharp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"fsharp","extname":".js","collection":null,"path":"/courses/editor/snippets/fsharp.js"},{"name":"fsl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"fsl","extname":".js","collection":null,"path":"/courses/editor/snippets/fsl.js"},{"name":"ftl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ftl","extname":".js","collection":null,"path":"/courses/editor/snippets/ftl.js"},{"name":"gcode.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"gcode","extname":".js","collection":null,"path":"/courses/editor/snippets/gcode.js"},{"name":"gherkin.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"gherkin","extname":".js","collection":null,"path":"/courses/editor/snippets/gherkin.js"},{"name":"gitignore.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"gitignore","extname":".js","collection":null,"path":"/courses/editor/snippets/gitignore.js"},{"name":"glsl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"glsl","extname":".js","collection":null,"path":"/courses/editor/snippets/glsl.js"},{"name":"gobstones.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"gobstones","extname":".js","collection":null,"path":"/courses/editor/snippets/gobstones.js"},{"name":"golang.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"golang","extname":".js","collection":null,"path":"/courses/editor/snippets/golang.js"},{"name":"graphqlschema.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"graphqlschema","extname":".js","collection":null,"path":"/courses/editor/snippets/graphqlschema.js"},{"name":"groovy.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"groovy","extname":".js","collection":null,"path":"/courses/editor/snippets/groovy.js"},{"name":"haml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"haml","extname":".js","collection":null,"path":"/courses/editor/snippets/haml.js"},{"name":"handlebars.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"handlebars","extname":".js","collection":null,"path":"/courses/editor/snippets/handlebars.js"},{"name":"haskell.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"haskell","extname":".js","collection":null,"path":"/courses/editor/snippets/haskell.js"},{"name":"haskell_cabal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"haskell_cabal","extname":".js","collection":null,"path":"/courses/editor/snippets/haskell_cabal.js"},{"name":"haxe.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"haxe","extname":".js","collection":null,"path":"/courses/editor/snippets/haxe.js"},{"name":"hjson.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"hjson","extname":".js","collection":null,"path":"/courses/editor/snippets/hjson.js"},{"name":"html.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"html","extname":".js","collection":null,"path":"/courses/editor/snippets/html.js"},{"name":"html_elixir.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"html_elixir","extname":".js","collection":null,"path":"/courses/editor/snippets/html_elixir.js"},{"name":"html_ruby.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"html_ruby","extname":".js","collection":null,"path":"/courses/editor/snippets/html_ruby.js"},{"name":"ini.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ini","extname":".js","collection":null,"path":"/courses/editor/snippets/ini.js"},{"name":"io.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"io","extname":".js","collection":null,"path":"/courses/editor/snippets/io.js"},{"name":"jack.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"jack","extname":".js","collection":null,"path":"/courses/editor/snippets/jack.js"},{"name":"jade.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"jade","extname":".js","collection":null,"path":"/courses/editor/snippets/jade.js"},{"name":"java.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"java","extname":".js","collection":null,"path":"/courses/editor/snippets/java.js"},{"name":"javascript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"javascript","extname":".js","collection":null,"path":"/courses/editor/snippets/javascript.js"},{"name":"json.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"json","extname":".js","collection":null,"path":"/courses/editor/snippets/json.js"},{"name":"jsoniq.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"jsoniq","extname":".js","collection":null,"path":"/courses/editor/snippets/jsoniq.js"},{"name":"jsp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"jsp","extname":".js","collection":null,"path":"/courses/editor/snippets/jsp.js"},{"name":"jssm.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"jssm","extname":".js","collection":null,"path":"/courses/editor/snippets/jssm.js"},{"name":"jsx.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"jsx","extname":".js","collection":null,"path":"/courses/editor/snippets/jsx.js"},{"name":"julia.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"julia","extname":".js","collection":null,"path":"/courses/editor/snippets/julia.js"},{"name":"kotlin.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"kotlin","extname":".js","collection":null,"path":"/courses/editor/snippets/kotlin.js"},{"name":"latex.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"latex","extname":".js","collection":null,"path":"/courses/editor/snippets/latex.js"},{"name":"less.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"less","extname":".js","collection":null,"path":"/courses/editor/snippets/less.js"},{"name":"liquid.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"liquid","extname":".js","collection":null,"path":"/courses/editor/snippets/liquid.js"},{"name":"lisp.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"lisp","extname":".js","collection":null,"path":"/courses/editor/snippets/lisp.js"},{"name":"livescript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"livescript","extname":".js","collection":null,"path":"/courses/editor/snippets/livescript.js"},{"name":"logiql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"logiql","extname":".js","collection":null,"path":"/courses/editor/snippets/logiql.js"},{"name":"logtalk.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"logtalk","extname":".js","collection":null,"path":"/courses/editor/snippets/logtalk.js"},{"name":"lsl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"lsl","extname":".js","collection":null,"path":"/courses/editor/snippets/lsl.js"},{"name":"lua.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"lua","extname":".js","collection":null,"path":"/courses/editor/snippets/lua.js"},{"name":"luapage.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"luapage","extname":".js","collection":null,"path":"/courses/editor/snippets/luapage.js"},{"name":"lucene.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"lucene","extname":".js","collection":null,"path":"/courses/editor/snippets/lucene.js"},{"name":"makefile.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"makefile","extname":".js","collection":null,"path":"/courses/editor/snippets/makefile.js"},{"name":"markdown.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"markdown","extname":".js","collection":null,"path":"/courses/editor/snippets/markdown.js"},{"name":"mask.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mask","extname":".js","collection":null,"path":"/courses/editor/snippets/mask.js"},{"name":"matlab.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"matlab","extname":".js","collection":null,"path":"/courses/editor/snippets/matlab.js"},{"name":"maze.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"maze","extname":".js","collection":null,"path":"/courses/editor/snippets/maze.js"},{"name":"mel.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mel","extname":".js","collection":null,"path":"/courses/editor/snippets/mel.js"},{"name":"mixal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mixal","extname":".js","collection":null,"path":"/courses/editor/snippets/mixal.js"},{"name":"mushcode.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mushcode","extname":".js","collection":null,"path":"/courses/editor/snippets/mushcode.js"},{"name":"mysql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"mysql","extname":".js","collection":null,"path":"/courses/editor/snippets/mysql.js"},{"name":"nginx.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"nginx","extname":".js","collection":null,"path":"/courses/editor/snippets/nginx.js"},{"name":"nim.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"nim","extname":".js","collection":null,"path":"/courses/editor/snippets/nim.js"},{"name":"nix.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"nix","extname":".js","collection":null,"path":"/courses/editor/snippets/nix.js"},{"name":"nsis.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"nsis","extname":".js","collection":null,"path":"/courses/editor/snippets/nsis.js"},{"name":"objectivec.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"objectivec","extname":".js","collection":null,"path":"/courses/editor/snippets/objectivec.js"},{"name":"ocaml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ocaml","extname":".js","collection":null,"path":"/courses/editor/snippets/ocaml.js"},{"name":"pascal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"pascal","extname":".js","collection":null,"path":"/courses/editor/snippets/pascal.js"},{"name":"perl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"perl","extname":".js","collection":null,"path":"/courses/editor/snippets/perl.js"},{"name":"perl6.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"perl6","extname":".js","collection":null,"path":"/courses/editor/snippets/perl6.js"},{"name":"pgsql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"pgsql","extname":".js","collection":null,"path":"/courses/editor/snippets/pgsql.js"},{"name":"php.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"php","extname":".js","collection":null,"path":"/courses/editor/snippets/php.js"},{"name":"php_laravel_blade.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"php_laravel_blade","extname":".js","collection":null,"path":"/courses/editor/snippets/php_laravel_blade.js"},{"name":"pig.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"pig","extname":".js","collection":null,"path":"/courses/editor/snippets/pig.js"},{"name":"plain_text.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"plain_text","extname":".js","collection":null,"path":"/courses/editor/snippets/plain_text.js"},{"name":"powershell.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"powershell","extname":".js","collection":null,"path":"/courses/editor/snippets/powershell.js"},{"name":"praat.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"praat","extname":".js","collection":null,"path":"/courses/editor/snippets/praat.js"},{"name":"prolog.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"prolog","extname":".js","collection":null,"path":"/courses/editor/snippets/prolog.js"},{"name":"properties.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"properties","extname":".js","collection":null,"path":"/courses/editor/snippets/properties.js"},{"name":"protobuf.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"protobuf","extname":".js","collection":null,"path":"/courses/editor/snippets/protobuf.js"},{"name":"puppet.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"puppet","extname":".js","collection":null,"path":"/courses/editor/snippets/puppet.js"},{"name":"python.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"python","extname":".js","collection":null,"path":"/courses/editor/snippets/python.js"},{"name":"r.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"r","extname":".js","collection":null,"path":"/courses/editor/snippets/r.js"},{"name":"razor.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"razor","extname":".js","collection":null,"path":"/courses/editor/snippets/razor.js"},{"name":"rdoc.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"rdoc","extname":".js","collection":null,"path":"/courses/editor/snippets/rdoc.js"},{"name":"red.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"red","extname":".js","collection":null,"path":"/courses/editor/snippets/red.js"},{"name":"redshift.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"redshift","extname":".js","collection":null,"path":"/courses/editor/snippets/redshift.js"},{"name":"rhtml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"rhtml","extname":".js","collection":null,"path":"/courses/editor/snippets/rhtml.js"},{"name":"rst.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"rst","extname":".js","collection":null,"path":"/courses/editor/snippets/rst.js"},{"name":"ruby.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"ruby","extname":".js","collection":null,"path":"/courses/editor/snippets/ruby.js"},{"name":"rust.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"rust","extname":".js","collection":null,"path":"/courses/editor/snippets/rust.js"},{"name":"sass.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"sass","extname":".js","collection":null,"path":"/courses/editor/snippets/sass.js"},{"name":"scad.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"scad","extname":".js","collection":null,"path":"/courses/editor/snippets/scad.js"},{"name":"scala.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"scala","extname":".js","collection":null,"path":"/courses/editor/snippets/scala.js"},{"name":"scheme.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"scheme","extname":".js","collection":null,"path":"/courses/editor/snippets/scheme.js"},{"name":"scss.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"scss","extname":".js","collection":null,"path":"/courses/editor/snippets/scss.js"},{"name":"sh.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"sh","extname":".js","collection":null,"path":"/courses/editor/snippets/sh.js"},{"name":"sjs.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"sjs","extname":".js","collection":null,"path":"/courses/editor/snippets/sjs.js"},{"name":"slim.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"slim","extname":".js","collection":null,"path":"/courses/editor/snippets/slim.js"},{"name":"smarty.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"smarty","extname":".js","collection":null,"path":"/courses/editor/snippets/smarty.js"},{"name":"snippets.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"snippets","extname":".js","collection":null,"path":"/courses/editor/snippets/snippets.js"},{"name":"soy_template.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"soy_template","extname":".js","collection":null,"path":"/courses/editor/snippets/soy_template.js"},{"name":"space.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"space","extname":".js","collection":null,"path":"/courses/editor/snippets/space.js"},{"name":"sparql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"sparql","extname":".js","collection":null,"path":"/courses/editor/snippets/sparql.js"},{"name":"sql.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"sql","extname":".js","collection":null,"path":"/courses/editor/snippets/sql.js"},{"name":"sqlserver.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"sqlserver","extname":".js","collection":null,"path":"/courses/editor/snippets/sqlserver.js"},{"name":"stylus.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"stylus","extname":".js","collection":null,"path":"/courses/editor/snippets/stylus.js"},{"name":"svg.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"svg","extname":".js","collection":null,"path":"/courses/editor/snippets/svg.js"},{"name":"swift.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"swift","extname":".js","collection":null,"path":"/courses/editor/snippets/swift.js"},{"name":"tcl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"tcl","extname":".js","collection":null,"path":"/courses/editor/snippets/tcl.js"},{"name":"terraform.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"terraform","extname":".js","collection":null,"path":"/courses/editor/snippets/terraform.js"},{"name":"tex.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"tex","extname":".js","collection":null,"path":"/courses/editor/snippets/tex.js"},{"name":"text.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"text","extname":".js","collection":null,"path":"/courses/editor/snippets/text.js"},{"name":"textile.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"textile","extname":".js","collection":null,"path":"/courses/editor/snippets/textile.js"},{"name":"toml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"toml","extname":".js","collection":null,"path":"/courses/editor/snippets/toml.js"},{"name":"tsx.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"tsx","extname":".js","collection":null,"path":"/courses/editor/snippets/tsx.js"},{"name":"turtle.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"turtle","extname":".js","collection":null,"path":"/courses/editor/snippets/turtle.js"},{"name":"twig.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"twig","extname":".js","collection":null,"path":"/courses/editor/snippets/twig.js"},{"name":"typescript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"typescript","extname":".js","collection":null,"path":"/courses/editor/snippets/typescript.js"},{"name":"vala.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"vala","extname":".js","collection":null,"path":"/courses/editor/snippets/vala.js"},{"name":"vbscript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"vbscript","extname":".js","collection":null,"path":"/courses/editor/snippets/vbscript.js"},{"name":"velocity.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"velocity","extname":".js","collection":null,"path":"/courses/editor/snippets/velocity.js"},{"name":"verilog.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"verilog","extname":".js","collection":null,"path":"/courses/editor/snippets/verilog.js"},{"name":"vhdl.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"vhdl","extname":".js","collection":null,"path":"/courses/editor/snippets/vhdl.js"},{"name":"visualforce.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"visualforce","extname":".js","collection":null,"path":"/courses/editor/snippets/visualforce.js"},{"name":"wollok.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"wollok","extname":".js","collection":null,"path":"/courses/editor/snippets/wollok.js"},{"name":"xml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"xml","extname":".js","collection":null,"path":"/courses/editor/snippets/xml.js"},{"name":"xquery.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"xquery","extname":".js","collection":null,"path":"/courses/editor/snippets/xquery.js"},{"name":"yaml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"yaml","extname":".js","collection":null,"path":"/courses/editor/snippets/yaml.js"},{"name":"zeek.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"zeek","extname":".js","collection":null,"path":"/courses/editor/snippets/zeek.js"},{"name":"theme-ambiance.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-ambiance","extname":".js","collection":null,"path":"/courses/editor/theme-ambiance.js"},{"name":"theme-chaos.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-chaos","extname":".js","collection":null,"path":"/courses/editor/theme-chaos.js"},{"name":"theme-chrome.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-chrome","extname":".js","collection":null,"path":"/courses/editor/theme-chrome.js"},{"name":"theme-clouds.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-clouds","extname":".js","collection":null,"path":"/courses/editor/theme-clouds.js"},{"name":"theme-clouds_midnight.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-clouds_midnight","extname":".js","collection":null,"path":"/courses/editor/theme-clouds_midnight.js"},{"name":"theme-cobalt.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-cobalt","extname":".js","collection":null,"path":"/courses/editor/theme-cobalt.js"},{"name":"theme-crimson_editor.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-crimson_editor","extname":".js","collection":null,"path":"/courses/editor/theme-crimson_editor.js"},{"name":"theme-dawn.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-dawn","extname":".js","collection":null,"path":"/courses/editor/theme-dawn.js"},{"name":"theme-dracula.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-dracula","extname":".js","collection":null,"path":"/courses/editor/theme-dracula.js"},{"name":"theme-dreamweaver.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-dreamweaver","extname":".js","collection":null,"path":"/courses/editor/theme-dreamweaver.js"},{"name":"theme-eclipse.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-eclipse","extname":".js","collection":null,"path":"/courses/editor/theme-eclipse.js"},{"name":"theme-github.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-github","extname":".js","collection":null,"path":"/courses/editor/theme-github.js"},{"name":"theme-gob.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-gob","extname":".js","collection":null,"path":"/courses/editor/theme-gob.js"},{"name":"theme-gruvbox.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-gruvbox","extname":".js","collection":null,"path":"/courses/editor/theme-gruvbox.js"},{"name":"theme-idle_fingers.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-idle_fingers","extname":".js","collection":null,"path":"/courses/editor/theme-idle_fingers.js"},{"name":"theme-iplastic.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-iplastic","extname":".js","collection":null,"path":"/courses/editor/theme-iplastic.js"},{"name":"theme-katzenmilch.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-katzenmilch","extname":".js","collection":null,"path":"/courses/editor/theme-katzenmilch.js"},{"name":"theme-kr_theme.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-kr_theme","extname":".js","collection":null,"path":"/courses/editor/theme-kr_theme.js"},{"name":"theme-kuroir.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-kuroir","extname":".js","collection":null,"path":"/courses/editor/theme-kuroir.js"},{"name":"theme-merbivore.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-merbivore","extname":".js","collection":null,"path":"/courses/editor/theme-merbivore.js"},{"name":"theme-merbivore_soft.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-merbivore_soft","extname":".js","collection":null,"path":"/courses/editor/theme-merbivore_soft.js"},{"name":"theme-mono_industrial.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-mono_industrial","extname":".js","collection":null,"path":"/courses/editor/theme-mono_industrial.js"},{"name":"theme-monokai.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-monokai","extname":".js","collection":null,"path":"/courses/editor/theme-monokai.js"},{"name":"theme-pastel_on_dark.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-pastel_on_dark","extname":".js","collection":null,"path":"/courses/editor/theme-pastel_on_dark.js"},{"name":"theme-solarized_dark.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-solarized_dark","extname":".js","collection":null,"path":"/courses/editor/theme-solarized_dark.js"},{"name":"theme-solarized_light.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-solarized_light","extname":".js","collection":null,"path":"/courses/editor/theme-solarized_light.js"},{"name":"theme-sqlserver.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-sqlserver","extname":".js","collection":null,"path":"/courses/editor/theme-sqlserver.js"},{"name":"theme-terminal.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-terminal","extname":".js","collection":null,"path":"/courses/editor/theme-terminal.js"},{"name":"theme-textmate.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-textmate","extname":".js","collection":null,"path":"/courses/editor/theme-textmate.js"},{"name":"theme-tomorrow.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-tomorrow","extname":".js","collection":null,"path":"/courses/editor/theme-tomorrow.js"},{"name":"theme-tomorrow_night.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-tomorrow_night","extname":".js","collection":null,"path":"/courses/editor/theme-tomorrow_night.js"},{"name":"theme-tomorrow_night_blue.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-tomorrow_night_blue","extname":".js","collection":null,"path":"/courses/editor/theme-tomorrow_night_blue.js"},{"name":"theme-tomorrow_night_bright.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-tomorrow_night_bright","extname":".js","collection":null,"path":"/courses/editor/theme-tomorrow_night_bright.js"},{"name":"theme-tomorrow_night_eighties.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-tomorrow_night_eighties","extname":".js","collection":null,"path":"/courses/editor/theme-tomorrow_night_eighties.js"},{"name":"theme-twilight.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-twilight","extname":".js","collection":null,"path":"/courses/editor/theme-twilight.js"},{"name":"theme-vibrant_ink.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-vibrant_ink","extname":".js","collection":null,"path":"/courses/editor/theme-vibrant_ink.js"},{"name":"theme-xcode.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"theme-xcode","extname":".js","collection":null,"path":"/courses/editor/theme-xcode.js"},{"name":"worker-coffee.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-coffee","extname":".js","collection":null,"path":"/courses/editor/worker-coffee.js"},{"name":"worker-css.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-css","extname":".js","collection":null,"path":"/courses/editor/worker-css.js"},{"name":"worker-html.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-html","extname":".js","collection":null,"path":"/courses/editor/worker-html.js"},{"name":"worker-javascript.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-javascript","extname":".js","collection":null,"path":"/courses/editor/worker-javascript.js"},{"name":"worker-json.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-json","extname":".js","collection":null,"path":"/courses/editor/worker-json.js"},{"name":"worker-lua.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-lua","extname":".js","collection":null,"path":"/courses/editor/worker-lua.js"},{"name":"worker-php.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-php","extname":".js","collection":null,"path":"/courses/editor/worker-php.js"},{"name":"worker-xml.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-xml","extname":".js","collection":null,"path":"/courses/editor/worker-xml.js"},{"name":"worker-xquery.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"worker-xquery","extname":".js","collection":null,"path":"/courses/editor/worker-xquery.js"},{"name":"index.html","modified_time":"2021-05-31 04:45:05 -0500","basename":"index","extname":".html","collection":null,"path":"/courses/index.html"},{"name":"katex.0cc7c58c.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"katex.0cc7c58c","extname":".js","collection":null,"path":"/courses/katex.0cc7c58c.js"},{"name":"katex.min.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"katex.min","extname":".css","collection":null,"path":"/courses/katex.min.css"},{"name":"logo.1e973952.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"logo.1e973952","extname":".png","collection":null,"path":"/courses/logo.1e973952.png"},{"name":"logo_192.c4a21617.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"logo_192.c4a21617","extname":".png","collection":null,"path":"/courses/logo_192.c4a21617.png"},{"name":"logo_512.7373a196.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"logo_512.7373a196","extname":".png","collection":null,"path":"/courses/logo_512.7373a196.png"},{"name":"main.0efb6fa5.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"main.0efb6fa5","extname":".css","collection":null,"path":"/courses/main.0efb6fa5.css"},{"name":"manifest.webmanifest","modified_time":"2021-05-31 04:45:05 -0500","basename":"manifest","extname":".webmanifest","collection":null,"path":"/courses/manifest.webmanifest"},{"name":"material-icons.4e30841b.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"material-icons.4e30841b","extname":".css","collection":null,"path":"/courses/material-icons.4e30841b.css"},{"name":"opensans.59ef1c65.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"opensans.59ef1c65","extname":".css","collection":null,"path":"/courses/opensans.59ef1c65.css"},{"name":"roboto-mono-v5-cyrillic-ext-regular.096de2e9.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-ext-regular.096de2e9","extname":".svg","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-ext-regular.096de2e9.svg"},{"name":"roboto-mono-v5-cyrillic-ext-regular.09a18dfd.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-ext-regular.09a18dfd","extname":".woff","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-ext-regular.09a18dfd.woff"},{"name":"roboto-mono-v5-cyrillic-ext-regular.1640582f.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-ext-regular.1640582f","extname":".woff2","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-ext-regular.1640582f.woff2"},{"name":"roboto-mono-v5-cyrillic-ext-regular.a1fe7885.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-ext-regular.a1fe7885","extname":".ttf","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-ext-regular.a1fe7885.ttf"},{"name":"roboto-mono-v5-cyrillic-ext-regular.fb7dd8d2.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-ext-regular.fb7dd8d2","extname":".eot","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-ext-regular.fb7dd8d2.eot"},{"name":"roboto-mono-v5-cyrillic-regular.096de2e9.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-regular.096de2e9","extname":".svg","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-regular.096de2e9.svg"},{"name":"roboto-mono-v5-cyrillic-regular.4d0b070c.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-regular.4d0b070c","extname":".woff","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-regular.4d0b070c.woff"},{"name":"roboto-mono-v5-cyrillic-regular.51170967.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-regular.51170967","extname":".eot","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-regular.51170967.eot"},{"name":"roboto-mono-v5-cyrillic-regular.9b9afdef.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-regular.9b9afdef","extname":".woff2","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-regular.9b9afdef.woff2"},{"name":"roboto-mono-v5-cyrillic-regular.b466b041.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-cyrillic-regular.b466b041","extname":".ttf","collection":null,"path":"/courses/roboto-mono-v5-cyrillic-regular.b466b041.ttf"},{"name":"roboto-mono-v5-greek-ext-regular.096de2e9.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-ext-regular.096de2e9","extname":".svg","collection":null,"path":"/courses/roboto-mono-v5-greek-ext-regular.096de2e9.svg"},{"name":"roboto-mono-v5-greek-ext-regular.44582380.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-ext-regular.44582380","extname":".woff","collection":null,"path":"/courses/roboto-mono-v5-greek-ext-regular.44582380.woff"},{"name":"roboto-mono-v5-greek-ext-regular.7854200b.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-ext-regular.7854200b","extname":".ttf","collection":null,"path":"/courses/roboto-mono-v5-greek-ext-regular.7854200b.ttf"},{"name":"roboto-mono-v5-greek-ext-regular.d22b152c.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-ext-regular.d22b152c","extname":".eot","collection":null,"path":"/courses/roboto-mono-v5-greek-ext-regular.d22b152c.eot"},{"name":"roboto-mono-v5-greek-ext-regular.dac45c2c.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-ext-regular.dac45c2c","extname":".woff2","collection":null,"path":"/courses/roboto-mono-v5-greek-ext-regular.dac45c2c.woff2"},{"name":"roboto-mono-v5-greek-regular.096de2e9.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-regular.096de2e9","extname":".svg","collection":null,"path":"/courses/roboto-mono-v5-greek-regular.096de2e9.svg"},{"name":"roboto-mono-v5-greek-regular.11fef9aa.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-regular.11fef9aa","extname":".woff2","collection":null,"path":"/courses/roboto-mono-v5-greek-regular.11fef9aa.woff2"},{"name":"roboto-mono-v5-greek-regular.c34a24c7.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-regular.c34a24c7","extname":".woff","collection":null,"path":"/courses/roboto-mono-v5-greek-regular.c34a24c7.woff"},{"name":"roboto-mono-v5-greek-regular.d2a16e53.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-regular.d2a16e53","extname":".ttf","collection":null,"path":"/courses/roboto-mono-v5-greek-regular.d2a16e53.ttf"},{"name":"roboto-mono-v5-greek-regular.eea19dbc.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-greek-regular.eea19dbc","extname":".eot","collection":null,"path":"/courses/roboto-mono-v5-greek-regular.eea19dbc.eot"},{"name":"roboto-mono-v5-latin-ext-regular.096de2e9.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-ext-regular.096de2e9","extname":".svg","collection":null,"path":"/courses/roboto-mono-v5-latin-ext-regular.096de2e9.svg"},{"name":"roboto-mono-v5-latin-ext-regular.1ffad1d1.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-ext-regular.1ffad1d1","extname":".woff2","collection":null,"path":"/courses/roboto-mono-v5-latin-ext-regular.1ffad1d1.woff2"},{"name":"roboto-mono-v5-latin-ext-regular.6111d5c7.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-ext-regular.6111d5c7","extname":".woff","collection":null,"path":"/courses/roboto-mono-v5-latin-ext-regular.6111d5c7.woff"},{"name":"roboto-mono-v5-latin-ext-regular.7b263b94.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-ext-regular.7b263b94","extname":".eot","collection":null,"path":"/courses/roboto-mono-v5-latin-ext-regular.7b263b94.eot"},{"name":"roboto-mono-v5-latin-ext-regular.b18e4423.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-ext-regular.b18e4423","extname":".ttf","collection":null,"path":"/courses/roboto-mono-v5-latin-ext-regular.b18e4423.ttf"},{"name":"roboto-mono-v5-latin-regular.096de2e9.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-regular.096de2e9","extname":".svg","collection":null,"path":"/courses/roboto-mono-v5-latin-regular.096de2e9.svg"},{"name":"roboto-mono-v5-latin-regular.136656c3.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-regular.136656c3","extname":".woff","collection":null,"path":"/courses/roboto-mono-v5-latin-regular.136656c3.woff"},{"name":"roboto-mono-v5-latin-regular.1770c27b.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-regular.1770c27b","extname":".ttf","collection":null,"path":"/courses/roboto-mono-v5-latin-regular.1770c27b.ttf"},{"name":"roboto-mono-v5-latin-regular.24977812.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-regular.24977812","extname":".woff2","collection":null,"path":"/courses/roboto-mono-v5-latin-regular.24977812.woff2"},{"name":"roboto-mono-v5-latin-regular.fe2c4126.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-latin-regular.fe2c4126","extname":".eot","collection":null,"path":"/courses/roboto-mono-v5-latin-regular.fe2c4126.eot"},{"name":"roboto-mono-v5-vietnamese-regular.096de2e9.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-vietnamese-regular.096de2e9","extname":".svg","collection":null,"path":"/courses/roboto-mono-v5-vietnamese-regular.096de2e9.svg"},{"name":"roboto-mono-v5-vietnamese-regular.1efa8359.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-vietnamese-regular.1efa8359","extname":".ttf","collection":null,"path":"/courses/roboto-mono-v5-vietnamese-regular.1efa8359.ttf"},{"name":"roboto-mono-v5-vietnamese-regular.52f8f0cd.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-vietnamese-regular.52f8f0cd","extname":".woff","collection":null,"path":"/courses/roboto-mono-v5-vietnamese-regular.52f8f0cd.woff"},{"name":"roboto-mono-v5-vietnamese-regular.92c77f61.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-vietnamese-regular.92c77f61","extname":".eot","collection":null,"path":"/courses/roboto-mono-v5-vietnamese-regular.92c77f61.eot"},{"name":"roboto-mono-v5-vietnamese-regular.d27a9dd7.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-mono-v5-vietnamese-regular.d27a9dd7","extname":".woff2","collection":null,"path":"/courses/roboto-mono-v5-vietnamese-regular.d27a9dd7.woff2"},{"name":"roboto-v18-cyrillic-ext-regular.1628be44.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-ext-regular.1628be44","extname":".woff2","collection":null,"path":"/courses/roboto-v18-cyrillic-ext-regular.1628be44.woff2"},{"name":"roboto-v18-cyrillic-ext-regular.7a632d06.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-ext-regular.7a632d06","extname":".ttf","collection":null,"path":"/courses/roboto-v18-cyrillic-ext-regular.7a632d06.ttf"},{"name":"roboto-v18-cyrillic-ext-regular.86e517b8.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-ext-regular.86e517b8","extname":".woff","collection":null,"path":"/courses/roboto-v18-cyrillic-ext-regular.86e517b8.woff"},{"name":"roboto-v18-cyrillic-ext-regular.ab9033c4.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-ext-regular.ab9033c4","extname":".svg","collection":null,"path":"/courses/roboto-v18-cyrillic-ext-regular.ab9033c4.svg"},{"name":"roboto-v18-cyrillic-ext-regular.e059c632.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-ext-regular.e059c632","extname":".eot","collection":null,"path":"/courses/roboto-v18-cyrillic-ext-regular.e059c632.eot"},{"name":"roboto-v18-cyrillic-regular.35fa3101.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-regular.35fa3101","extname":".ttf","collection":null,"path":"/courses/roboto-v18-cyrillic-regular.35fa3101.ttf"},{"name":"roboto-v18-cyrillic-regular.38a49692.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-regular.38a49692","extname":".eot","collection":null,"path":"/courses/roboto-v18-cyrillic-regular.38a49692.eot"},{"name":"roboto-v18-cyrillic-regular.87e0bb7f.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-regular.87e0bb7f","extname":".woff2","collection":null,"path":"/courses/roboto-v18-cyrillic-regular.87e0bb7f.woff2"},{"name":"roboto-v18-cyrillic-regular.ab9033c4.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-regular.ab9033c4","extname":".svg","collection":null,"path":"/courses/roboto-v18-cyrillic-regular.ab9033c4.svg"},{"name":"roboto-v18-cyrillic-regular.d76ee912.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-cyrillic-regular.d76ee912","extname":".woff","collection":null,"path":"/courses/roboto-v18-cyrillic-regular.d76ee912.woff"},{"name":"roboto-v18-greek-ext-regular.2910e05b.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-ext-regular.2910e05b","extname":".woff","collection":null,"path":"/courses/roboto-v18-greek-ext-regular.2910e05b.woff"},{"name":"roboto-v18-greek-ext-regular.62b2d1cc.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-ext-regular.62b2d1cc","extname":".woff2","collection":null,"path":"/courses/roboto-v18-greek-ext-regular.62b2d1cc.woff2"},{"name":"roboto-v18-greek-ext-regular.8f023c78.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-ext-regular.8f023c78","extname":".ttf","collection":null,"path":"/courses/roboto-v18-greek-ext-regular.8f023c78.ttf"},{"name":"roboto-v18-greek-ext-regular.ab9033c4.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-ext-regular.ab9033c4","extname":".svg","collection":null,"path":"/courses/roboto-v18-greek-ext-regular.ab9033c4.svg"},{"name":"roboto-v18-greek-ext-regular.c71099b2.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-ext-regular.c71099b2","extname":".eot","collection":null,"path":"/courses/roboto-v18-greek-ext-regular.c71099b2.eot"},{"name":"roboto-v18-greek-regular.41173422.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-regular.41173422","extname":".woff2","collection":null,"path":"/courses/roboto-v18-greek-regular.41173422.woff2"},{"name":"roboto-v18-greek-regular.43aed030.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-regular.43aed030","extname":".eot","collection":null,"path":"/courses/roboto-v18-greek-regular.43aed030.eot"},{"name":"roboto-v18-greek-regular.91d0e251.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-regular.91d0e251","extname":".woff","collection":null,"path":"/courses/roboto-v18-greek-regular.91d0e251.woff"},{"name":"roboto-v18-greek-regular.ab9033c4.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-regular.ab9033c4","extname":".svg","collection":null,"path":"/courses/roboto-v18-greek-regular.ab9033c4.svg"},{"name":"roboto-v18-greek-regular.d23af664.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-greek-regular.d23af664","extname":".ttf","collection":null,"path":"/courses/roboto-v18-greek-regular.d23af664.ttf"},{"name":"roboto-v18-latin-ext-regular.300b57d0.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-ext-regular.300b57d0","extname":".woff2","collection":null,"path":"/courses/roboto-v18-latin-ext-regular.300b57d0.woff2"},{"name":"roboto-v18-latin-ext-regular.8f3d4404.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-ext-regular.8f3d4404","extname":".eot","collection":null,"path":"/courses/roboto-v18-latin-ext-regular.8f3d4404.eot"},{"name":"roboto-v18-latin-ext-regular.ab9033c4.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-ext-regular.ab9033c4","extname":".svg","collection":null,"path":"/courses/roboto-v18-latin-ext-regular.ab9033c4.svg"},{"name":"roboto-v18-latin-ext-regular.cc24dc3c.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-ext-regular.cc24dc3c","extname":".woff","collection":null,"path":"/courses/roboto-v18-latin-ext-regular.cc24dc3c.woff"},{"name":"roboto-v18-latin-ext-regular.e8097339.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-ext-regular.e8097339","extname":".ttf","collection":null,"path":"/courses/roboto-v18-latin-ext-regular.e8097339.ttf"},{"name":"roboto-v18-latin-regular.12a7207b.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-regular.12a7207b","extname":".woff2","collection":null,"path":"/courses/roboto-v18-latin-regular.12a7207b.woff2"},{"name":"roboto-v18-latin-regular.61ac9e9d.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-regular.61ac9e9d","extname":".eot","collection":null,"path":"/courses/roboto-v18-latin-regular.61ac9e9d.eot"},{"name":"roboto-v18-latin-regular.8023399d.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-regular.8023399d","extname":".woff","collection":null,"path":"/courses/roboto-v18-latin-regular.8023399d.woff"},{"name":"roboto-v18-latin-regular.94826897.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-regular.94826897","extname":".ttf","collection":null,"path":"/courses/roboto-v18-latin-regular.94826897.ttf"},{"name":"roboto-v18-latin-regular.ab9033c4.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-latin-regular.ab9033c4","extname":".svg","collection":null,"path":"/courses/roboto-v18-latin-regular.ab9033c4.svg"},{"name":"roboto-v18-vietnamese-regular.0ef3cb8d.eot","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-vietnamese-regular.0ef3cb8d","extname":".eot","collection":null,"path":"/courses/roboto-v18-vietnamese-regular.0ef3cb8d.eot"},{"name":"roboto-v18-vietnamese-regular.98146e63.woff","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-vietnamese-regular.98146e63","extname":".woff","collection":null,"path":"/courses/roboto-v18-vietnamese-regular.98146e63.woff"},{"name":"roboto-v18-vietnamese-regular.9b8060ff.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-vietnamese-regular.9b8060ff","extname":".ttf","collection":null,"path":"/courses/roboto-v18-vietnamese-regular.9b8060ff.ttf"},{"name":"roboto-v18-vietnamese-regular.ab9033c4.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-vietnamese-regular.ab9033c4","extname":".svg","collection":null,"path":"/courses/roboto-v18-vietnamese-regular.ab9033c4.svg"},{"name":"roboto-v18-vietnamese-regular.f9b20dd9.woff2","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto-v18-vietnamese-regular.f9b20dd9","extname":".woff2","collection":null,"path":"/courses/roboto-v18-vietnamese-regular.f9b20dd9.woff2"},{"name":"roboto.a4c0fcd8.css","modified_time":"2021-05-31 04:45:05 -0500","basename":"roboto.a4c0fcd8","extname":".css","collection":null,"path":"/courses/roboto.a4c0fcd8.css"},{"name":"src.2431f9ea.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"src.2431f9ea","extname":".js","collection":null,"path":"/courses/src.2431f9ea.js"},{"name":"sw.js","modified_time":"2021-05-31 04:45:05 -0500","basename":"sw","extname":".js","collection":null,"path":"/courses/sw.js"},{"name":"docker-compose.yml","modified_time":"2021-05-31 04:45:05 -0500","basename":"docker-compose","extname":".yml","collection":null,"path":"/docker-compose.yml"},{"name":"favicon.ico","modified_time":"2021-05-31 04:45:05 -0500","basename":"favicon","extname":".ico","collection":null,"path":"/favicon.ico"},{"name":"feed.xml","modified_time":"2021-05-31 04:45:05 -0500","basename":"feed","extname":".xml","collection":null,"path":"/feed.xml"},{"name":"galaxy_instances.yaml","modified_time":"2021-05-31 04:45:05 -0500","basename":"galaxy_instances","extname":".yaml","collection":null,"path":"/galaxy_instances.yaml"},{"name":"LICENSE.txt","modified_time":"2021-05-31 04:45:05 -0500","basename":"LICENSE","extname":".txt","collection":null,"path":"/shared/fonts/NotoSans/LICENSE.txt"},{"name":"NotoSans-Bold.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"NotoSans-Bold","extname":".ttf","collection":null,"path":"/shared/fonts/NotoSans/NotoSans-Bold.ttf"},{"name":"NotoSans-BoldItalic.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"NotoSans-BoldItalic","extname":".ttf","collection":null,"path":"/shared/fonts/NotoSans/NotoSans-BoldItalic.ttf"},{"name":"NotoSans-Italic.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"NotoSans-Italic","extname":".ttf","collection":null,"path":"/shared/fonts/NotoSans/NotoSans-Italic.ttf"},{"name":"NotoSans-Regular.ttf","modified_time":"2021-05-31 04:45:05 -0500","basename":"NotoSans-Regular","extname":".ttf","collection":null,"path":"/shared/fonts/NotoSans/NotoSans-Regular.ttf"},{"name":"Excelerate_whitebackground.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Excelerate_whitebackground","extname":".png","collection":null,"path":"/shared/images/Excelerate_whitebackground.png"},{"name":"FreeBayes_settings.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FreeBayes_settings","extname":".png","collection":null,"path":"/shared/images/FreeBayes_settings.png"},{"name":"GTNLogo1000.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GTNLogo1000","extname":".png","collection":null,"path":"/shared/images/GTNLogo1000.png"},{"name":"MA_plot.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"MA_plot","extname":".png","collection":null,"path":"/shared/images/MA_plot.png"},{"name":"RNAseq_histiry_image.key","modified_time":"2021-05-31 04:45:05 -0500","basename":"RNAseq_histiry_image","extname":".key","collection":null,"path":"/shared/images/RNAseq_histiry_image.key"},{"name":"Rarithmetic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rarithmetic_operators","extname":".png","collection":null,"path":"/shared/images/Rarithmetic_operators.png"},{"name":"Rautocompletion.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rautocompletion","extname":".png","collection":null,"path":"/shared/images/Rautocompletion.png"},{"name":"Rcomment_uncomment.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rcomment_uncomment","extname":".png","collection":null,"path":"/shared/images/Rcomment_uncomment.png"},{"name":"Rexport_plot.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rexport_plot","extname":".png","collection":null,"path":"/shared/images/Rexport_plot.png"},{"name":"Rfile_imported.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rfile_imported","extname":".png","collection":null,"path":"/shared/images/Rfile_imported.png"},{"name":"Rgeneral_functions.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rgeneral_functions","extname":".png","collection":null,"path":"/shared/images/Rgeneral_functions.png"},{"name":"Rinstall_zip.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rinstall_zip","extname":".png","collection":null,"path":"/shared/images/Rinstall_zip.png"},{"name":"Rlogic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rlogic_operators","extname":".png","collection":null,"path":"/shared/images/Rlogic_operators.png"},{"name":"Rnew_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rnew_script","extname":".png","collection":null,"path":"/shared/images/Rnew_script.png"},{"name":"Rrefresh_button.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rrefresh_button","extname":".png","collection":null,"path":"/shared/images/Rrefresh_button.png"},{"name":"Rsave_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsave_script","extname":".png","collection":null,"path":"/shared/images/Rsave_script.png"},{"name":"Rsection_headings.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsection_headings","extname":".png","collection":null,"path":"/shared/images/Rsection_headings.png"},{"name":"Rtable_function.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rtable_function","extname":".png","collection":null,"path":"/shared/images/Rtable_function.png"},{"name":"Rview_file.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rview_file","extname":".png","collection":null,"path":"/shared/images/Rview_file.png"},{"name":"Screen Shot 2016-04-21 at 3.55.19 PM.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Screen Shot 2016-04-21 at 3.55.19 PM","extname":".png","collection":null,"path":"/shared/images/Screen Shot 2016-04-21 at 3.55.19 PM.png"},{"name":"Screen Shot 2016-12-06 at 18.14.25.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Screen Shot 2016-12-06 at 18.14.25","extname":".png","collection":null,"path":"/shared/images/Screen Shot 2016-12-06 at 18.14.25.png"},{"name":"accepted_hits_1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"accepted_hits_1","extname":".png","collection":null,"path":"/shared/images/accepted_hits_1.png"},{"name":"accepted_hits_2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"accepted_hits_2","extname":".png","collection":null,"path":"/shared/images/accepted_hits_2.png"},{"name":"ansible_logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"ansible_logo","extname":".png","collection":null,"path":"/shared/images/ansible_logo.png"},{"name":"arrow_white_left.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"arrow_white_left","extname":".svg","collection":null,"path":"/shared/images/arrow_white_left.svg"},{"name":"bioconda_logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bioconda_logo","extname":".png","collection":null,"path":"/shared/images/bioconda_logo.png"},{"name":"bw.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bw","extname":".png","collection":null,"path":"/shared/images/bw.png"},{"name":"bw_b_rank.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bw_b_rank","extname":".png","collection":null,"path":"/shared/images/bw_b_rank.png"},{"name":"bwt_q1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bwt_q1","extname":".png","collection":null,"path":"/shared/images/bwt_q1.png"},{"name":"bwt_q2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bwt_q2","extname":".png","collection":null,"path":"/shared/images/bwt_q2.png"},{"name":"bwt_q3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bwt_q3","extname":".png","collection":null,"path":"/shared/images/bwt_q3.png"},{"name":"bwt_q4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bwt_q4","extname":".png","collection":null,"path":"/shared/images/bwt_q4.png"},{"name":"bwt_q5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bwt_q5","extname":".png","collection":null,"path":"/shared/images/bwt_q5.png"},{"name":"bwt_rev.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bwt_rev","extname":".png","collection":null,"path":"/shared/images/bwt_rev.png"},{"name":"bwt_rev2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"bwt_rev2","extname":".png","collection":null,"path":"/shared/images/bwt_rev2.png"},{"name":"cloudman_based_server_landing_page.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"cloudman_based_server_landing_page","extname":".png","collection":null,"path":"/shared/images/cloudman_based_server_landing_page.png"},{"name":"cloudman_management_console_showing_autoscaled_w_1_worker.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"cloudman_management_console_showing_autoscaled_w_1_worker","extname":".png","collection":null,"path":"/shared/images/cloudman_management_console_showing_autoscaled_w_1_worker.png"},{"name":"collections_video.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"collections_video","extname":".png","collection":null,"path":"/shared/images/collections_video.png"},{"name":"conda_logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"conda_logo","extname":".png","collection":null,"path":"/shared/images/conda_logo.png"},{"name":"create_index.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"create_index","extname":".png","collection":null,"path":"/shared/images/create_index.png"},{"name":"dT_random.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"dT_random","extname":".png","collection":null,"path":"/shared/images/dT_random.png"},{"name":"deNBI.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"deNBI","extname":".png","collection":null,"path":"/shared/images/deNBI.png"},{"name":"deseq2_interface.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"deseq2_interface","extname":".png","collection":null,"path":"/shared/images/deseq2_interface.png"},{"name":"deseq2_output.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"deseq2_output","extname":".png","collection":null,"path":"/shared/images/deseq2_output.png"},{"name":"development_process.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"development_process","extname":".png","collection":null,"path":"/shared/images/development_process.png"},{"name":"diff.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"diff","extname":".png","collection":null,"path":"/shared/images/diff.png"},{"name":"dispersion.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"dispersion","extname":".png","collection":null,"path":"/shared/images/dispersion.png"},{"name":"dna_rna.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"dna_rna","extname":".png","collection":null,"path":"/shared/images/dna_rna.png"},{"name":"docker_logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"docker_logo","extname":".png","collection":null,"path":"/shared/images/docker_logo.png"},{"name":"dotplot.gif","modified_time":"2021-05-31 04:45:05 -0500","basename":"dotplot","extname":".gif","collection":null,"path":"/shared/images/dotplot.gif"},{"name":"elixir.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"elixir","extname":".png","collection":null,"path":"/shared/images/elixir.png"},{"name":"em.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"em","extname":".png","collection":null,"path":"/shared/images/em.png"},{"name":"euc_dist.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"euc_dist","extname":".png","collection":null,"path":"/shared/images/euc_dist.png"},{"name":"everything_connected.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"everything_connected","extname":".png","collection":null,"path":"/shared/images/everything_connected.png"},{"name":"f_from_l.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"f_from_l","extname":".png","collection":null,"path":"/shared/images/f_from_l.png"},{"name":"filter_gtf.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"filter_gtf","extname":".png","collection":null,"path":"/shared/images/filter_gtf.png"},{"name":"forward_index.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"forward_index","extname":".png","collection":null,"path":"/shared/images/forward_index.png"},{"name":"freebayes.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"freebayes","extname":".png","collection":null,"path":"/shared/images/freebayes.png"},{"name":"freebayes_gq.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"freebayes_gq","extname":".png","collection":null,"path":"/shared/images/freebayes_gq.png"},{"name":"galaxy_command.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"galaxy_command","extname":".png","collection":null,"path":"/shared/images/galaxy_command.png"},{"name":"galaxy_logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"galaxy_logo","extname":".png","collection":null,"path":"/shared/images/galaxy_logo.png"},{"name":"galaxy_logo_25percent_transparent.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"galaxy_logo_25percent_transparent","extname":".png","collection":null,"path":"/shared/images/galaxy_logo_25percent_transparent.png"},{"name":"gemini_command.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"gemini_command","extname":".png","collection":null,"path":"/shared/images/gemini_command.png"},{"name":"gemini_db_info.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"gemini_db_info","extname":".png","collection":null,"path":"/shared/images/gemini_db_info.png"},{"name":"gemini_load.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"gemini_load","extname":".png","collection":null,"path":"/shared/images/gemini_load.png"},{"name":"gemini_query1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"gemini_query1","extname":".png","collection":null,"path":"/shared/images/gemini_query1.png"},{"name":"gemini_query2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"gemini_query2","extname":".png","collection":null,"path":"/shared/images/gemini_query2.png"},{"name":"goblet.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"goblet","extname":".png","collection":null,"path":"/shared/images/goblet.png"},{"name":"hash.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"hash","extname":".png","collection":null,"path":"/shared/images/hash.png"},{"name":"hisat.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"hisat","extname":".png","collection":null,"path":"/shared/images/hisat.png"},{"name":"history_menu_buttons2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"history_menu_buttons2","extname":".png","collection":null,"path":"/shared/images/history_menu_buttons2.png"},{"name":"history_menu_extract_workflow.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"history_menu_extract_workflow","extname":".png","collection":null,"path":"/shared/images/history_menu_extract_workflow.png"},{"name":"history_options_menu.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"history_options_menu","extname":".png","collection":null,"path":"/shared/images/history_options_menu.png"},{"name":"htseq_count.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"htseq_count","extname":".png","collection":null,"path":"/shared/images/htseq_count.png"},{"name":"htseq_count_interface.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"htseq_count_interface","extname":".png","collection":null,"path":"/shared/images/htseq_count_interface.png"},{"name":"igv_tophat.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"igv_tophat","extname":".png","collection":null,"path":"/shared/images/igv_tophat.png"},{"name":"import_history.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"import_history","extname":".png","collection":null,"path":"/shared/images/import_history.png"},{"name":"interactive_training.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"interactive_training","extname":".png","collection":null,"path":"/shared/images/interactive_training.png"},{"name":"introblockheader.jpg","modified_time":"2021-05-31 04:45:05 -0500","basename":"introblockheader","extname":".jpg","collection":null,"path":"/shared/images/introblockheader.jpg"},{"name":"inverted_index.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"inverted_index","extname":".png","collection":null,"path":"/shared/images/inverted_index.png"},{"name":"kallisto.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"kallisto","extname":".png","collection":null,"path":"/shared/images/kallisto.png"},{"name":"landing_page.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"landing_page","extname":".png","collection":null,"path":"/shared/images/landing_page.png"},{"name":"lcs.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"lcs","extname":".png","collection":null,"path":"/shared/images/lcs.png"},{"name":"lf_a.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"lf_a","extname":".png","collection":null,"path":"/shared/images/lf_a.png"},{"name":"lf_b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"lf_b","extname":".png","collection":null,"path":"/shared/images/lf_b.png"},{"name":"lib_type.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"lib_type","extname":".png","collection":null,"path":"/shared/images/lib_type.png"},{"name":"library_import.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"library_import","extname":".png","collection":null,"path":"/shared/images/library_import.png"},{"name":"library_import_complete.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"library_import_complete","extname":".png","collection":null,"path":"/shared/images/library_import_complete.png"},{"name":"logo.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"logo","extname":".svg","collection":null,"path":"/shared/images/logo.svg"},{"name":"map.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"map","extname":".png","collection":null,"path":"/shared/images/map.png"},{"name":"mum.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"mum","extname":".png","collection":null,"path":"/shared/images/mum.png"},{"name":"nih.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"nih","extname":".png","collection":null,"path":"/shared/images/nih.png"},{"name":"nsf.gif","modified_time":"2021-05-31 04:45:05 -0500","basename":"nsf","extname":".gif","collection":null,"path":"/shared/images/nsf.gif"},{"name":"pA.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pA","extname":".png","collection":null,"path":"/shared/images/pA.png"},{"name":"pAB.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pAB","extname":".png","collection":null,"path":"/shared/images/pAB.png"},{"name":"pB.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pB","extname":".png","collection":null,"path":"/shared/images/pB.png"},{"name":"p_val_hist.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"p_val_hist","extname":".png","collection":null,"path":"/shared/images/p_val_hist.png"},{"name":"pca.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pca","extname":".png","collection":null,"path":"/shared/images/pca.png"},{"name":"pcr-duplicates.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pcr-duplicates","extname":".png","collection":null,"path":"/shared/images/pcr-duplicates.png"},{"name":"phinch_overviewpage.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"phinch_overviewpage","extname":".png","collection":null,"path":"/shared/images/phinch_overviewpage.png"},{"name":"psu.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"psu","extname":".png","collection":null,"path":"/shared/images/psu.png"},{"name":"quasi_aln.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"quasi_aln","extname":".png","collection":null,"path":"/shared/images/quasi_aln.png"},{"name":"rename_history.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"rename_history","extname":".png","collection":null,"path":"/shared/images/rename_history.png"},{"name":"repo_organization.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"repo_organization","extname":".png","collection":null,"path":"/shared/images/repo_organization.png"},{"name":"repo_organization.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"repo_organization","extname":".svg","collection":null,"path":"/shared/images/repo_organization.svg"},{"name":"rnaseq_comparison.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"rnaseq_comparison","extname":".png","collection":null,"path":"/shared/images/rnaseq_comparison.png"},{"name":"rnaseq_data_in_history.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"rnaseq_data_in_history","extname":".png","collection":null,"path":"/shared/images/rnaseq_data_in_history.png"},{"name":"rnaseq_library.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"rnaseq_library","extname":".png","collection":null,"path":"/shared/images/rnaseq_library.png"},{"name":"sa.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"sa","extname":".png","collection":null,"path":"/shared/images/sa.png"},{"name":"sailfish.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"sailfish","extname":".png","collection":null,"path":"/shared/images/sailfish.png"},{"name":"salmon.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"salmon","extname":".png","collection":null,"path":"/shared/images/salmon.png"},{"name":"sampling-bias.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"sampling-bias","extname":".png","collection":null,"path":"/shared/images/sampling-bias.png"},{"name":"sashimi.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"sashimi","extname":".png","collection":null,"path":"/shared/images/sashimi.png"},{"name":"search_index.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"search_index","extname":".png","collection":null,"path":"/shared/images/search_index.png"},{"name":"side-by-side.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"side-by-side","extname":".png","collection":null,"path":"/shared/images/side-by-side.png"},{"name":"snpeff.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"snpeff","extname":".png","collection":null,"path":"/shared/images/snpeff.png"},{"name":"snpeff_chart.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"snpeff_chart","extname":".png","collection":null,"path":"/shared/images/snpeff_chart.png"},{"name":"snpeff_codons.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"snpeff_codons","extname":".png","collection":null,"path":"/shared/images/snpeff_codons.png"},{"name":"sorted_list.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"sorted_list","extname":".png","collection":null,"path":"/shared/images/sorted_list.png"},{"name":"specificity.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"specificity","extname":".png","collection":null,"path":"/shared/images/specificity.png"},{"name":"star.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"star","extname":".png","collection":null,"path":"/shared/images/star.png"},{"name":"stranded_protocols.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"stranded_protocols","extname":".png","collection":null,"path":"/shared/images/stranded_protocols.png"},{"name":"stranded_result.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"stranded_result","extname":".png","collection":null,"path":"/shared/images/stranded_result.png"},{"name":"stringtie1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"stringtie1","extname":".png","collection":null,"path":"/shared/images/stringtie1.png"},{"name":"stringtie2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"stringtie2","extname":".png","collection":null,"path":"/shared/images/stringtie2.png"},{"name":"substring_trie.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"substring_trie","extname":".png","collection":null,"path":"/shared/images/substring_trie.png"},{"name":"suffix_tree_1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_tree_1","extname":".png","collection":null,"path":"/shared/images/suffix_tree_1.png"},{"name":"suffix_tree_2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_tree_2","extname":".png","collection":null,"path":"/shared/images/suffix_tree_2.png"},{"name":"suffix_tree_3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_tree_3","extname":".png","collection":null,"path":"/shared/images/suffix_tree_3.png"},{"name":"suffix_tree_4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_tree_4","extname":".png","collection":null,"path":"/shared/images/suffix_tree_4.png"},{"name":"suffix_tree_5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_tree_5","extname":".png","collection":null,"path":"/shared/images/suffix_tree_5.png"},{"name":"suffix_trie_1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_1","extname":".png","collection":null,"path":"/shared/images/suffix_trie_1.png"},{"name":"suffix_trie_2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_2","extname":".png","collection":null,"path":"/shared/images/suffix_trie_2.png"},{"name":"suffix_trie_3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_3","extname":".png","collection":null,"path":"/shared/images/suffix_trie_3.png"},{"name":"suffix_trie_4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_4","extname":".png","collection":null,"path":"/shared/images/suffix_trie_4.png"},{"name":"suffix_trie_5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_5","extname":".png","collection":null,"path":"/shared/images/suffix_trie_5.png"},{"name":"suffix_trie_6.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_6","extname":".png","collection":null,"path":"/shared/images/suffix_trie_6.png"},{"name":"suffix_trie_7.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_7","extname":".png","collection":null,"path":"/shared/images/suffix_trie_7.png"},{"name":"suffix_trie_8.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_8","extname":".png","collection":null,"path":"/shared/images/suffix_trie_8.png"},{"name":"suffix_trie_9.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"suffix_trie_9","extname":".png","collection":null,"path":"/shared/images/suffix_trie_9.png"},{"name":"tools_collection_input.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tools_collection_input","extname":".png","collection":null,"path":"/shared/images/tools_collection_input.png"},{"name":"tophat.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tophat","extname":".png","collection":null,"path":"/shared/images/tophat.png"},{"name":"tophat2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tophat2","extname":".png","collection":null,"path":"/shared/images/tophat2.png"},{"name":"tophat_interface.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tophat_interface","extname":".png","collection":null,"path":"/shared/images/tophat_interface.png"},{"name":"tophat_output.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tophat_output","extname":".png","collection":null,"path":"/shared/images/tophat_output.png"},{"name":"training_infra.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"training_infra","extname":".svg","collection":null,"path":"/shared/images/training_infra.svg"},{"name":"trie.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"trie","extname":".png","collection":null,"path":"/shared/images/trie.png"},{"name":"trie_no_end.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"trie_no_end","extname":".png","collection":null,"path":"/shared/images/trie_no_end.png"},{"name":"tutorial_footer.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tutorial_footer","extname":".png","collection":null,"path":"/shared/images/tutorial_footer.png"},{"name":"tutorial_footer.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"tutorial_footer","extname":".svg","collection":null,"path":"/shared/images/tutorial_footer.svg"},{"name":"tutorial_header.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tutorial_header","extname":".png","collection":null,"path":"/shared/images/tutorial_header.png"},{"name":"tutorial_introduction.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tutorial_introduction","extname":".png","collection":null,"path":"/shared/images/tutorial_introduction.png"},{"name":"tutorial_introduction.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"tutorial_introduction","extname":".svg","collection":null,"path":"/shared/images/tutorial_introduction.svg"},{"name":"tutorial_part.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"tutorial_part","extname":".png","collection":null,"path":"/shared/images/tutorial_part.png"},{"name":"tutorial_part.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"tutorial_part","extname":".svg","collection":null,"path":"/shared/images/tutorial_part.svg"},{"name":"ucsc_dm3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"ucsc_dm3","extname":".png","collection":null,"path":"/shared/images/ucsc_dm3.png"},{"name":"unification_scheme.graphml","modified_time":"2021-05-31 04:45:05 -0500","basename":"unification_scheme","extname":".graphml","collection":null,"path":"/shared/images/unification_scheme.graphml"},{"name":"unification_scheme.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"unification_scheme","extname":".png","collection":null,"path":"/shared/images/unification_scheme.png"},{"name":"vcfallelicprimitives.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"vcfallelicprimitives","extname":".png","collection":null,"path":"/shared/images/vcfallelicprimitives.png"},{"name":"vib_logo_white.svg","modified_time":"2021-05-31 04:45:05 -0500","basename":"vib_logo_white","extname":".svg","collection":null,"path":"/shared/images/vib_logo_white.svg"},{"name":"viewatphinch.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"viewatphinch","extname":".png","collection":null,"path":"/shared/images/viewatphinch.png"},{"name":"we_need_you.jpg","modified_time":"2021-05-31 04:45:05 -0500","basename":"we_need_you","extname":".jpg","collection":null,"path":"/shared/images/we_need_you.jpg"},{"name":"within_norm.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"within_norm","extname":".png","collection":null,"path":"/shared/images/within_norm.png"},{"name":"workflow_editor_mark_output.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"workflow_editor_mark_output","extname":".png","collection":null,"path":"/shared/images/workflow_editor_mark_output.png"},{"name":"workflow_editor_save.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"workflow_editor_save","extname":".png","collection":null,"path":"/shared/images/workflow_editor_save.png"},{"name":"literature.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"literature","extname":".md","collection":null,"path":"/shared/literature.md"},{"name":"add_custom_build.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"add_custom_build","extname":".md","collection":null,"path":"/snippets/add_custom_build.md"},{"name":"add_tag.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"add_tag","extname":".md","collection":null,"path":"/snippets/add_tag.md"},{"name":"ansible_local.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"ansible_local","extname":".md","collection":null,"path":"/snippets/ansible_local.md"},{"name":"build_dataset_list.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"build_dataset_list","extname":".md","collection":null,"path":"/snippets/build_dataset_list.md"},{"name":"build_list_collection.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"build_list_collection","extname":".md","collection":null,"path":"/snippets/build_list_collection.md"},{"name":"change_datatype.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"change_datatype","extname":".md","collection":null,"path":"/snippets/change_datatype.md"},{"name":"change_dbkey.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"change_dbkey","extname":".md","collection":null,"path":"/snippets/change_dbkey.md"},{"name":"create_dataset_collection.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"create_dataset_collection","extname":".md","collection":null,"path":"/snippets/create_dataset_collection.md"},{"name":"create_new_file.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"create_new_file","extname":".md","collection":null,"path":"/snippets/create_new_file.md"},{"name":"create_new_history.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"create_new_history","extname":".md","collection":null,"path":"/snippets/create_new_history.md"},{"name":"display_extra_training.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"display_extra_training","extname":".md","collection":null,"path":"/snippets/display_extra_training.md"},{"name":"display_extra_training_slides.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"display_extra_training_slides","extname":".md","collection":null,"path":"/snippets/display_extra_training_slides.md"},{"name":"extra_protein.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"extra_protein","extname":".md","collection":null,"path":"/snippets/extra_protein.md"},{"name":"extract_workflow.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"extract_workflow","extname":".md","collection":null,"path":"/snippets/extract_workflow.md"},{"name":"history_create_new.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"history_create_new","extname":".md","collection":null,"path":"/snippets/history_create_new.md"},{"name":"import_from_data_library.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"import_from_data_library","extname":".md","collection":null,"path":"/snippets/import_from_data_library.md"},{"name":"import_via_link.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"import_via_link","extname":".md","collection":null,"path":"/snippets/import_via_link.md"},{"name":"import_workflow.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"import_workflow","extname":".md","collection":null,"path":"/snippets/import_workflow.md"},{"name":"rename_dataset.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"rename_dataset","extname":".md","collection":null,"path":"/snippets/rename_dataset.md"},{"name":"rename_history.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"rename_history","extname":".md","collection":null,"path":"/snippets/rename_history.md"},{"name":"run_workflow.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"run_workflow","extname":".md","collection":null,"path":"/snippets/run_workflow.md"},{"name":"select_collection.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"select_collection","extname":".md","collection":null,"path":"/snippets/select_collection.md"},{"name":"select_multiple_datasets.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"select_multiple_datasets","extname":".md","collection":null,"path":"/snippets/select_multiple_datasets.md"},{"name":"use_scratchbook.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"use_scratchbook","extname":".md","collection":null,"path":"/snippets/use_scratchbook.md"},{"name":"warning_results_may_vary.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"warning_results_may_vary","extname":".md","collection":null,"path":"/snippets/warning_results_may_vary.md"},{"name":"RNAseq_histiry_image.key","modified_time":"2021-05-31 04:45:05 -0500","basename":"RNAseq_histiry_image","extname":".key","collection":null,"path":"/topics/R/images/RNAseq_histiry_image.key"},{"name":"Rarithmetic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rarithmetic_operators","extname":".png","collection":null,"path":"/topics/R/images/Rarithmetic_operators.png"},{"name":"Rautocompletion.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rautocompletion","extname":".png","collection":null,"path":"/topics/R/images/Rautocompletion.png"},{"name":"Rcomment_uncomment.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rcomment_uncomment","extname":".png","collection":null,"path":"/topics/R/images/Rcomment_uncomment.png"},{"name":"Rexport_plot.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rexport_plot","extname":".png","collection":null,"path":"/topics/R/images/Rexport_plot.png"},{"name":"Rfile_imported.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rfile_imported","extname":".png","collection":null,"path":"/topics/R/images/Rfile_imported.png"},{"name":"Rgeneral_functions.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rgeneral_functions","extname":".png","collection":null,"path":"/topics/R/images/Rgeneral_functions.png"},{"name":"Rinstall_zip.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rinstall_zip","extname":".png","collection":null,"path":"/topics/R/images/Rinstall_zip.png"},{"name":"Rlogic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rlogic_operators","extname":".png","collection":null,"path":"/topics/R/images/Rlogic_operators.png"},{"name":"Rnew_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rnew_script","extname":".png","collection":null,"path":"/topics/R/images/Rnew_script.png"},{"name":"Rrefresh_button.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rrefresh_button","extname":".png","collection":null,"path":"/topics/R/images/Rrefresh_button.png"},{"name":"Rsave_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsave_script","extname":".png","collection":null,"path":"/topics/R/images/Rsave_script.png"},{"name":"Rsection_headings.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsection_headings","extname":".png","collection":null,"path":"/topics/R/images/Rsection_headings.png"},{"name":"Rtable_function.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rtable_function","extname":".png","collection":null,"path":"/topics/R/images/Rtable_function.png"},{"name":"Rview_file.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rview_file","extname":".png","collection":null,"path":"/topics/R/images/Rview_file.png"},{"name":"assembly-general-introduction.ga","modified_time":"2021-05-31 04:45:05 -0500","basename":"assembly-general-introduction","extname":".ga","collection":null,"path":"/topics/basic-bioinformatics/tutorials/general-introduction/workflows/assembly-general-introduction.ga"},{"name":"RNAseq_histiry_image.key","modified_time":"2021-05-31 04:45:05 -0500","basename":"RNAseq_histiry_image","extname":".key","collection":null,"path":"/topics/basic-statistics/images/RNAseq_histiry_image.key"},{"name":"Rarithmetic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rarithmetic_operators","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rarithmetic_operators.png"},{"name":"Rautocompletion.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rautocompletion","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rautocompletion.png"},{"name":"Rcomment_uncomment.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rcomment_uncomment","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rcomment_uncomment.png"},{"name":"Rexport_plot.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rexport_plot","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rexport_plot.png"},{"name":"Rfile_imported.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rfile_imported","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rfile_imported.png"},{"name":"Rgeneral_functions.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rgeneral_functions","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rgeneral_functions.png"},{"name":"Rinstall_zip.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rinstall_zip","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rinstall_zip.png"},{"name":"Rlogic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rlogic_operators","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rlogic_operators.png"},{"name":"Rnew_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rnew_script","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rnew_script.png"},{"name":"Rrefresh_button.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rrefresh_button","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rrefresh_button.png"},{"name":"Rsave_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsave_script","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rsave_script.png"},{"name":"Rsection_headings.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsection_headings","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rsection_headings.png"},{"name":"Rtable_function.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rtable_function","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rtable_function.png"},{"name":"Rview_file.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rview_file","extname":".png","collection":null,"path":"/topics/basic-statistics/images/Rview_file.png"},{"name":"DTLorenz.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"DTLorenz","extname":".png","collection":null,"path":"/topics/chip-seq/images/DTLorenz.png"},{"name":"ENA2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"ENA2","extname":".png","collection":null,"path":"/topics/chip-seq/images/ENA2.png"},{"name":"ENA3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"ENA3","extname":".png","collection":null,"path":"/topics/chip-seq/images/ENA3.png"},{"name":"FASTQC11b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC11b","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC11b.png"},{"name":"FASTQC12b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC12b","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC12b.png"},{"name":"FASTQC13b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC13b","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC13b.png"},{"name":"FASTQC14b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC14b","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC14b.png"},{"name":"FASTQC14c.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC14c","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC14c.png"},{"name":"FASTQC15b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC15b","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC15b.png"},{"name":"FASTQC15c.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC15c","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC15c.png"},{"name":"FASTQC15d.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC15d","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC15d.png"},{"name":"FASTQC17b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC17b","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC17b.png"},{"name":"FASTQC9b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQC9b","extname":".png","collection":null,"path":"/topics/chip-seq/images/FASTQC9b.png"},{"name":"GEO1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO1","extname":".png","collection":null,"path":"/topics/chip-seq/images/GEO1.png"},{"name":"GEO2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO2","extname":".png","collection":null,"path":"/topics/chip-seq/images/GEO2.png"},{"name":"GEO3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO3","extname":".png","collection":null,"path":"/topics/chip-seq/images/GEO3.png"},{"name":"GEO4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO4","extname":".png","collection":null,"path":"/topics/chip-seq/images/GEO4.png"},{"name":"GEO5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO5","extname":".png","collection":null,"path":"/topics/chip-seq/images/GEO5.png"},{"name":"GP11.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP11","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP11.png"},{"name":"GP12.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP12","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP12.png"},{"name":"GP13.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP13","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP13.png"},{"name":"GP14.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP14","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP14.png"},{"name":"GP16.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP16","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP16.png"},{"name":"GP18.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP18","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP18.png"},{"name":"GP2b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP2b","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP2b.png"},{"name":"GP3b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP3b","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP3b.png"},{"name":"GP4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP4","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP4.png"},{"name":"GP4a.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP4a","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP4a.png"},{"name":"GP5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP5","extname":".png","collection":null,"path":"/topics/chip-seq/images/GP5.png"},{"name":"GPAddPaths.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPAddPaths","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPAddPaths.png"},{"name":"GPBowtie1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPBowtie1","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPBowtie1.png"},{"name":"GPBowtie2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPBowtie2","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPBowtie2.png"},{"name":"GPBowtie3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPBowtie3","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPBowtie3.png"},{"name":"GPBowtie4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPBowtie4","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPBowtie4.png"},{"name":"GPBowtie5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPBowtie5","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPBowtie5.png"},{"name":"GPDownloadBdg.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPDownloadBdg","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPDownloadBdg.png"},{"name":"GPPL.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPPL","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPPL.png"},{"name":"GPPL2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPPL2","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPPL2.png"},{"name":"GPPL3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPPL3","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPPL3.png"},{"name":"GPSharedData.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPSharedData","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPSharedData.png"},{"name":"GPUpload.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPUpload","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPUpload.png"},{"name":"GPUpload2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GPUpload2","extname":".png","collection":null,"path":"/topics/chip-seq/images/GPUpload2.png"},{"name":"Genome2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Genome2","extname":".png","collection":null,"path":"/topics/chip-seq/images/Genome2.png"},{"name":"IGVLoadFile (1).png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVLoadFile (1)","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVLoadFile (1).png"},{"name":"IGVLoadFile.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVLoadFile","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVLoadFile.png"},{"name":"IGVLoadFile2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVLoadFile2","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVLoadFile2.png"},{"name":"IGVLoadFile3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVLoadFile3","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVLoadFile3.png"},{"name":"IGVLoadFile4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVLoadFile4","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVLoadFile4.png"},{"name":"IGVLoadFile5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVLoadFile5","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVLoadFile5.png"},{"name":"IGVLoadGenome.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVLoadGenome","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVLoadGenome.png"},{"name":"IGVLoadGenome2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVLoadGenome2","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVLoadGenome2.png"},{"name":"IGVbams.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVbams","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVbams.png"},{"name":"IGVpepT.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGVpepT","extname":".png","collection":null,"path":"/topics/chip-seq/images/IGVpepT.png"},{"name":"NCBIGFF3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"NCBIGFF3","extname":".png","collection":null,"path":"/topics/chip-seq/images/NCBIGFF3.png"},{"name":"mapping1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"mapping1","extname":".png","collection":null,"path":"/topics/chip-seq/images/mapping1.png"},{"name":"mapping2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"mapping2","extname":".png","collection":null,"path":"/topics/chip-seq/images/mapping2.png"},{"name":"mapping4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"mapping4","extname":".png","collection":null,"path":"/topics/chip-seq/images/mapping4.png"},{"name":"mapping6.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"mapping6","extname":".png","collection":null,"path":"/topics/chip-seq/images/mapping6.png"},{"name":"00_Folder-structure.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"00_Folder-structure","extname":".png","collection":null,"path":"/topics/data-management-plans/images/00_Folder-structure.png"},{"name":"00_Metadata.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"00_Metadata","extname":".png","collection":null,"path":"/topics/data-management-plans/images/00_Metadata.png"},{"name":"00_Preferred-formats.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"00_Preferred-formats","extname":".png","collection":null,"path":"/topics/data-management-plans/images/00_Preferred-formats.png"},{"name":"00_privacy-sensitive-data.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"00_privacy-sensitive-data","extname":".png","collection":null,"path":"/topics/data-management-plans/images/00_privacy-sensitive-data.png"},{"name":"01_Folder-structure-Learning-Objective.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"01_Folder-structure-Learning-Objective","extname":".png","collection":null,"path":"/topics/data-management-plans/images/01_Folder-structure-Learning-Objective.png"},{"name":"01_Metadata_Learning_Objective.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"01_Metadata_Learning_Objective","extname":".png","collection":null,"path":"/topics/data-management-plans/images/01_Metadata_Learning_Objective.png"},{"name":"01_Preferred-formats_Learning_Objective.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"01_Preferred-formats_Learning_Objective","extname":".png","collection":null,"path":"/topics/data-management-plans/images/01_Preferred-formats_Learning_Objective.png"},{"name":"01_privacy-sensitive-data-learning-objectives.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"01_privacy-sensitive-data-learning-objectives","extname":".png","collection":null,"path":"/topics/data-management-plans/images/01_privacy-sensitive-data-learning-objectives.png"},{"name":"02_Folder-structrue-introduction-file-management.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"02_Folder-structrue-introduction-file-management","extname":".png","collection":null,"path":"/topics/data-management-plans/images/02_Folder-structrue-introduction-file-management.png"},{"name":"02_Metadata_Lab-Notebook.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"02_Metadata_Lab-Notebook","extname":".png","collection":null,"path":"/topics/data-management-plans/images/02_Metadata_Lab-Notebook.png"},{"name":"02_Preferred-formats_proprietary-formats-01.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"02_Preferred-formats_proprietary-formats-01","extname":".png","collection":null,"path":"/topics/data-management-plans/images/02_Preferred-formats_proprietary-formats-01.png"},{"name":"02_Preferred-formats_proprietary-formats-02.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"02_Preferred-formats_proprietary-formats-02","extname":".png","collection":null,"path":"/topics/data-management-plans/images/02_Preferred-formats_proprietary-formats-02.png"},{"name":"02_privacy-sensitive-data-personal-data-01.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"02_privacy-sensitive-data-personal-data-01","extname":".png","collection":null,"path":"/topics/data-management-plans/images/02_privacy-sensitive-data-personal-data-01.png"},{"name":"03_Folder-structure-batch-renaming.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"03_Folder-structure-batch-renaming","extname":".png","collection":null,"path":"/topics/data-management-plans/images/03_Folder-structure-batch-renaming.png"},{"name":"03_Metadata-controlled-vocabulary.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"03_Metadata-controlled-vocabulary","extname":".png","collection":null,"path":"/topics/data-management-plans/images/03_Metadata-controlled-vocabulary.png"},{"name":"03_Preferred-formats-file-conversion.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"03_Preferred-formats-file-conversion","extname":".png","collection":null,"path":"/topics/data-management-plans/images/03_Preferred-formats-file-conversion.png"},{"name":"04_Folder-structure-version-control.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"04_Folder-structure-version-control","extname":".png","collection":null,"path":"/topics/data-management-plans/images/04_Folder-structure-version-control.png"},{"name":"04_Preferred-formats-data-compression.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"04_Preferred-formats-data-compression","extname":".png","collection":null,"path":"/topics/data-management-plans/images/04_Preferred-formats-data-compression.png"},{"name":"AgreementsPicture.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"AgreementsPicture","extname":".png","collection":null,"path":"/topics/data-management-plans/images/AgreementsPicture.png"},{"name":"CC.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CC","extname":".png","collection":null,"path":"/topics/data-management-plans/images/CC.png"},{"name":"Cont_5_Share_SelectPreserve_Chart10years.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Cont_5_Share_SelectPreserve_Chart10years","extname":".png","collection":null,"path":"/topics/data-management-plans/images/Cont_5_Share_SelectPreserve_Chart10years.png"},{"name":"Cost overview.docx","modified_time":"2021-05-31 04:45:05 -0500","basename":"Cost overview","extname":".docx","collection":null,"path":"/topics/data-management-plans/images/Cost overview.docx"},{"name":"LCRDM-privacy-reference-card -why-Version-02.pdf","modified_time":"2021-05-31 04:45:05 -0500","basename":"LCRDM-privacy-reference-card -why-Version-02","extname":".pdf","collection":null,"path":"/topics/data-management-plans/images/LCRDM-privacy-reference-card -why-Version-02.pdf"},{"name":"Logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Logo","extname":".png","collection":null,"path":"/topics/data-management-plans/images/Logo.png"},{"name":"O_Funders_Screenshot_H2020guidelines.JPG","modified_time":"2021-05-31 04:45:05 -0500","basename":"O_Funders_Screenshot_H2020guidelines","extname":".JPG","collection":null,"path":"/topics/data-management-plans/images/O_Funders_Screenshot_H2020guidelines.JPG"},{"name":"PreferredFormatsExcersizePenguinDOC.docx","modified_time":"2021-05-31 04:45:05 -0500","basename":"PreferredFormatsExcersizePenguinDOC","extname":".docx","collection":null,"path":"/topics/data-management-plans/images/PreferredFormatsExcersizePenguinDOC.docx"},{"name":"Workflow-for-experimental-research.pdf","modified_time":"2021-05-31 04:45:05 -0500","basename":"Workflow-for-experimental-research","extname":".pdf","collection":null,"path":"/topics/data-management-plans/images/Workflow-for-experimental-research.pdf"},{"name":"data-breaches.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"data-breaches","extname":".png","collection":null,"path":"/topics/data-management-plans/images/data-breaches.png"},{"name":"FunGSEA_FF.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FunGSEA_FF","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FunGSEA_FF.png"},{"name":"FunGSEA_FF2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FunGSEA_FF2","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FunGSEA_FF2.png"},{"name":"FunGSEA_Interface.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FunGSEA_Interface","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FunGSEA_Interface.png"},{"name":"FunGSEA_Status.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FunGSEA_Status","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FunGSEA_Status.png"},{"name":"FunTopp_NF.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FunTopp_NF","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FunTopp_NF.png"},{"name":"FunTopp_Results.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FunTopp_Results","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FunTopp_Results.png"},{"name":"FunWebG_Results1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FunWebG_Results1","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FunWebG_Results1.png"},{"name":"FunWebG_Results2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FunWebG_Results2","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FunWebG_Results2.png"},{"name":"FungP_Interface.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FungP_Interface","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FungP_Interface.png"},{"name":"FungP_Ranked.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FungP_Ranked","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FungP_Ranked.png"},{"name":"FungP_Results.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FungP_Results","extname":".png","collection":null,"path":"/topics/functional_analysis/images/FungP_Results.png"},{"name":"Dockerfile","modified_time":"2021-05-31 04:45:05 -0500","basename":"Dockerfile","extname":"","collection":null,"path":"/topics/git-introduction/docker/Dockerfile"},{"name":"02-1-create-repository.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"02-1-create-repository","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/02-1-create-repository.PNG"},{"name":"02-2-create-repository.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"02-2-create-repository","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/02-2-create-repository.PNG"},{"name":"02-3-create-empty-repository.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"02-3-create-empty-repository","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/02-3-create-empty-repository.PNG"},{"name":"02-3-create-readme-repository.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"02-3-create-readme-repository","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/02-3-create-readme-repository.PNG"},{"name":"Exercise-fork-1.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"Exercise-fork-1","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/Exercise-fork-1.PNG"},{"name":"Exercise-fork-2.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"Exercise-fork-2","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/Exercise-fork-2.PNG"},{"name":"branching.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"branching","extname":".png","collection":null,"path":"/topics/git-introduction/images/branching.png"},{"name":"commitReadme.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"commitReadme","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/commitReadme.PNG"},{"name":"conceptual_areas.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"conceptual_areas","extname":".png","collection":null,"path":"/topics/git-introduction/images/conceptual_areas.png"},{"name":"conceptual_areas_branching.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"conceptual_areas_branching","extname":".png","collection":null,"path":"/topics/git-introduction/images/conceptual_areas_branching.png"},{"name":"conceptual_areas_push.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"conceptual_areas_push","extname":".png","collection":null,"path":"/topics/git-introduction/images/conceptual_areas_push.png"},{"name":"conceptual_areas_push_pull.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"conceptual_areas_push_pull","extname":".png","collection":null,"path":"/topics/git-introduction/images/conceptual_areas_push_pull.png"},{"name":"deleting-branch-1-update.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"deleting-branch-1-update","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/deleting-branch-1-update.PNG"},{"name":"deleting-branch-1.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"deleting-branch-1","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/deleting-branch-1.PNG"},{"name":"deleting-branch-2.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"deleting-branch-2","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/deleting-branch-2.PNG"},{"name":"edited-forked-repository.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"edited-forked-repository","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/edited-forked-repository.PNG"},{"name":"folder1.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"folder1","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/folder1.PNG"},{"name":"fork-button.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"fork-button","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/fork-button.PNG"},{"name":"fork-clone-local.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"fork-clone-local","extname":".png","collection":null,"path":"/topics/git-introduction/images/fork-clone-local.png"},{"name":"fork-clone-representation.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"fork-clone-representation","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/fork-clone-representation.PNG"},{"name":"fork-clone-representation.png_revision=1","modified_time":"2021-05-31 04:45:05 -0500","basename":"fork-clone-representation","extname":".png_revision=1","collection":null,"path":"/topics/git-introduction/images/fork-clone-representation.png_revision=1"},{"name":"fork_pull.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"fork_pull","extname":".png","collection":null,"path":"/topics/git-introduction/images/fork_pull.png"},{"name":"forked-pull-request.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"forked-pull-request","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/forked-pull-request.PNG"},{"name":"forked-repository-ahead.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"forked-repository-ahead","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/forked-repository-ahead.PNG"},{"name":"forked-repository-final-pull-request.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"forked-repository-final-pull-request","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/forked-repository-final-pull-request.PNG"},{"name":"forked-repository.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"forked-repository","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/forked-repository.PNG"},{"name":"gitignore.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"gitignore","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/gitignore.PNG"},{"name":"nav-bar.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"nav-bar","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/nav-bar.PNG"},{"name":"newbranch-github.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"newbranch-github","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/newbranch-github.PNG"},{"name":"pull-request-1.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"pull-request-1","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/pull-request-1.PNG"},{"name":"pull-request-2.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"pull-request-2","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/pull-request-2.PNG"},{"name":"pull-request-3.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"pull-request-3","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/pull-request-3.PNG"},{"name":"pull-request-pre.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"pull-request-pre","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/pull-request-pre.PNG"},{"name":"rstudio-1.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-1","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-1.PNG"},{"name":"rstudio-2.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-2","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-2.PNG"},{"name":"rstudio-3.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-3","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-3.PNG"},{"name":"rstudio-4.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-4","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-4.PNG"},{"name":"rstudio-5.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-5","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-5.PNG"},{"name":"rstudio-6.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-6","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-6.PNG"},{"name":"rstudio-7.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-7","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-7.PNG"},{"name":"rstudio-8-1.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-8-1","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-8-1.PNG"},{"name":"rstudio-8.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-8","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-8.PNG"},{"name":"rstudio-9.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"rstudio-9","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/rstudio-9.PNG"},{"name":"solution1.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"solution1","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/solution1.PNG"},{"name":"solution5.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"solution5","extname":".PNG","collection":null,"path":"/topics/git-introduction/images/solution5.PNG"},{"name":"staging_area.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"staging_area","extname":".png","collection":null,"path":"/topics/git-introduction/images/staging_area.png"},{"name":"version-control-meme.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"version-control-meme","extname":".png","collection":null,"path":"/topics/git-introduction/images/version-control-meme.png"},{"name":"RNAseq_histiry_image.key","modified_time":"2021-05-31 04:45:05 -0500","basename":"RNAseq_histiry_image","extname":".key","collection":null,"path":"/topics/graphpad/images/RNAseq_histiry_image.key"},{"name":"Rarithmetic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rarithmetic_operators","extname":".png","collection":null,"path":"/topics/graphpad/images/Rarithmetic_operators.png"},{"name":"Rautocompletion.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rautocompletion","extname":".png","collection":null,"path":"/topics/graphpad/images/Rautocompletion.png"},{"name":"Rcomment_uncomment.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rcomment_uncomment","extname":".png","collection":null,"path":"/topics/graphpad/images/Rcomment_uncomment.png"},{"name":"Rexport_plot.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rexport_plot","extname":".png","collection":null,"path":"/topics/graphpad/images/Rexport_plot.png"},{"name":"Rfile_imported.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rfile_imported","extname":".png","collection":null,"path":"/topics/graphpad/images/Rfile_imported.png"},{"name":"Rgeneral_functions.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rgeneral_functions","extname":".png","collection":null,"path":"/topics/graphpad/images/Rgeneral_functions.png"},{"name":"Rinstall_zip.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rinstall_zip","extname":".png","collection":null,"path":"/topics/graphpad/images/Rinstall_zip.png"},{"name":"Rlogic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rlogic_operators","extname":".png","collection":null,"path":"/topics/graphpad/images/Rlogic_operators.png"},{"name":"Rnew_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rnew_script","extname":".png","collection":null,"path":"/topics/graphpad/images/Rnew_script.png"},{"name":"Rrefresh_button.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rrefresh_button","extname":".png","collection":null,"path":"/topics/graphpad/images/Rrefresh_button.png"},{"name":"Rsave_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsave_script","extname":".png","collection":null,"path":"/topics/graphpad/images/Rsave_script.png"},{"name":"Rsection_headings.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsection_headings","extname":".png","collection":null,"path":"/topics/graphpad/images/Rsection_headings.png"},{"name":"Rtable_function.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rtable_function","extname":".png","collection":null,"path":"/topics/graphpad/images/Rtable_function.png"},{"name":"Rview_file.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rview_file","extname":".png","collection":null,"path":"/topics/graphpad/images/Rview_file.png"},{"name":"RNAseq_histiry_image.key","modified_time":"2021-05-31 04:45:05 -0500","basename":"RNAseq_histiry_image","extname":".key","collection":null,"path":"/topics/metagenomics/images/RNAseq_histiry_image.key"},{"name":"Rarithmetic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rarithmetic_operators","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rarithmetic_operators.png"},{"name":"Rautocompletion.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rautocompletion","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rautocompletion.png"},{"name":"Rcomment_uncomment.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rcomment_uncomment","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rcomment_uncomment.png"},{"name":"Rexport_plot.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rexport_plot","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rexport_plot.png"},{"name":"Rfile_imported.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rfile_imported","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rfile_imported.png"},{"name":"Rgeneral_functions.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rgeneral_functions","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rgeneral_functions.png"},{"name":"Rinstall_zip.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rinstall_zip","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rinstall_zip.png"},{"name":"Rlogic_operators.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rlogic_operators","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rlogic_operators.png"},{"name":"Rnew_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rnew_script","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rnew_script.png"},{"name":"Rrefresh_button.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rrefresh_button","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rrefresh_button.png"},{"name":"Rsave_script.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsave_script","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rsave_script.png"},{"name":"Rsection_headings.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rsection_headings","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rsection_headings.png"},{"name":"Rtable_function.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rtable_function","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rtable_function.png"},{"name":"Rview_file.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Rview_file","extname":".png","collection":null,"path":"/topics/metagenomics/images/Rview_file.png"},{"name":"AE1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"AE1","extname":".png","collection":null,"path":"/topics/ngs-intro/images/AE1.png"},{"name":"AE2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"AE2","extname":".png","collection":null,"path":"/topics/ngs-intro/images/AE2.png"},{"name":"CLcutadapt1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CLcutadapt1","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CLcutadapt1.png"},{"name":"CLcutadapt2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CLcutadapt2","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CLcutadapt2.png"},{"name":"CLcutadapt3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CLcutadapt3","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CLcutadapt3.png"},{"name":"CLcutadapt4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CLcutadapt4","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CLcutadapt4.png"},{"name":"CLcutadapt5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CLcutadapt5","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CLcutadapt5.png"},{"name":"CountTable10.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable10","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable10.png"},{"name":"CountTable11.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable11","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable11.png"},{"name":"CountTable12.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable12","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable12.png"},{"name":"CountTable13.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable13","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable13.png"},{"name":"CountTable14.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable14","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable14.png"},{"name":"CountTable3B.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable3B","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable3B.png"},{"name":"CountTable3C.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable3C","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable3C.png"},{"name":"CountTable3D.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable3D","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable3D.png"},{"name":"CountTable3E.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable3E","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable3E.png"},{"name":"CountTable4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable4","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable4.png"},{"name":"CountTable5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable5","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable5.png"},{"name":"CountTable6.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable6","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable6.png"},{"name":"CountTable7.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable7","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable7.png"},{"name":"CountTable8.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"CountTable8","extname":".png","collection":null,"path":"/topics/ngs-intro/images/CountTable8.png"},{"name":"ENA1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"ENA1","extname":".png","collection":null,"path":"/topics/ngs-intro/images/ENA1.png"},{"name":"ENA2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"ENA2","extname":".png","collection":null,"path":"/topics/ngs-intro/images/ENA2.png"},{"name":"ENA3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"ENA3","extname":".png","collection":null,"path":"/topics/ngs-intro/images/ENA3.png"},{"name":"FASTQCRNASeq10.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeq10","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeq10.png"},{"name":"FASTQCRNASeq11.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeq11","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeq11.png"},{"name":"FASTQCRNASeq5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeq5","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeq5.png"},{"name":"FASTQCRNASeqB1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeqB1","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeqB1.png"},{"name":"FASTQCRNASeqB2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeqB2","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeqB2.png"},{"name":"FASTQCRNASeqB3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeqB3","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeqB3.png"},{"name":"FASTQCRNASeqB4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeqB4","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeqB4.png"},{"name":"FASTQCRNASeqB6.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeqB6","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeqB6.png"},{"name":"FASTQCRNASeqB7.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeqB7","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeqB7.png"},{"name":"FASTQCRNASeqB9.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"FASTQCRNASeqB9","extname":".png","collection":null,"path":"/topics/ngs-intro/images/FASTQCRNASeqB9.png"},{"name":"GEO1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO1","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GEO1.png"},{"name":"GEO2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO2","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GEO2.png"},{"name":"GEO3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO3","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GEO3.png"},{"name":"GEO4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO4","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GEO4.png"},{"name":"GEO5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GEO5","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GEO5.png"},{"name":"GP18.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP18","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GP18.png"},{"name":"GP22.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP22","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GP22.png"},{"name":"GP23.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP23","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GP23.png"},{"name":"GP25.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP25","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GP25.png"},{"name":"GP26.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP26","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GP26.png"},{"name":"GP28.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP28","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GP28.png"},{"name":"GP9.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"GP9","extname":".png","collection":null,"path":"/topics/ngs-intro/images/GP9.png"},{"name":"Galaxy10b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Galaxy10b","extname":".png","collection":null,"path":"/topics/ngs-intro/images/Galaxy10b.png"},{"name":"Galaxy12a.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Galaxy12a","extname":".png","collection":null,"path":"/topics/ngs-intro/images/Galaxy12a.png"},{"name":"Galaxy18b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Galaxy18b","extname":".png","collection":null,"path":"/topics/ngs-intro/images/Galaxy18b.png"},{"name":"Galaxy6a.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Galaxy6a","extname":".png","collection":null,"path":"/topics/ngs-intro/images/Galaxy6a.png"},{"name":"IGV2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGV2","extname":".png","collection":null,"path":"/topics/ngs-intro/images/IGV2.png"},{"name":"IGV3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGV3","extname":".png","collection":null,"path":"/topics/ngs-intro/images/IGV3.png"},{"name":"IGV4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGV4","extname":".png","collection":null,"path":"/topics/ngs-intro/images/IGV4.png"},{"name":"IGV5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGV5","extname":".png","collection":null,"path":"/topics/ngs-intro/images/IGV5.png"},{"name":"IGV6.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGV6","extname":".png","collection":null,"path":"/topics/ngs-intro/images/IGV6.png"},{"name":"IGV8.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"IGV8","extname":".png","collection":null,"path":"/topics/ngs-intro/images/IGV8.png"},{"name":"MapRNASeq1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"MapRNASeq1","extname":".png","collection":null,"path":"/topics/ngs-intro/images/MapRNASeq1.png"},{"name":"MapRNASeq2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"MapRNASeq2","extname":".png","collection":null,"path":"/topics/ngs-intro/images/MapRNASeq2.png"},{"name":"MapRNASeq3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"MapRNASeq3","extname":".png","collection":null,"path":"/topics/ngs-intro/images/MapRNASeq3.png"},{"name":"MapRNASeq4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"MapRNASeq4","extname":".png","collection":null,"path":"/topics/ngs-intro/images/MapRNASeq4.png"},{"name":"MapRNASeq5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"MapRNASeq5","extname":".png","collection":null,"path":"/topics/ngs-intro/images/MapRNASeq5.png"},{"name":"MapRNASeq6.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"MapRNASeq6","extname":".png","collection":null,"path":"/topics/ngs-intro/images/MapRNASeq6.png"},{"name":"MapRNASeq7.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"MapRNASeq7","extname":".png","collection":null,"path":"/topics/ngs-intro/images/MapRNASeq7.png"},{"name":"SRA1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"SRA1","extname":".png","collection":null,"path":"/topics/ngs-intro/images/SRA1.png"},{"name":"SRA2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"SRA2","extname":".png","collection":null,"path":"/topics/ngs-intro/images/SRA2.png"},{"name":"SRA6A.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"SRA6A","extname":".png","collection":null,"path":"/topics/ngs-intro/images/SRA6A.png"},{"name":"SRA6B.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"SRA6B","extname":".png","collection":null,"path":"/topics/ngs-intro/images/SRA6B.png"},{"name":"SRA6C.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"SRA6C","extname":".png","collection":null,"path":"/topics/ngs-intro/images/SRA6C.png"},{"name":"SRA6D.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"SRA6D","extname":".png","collection":null,"path":"/topics/ngs-intro/images/SRA6D.png"},{"name":"count_modes.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"count_modes","extname":".png","collection":null,"path":"/topics/ngs-intro/images/count_modes.png"},{"name":"fastqcTrim5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"fastqcTrim5","extname":".png","collection":null,"path":"/topics/ngs-intro/images/fastqcTrim5.png"},{"name":"samtools2b.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"samtools2b","extname":".png","collection":null,"path":"/topics/ngs-intro/images/samtools2b.png"},{"name":"1214.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"1214","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/1214.png"},{"name":"3P0G_A_PoseView_Input.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"3P0G_A_PoseView_Input","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/3P0G_A_PoseView_Input.png"},{"name":"3SN6_withoutLysozyme.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"3SN6_withoutLysozyme","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/3SN6_withoutLysozyme.png"},{"name":"Blastpdb.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Blastpdb","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Blastpdb.png"},{"name":"Groel.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Groel","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Groel.png"},{"name":"Insulin.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Insulin","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Insulin.png"},{"name":"Insulin_hires.jpg","modified_time":"2021-05-31 04:45:05 -0500","basename":"Insulin_hires","extname":".jpg","collection":null,"path":"/topics/protein-structure-analysis/images/Insulin_hires.jpg"},{"name":"Modelling_results_step3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Modelling_results_step3","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Modelling_results_step3.png"},{"name":"Modelling_sequence_template_step1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Modelling_sequence_template_step1","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Modelling_sequence_template_step1.png"},{"name":"Modelling_template_selection_step2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Modelling_template_selection_step2","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Modelling_template_selection_step2.png"},{"name":"Mol_desc_1DKX.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Mol_desc_1DKX","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Mol_desc_1DKX.png"},{"name":"Pdb_expdetails_1dkx.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Pdb_expdetails_1dkx","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Pdb_expdetails_1dkx.png"},{"name":"Pdb_firstresiduesmissing_1dkx.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Pdb_firstresiduesmissing_1dkx","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Pdb_firstresiduesmissing_1dkx.png"},{"name":"Pdb_seqtab.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Pdb_seqtab","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Pdb_seqtab.png"},{"name":"Pdbdownloadfile1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Pdbdownloadfile1","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Pdbdownloadfile1.png"},{"name":"Pdbsearchbox_RCSB.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Pdbsearchbox_RCSB","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Pdbsearchbox_RCSB.png"},{"name":"Pdf_uniprotview_button.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Pdf_uniprotview_button","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Pdf_uniprotview_button.png"},{"name":"ProteinPlusPoseView.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"ProteinPlusPoseView","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/ProteinPlusPoseView.png"},{"name":"RMSD.gif","modified_time":"2021-05-31 04:45:05 -0500","basename":"RMSD","extname":".gif","collection":null,"path":"/topics/protein-structure-analysis/images/RMSD.gif"},{"name":"Seqselector.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Seqselector","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Seqselector.png"},{"name":"Training_1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_1","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_1.png"},{"name":"Training_10.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_10","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_10.png"},{"name":"Training_11.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_11","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_11.png"},{"name":"Training_12.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_12","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_12.png"},{"name":"Training_2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_2","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_2.png"},{"name":"Training_3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_3","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_3.png"},{"name":"Training_4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_4","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_4.png"},{"name":"Training_5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_5","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_5.png"},{"name":"Training_6.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_6","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_6.png"},{"name":"Training_7.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_7","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_7.png"},{"name":"Training_8.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_8","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_8.png"},{"name":"Training_9.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Training_9","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/Training_9.png"},{"name":"aligned-structures.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"aligned-structures","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/aligned-structures.png"},{"name":"amino-acids.cdx","modified_time":"2021-05-31 04:45:05 -0500","basename":"amino-acids","extname":".cdx","collection":null,"path":"/topics/protein-structure-analysis/images/amino-acids.cdx"},{"name":"amino-acids.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"amino-acids","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/amino-acids.png"},{"name":"blastpdb.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"blastpdb","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/blastpdb.png"},{"name":"diffraction-pattern.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"diffraction-pattern","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/diffraction-pattern.png"},{"name":"electron-density.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"electron-density","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/electron-density.png"},{"name":"hemoglobin.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"hemoglobin","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/hemoglobin.png"},{"name":"nmr-model-example.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"nmr-model-example","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/nmr-model-example.png"},{"name":"nmr-noe.jpg","modified_time":"2021-05-31 04:45:05 -0500","basename":"nmr-noe","extname":".jpg","collection":null,"path":"/topics/protein-structure-analysis/images/nmr-noe.jpg"},{"name":"nmr-peaks-to-structure.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"nmr-peaks-to-structure","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/nmr-peaks-to-structure.png"},{"name":"occupancy.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"occupancy","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/occupancy.png"},{"name":"pdb-file-format.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pdb-file-format","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/pdb-file-format.png"},{"name":"pdb-logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pdb-logo","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/pdb-logo.png"},{"name":"pdbdownloadfile1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pdbdownloadfile1","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/pdbdownloadfile1.png"},{"name":"pdbsearchbox_RCSB.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"pdbsearchbox_RCSB","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/pdbsearchbox_RCSB.png"},{"name":"saxs.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"saxs","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/saxs.png"},{"name":"uniprot-logo.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"uniprot-logo","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/uniprot-logo.png"},{"name":"uniprotsearchbox.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"uniprotsearchbox","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/uniprotsearchbox.png"},{"name":"wwpdb-welcome-page.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"wwpdb-welcome-page","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/wwpdb-welcome-page.png"},{"name":"xray-tech-setup.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"xray-tech-setup","extname":".png","collection":null,"path":"/topics/protein-structure-analysis/images/xray-tech-setup.png"},{"name":"Visualizing_protein_structures_with_YASARA_exercises.md","modified_time":"2021-05-31 04:45:05 -0500","basename":"Visualizing_protein_structures_with_YASARA_exercises","extname":".md","collection":null,"path":"/topics/protein-structure-analysis/tutorials/visualise-structures/Visualizing_protein_structures_with_YASARA_exercises.md"},{"name":"Dockerfile","modified_time":"2021-05-31 04:45:05 -0500","basename":"Dockerfile","extname":"","collection":null,"path":"/topics/python-programming/docker/Dockerfile"},{"name":"Python-Set-Operatioons.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"Python-Set-Operatioons","extname":".png","collection":null,"path":"/topics/python-programming/images/Python-Set-Operatioons.png"},{"name":"cells.PNG","modified_time":"2021-05-31 04:45:05 -0500","basename":"cells","extname":".PNG","collection":null,"path":"/topics/python-programming/images/cells.PNG"},{"name":"format_method_positional_parameters.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"format_method_positional_parameters","extname":".png","collection":null,"path":"/topics/python-programming/images/format_method_positional_parameters.png"},{"name":"myDictionary-cropped.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"myDictionary-cropped","extname":".png","collection":null,"path":"/topics/python-programming/images/myDictionary-cropped.png"},{"name":"myDictionary.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"myDictionary","extname":".png","collection":null,"path":"/topics/python-programming/images/myDictionary.png"},{"name":"plotting1.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"plotting1","extname":".png","collection":null,"path":"/topics/python-programming/images/plotting1.png"},{"name":"plotting2.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"plotting2","extname":".png","collection":null,"path":"/topics/python-programming/images/plotting2.png"},{"name":"plotting3.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"plotting3","extname":".png","collection":null,"path":"/topics/python-programming/images/plotting3.png"},{"name":"plotting4.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"plotting4","extname":".png","collection":null,"path":"/topics/python-programming/images/plotting4.png"},{"name":"plotting5.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"plotting5","extname":".png","collection":null,"path":"/topics/python-programming/images/plotting5.png"},{"name":"plotting6.png","modified_time":"2021-05-31 04:45:05 -0500","basename":"plotting6","extname":".png","collection":null,"path":"/topics/python-programming/images/plotting6.png"},{"name":"search-bundle.js","modified_time":"2021-05-31 04:46:09 -0500","basename":"search-bundle","extname":".js","collection":null,"path":"assets/js/search-bundle.js"},{"name":"search-bundle.js.gz","modified_time":"2021-05-31 04:46:09 -0500","basename":"search-bundle.js","extname":".gz","collection":null,"path":"assets/js/search-bundle.js.gz"},{"name":"search.png","modified_time":"2021-05-31 04:46:09 -0500","basename":"search","extname":".png","collection":null,"path":"assets/png/search.png"},{"name":"search.svg","modified_time":"2021-05-31 04:46:09 -0500","basename":"search","extname":".svg","collection":null,"path":"assets/svg/search.svg"}],"categories":{},"collections":[{"files":[],"output":true,"label":"posts","directory":"/srv/jekyll/_posts","docs":[],"relative_directory":"_posts","permalink":"/:categories/:year/:month/:day/:title:output_ext"}],"time":"2021-05-31 04:46:10 -0500","html_pages":["\n","\n","\n","<ol id=\"markdown-toc\">\n  <li><a href=\"#overview-questions\" id=\"markdown-toc-overview-questions\">Overview Questions</a>    <ol>\n      <li><a href=\"#what-is-this-website\" id=\"markdown-toc-what-is-this-website\">What is this website?</a></li>\n      <li><a href=\"#what-are-the-tutorials-for\" id=\"markdown-toc-what-are-the-tutorials-for\">What are the tutorials for?</a></li>\n      <li><a href=\"#what-audiences-are-the-tutorials-for\" id=\"markdown-toc-what-audiences-are-the-tutorials-for\">What audiences are the tutorials for?</a></li>\n      <li><a href=\"#how-is-the-content-licensed\" id=\"markdown-toc-how-is-the-content-licensed\">How is the content licensed?</a></li>\n      <li><a href=\"#how-can-i-advertise-the-training-materials-on-my-posters\" id=\"markdown-toc-how-can-i-advertise-the-training-materials-on-my-posters\">How can I advertise the training materials on my posters?</a></li>\n      <li><a href=\"#how-do-i-use-this-material\" id=\"markdown-toc-how-do-i-use-this-material\">How do I use this material?</a></li>\n      <li><a href=\"#how-can-i-get-help\" id=\"markdown-toc-how-can-i-get-help\">How can I get help?</a></li>\n    </ol>\n  </li>\n  <li><a href=\"#for-instructors\" id=\"markdown-toc-for-instructors\">For Instructors</a>    <ol>\n      <li><a href=\"#where-do-i-start\" id=\"markdown-toc-where-do-i-start\">Where do I start?</a></li>\n      <li><a href=\"#how-can-i-fix-mistakes-or-expand-an-existing-tutorial-using-the-github-interface\" id=\"markdown-toc-how-can-i-fix-mistakes-or-expand-an-existing-tutorial-using-the-github-interface\">How can I fix mistakes or expand an existing tutorial using the GitHub interface?</a></li>\n      <li><a href=\"#sustainability-of-the-training-material-and-metadata\" id=\"markdown-toc-sustainability-of-the-training-material-and-metadata\">Sustainability of the training-material and metadata</a></li>\n    </ol>\n  </li>\n</ol>\n\n<h1 id=\"overview-questions\">Overview Questions</h1>\n\n<h2 id=\"what-is-this-website\">What is this website?</h2>\n\n<p>This website is a collection of hands-on tutorials that are designed to be interactive.</p>\n\n<p>This material is developed and maintained by the <a href=\"https://www.bits.vib.be/\">VIB Bioinformatics Core</a>.</p>\n\n<h2 id=\"what-are-the-tutorials-for\">What are the tutorials for?</h2>\n\n<p>These tutorials can be used for learning and teaching how for general data analysis, and for learning/teaching specific domains such as metagenomcis and differential gene expression analysis with RNA-Seq data.</p>\n\n<h2 id=\"what-audiences-are-the-tutorials-for\">What audiences are the tutorials for?</h2>\n\n<p>There are two distinct audiences for these materials.</p>\n\n<ol>\n  <li><strong>Self-paced individual learners.</strong> These tutorials provide everything you need to learn a topic, from explanations of concepts to detailed hands-on exercises.</li>\n  <li><strong>Instructors.</strong> They are also designed to be used by instructors in teaching/training settings. Slides, and detailed tutorials are provided.</li>\n</ol>\n\n<h2 id=\"how-is-the-content-licensed\">How is the content licensed?</h2>\n\n<p>The content of this website is licensed under the <a href=\"https://creativecommons.org/licenses/by/4.0/\">Creative Commons Attribution 4.0 License</a>.</p>\n\n<h2 id=\"how-can-i-advertise-the-training-materials-on-my-posters\">How can I advertise the training materials on my posters?</h2>\n\n<p>We provide some QR codes and logos in the <a href=\"https://github.com/vibbits/training-material/tree/master/assets/images\">images folder</a>.</p>\n\n<h2 id=\"how-do-i-use-this-material\">How do I use this material?</h2>\n\n<p>Many topics include slide decks and if the topic you are interested in has slides then start there.  These will introduce the topic and important concepts.</p>\n\n<h2 id=\"how-can-i-get-help\">How can I get help?</h2>\n\n<p>If you have questions about this training material, you can reach us sending an email to bits@vib.be.</p>\n\n<h1 id=\"for-instructors\">For Instructors</h1>\n\n<p>This material can also be used to teach the content in a group setting to students and researchers.</p>\n\n<h2 id=\"where-do-i-start\">Where do I start?</h2>\n\n<p>Spend some time exploring the different tutorials and the different resources that are available. Become familiar with the structure of the tutorials and think about how you might use them in your teaching.</p>\n\n<h2 id=\"how-can-i-fix-mistakes-or-expand-an-existing-tutorial-using-the-github-interface\">How can I fix mistakes or expand an existing tutorial using the GitHub interface?</h2>\n\n<p>Please submit an issue via github.</p>\n\n<h2 id=\"sustainability-of-the-training-material-and-metadata\">Sustainability of the training-material and metadata</h2>\n\n<p>This repository is hosted on <a href=\"https://github.com/\">GitHub</a> using git as a <a href=\"https://en.wikipedia.org/wiki/Distributed_version_control\">DVCS</a>. Therefore the community is hosting backups of this repository in a decentralised way. The repository is self-contained and contains all needed content and all metadata.</p>\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","### ChiP-Seq Analysis ###\n\n[slides](http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/booklet/thomas-chollier_2020.pdf)\n","### How to fill the slide decks?\n\nPlease follow our\n[tutorial to learn how to fill the slides]({{ site.baseurl }}/topics/contributing/tutorials/create-new-tutorial-slides/slides.html)\n","### Protein Structure Analysis ###\n\n[slides](https://material.bits.vib.be/courses/?https://raw.githubusercontent.com/vibbits/material-liascript/master/slides-PSA.md)\n\n- Sequences, structures and databases\n- Experimental methods (X-rays, electrons and NMR)\n- Finding and visualising structures from the  Protein Data Bank\n- Comparing structures\n- Modelling mutations\n- Creating homology models","","","",".enlarge120[\n\n# ***De novo* Genome Assembly**\n\n]\n\n#### With thanks to T Seemann, D Bulach, I Cooke and Simon Gladman\n---\n.enlarge120[\n\n# ***De novo* assembly**\n\n]\n\n.pull-left[\n\n**The process of reconstructing the original DNA sequence from the fragment reads alone.**\n\n* Instinctively like a jigsaw puzzle\n\n  * Find reads which \"fit together\" (overlap)\n  * Could be missing pieces (sequencing bias)\n  * Some pieces will be dirty (sequencing errors)\n\n]\n\n.pull-right[ ![](../../images/Humpty.jpg) ]\n\n---\n\n# **Another View**\n\n![](../../images/newspaper.png)\n\n---\n\n# **Assembly: An Example**\n\n---\n\n# **A small \"genome\"**\n\n![](../../images/shakespear1.png)\n\n---\n\n# **Shakespearomics**\n\n![](../../images/shakespear2.png)\n\n---\n\n# **Shakespearomics**\n\n![](../../images/shakespear3.png)\n\n---\n\n# **Shakespearomics**\n\n![](../../images/shakespear4.png)\n\n---\n\n# **So far, so good!**\n\n---\n\n# **The Awful Truth**\n\n![](../../images/notsimply.png)\n\n## \"Genome assembly is impossible.\" - A/Prof. Mihai Pop\n\n---\n.enlarge120[\n\n# **Why is it so hard?**\n\n]\n\n.pull-left[\n* Millions of pieces\n  * Much, much shorter than the genome\n  * Lots of them look similar\n* Missing pieces\n  * Some parts can't be sequenced easily\n* Dirty Pieces\n  * Lots of errors in reads\n]\n\n.pull-right[ ![](../../images/worlds_hardest.png) ]\n\n---\n\n# **Assembly recipe**\n\n* Find all overlaps between reads\n  * Hmm, sounds like a lot of work..\n* Build a graph\n  * A picture of the read connections\n* Simplify the graph\n  * Sequencing errors will mess it up a lot\n* Traverse the graph\n  * Trace a sensible path to produce a consensus\n\n---\n\n![](../../images/olc_pic.png)\n\n---\n\n# **A more realistic graph**\n\n![](../../images/real_graph.png)\n\n---\n\n# .image-15[![](../../images/nofun.png)] **What ruins the graph?**\n\n* Read errors\n  * Introduces false edges and nodes\n\n* Non haploid organisms\n  * Heterozygosity causes lots of detours\n\n* Repeats\n  * If they are longer than the read length\n  * Causes nodes to be shared, locality confusion.\n\n---\n\n# **Repeats**\n\n---\n.enlarge120[\n# **What is a repeat?**\n]\n\n.pull-left[\n\n#### ***A segment of DNA which occurs more than once in the genome sequence***\n\n* Very common\n  * Transposons (self replicating genes)\n  * Satellites (repetitive adjacent patterns)\n  * Gene duplications (paralogs)\n\n]\n\n.pull-right[\n\n![](../../images/triplets.png)\n\n]\n\n---\n\n# **Effect on Assembly**\n\n![](../../images/repeat_effect.png)\n\n---\n.enlarge120[\n# **The law of repeats** .image-15[![](../../images/repeatafterme.png)]\n]\n\n## **It is impossible to resolve repeats of length S unless you have reads longer than S**\n\n## **It is impossible to resolve repeats of length S unless you have reads longer than S**\n\n---\n\n# **Scaffolding**\n\n---\n.enlarge120[\n# **Beyond contigs**\n]\n\n.pull-left[\n\nContig sizes are limited by:\n\n* the length of the repeats in your genome\n  * Can't change this\n\n\n* the length (or \"span\") of the reads\n  * Use long read technology\n  * Use tricks with other technology\n\n]\n\n---\n.enlarge120[\n# **Types of reads**\n]\n\n.pull-left[.enlarge120[**Example fragment**]]\n\n\n.remark-code[.enlarge120[atcgtatgatcttgagattctctcttcccttatagctgctata]]\n\n.pull-left[.enlarge120[**\"Single-end\" read**]]\n\n\n.remark-code[.enlarge120[**atcgtatg**atcttgagattctctcttcccttatagctgctata]]\n\nsequence *one* end of the fragment\n\n.pull-left[.enlarge120[**\"Paired-end\" read**]]\n\n\n.remark-code[.enlarge120[**atcgtatg**atcttgagattctctcttcccttatag**ctgctata**]]\n\nsequence both ends of the same fragment\n\n**We can exploit this information!**\n---\n\n.enlarge120[# **Scaffolding**]\n\n* **Paired end reads**\n  * Known sequences at each end of fragment\n  * Roughly known fragment length\n\n* **Most ends will occur in same contig**\n\n* **Some will occur in different contigs**\n  * ***evidence that these contigs are linked***\n---\n\n.enlarge120[# **Contigs to Scaffolds**]\n\n![](../../images/scaffolding.png)\n\n---\n\n.enlarge120[# **Assessing assemblies**]\n\n* We desire\n  * Total length similar to genome size\n  * Fewer, larger contigs\n  * Correct contigs\n\n* Metrics\n  * No generally useful measure. (No real prior information)\n  * Longest contigs, total base pairs in contigs, **N50**, ...\n\n---\n\n.enlarge120[# **The \"N50\"**]\n\n.enlarge120[***The length of that contig from which 50% of the bases are in it and shorter contigs***]\n\n* Imagine we have 7 contigs with lengths:\n  * 1, 1, 3, 5, 8, 12, 20\n\n* Total\n  * 1+1+3+5+8+12+20 = 50\n\n* N50 is the \"halfway sum\" = 25\n  * 1+1+3+5+8+**12** = 30 (>25) so **N50 is 12**\n\n---\n\n.enlarge120[# **2 levels of assembly**]\n\n* Draft assembly\n  * Will contain a number of non-linked scaffolds with gaps of unknown sequence\n  * Fairly easy to get to\n\n* Closed (finished) assembly\n  * One sequence for each chromosome\n  * Takes a **lot** more work\n  * Small genomes are becoming easier with long read tech\n  * Large genomes are the province of big consortia (e.g. Human Genome Consortium)\n\n---\n.enlarge120[# **How do I do it?**]\n---\n.enlarge120[\n# **Example**\n\n* Culture your bacterium\n\n\n* Extract your genomic DNA\n\n\n* Send it to your sequencing centre for Illumina sequencing\n  * 250bp paired end\n\n\n* Get back 2 files\n  * .remark-code[MRSA_R1.fastq.gz]\n  * .remark-code[MRSA_R2.fastq.gz]\n\n\n* ***Now what?***\n]\n\n---\n.enlarge120[# **Assembly tools**\n\n* **Genome**\n  * **Velvet, Velvet Optimizer, Spades,** Abyss, MIRA, Newbler, SGA, AllPaths, Ray, SOAPdenovo, ...\n\n\n* Meta-genome\n  * Meta Velvet, SGA, custom scripts + above\n\n\n* Transcriptome\n  * Trinity, Oases, Trans-abyss\n\n***And many, many others...***\n\n]\n\n---\n.enlarge120[\n# **Assembly Exercise #1**\n\n* We will do a simple assembly using **Velvet** in **Galaxy**\n* We can do a number of different assemblies and compare some assembly metrics.\n\n]\n","As we introduced a lot of new concepts it is important that you practice them.\n\n----\n\n> ### {% icon hands_on %} Exercise 7\n>\n> Write a program that does the following:\n> \n> 1. Ask the user for a full DNA sequence\n>     - Make sure the sequence contains only GACT\n> 2. Once you have a valid sequence\n>     - For each DNA fragment the user enters:\n>         - Check if it occurs in the full sequence\n>         - Print out the sequence position if so\n>         - Track each fragment\n>     - Keep on asking the user for DNA fragments, stop if they just press return\n> 3. As a summary, print out all fragments with their position that you tracked\n> \n> **Tips** to complete this exercise in case you get stuck.\n> - Use while loops: you can use the condition to decide when to end the loop depending on the user input\n> - Track the sequence fragment and position data using a list\n> - Use string methods!\n> - To check the full DNA sequence, you can count how many times each GACT letter occurs, add up these counts, and compare this value to the total length of the full DNA sequence\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > \n>    > ```python\n>    > # This variable will be used for the while loop\n>    > validSequence = False\n>    >  \n>    > # Keep on going as long as the DNA sequence is not valid\n>    > while not validSequence:\n>    >     # Get a string from the user\n>    >     fullDnaSequence = input(\"Please enter your full DNA sequence:\")\n>    >     fullDnaSequence = fullDnaSequence.upper()\n>    >     \n>    >     # Count the GACT characters in the sequence\n>    >     gactCount = 0\n>    >     for code in 'GACT':\n>    >         gactCount += fullDnaSequence.count(code)\n>    >  \n>    >     # Check if the number of GACT characters matches the full length of the sequence\n>    >     # and set validSequence to True if so - this will stop the while: loop\n>    >     if gactCount == len(fullDnaSequence):\n>    >         validSequence = True\n>    >     else:\n>    >         print(\"\\nInvalid sequence, only GACT allowed, try again!.\\n\")\n>    > \n>    > # Print some line breaks\n>    > print(\"\\n\\n\")\n>    >  \n>    > # Prime the list to track the DNA fragments and the variable for the while loop\n>    > dnaFragmentInfo = []\n>    > dnaFragment = input(\"Please give a DNA fragment to check:\")\n>    >  \n>    > while dnaFragment:\n>    >     \n>    >     # Check if present at all\n>    >     dnaFragmentCount = fullDnaSequence.count(dnaFragment)\n>    >     if dnaFragmentCount:\n>    >         currentDnaSequenceIndex = 0\n>    >         for i in range(dnaFragmentCount):        \n>    >             # Equivalent to currentDnaSequenceIndex = currentDnaSequenceIndex + fullDna...\n>    >             currentDnaSequenceIndex += fullDnaSequence[currentDnaSequenceIndex:].index(dnaFragment)\n>    >  \n>    >             print(\"\\n  Fragment {} present at position {}.\\n\".format(dnaFragment,currentDnaSequenceIndex + 1))\n>    >             dnaFragmentInfo.append((currentDnaSequenceIndex + 1,dnaFragment))\n>    >             currentDnaSequenceIndex += 1\n>    >     else:\n>    >         print(\"\\n  Fragment {} not present!\\n\".format(dnaFragment))\n>    >  \n>    >     dnaFragment = input(\"Please give a DNA fragment to check:\")\n>    > \n>    > # Print some line breaks\n>    > print(\"\\n\\n\")\n>    > \n>    > # Print out the fragment information again, first sort it\n>    > dnaFragmentInfo.sort()\n>    > for (dnaFragmentPosition,dnaFragment) in dnaFragmentInfo:\n>    >     print(\"Found {} at position {}\".format(dnaFragment,dnaFragmentPosition))\n>    > ```\n>    > </details>\n>\n{: .hands_on}\n","[Group exercises](http://data.bits.vib.be/pub/trainingen/NGSIntro/GroupExNoSol.pdf)\n\nYou can solve most quality issues found by FASTQC e.g. trimming contaminating adapters, low quality bases at the end of your reads, filtering low quality reads...\nThere's is a lot of debate on whether it is required to do this. Reads that are contaminated with adapter sequences will not map but if these reads make up a large fraction of the total number of reads they might slow down the mapping a lot. While it is true that mappers can use noisy info (still containing adapters, low quality bases...), the mapping results will be negatively affected by this noise.\nCleaning is in my opinion worthwhile especially when working with small reads and in case of extensive adapter contamination (almost always).\n\n## Quality control in Galaxy\n\nLinks:\n- [European Galaxy](http://usegalaxy.eu)\n- [Raw Arabidopsis data in European Galaxy](https://usegalaxy.eu/u/janick/h/rawdata)\n- [Groomed Arabidopsis data in European Galaxy](https://usegalaxy.eu/u/janick/h/groomeddata)\n- [Clean Arabidopsis data in European Galaxy](https://usegalaxy.eu/u/janick/h/cleandata)\n- [Raw E. coli data in European Galaxy](https://usegalaxy.eu/u/janick/h/ecoliraw)\n- [Groomed E. coli data in European Galaxy](https://usegalaxy.eu/u/janick/h/ecoligroomed)\n- [Filtered E. coli data in European Galaxy](https://usegalaxy.eu/u/janick/h/ecolifiltered)\n\n- [Main Galaxy](http://usegalaxy.org \n- [Raw Arabidopsis data in main Galaxy](https://usegalaxy.org/u/janick/h/ngsdata)\n- [Groomed Arabidopsis data in main Galaxy](https://usegalaxy.org:/u/janick/h/ngsgroomed-1)\n\nGalaxy is a bioinformatics server that contains many tools, data and analysis results. Before you can upload your data to Galaxy, you have to register or log in to Galaxy (see slides).\n\n### Upload data to Galaxy\n\nIf you want to work on your data in Galaxy, you have to first get the data into Galaxy. To accomplish this you can use the **Upload file** tool in the **Get data** section.\n**Instead I shared the file on Galaxy so you can import it using [this link](https://usegalaxy.eu/u/janick/h/rawdata).** Make sure that you are logged on to Galaxy before you do this. When you click this link you are redirected to a web page where you can import the file:\n\n ![](../../images/Galaxy6a.png)\n\n\n### The history\n\nData sets that are uploaded or created by running a tool appear in the history in the right Galaxy pane.\nTo give a history a new name, click the history's current name, type a new one and hit enter.\nClicking the name of a data set unfolds a preview, a short description and tools to manipulate the data.\n\n#### Icons in the History\n\n- Clicking the floppy (**Download**) icon will download the file to your computer\n- To visualize a file in the middle pane, click the eye (**View data**) icon next to the name of the file.\n\n\n#### Colors of files in the HistoryData sets in the history have different colors representing different states.\n\n- **Grey**: The job is placed in the waiting queue. You can check the status of queued jobs by refreshing the History pane.\n- **Yellow**: The job is running.\n- **Green**: When the job has been run the status will change from yellow to green if completed successfully.\n- **Red**: When the job has been run the status will change from yellow to red if problems were encountered.\n\n\n### Running Groomer in Galaxy\n\nIf you select a tool in Galaxy it will automatically detect all data sets in your history that it can use as input. In the case shown below the tool does not recognize the fastq file in the history:\n\n ![](../../images/Galaxy10b.png)\n\nThe fact that the tool does not recognize the fastq file means that the fastq file is so messy that the tool can't read it. Remember that there is a tool to clean messy fastq files: **FASTQ Groomer** \n\nCheck the quality encoding in your fastq file (e.g. in FASTQC), and click the **Execute** button to start the tool:\n\n ![](../../images/Galaxy12a.png)\n\nGrooming takes long (30 min when Galaxy traffic is low). You can choose to wait but if it takes too long you can click the **Delete** button in the History (see slides) to stop the tool. I have provided the groomed file: import it in Galaxy using [https://usegalaxy.eu/u/janick/h/groomeddata this link].\n\n\n### Using Trimmomatic in Galaxy\n\nTo clean your data use the **Trimmomatic** tool in the **Quality Control** section of tools. Click the name of the tool to display its parameters in the middle pane.\n\nSee [this page](http://wiki.bits.vib.be/index.php/Parameters_of_Trimmomatic) for an overview of the Trimmomatic parameters.\n\nA bit more explanation:\n\n- **The input file with the reads**: Galaxy will automatically suggest a file from your History that has the right format, in this case: a fastq file. If Galaxy doesn't make a suggestion it means it cannot find any files in your History with the right format.\n- **The sequence of the adapter**: provide a custom sequence. If you analyze your own data you know which adapter sequences were used. Since this is public data we don't really know the name of the adapter. However, remember that FASTQC gives you a list of contaminating adapter sequences so you have the sequence of the adapter. Choose **custom adapter sequence** and paste the adapter sequence from FASTQC. You can only enter one sequence.\n\n\n\nClick **Execute** to run the tool.\n\nIn the history you see a new item, colored in yellow as long as the tool is running. Regularly hit the **Refresh** button in the History to check if the tool has finished. Clipping should go fast, after a few minutes you should have the result.\n\n\n### Running FASTQC in Galaxy\n\nSearch for **FASTQC** in the tools pane and click the resulting **FastQC** link to open the parameter settings in the middle pane:\n\n ![](../../images/Galaxy18b.png)\n\nFASTQC automatically recognizes all files it can use as an input. Select the file you want to use.\nThe FASTQC implementation in Galaxy can take an optional file containing a list of contaminants. If you don't specify one, FASTQC will look for standardly used Illumina adapters. <!--As another option the tool takes a custom limits .txt file that allows setting the warning thresholds for the different modules and to specify which modules to include in the output.-->\n\nIn most cases you keep the default settings and click **Execute**.\n\n\n## Quality control in GenePattern\n\n[Genepattern](http://www.broadinstitute.org/cancer/software/genepattern/) is very similar to Galaxy. It's as user-friendly as Galaxy, allows analysis of NGS data just like Galaxy... \n\nIt provides easy access to hundreds of tools for different kinds of analyses (e.g. RNA-seq, microarray, proteomics and flow cytometry, sequence variation, copy number and network analysis) via a web browser.\n\n**Links**\n\n- [BITS Genepattern server](https://dev.bits.vib.be:8686/gp/pages/index.jsf)\n- [fasta file containing Arabidopsis adapter sequence](https://data.bits.vib.be/pub/trainingen/NGSIntro/adapter.fa)\n- [fasta file containing E. coli adapter sequence](https://data.bits.vib.be/pub/trainingen/NGSIntro/adapterEcoli.fa)\n- [Overview of Trimmomatic parameters](http://wiki.bits.vib.be/index.php/Parameters_of_Trimmomatic)\n\n\nConsult the [GenePattern tutorial](https://wiki.bits.vib.be/index.php/GenePattern_tutorial) for more info.\n\n### Running Groomer in GenePattern\n\nThe Broad Genepattern server does not contain the Groomer tool, but we have added the tool to our BITS Genepattern server. \n\n- Search for the Groomer tool in GenePattern.\n- Define the parameters: one of the parameters you need to define is **Input format**: the encoding of the fastq file you want to clean. The encoding is important because it determines the offset of the quality scores (ASCII offset 33 or ASCII offset 64). If you're not sure you can check the encoding of your file in the FastQC report (take into account that FastQC sporadically makes the wrong guess).\n ![](../../images/GP9.png)\n- Run the Groomer tool.\n\n### Running FastQC in GenePattern\n\n- Search for the FASTQC tool\n- Fill in the parameters\n- Run the FASTQC tool\n\nYou can open the resulting HTML report in your browser: \n\n- Click the name of the output file at the bottom of the page\n- Select **Open Link**\n ![](../../images/GP18.png)\n\n### Running Trimmomatic in GenePattern\n\nIn GenePattern you can improve the quality of your NGS data using the Trimmomatic tool. \n- Search for the Trimmomatic tool\n- Fill in the parameters: See [this page](http://wiki.bits.vib.be/index.php/Parameters_of_Trimmomatic) for an overview of the Trimmomatic parameters.\n- Run Trimmomatic\n\n## Removing adapters using command line tools\n\nSee [exercise on using cutadapt to trim adapter sequences](http://wiki.bits.vib.be/index.php/Linux_command_line#Improving_the_quality_of_the_data)\n","After quality control, the next step is to align the reads to a reference sequence. \nThe reference is in most cases the full genome sequence but sometimes, a library of EST sequences is used. In either way, aligning your reads to a reference sequence is called mapping.\nThe most used mappers are [BWA](http://bio-bwa.sourceforge.net/) and [Bowtie](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) **for DNA-Seq data** and [Tophat](http://tophat.cbcb.umd.edu/) , [STAR](https://github.com/alexdobin/STAR) , [STAR article](http://bioinformatics.oxfordjournals.org/content/early/2012/10/25/bioinformatics.bts635) , or [HISAT2](http://www.ccb.jhu.edu/software/hisat/index.shtml) **for RNA-Seq data**.\nMappers differ in methodology, parameters, how fast and how accurate they are and whether they tolerate spliced alignments or not (relevant for RNA-Seq). Bowtie is faster than BWA, but looses some sensitivity (does not map an equal amount of reads to the correct position in the genome as BWA). BWA and Bowtie cannot align spliced reads while Tophat, STAR and HISAT2 can.\nAt the moment STAR is the most popular RNASeq mapper and HISAT2 is being pushed over TopHat.\n\n## Mapping in Galaxy\n\nLinks:\n - [Mapped data for Arabidopsis in the European Galaxy](https://usegalaxy.eu/u/janick/h/cleandatamapped)\n - [paper on intron sizes in various organisms](https://www.ncbi.nlm.nih.gov/pubmed/10454621)\n - [Sorted and indexed data for *E.coli* in the European Galaxy](https://usegalaxy.eu/u/janick/h/ecolisorted)\n - [fasta file containing the E.coli K12 genome](http://data.bits.vib.be/pub/trainingen/NGSIntro/EcoliK12.fasta)\n - [Bowtie manual](http://bowtie-bio.sourceforge.net/manual.shtml)\n\n### Running RNA STAR in Galaxy\n\nSTAR has a large number of parameters, we'll give an overview of the most important ones:\n - **Single end or paired end data**: the parameters you have to set will adjust accordingly\n - **RNASeq Fastq file**: STAR automatically detects files it can use as input, select the file you want to map.\n - **Custom or built-in reference genome**: many reference genomes are built-in in Galaxy just select the correct organism from the list of reference genomes.\n - **Length of the genomic sequence around annotated junctions**: the default is 100 but the ideal value is **read length-1**.\n - **Count number of reads per gene**: map reads and create a count table (table with counts of how many reads map to each gene).\n - **Would you like to set output parameters (formatting and filtering)?**: in most cases **yes** because the default settings will most likely not be ideal for your data\n - **Would you like to set additional output parameters (formatting and filtering)?**: in most cases **yes** because the default settings will most likely not be ideal for your data\n - **Would you like unmapped reads included in the SAM?**: by default STAR does not save the unmapped reads, so if you want to analyze them (BLAST...) you need to change this setting.\n - **Maximum number of alignments to output a read's alignment results, plus 1**: default is 10 meaning that reads that map to more than 10 locations in the genome are excluded from the results.  Multimappers are common when you map short reads. What to do with them is a complicated issue. You could use them to represent expression of whole classes/families of RNAs (e.g. transposons, gene families...). It can be useful to have two separate files: one for unique mappers and one for multimappers.\n - **Maximum number of mismatches to output an alignment, plus 1**: maximum number of mismatches for a read (single-end) or a pair of reads (paired-end). Default is 10. The value you should choose is dependent on the read length. For short quality trimmed reads you typically allow 5% mismatches.\n - **Maximum ratio of mismatches to read length**: how many mismatches you allow in the alignment (number is represented as a fraction of the total read length). Typically you choose 0.05 (= 5%) but this depends on the quality of the reads. In case of reads with many sequencing errors you need to increase the fraction of mismatches you allow.\n - **Other parameters (seed, alignment, limits and chimeric alignment)**: choose **extended parameter list** because the default settings will most likely not be ideal for your data\n - **Alignment parameters: Maximum intron size**: maximum distance between reads from a pair when mapped to the genome.\n - **Two-pass mode: Use two pass mode to better map reads to unknown splice junctions**: for the most accurate mapping, you should run STAR in 2-pass mode. It allows to detect more reads mapping to novel splice junctions. The basic idea is to run STAR with standard parameters, then collect the junctions detected in this first pass, and use them as annotated junctions for the second pass mapping.\n - **Parameters related to chimeric reads**: chimeric reads occur when one read aligns to two distinct portions of the genome. In RNA-Seq chimeric reads may indicate the presence of chimeric genes. Many chimeric genes form through errors in DNA replication or DNA repair so that pieces of two different genes are combined. Chimeric genes can also occur when a retrotransposon accidentally copies the transcript of a gene and inserts it into the genome in a new location. Depending on where the new retrogene appears, it can produce a chimeric gene...\n\nClick **Execute** to start the mapping.\n\nSTAR produces 3 result files:\n - **bam** file containing all alignments (multimappers, reads that map to multiple locations, are printed at each location)\n - **tab** file containing all detected splice junctions\n - **log** file containing mapping statistics\n\n### Running Bowtie for Illumina (= Bowtie1) in Galaxy\n\nThis is an overview of the main parameters:\n - **Will you select a reference genome from your history or use a built-in index?** Galaxy has many built-in genomes for Bowtie 1 but you can also use a fasta file from the history when the organism you work is not supported.\n - **Is this library mate-paired?** single end or paired end ?\n - **FASTQ file** Galaxy will automatically detect potential input files, select the file you want to use as input.\n - **Bowtie settings to use** ask for full parameter list since the defaults are most likely not ideal for your data\n - **Trim n bases from high-quality (left) end of each read before alignment (-5)** trim bases from high-quality (left) end of each read before alignment, default is 0.\n - **Trim n bases from low-quality (right) end of each read before alignment (-3)** trim bases from low-quality (right) end of each read before alignment, default is 0.\n - **Alignment mode** when the default -n option is used, bowtie determines which alignments are valid according to the following policy: alignments may have no more than n mismatches (where n is a number 0-3, set with **Maximum number of mismatches permitted in the seed (-n)**) in the first l bases (where l is a number 5 or greater, set with **Seed length (-l)**) on the high-quality (left) end of the read. The first l bases are called the \"seed\". The sum of the Phred quality scores at all mismatched positions (not just in the seed) may not exceed e (set with **Maximum permitted total of quality values at all mismatched read positions (-e)**).\nIn -v mode, alignments may have no more than v mismatches, where v may be a number from 0 through 3 set using the **Maximum number of mismatches (-v)** option. Quality values are ignored.\n - **Suppress all alignments for a read if more than n reportable alignments exist (-m)**  default is no limit. Bowtie is designed to be very fast for small -m but can become significantly slower for larger values of -m\n\n### Download mapping results from Galaxy\n\nClick the name of the file containing the sorted alignments in the history.\nClick the **download** button at the bottom of the description. You should download two files: the bam file containing the mapping results and an index file (.bai) for fast access to the bam file. In Galaxy, indexing of bam files is done automatically. You need to download both files into the same folder. \n ![](../../images/IGV2.png)\n\n## Mapping in GenePattern\n\n**Links**:\n - [Parameters of STAR](https://wiki.bits.vib.be/index.php/Parameters_of_STAR)\n - [paper on intron sizes in various organisms](https://www.ncbi.nlm.nih.gov/pubmed/10454621)\n - [fasta file containing the E.coli K12 genome](http://data.bits.vib.be/pub/trainingen/NGSIntro/EcoliK12.fasta)\n - [Bowtie manual](http://bowtie-bio.sourceforge.net/manual.shtml)\n\n### Running STAR in GenePattern\n\n - Search for the STAR aligner tool\n - Fill in the parameters of STAR, you can find a detailed description of the parameters on [this page](https://wiki.bits.vib.be/index.php/Parameters_of_STAR)\n - Run STAR\n - Store the resulting bam file in your uploads folder\n - View the …align_summary.txt file in your browser to get an overview of the mapping results.\n\n### Running Bowtie_1 indexer in GenePattern\n\nSearch for the Bowtie_1 indexer tool. Here's a detailed description of the main parameters:\n\n - **fasta files** one or several fasta files containing the DNA sequence of the genome to index.\n - **index name** a name for the bowtie 1 index files.\n\nRun the indexer, it will produce 6 files:\n - <index name>.1.ebwt\n - <index name>.2.ebwt\n - <index name>.3.ebwt\n - <index name>.4.ebwt\n - <index name>.rev.1.ebwt\n - <index name>.rev.2.ebwt\n\nFor easy handling in GenePattern Bowtie_1.indexer puts all these files in a ZIP archive, which can be given as input to Bowtie_1.aligner. Store the resulting zip file in your uploads folder.\n\n### Running Picard SortSam in GenePattern\n\nSome downstream tools cannot handle raw bam files since they are so large and chaotic, they need sorted and indexed bam files. Bam files can be sorted and indexed with samtools or Picard. \n - Search for a tool that can sort sam or bam files\n - Sort the file, keep the results in bam format. Sorting will add an index to the bam file (this is the .bai file that is generated)\n - Download the sorted bam and bai files to your computer\n\n## Mapping via command line tools\n\nOn our Linux command line page you can find:\n[an exercise on mapping with Bowtie](http://wiki.bits.vib.be/index.php/Linux_command_line#Mapping_reads_with_Bowtie) via the command line.\n\nWe will handle the mapping in detail in advanced NGS trainings, so we are not going into more detail now.\n\n## Visualisation of mapping results in IGV\n\n - [bam-file for *Arabidopsis thaliana* from GenePattern](http://data.bits.vib.be/pub/trainingen/NGSIntro/GP_Athaliana.bam)\n - [bai-file for *Arabidopsis thaliana* from GenePattern](http://data.bits.vib.be/pub/trainingen/NGSIntro/GP_Athaliana.bai)\n - [bam-file for *Arabidopsis thaliana* from Galaxy](http://data.bits.vib.be/pub/trainingen/NGSIntro/Galaxy_Athaliana.bam)\n - [bai-file for *Arabidopsis thaliana* from Galaxy](http://data.bits.vib.be/pub/trainingen/NGSIntro/Galaxy_Athaliana.bai)\n - [bam-file for *E. coli* from GenePattern](http://data.bits.vib.be/pub/trainingen/NGSIntro/GP_Ecoli.bam)\n - [bai-file for *E. coli* from GenePattern](http://data.bits.vib.be/pub/trainingen/NGSIntro/GP_Ecoli.bai)\n - [bam-file for *E. coli* from Galaxy](http://data.bits.vib.be/pub/trainingen/NGSIntro/Galaxy_Ecoli.bam)\n - [bai-file for *E. coli* from Galaxy](http://data.bits.vib.be/pub/trainingen/NGSIntro/Galaxy_Ecoli.bai)\n\nIGV needs a sorted bam file and an index (.bai) file.\n\n - Open IGV by clicking its icon on the Desktop. Be patient, it might take a few minutes for the program to start.\n - If necessary change the genome in IGV from **Human hg19** to the one you used in the mapping.\n\n ![](../../images/IGV3.png)\n - Load the mapped reads via **File** in the top menu and **Load from File**.\n\n ![](../../images/IGV4.png)\n\nSelect the .bam file to open. You don't need to load the .bai file, it's suffcient that it is present in the same folder as the .bam file. \n - This loads the data into the center view. At this point, you can't see the reads, you have to zoom in to view them.\n - To zoom in on a gene type its accession number in the top toolbar and clicking **Go**:\n\n ![](../../images/IGV5.png)\n\n - Zooming in can be done using the zoom bar in the top toolbar:\n\n ![](../../images/IGV6.png)\n\nThe reads are represented by grey arrows, the arrow indicating the orietation of the mapping. Hovering your mouse over a read gives additional info on the mapping. The colored nucleotides indicate mismatches between the read and the reference.\n\nBy default IGV calculates and displays the coverage track (red) for an alignment file. When IGV is zoomed to the alignment read visibility threshold (by default 30 KB), the coverage track displays the depth of the reads displayed at each locus as a gray bar chart. If a nucleotide differs from the reference sequence in greater than 20% of quality weighted reads, IGV colors the bar in proportion to the read count of each base (A, C, G, T). You can view count details by hovering the mouse over a coverage bar:\n\n ![](../../images/IGV8.png)\n\n## Quality control of mapping results using Qualimap\n\nQualimap is very similar to FastQC. It has an easy-to-use user interface and works on any platform: Windows, Mac, Linux. It's installed on the BITS laptops: you can run it by clicking the icon on the desktop.\nYou can do several analyses in Qualimap: we will focus on the BAM Quality Control and the RNA-Seq Quality Control.\n\n### Starting a BAM QC analysis in Qualimap\n\n - [gtf-file for *Arabidopsis thaliana* from Ensembl Plants](http://data.bits.vib.be/pub/trainingen/NGSIntro/Arabidopsis_thaliana.TAIR10.31.gtf)\n\nIn the top menu, expand **File** and select **New analysis** and **BAM QC**\n\n ![](../../images/GP22.png)\n\nA parameters form is opened.\n\nSelect a .bam file as input file and leave all other parameters at their default setting:\n\n ![](../../images/GP23.png)\n\n - With the default settings the mapping is evaluated over the full reference sequence but you can limit the evaluation to certain regions by selecting the **Analyze regions** option and providing a gtf file containing the regions of interest.\n - There are parameters for specific types of NGS experiments e.g. stranded libraries (**Library strand specificity**) and paired-end reads (**Detect overlapping paired-end reads**).\n\nA BAM Quality Control report is generated, very similar to the report that FastQC produces. Let's take a look at some of the figures in the report:\n\n - **Coverage across reference**: In the top figure you see the coverage (red line; average coverage in a window of a certain size) across the reference sequence. In the bottom figure you see the GC content (black line) across the reference.\n - **Coverage histograms**: What percentage of the genome is not covered, covered at least once....\n\n### Starting a RNA-Seq QC analysis in Qualimap\n\nSpecifically for RNA-Seq data you can  do a RNA-Seq QC in Qualimap. In the top menu, expand **File** and select **New analysis** and **RNA-seq QC**\n\n ![](../../images/GP25.png)\n\nA parameters form is opened. \n\n - You need to provide an annotation file so Qualimap knows where the exons are located on the reference sequence. This annotation file is in gtf format and can be downloaded from the Ensembl or [EnsemblGenomes](http://ensemblgenomes.org/info/access/ftp) ftp site. GTF stands for general transfer format, used for linking features (exons, introns, genes, transcripts, repeats, mutations...) to locations in the genome. \n\n ![](../../images/GP26.png)\nSelect the .gtf file as annotation file\n - Select the .bam file as input file \n - Leave all other parameters at their default setting\n\n ![](../../images/GP28.png)\n\nA RNA-seq Quality Control report is generated.\n\n*Coverage Profile (Total)*: The plot shows mean coverage profile of the transcripts. All transcripts with non-zero coverage are used to calculate this plot.\n*Coverage Profile (Low)*: The plot shows mean coverage profile of 500 lowest-expressed genes.\n*Coverage Profile (Total)*: The plot shows mean coverage profile of 500 highest-expressed genes.\n*Coverage Histogram (0-50x)*: Coverage of transcripts from 0 to 50X. If certain genes have higher coverage level they are added to the last column (50X).\n*Junction Analysis*: This pie chart shows analysis of junction positions in spliced alignments. Known category represents percentage of alignments where both junction sides are known. Partly known represents alignments where only one junction side is known. All other alignments with junctions are marked as Novel.\n\n[Solutions of Group Exercises](http://data.bits.vib.be/pub/trainingen/NGSIntro/Solutions.pdf)\n\n","[Download the slides for this training session](http://data.bits.vib.be/pub/trainingen/NGSIntro/NGSBroad.pdf).\n\nThe dataset comes from a 2014 publication on *Human Airway Smooth Muscle Transcriptome Changes in Response to Asthma Medications*.\nThe goal of the analysis is to find DE genes (differentially expressed: genes with different expression levels in one group of samples compared to other groups of samples). Typically the groups of samples represent different treatments: one consisting of biological replicates that have received a control treatment, others consisting of replicates that received a specific biological treatment.\n\nIn this experiment the data consists of four groups (**treatment**):\n - The **dex** group: samples from 4 cell lines after treatment with the glucocorticoid dexamethasone (dex), used as astma medication\n - The **alb** group: samples from the same cell lines after treatment with albuterol (alb), another astma medication\n - The **alb_dex** group: samples from the same cell lines after treatment with both astma medications\n - The **untreated** group: samples from the same untreated cell lines cultured in parallel.\n\nSo all samples come from the same 4 cell lines (**cells**).\n```\n#   run_accession  read_count  samples            cells    treatment\n1   SRR1039508     22935521    CL1_untreated   CL1   untreated\n2   SRR1039509     21155707    CL1_Dex         CL1   Dex\n3   SRR1039510     22852619    CL1_Alb         CL1   Alb\n4   SRR1039511     21938637    CL1_Alb_Dex     CL1   Alb_Dex\n5   SRR1039512     28136282    CL2_untreated   CL2  untreated\n...\n```\nThe data comes from a paired-end sequencing experiment so we have two files for each sample. \nFor simplicity we will do the analysis on a single sample, SRR1039509, obtained from dexamethasone treated cell line 1.\n\n### Quality checks\n\nBefore you analyze the data, it is crucial to check the quality of the data.\nWe use the standard tool for checking the quality of NGS data generated on the Illumina platform: [FASTQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\n\nCorrect interpretation of the FASTQC report is very important.\nIf the quality of your data is good, you can proceed with the analysis.\n**!! If the quality of your data is very bad, don't immediately throw the data in the recycle bin but contact an expert and ask for his/her opinion. !!**\n\nDouble click the FASTQC icon on the Desktop and open the fastq file (it's in the summer folder of your home folder). FASTQC consists of multiple modules each checking a specific aspect of the quality of the data. On the first page you can select the module you wish to view.\nThe names of the modules are preceded by an icon that reflects the quality of the data. The icon indicates whether the results of the module seem normal (green tick), slightly abnormal (orange triangle) or very unusual (red cross).\n\nHowever, these evaluations must be interpreted in the context of what you expect from your library. A 'normal' sample as far as FastQC is concerned is random and diverse. Some experiments may be expected to produce libraries which are biased. You should treat the icons as pointers to where you should concentrate your attention on and understand why your library may not look normal.\n\n#### General information on the reads\n\n> How long are the reads in this file ?\n>  63 nucleotides\n>  ![](../../images/FASTQCRNASeqB1.png)\n\n#### Checking the quality scores of the reads\n\nPhred scores represent base call quality. The higher the score the more reliable the base call. Often the quality of reads degrades over the length of the read. Therefore, it is common practice to determine the average quality of the first, second, third,...nth base by plotting the distribution of the Phred scores on each position of the reads using box plots.\n\n> Evaluate the quality scores per position\n>  Go to the **Per base sequence quality** module: \n>  ![](../../images/FASTQCRNASeqB2.png)\nThe y-axis on the graph shows the Phred quality scores, the x-axis shows the position in the read. So again you see that the reads are 63 bases long.\n\nThe average Phred score is depicted by the blue line, the median Phred score by the red line. The yellow boxes contain 50% of all Phred scores on a certain position. As expected the quality is steadily declining. \n\nThe background of the graph divides the y-axis into very good quality calls (green), calls of reasonable quality (orange), and calls of poor quality (red; Phred score < 20). As you can see the Phred scores of this data set are very high.\n\nRemark: In new Illumina kits the sequence quality goes up a bit first before it steadily declines.\n\nInstead of showing the quality of each position separately, you can calculate the average Phred score of each read and show a cumulative plot of the average qualities of all the reads.\n\n> Evaluate the overall quality\n> Go to the **Per sequence quality scores** module: \n ![](../../images/FASTQCRNASeqB3.png)\nThe y-axis on the graph shows the number of reads, the x-axis shows the Phred score.\n\nMost reads have an average Phred score of 40. This is a very high score (Phred scores of Illumina calls range from -5 to 41).\n\n\nIllumina flow cells are divided into tiles. To see if there is a loss in quality associated with specific parts of the flow cell, FASTQC calculates average quality scores for each tile across all positions in the reads. \n\n> Evaluate the quality per tile\n> Go to the **Per tile sequence quality** module: \n ![](../../images/FASTQCRNASeqB4.png)\nThe y-axis on the graph shows the tile number, the x-axis shows the position in the reads.\n\nThe plot shows the deviation from the average tile quality. The colours are on a cold to hot scale, with blue being the average tile quality and other colours representing tiles where the quality was different from the average. In the example you see that a few tiles show poor quality over a few positions. A good plot should be blue all over. Although the plot isn't entirely blue the results of this module are still acceptable.\n \n\nReasons for seeing warnings or failures on this plot could be transient problems such as bubbles going through the flow cell, or they could be more permanent problems such as smudges or debris on/in the flow cell or a very high density of clusters in a tile. The most common cause of warnings in this module is the flow cell being overloaded.\n\nIt is recommended to ignore warnings/failures which mildly affect a small number of tiles for only a few cycles, and to only pursue larger effects which show high deviation in scores, or which persist for a high number of cycles.\n\n#### Checking duplicates\n\nIn a diverse library generated by shearing genomic DNA, most fragments will occur only once. A low level of duplication may indicate a very high level of coverage of some target sequences, but a high level of duplication indicates a bias (eg PCR overamplification, contamination of the library with adapter dimers...).\n\nThe **Sequence duplication levels** module counts the degree of duplication for every read and creates a plot showing the relative number of reads with different degrees of duplication.\n\n> Evaluate the sequence duplication levels\n> Go to the **Sequence duplication levels** module: \n ![](../../images/FASTQCRNASeq5.png)\nThe y-axis on the graph shows the percentage of occurrence, the x-axis shows the duplication level.\n\nThe blue line represents the counts of all duplicated sequences. The percentage is computed relative to the total number of reads.\n\nThe red line represents the number of **distinct** sequences that are duplicated. The percentage is computed relative to the total number of **distinct** sequences in the data (see slides).\n\nSeeing duplication in RNA-Seq data is normal. To sequence lowly expressed genes you must oversequence the genes with high expression levels. However, RNA-Seq libraries may be contaminated with adapters.\n\nThe presence of contaminating adapters will produce spikes on the far right of the plot. These peaks will appear in the blue trace as they make up a high proportion of the original library, but usually disappear in the red trace as they make up a small proportion of the deduplicated set (you only use 2 adapters to create a library).\n\nAdditionally, as in every RNA-Seq file you also see a substantial number of oversequenced reads with lower duplication levels.\n\n\nSince the reads are random fragments from the genome sequence, the contribution of A, C, G and T should be identical on each position.\n\n> Evaluate the per base sequence content\n> Go to the **Per base sequence content** module: \n\n ![](../../images/FASTQCRNASeqB6.png)\n\nThe y-axis on the graph shows the percentage of occurrence, the x-axis shows the position in the read.\n\nOn this plot you should see straight lines for the four nucleotides. In reality you often see that this is not the case for the first positions. Libraries produced by priming using random hexamers (nearly all RNA-Seq libraries) and those which were fragmented using transposases inherit an intrinsic bias in the first positions of the reads. This bias does not come from a single sequence, but because of enrichment of a number of different K-mers at the 5' end of the reads. So it isn't something you can correct by trimming (you do not have one specific sequence that you can remove from the reads). In most cases it doesn't adversely affect the downstream analysis but it will produce a warning or failure in this module. \n\n\nDuplicates often arise because libraries are contaminated with adapter sequences. You can check for contaminating sequences using the **Overrepresented sequences** module: it lists all sequences which make up more than 0.1% of the total. For each sequence in the list the module will look for matches in a database of common contaminants and will report the best hit it finds.\n\n> Which contaminants are found in this library ?\n> Go to the **Overrepresented sequences** module: \n\n ![](../../images/FASTQCRNASeqB7.png)\n\nAs you can see, a single illumina adapter was found contaminating the library to a small extent: 0,5% of the library consists of adapter sequences.\n\n\nThe **Overrepresented sequences** module shows contamination with full adapter sequences (= reads that completely correspond to adapters), but often the library also contains reads that have remnants of adapter sequences at their 3' ends. These reads are not detected by the **Overrepresented sequences** module. \n\nThis was the quality check of one file from one of the 16 samples. We do not have the time to do all quality checks. But in the real world, you would have to do this for each of the 32 files of this experiment.\n\n### Improving the quality of the data\nThere are many possible steps to improve the quality of the data. Due to time constraints, we are going to focus on\n\n - removing adapter sequences, both filtering full adapter sequences and trimming remnants of adapters from the 3' ends of the reads\n - filter reads of low quality\n\n\n\nThere are many tools to remove adapters from reads, but we chose cutadapt because it works on paired-end reads and it can do the two steps at the same time (removing adapters and filtering reads of poor quality).\n\nTo make it feasible to go through the complete RNA-Seq workflow during the training, we have limited the data set to reads that map to chromosome 22. The data come from a paired-end experiment so we have two files with reads. You can download these limited data sets: [http://data.bits.vib.be/pub/trainingen/NGSIntro/chr22_SRR1039509_1.fastq.gz data_set_1] (first reads of a pair that map to chromosome 22) and [http://data.bits.vib.be/pub/trainingen/NGSIntro/chr22_SRR1039509_2.fastq.gz data_set_2] (second reads of a pair that map to chromosome 22). On the bits laptops, the files are already present in the /home/bits/NGS/RNASeq/ folder.\n\nRemember that the **Overrepresented sequence** module of the FASTQC report showed contamination with the following TruSeq adapter in the first file of sample SRR1039509: \n```\nACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAA\n```\nWe will remove this adapter from the file containing the reads that map to chromosome 22.\n\nOpen the terminal.\n\n> Make a variable called adapter to hold the sequence of the adapter\n|-\n```\nadapter=ACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAA\n```\nRemember to use Shift + Insert to paste in the terminal !\n\n\n> Check the cutadapt option for defining the number of mismatches you allow (= error rate)\n> Cutadapt is not a regular bash command but a Python script so it doesn't have a manual. So to open the cutadapt help type:\n```\ncutadapt -h\n```\n\nScrolling down the help file shows that the **-e** option defines the maximum allowed error rate: the default is 0.1 meaning that it allows one mismatch every 10 nucleotides. Adapters are identified by aligning each read to the adapter sequence: if the frequency of mismatches in the alignment is below the allowed error rate then the adapter is trimmed from the read.\n\n\n> Check the option for defining the adapter sequence\n> In the help file you see that you have multiple options:\n\n - **-a** to trim adapter sequences at the 3' end of the reads. In most cases this is the end that is causing the problems: when small RNA fragments are sequenced, the resulting reads can be longer than the RNA fragments. As a results they will contain (parts of) the adapter at their 3'end. In long reads the adapter might even lie within the read:\n```\nMYSEQUEN                         (no adapter contaimination)\nMYSEQUENCEADAP                   (part of adapter at 3' end)\nMYSEQUENCEADAPTER                (adapter at 3' end)\nMYSEQUENCEADAPTERSOMETHINGELSE   (adapter within the read)\n```\nCutadapt will cut the adapter (part) and all sequence following it resulting in:\n```\nMYSEQUEN\nMYSEQUENCE\nMYSEQUENCE\nMYSEQUENCE\n```\n\n - **-g** to trim adapter sequences at the 5' end of the reads. These adapters are expected to appear at the start of a read (where they can be just partially there) or somewhere within the read:\n```\nADAPTERMYSEQUENCE              (5' end)\nDAPTERMYSEQUENCE               (partial)\nTERMYSEQUENCE                  (partial)\nSOMETHINGADAPTERMYSEQUENCE     (within)\n```\nIn all cases, the adapter itself and the sequence preceding it will be removed, leaving in all examples above:\n```\nMYSEQUENCE\n```\n\n - **-b** to trim adapters at the 3' or 5' end of the read. If there is at least one base before the adapter, then the adapter is trimmed as a 3’ adapter and the adapter itself and everything following it is removed. Otherwise, the adapter is trimmed as a 5’ adapter and it is removed from the read, but the sequence after it remains:\n```\nBefore trimming \t        After trimming \t\nMYSEQUENCEADAPTERSOMETHING \tMYSEQUENCE \nMYSEQUENCEADAPTER \t        MYSEQUENCE\nMYSEQUENCEADAP \t                MYSEQUENCE\nMADAPTER \t                M\nADAPTERMYSEQUENCE \t        MYSEQUENCE\nPTERMYSEQUENCE \t                MYSEQUENCE\nTERMYSEQUENCE \t                MYSEQUENCE\n```\n\n\nSince we have contaminating adapter at the 3'end we'll take the -a option\n\n\nWe will use a few other options:\n\n - Discard trimmed reads that are shorter than 20 bases after trimming using the **-m** option \n - Trim low-quality bases at the 3'ends from reads before adapter removal if their Phred score is less than 10 using the **-q** option\n\n\n\n> Check the -q option in the help file\n> Scroll down to the **Additional modifications to the reads:** section to check the usage of the -q option:\n\n ![](../../images/CLcutadapt1.png)\n\n\n> Check the -m option in the help file\n> Scroll up to the **Options for filtering of processed reads:** section to check the usage of the -m option:\n\n ![](../../images/CLcutadapt2.png)\n\n\nRemember that we are working with paired-end reads !\n\n> Check the usage of cutadapt for paired-end reads in the help file\n> Scroll up to the start of the help file to check the usage of cutadapt for paired-end reads:\n\n ![](../../images/CLcutadapt3.png)\n\n\nSince we have to specify the location in the file system of two input and two output files, we are going to create a variable called folder for holding the path.\n\n> Create the variable path\n|-\n```\nfolder=/home/bits/NGS/RNASeq/\n```\n\nRemember to use tab autocompletion.\n\n> Clean up the files using the knowledge you have obtained\n|-\n```\ncutadapt -a ${adapter} -q 10 -m 20 -o ${path}chr22_SRR1039509_1t.fastq -p ${path}chr22_SRR1039509_2t.fastq ${path}chr22_SRR1039509_1.fastq.gz ${path}chr22_SRR1039509_2.fastq.gz\n```\n\n - **-a** to specify the sequence of the adapter and to specify that we want to cut adapter sequences from the 3'ends of the reads\n - **-q** to specify the minimal quality score is 10. Bases at the 3'end of the reads with a quality score below 10 will be removed\n - **-m** to specify the minimal length of the read after trimming. Reads smaller than 20 bases will be removed\n - **-o** to specify the location in the file system where you want to write the output files\n - **-p** to specify the location in the file system where you want to write the results for the reads from the other end of the fragments. As such you specify that these are paired-end reads.\n\n\n\nIn the cutadapt stats you see we only trimmed one file (containing sequences from one end of the fragments). \n\n ![](../../images/CLcutadapt4.png)\n\nThat is because the sequences from the other end of the fragments contain another adapter: \n```\nGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAAAAAAAAA\n```\n\n> Redefine the adapter variable\n|-\n```\nadapter=GTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAAAAAAAAA\n```\nRemember to use Shift + Insert to paste in the terminal.\n\n\n> Run cutadapt again to remove the other adapter\n> Remember to switch the two files now:\n```\ncutadapt -a ${adapter} -q 10 -m 20 -o ${path}chr22_SRR1039509_2trim.fastq -p ${path}chr22_SRR1039509_1trim.fastq ${path}chr22_SRR1039509_2t.fastq ${path}chr22_SRR1039509_1t.fastq\n```\nRemember to use the up arrow to go back in the history.\n\n\nNow you see in the cutadapt stats that you have trimmed adapters from (both files) both ends of the fragments.\n\n ![](../../images/CLcutadapt5.png)\n\n### Check the quality of the cleaned reads\n\nI have done this also for the complete files and rechecked the trimmed reads in FASTQC. You can download [the report for the complete trimmed reads from sample SRR1039509](http://data.bits.vib.be/hidden/jhslbjcgnchjdgksqngcvgqdlsjcnv/ngsrnade2015/ex1/fastqc_SRR1039509_1/trimmed_reads/SRR1039509_1.fastq.pdf).\n\n> Are all the reads still 63 nt long after trimming ?\n> In the **Basic statistics** tab you see that the length of the reads varies between 20 (in the cutadapt command we set 20 as the minimal length for a read to be retained) and 63 (reads that were not trimmed):\n ![](../../images/FASTQCRNASeqB9.png)  \n\n> Have the quality scores of the reads significantly changed after trimming ?\n> The **Per base sequence quality** is similar to that of the untrimmed file, as is the **Per sequence quality**.\n\nQuality scores have of course changed a bit since we trimmed low quality bases, but the initial quality of the reads was so good that you don't really see the effect of the trimming.\n\n> Has the per base sequence content improved as a result of the trimming ?\n> The **Per base sequence content** - the tool to detect adapter contamination - plot has improved, it's even more stable now.\n\n ![](../../images/FASTQCRNASeq10.png) \n\n> What is the little bump you see in the Sequence length distribution plot ?\n> \nApparently many reads contain 3 bases that belong to the adapter. These 3 bases have been cut leaving reads of 60 nt long: this is the small peak you see on the plot at length 60. All intermediate lengths of adapter contamination have been detected but in such a small fraction of reads that you cannot see the influence of the trimming on the plot.\n\n> Are there any overrepresented sequences left ?\n> No.\n\n ![](../../images/FASTQCRNASeq11.png) \n\n> Are there any overrepresented heptamers ?\n> FASTQC still detects overrepresented heptamers although at much lower counts than before. \n\n ![](../../images/fastqcTrim5.png)\n\nFastQC confirmed the removal of the two adapters by cutadapt. \n\n### Mapping\n\n#### Obtaining the reference genome\n\nBefore we can do any mapping we need a reference sequence first. We will map the reads against the hg19 human genome build. Mapping requires a specially formatted file (hash database). This hash database can be derived from the reference genome using the bowtie2 tools. However, for some organisms like human the hash table can be obtained 'ready-to-use' from the bowtie2 website. If you also need a fasta copy of the hg19 genome, you can obtain it from the hash table using bowtie2. \nWe can download the hash table from the [bowtie2 website](ftp://ftp.ccb.jhu.edu/pub/data/bowtie2_indexes/hg19.zip) using the **wget** command. It takes about 90 minutes to download so we are not going to download it during the training, it is already present in the /home/bits/NGS/RNASeq/reference/ folder.\n\nGo to this folder and look at its contents. As you can see the file is a compressed .zip file\n \n> Unzip the reference genome file\n> To decompress a .zip file you need the **unzip** command:\n```\nunzip hg19.zip\n```\n\nIt will take a few minutes and it will generate 7 files:\n\n ![](../../images/MapRNASeq1.png) \n\nTo transform the hash table into a fasta sequence we use bowtie2. From the [bowtie2 documentation](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#the-bowtie2-inspect-index-inspector) we learn that we should use bowtie2-inspect without options to generate the fasta file.\n\n> Generate the human reference genome sequence in fasta format.\n> The required command simply is:\n```\nbowtie2-inspect hg19 > hg19.fa\n```\n\nThe **>** sign indicates that the output of the command (so the fasta file with the reference sequence) needs to be written to a file called **hg19.fa**.\nIt will take a few minutes to generate the file. \n\n ![](../../images/MapRNASeq2.png)\n\n#### Installing TopHat\n\nMapping RNA-Seq reads is done using the TopHat tool. So we need to install the [TopHat tool](http://ccb.jhu.edu/software/tophat/tutorial.shtml).\n\n> Go to the TopHat website and fetch the download link.\n> \n - Go to the [TopHat website](http://ccb.jhu.edu/software/tophat/tutorial.shtml)\n - Right click the Linux download link\n - Select **Copy Link Location**\n\n> Download the file.\n> \n - Go to the terimnal\n - Navigate to the /usr/bin/NGS/ folder\n - Type **wget **\n - Press the Shift and Insert keys simultaneously to paste the url\n\nTopHat is downloaded as a .tar.gz file \n\n> Decompress the file\n> For decompressing a .tar.gz file you need the following command:\n```\ntar -xzvf tophat-2.1.0.Linux_x86_64.tar.gz\n```\nRemember to use tab autocompletion !\n\nThis creates a new folder:\n\n ![](../../images/MapRNASeq3.png) \n\nGo into the tophat folder and type:\n```\n./tophat\n```\n\nIf this opens the help of tophat, it means the software has been installed correctly. It does not mean that you can use the software now. Well you can but you will always have to type the commands from inside the tophat folder or provide the full path to the tophat folder. To avoid this we can create a symbolic link for tophat2.\n\n> Create a symbolic link for tophat2\n> For creating the link you need the following command:\n```\nln -s /usr/bin/NGS/tophat-2.1.0.Linux_x86_64/tophat2 /usr/local/bin/tophat2\n```\nRemember to use tab autocompletion !\n\nNow go to a different folder and type **tophat2**. If you see the help file, the link works.\n\n#### Installing samtools\n\nWhen you navigate to the **tophat** folder in /usr/bin/NGS/ you see that samtools is automatically installed when TopHat was installed:\n ![](../../images/MapRNASeq5.png)\n\nIf you see the samtools help page when you type\n```\n./samtools_0.1.18\n```\nit means that samtools is indeed installed\n ![](../../images/MapRNASeq6.png)\n\nIf you want to use samtools from anywhere in the file system you can create a soft link. \n\n> Create a soft link for samtools.\n> Create a link using the **ln -s** command:\n```\nln -s /usr/bin/NGS/tophat-2.1.0_Linux_x86_64/samtools-0.1.18/samtools /usr/local/bin/samtools\n```\nGo up one directory in the file system and check if the command works. If you type\n```\nsamtools view\n```\n(one of the possible samtools commands) you should see the manual of the command.\n\n#### Mapping the reads\n\nWe are not going to do the actual mapping since it takes almost 25 minutes even with the chromosome22-limited datasets. If we were to map the reads we would use the following command:\n```\nfolder=/home/bits/NGS/RNASeq/\ntophat2 --no-coverage-search ${folder}reference/hg19 ${folder}chr22_SRR1039509_1.fastq.gz ${folder}chr22_SRR1039509_2.fastq.gz \n```\n\n - **--no-coverage-search**: is related to how TopHat finds splice junctions. I'm not going to go into detail here but in the TopHat manual the developers of TopHat say: \"We only suggest users use the **--coverage-search** option for short reads (< 45bp) and with a small number of reads (<= 10 million).\" Since we have the double amount of longer reads (63bp) we have to go for the **--no-coverage-search** option.\n - the first argument is the location of the hash table of the reference genome\n - the second argument is the (cleaned) fastq file containing the reads from one end of the fragments. As you can see TopHat can work directly on the compressed file.\n - the third argument is the (cleaned) fastq file containing the reads from the other end of the fragments\n\nOther useful options for Tophat:\n\n - **-p**: the number of processors (cpu) that TopHat can use for the mapping. The default is 1. This is ok for a laptop since laptops do not contain manu cpu but of course the more cpu you give TopHat the faster the mapping. So it's better to do the mapping on a strong computer with many cpu\n - **-o**: if you want to store the results of the mapping in another folder\n\nThe mapping generates a new folder **tophat_out** containing 3 .bed files and 2 .bam files containing the resulting alignments:\n\n - **accepted_hits.bam**: a list of read alignments.\n - **unmapped.bam**: a list of reads that could not be mapped. As you can see the size of this file is quite small compared to the accepted_hits.\n - **junctions.bed**: a list of splice junctions between exons in [UCSC BED format](http://genome.ucsc.edu/FAQ/FAQformat.html#format1) (that can be opened as a track in the UCSC genome browser).\nTophat can find novel - not yet annotated - splice junctions based on the alignment of the reads to a reference genome. This is what Tophat is specifically good at, compared to mappers like bwa and bowtie which will only find annotated splice junctions. This is why we use Tophat for mapping RNA-Seq data.\n - **insertions.bed**: a list of insertions.\n - **deletions.bed**: a list of deletions.\n\n ![](../../images/MapRNASeq4.png)\nSince we haven't actually done the mapping, we do not have this folder. However, you can find the bam file with the read alignments in the /home/bits/NGS/RNASeq folder.\n\n#### Quality control of the mapping\n\nIt is vital to check the quality of mapping before proceeding with the RNASeq workflow. The mapping to a reference genome has sorted the reads and it is now possible to identify \n - the regions of the genome the reads originate from\n - duplicate reads\n - RNA degradation...\n\nSeveral program exist to perform quality control of bam files; e.g. RSeQC pubmed: 22743226, [QualiMap](http://qualimap.bioinfo.cipf.es/) pubmed: 22914218 , samtools, [deeptools](https://github.com/fidelram/deepTools/wiki/All-command-line-options) pubmed: 24799436 , [Picard](http://broadinstitute.github.io/picard/) which is part of the very popular GATK platform - pubmed: 20644199...\n\nWe are going to use **samtools** here.\n\n> Get the basic stats of the bam file.  \n> On the [samtools wiki](http://davetang.org/wiki/tiki-index.php?page=SAMTools)\n\n ![](../../images/samtools2b.png)\n\nYou can see that you need the **samtools flagstat** command for this. The bam file is in the /home/bits/NGS/RNASeq/ folder so we are going to reuse the folder variable that we created for the mapping:\n```\nfolder=/home/bits/NGS/RNAseq/\nsamtools_0.1.18 flagstat ${folder}accepted_hits_chr22.bam\n```\n\nor if you have done the mapping yourself: \n\n```\nsamtools_0.1.18 flagstat /usr/bin/NGS/tophat-2.1.0_Linux_x86_64/tophat_out/accepted_hits_chr22.bam\n```\n\nThe samtools flagstat command displays an overview of the alignment results on your screen. You just see that 100% of the reads were mapped. This is extremely high but it is of course because we reversed engineered our chromosome 22 limited data set. From the complete fastq files we took the reads that mapped to chromosome 22 so it's normal that we get an almost perfect mapping.\n\n ![](../../images/MapRNASeq7.png)\n\nThis overview deserves some explanation:\n\n - **nan** means **Not A Number** (e.g: divided by 0 )\n - **paired in sequencing** means reads that belong to a pair regardless of the fact that they are really mapped as a pair\n - **read1** means forward reads\n - **read2** means reverse reads\n - **properly paired** means that both mates of a read pair map to the same chromosome, oriented towards each other, and with a sensible insert size\n - **with itself and mate mapped** means that both reads of a pair map to the genome but they are not necessarily properly paired, they just map somewhere on the genome\n - **singletons** means that one of the reads of a pair is unmapped while its mate is mapped\n - **with mate mapped to a different chr** means reads with a mate mapped on a different chromosome\n - **with mate mapped to a different chr (mapQ >= 5)** means reads with a mate mapped on a different chromosome having a mapping quality greater than 5\n\n> Compare the number of forward and reverse reads in the paired-end experiment.  \n> the counts of forward and reverse reads are to be found on the lines ending with read1 and read2 respectively. As you see the number of forward reads exceeds the number of reverse reads by 55. \n\n> How many reads were mapped as a pair in the paired-end experiment?   \n> 814320 reads were properly mapped as a pair, that's 99,46% of the total number of reads.\n\nTools like Qualimap, RSeqQC and Picard will give much more detailed information on the quality of the mapping. Unfortunately we do not have time to use them.\n\n### Calculating a count table\n\nIn order to compute differential expression between groups of samples, we need to convert mapping results to read counts for each gene in each sample. The counting can also be done in R using various packages but will be slower as compared to command-line tools.\nWe will use the popular [HTSeq-count tool](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) to compute gene counts.\n\n#### Prepare the alignment file\n\nWe need to sort the bam file since we have paired-end reads. HTSeq assumes the file is sorted so that reads belonging to the same pair are in adjacent lines. If you don't sort the bam file by read name, HTSeq will think there are lot of reads with missing mates. \nIn the [samtools manual](http://www.htslib.org/doc/samtools.html) we can look up which command we need to do the sorting.\n\n> Sort the reads in the .bam file by name\n> As you can see in the manual the **samtools sort** command sorts .bam files:\n\n ![](../../images/CountTable7.png)\n\nThe input and output file are located in the /home/bits/NGS/RNASeq/ folder (or the /usr/bin/NGS/tophat-2.1.0.Linux_x86_64/tophat_out/ folder if you have done the mapping yourself). We are going to create a folder variable:\n```\nfolder=/home/bits/NGS/RNASeq/\nsamtools_0.1.18 sort -n ${folder}accepted_hits.bam ${folder}accepted_hits_sort\n```\n\nGo to the folder where input and output file are stored and check if the sorted .bam file was generated:\nAccording to [the HTSeq manual](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) the input file for HTSeq contains the aligned reads in **SAM** format. In our case the mapping generated a .bam file. Fortunately samtools contains scripts to convert BAM format to SAM.\n\nIn the [samtools manual](http://www.htslib.org/doc/samtools.html) we can look up which command we need to do the transformation.\n\n> Transform the .bam into a .sam file  \n> As you can see in the manual the **samtools view** command can transform any alignment format into standard SAM format:\n\n ![](../../images/CountTable8.png)\n\nWe are going to reuse the folder variable:\n```\nsamtools_0.1.18 view ${folder}accepted_hits_sort.bam > ${folder}accepted_hits.sam\n```\n\nGo to the folder where input and output file are stored and check if the .sam file was generated.\n\n#### Obtaining a reference annotation file\n\nTo calculate read counts we need a gtf file containing the annotation of all exons. You can obtain such files from genome annotation databases such as NCBI, Ensembl, and UCSC. The problem is that there are small differences between the formats of annotation files coming from different databases. These differences have implications for counting the reads.\nFor instance, we used pre-built index files from the Bowtie website for the mapping. These files have UCSC format. So it seems obvious to use UCSC annotation files for the counting. However, HTSeq prefers Ensembl gtf files. As stated in the [HTSeq documentation](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) using gtf file generated by UCSC will result in very low counts. In the UCSC files, the gene_id incorrectly contains the same value as the transcript_id. Hence, if a read maps to an exon shared by several transcripts of the same gene, this will appear to htseq-count as an overlap between different genes since the different transcripts have different gene_ids. The read will be considered ambiguous and not counted. Therefore, the counts will be incorrect.\nAs a solution, HTSeq recommends to use a gtf file from Ensembl. You can find Ensembl gtf files on the [Ensembl ftp server](ftp://ftp.ensembl.org/pub/). The version that we need is called grch37 (this corresponds to UCSC genome build hg19). So you can download the gtf file from [this web site](ftp://ftp.ensembl.org/pub/grch37/).\n\nNavigate to the /home/bits/NGS/RNASeq/reference/ folder:\n\n> Decompress the gtf file.  \n> .gz files are decompressed by the **gunzip** command:\n```\ngunzip Homo_sapiens.GRCh37.82.gtf.gz \n``` \nUse Tab autocompletion for the name of the file.\n\n> Look at the first 10 lines of the gtf file.  \n> Use the **head** command to preview the file:\n```\nhead Homo_sapiens.GRCh37.82.gtf\n```\nUse Tab autocompletion for the name of the file.\n\n ![](../../images/CountTable3B.png)\n\nAs you can see the first column of the file contains chromosome numbers. Ensembl uses 1, 2, 3... as chromosome IDs.\n\n> Look at the first 10 lines of the sam file.  \n> Use the **head** command to preview the file:\n```\nhead accepted_hits.sam\n```\nUse Tab autocompletion for the name of the file.\n\n ![](../../images/CountTable3C.png)\n\nAs you can see the third column of the file contains chromosome IDs but they have UCSC format: **chr**22 (remember that all reads come from chromosome 22 in our example). So we need to:\n\n - Filter the annotation for chromosome 22 from the gtf file to limit processing time.\n - Transform Ensembl chromosome IDs into UCSC format.\n\nFirst of all we'll give the gtf file a simple name to simplify processing.\n\n> Use the move command to rename the file to hg19_EnsGene.gtf  \n> Use the **mv** command to rename the file:\n```\nmv Homo_sapiens.GRCh37.82.gtf ./hg19_EnsGene.gtf\n```\nThe **./** defines to move the file to the current folder (the folder that you are in when you type the command). So you will move the file to the same folder but under another name, which corresponds to just renaming it.\n\n> Filter chromsome 22 annotations from the gtf file. Name the resulting file chr22_Ens.gtf\n> Chromosome 22 annotations are lines starting with **22**. Use the **grep** command to filter the file:\n```\ngrep \"^22\" hg19_EnsGene.gtf > chr22_Ens.gtf\n```\nThe **^** defines the start of a line. So **^22** means: search for lines that start with **22**.\n\n> Look at the first 10 lines of the filtered gtf file.  \n> Use the **head** command to preview the file:\n```\nhead chr22_Ens.gtf\n```\nUse Tab autocompletion for the name of the file.\n\n ![](../../images/CountTable3D.png)\n\n\nNow, we still need to transform the Ensembl chromosome IDs into UCSC format, meaning that we simply need to add the prefix **chr** to each line in the filtered gtf file. You'll need the **sed** command for this. Look at [the sed documentation](http://www.grymoire.com/Unix/Sed.html) the sed documentation] before you try to do the substitution. To add the word **chr** to the start of each line, you essentially need to replace the start of a line by **chr**.\n\n> Add the prefix to each line of the gtf file. Name the resulting file chr22_Ens_corr.gtf\n> To do a replacement or substitution you need to use the **s** command, followed by what you want to replace and what to replace it with, each separated by a **/**. Remember from the filtering exercise that the start of a line is represented by **^**. So use the following command to make the substitution:\n```\nsed 's/^/chr/' chr22_Ens.gtf > chr22_Ens_corr.gtf\n```\nUse Tab autocompletion for the name of the file.\n\n> Look at the first 10 lines of the substituted gtf file.  \n> Use the **head** command to preview the file:\n```\nhead chr22_Ens_corr.gtf\n```\nUse Tab autocompletion for the name of the file.\n\n ![](../../images/CountTable3E.png)\n\n#### Installing HTSeq\n\nHTSeq is a Python script. Python scripts can be installed using the **pip install** command. Remember you need administrator privileges for installing tools.\n\n> Try to install HTSeq. What happens ?  \n> Use the following command to install the tool:\n```\npip install HTSeq\n```\n\nAs you can see this generates an error and the tool is not installed.\n\n ![](../../images/CountTable4.png)\n\nLooking up the error in Google leads to [this web page](http://www.cyberciti.biz/faq/debian-ubuntu-linux-python-h-file-not-found-error-solution/), where you can find the solution to the problem: some dependencies are missing.\n\n> Install the missing dependencies and try again. What happens ?  \n> Use the following command to install the dependencies:\n```\napt-get install python-dev\n```\n\nThen try to install HTseq again:\n```\npip install HTSeq\n```\n\nAs you can see this generates a new error and the tool is not installed.\n\n ![](../../images/CountTable5.png)\n\nLooking up the error in Google leads to [this web page](http://ubuntuforums.org/archive/index.php/t-345201.html), where you can find the solution to the problem: the C compiler is missing.\n\n> Install the missing compiler and try again. What happens ?  \n> Use the following command to install the dependencies:\n```\napt-get install g++\n```\n\nThen try to install HTseq again:\n```\npip install HTSeq\n```\n\nAs you can see this does not generate an error. To check if the tool works type:\n```\nhtseq-count\n```\n\nIf this displays the help file, you know that the tool was correctly installed\n\n ![](../../images/CountTable6.png)\n\n#### Calculating the count table\n\n**HTSeq** counts reads in different **modes**: \n[HTSeq](http://www-huber.embl.de/users/anders/HTSeq/doc/_images/count_modes.png)\n\n{{Tip|We will use the **union** method}}\n\nIn the [HTSeq manual](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) we get an overview of the options we can use.\n\n> How to define that we want to use the union mode ?\n> The **-m** option allows to define the mode:\n ![](../../images/CountTable10.png)\n\n> How to define that this was not a strand-specific experiment ?\n> The **-s** option allows to define if a strand-specific library was used:\n ![](../../images/CountTable11.png)\n\n> How to define the minimum alignment quality score for a read to be counted ?\n> The **-a** option allows to define the minimum alignment score:\n ![](../../images/CountTable12.png)\n\nWe'll go for the default mininimum alignment score of 10 (90% confidence).\n\n> How to define we want to count based on exon annotation ?\n> The **-t** option allows to define the feature to base the count on:\n ![](../../images/CountTable13.png)\n\nFor a .gtf file **exon** is the default. It means HTSeq will count the number of reads that align to each exon and then combine the counts for all exons of a transcript variant. \n\n> How to define the feature we want to use as an ID after the counting ?\n> The **-i** option allows to define the feature to use as ID.\n\nFor a .gtf file gene_id is the default: it means that the output of HTSeq will be a list of gene_ids and for each gene_id you'll see the number of reads that align to all its exons. \n\n> Calculate the count table\n> HTSeq was installed by **pip install** which automatically creates a link. So the HTSeq commands will work from anywhere in the file system. We will go to the folder that contains the input sam file: **/home/bits/NGS/RNAseq/** and run the command from there or create a variable folder containing the path.\nIn the [HTSeq manual](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) we get an overview of the options we can use. Default options should not be defined.\n```\nfolder=/home/bits/NGS/RNAseq/\nhtseq-count -m union -s no ${folder}accepted_hits.sam ${folder}reference/chr22_Ens_corr.gtf > ${folder}chr22_counts.txt\n```\n\n> View the first 10 lines of the resulting count table\n> \n```\nhead chr22_counts.txt\n```\n\n ![](../../images/CountTable14.png)\nYou nicely see the read counts for each gene...\n","### Your first command\n\n> Print the word hello to the screen\n\n```\necho hello\n```\n> Print the sentence “hello my friends to the screen (with open quotation and without end quotation)\nRemember you can use the up arrow to go back to previously typed commands \n```\necho \"hello my friends\n```\nNow the terminal hangs. We typed an incorrect command and the terminal does not know what to do. \n\n> What to type when the terminal hangs ?\n\nCtrl-C\nIf **Ctrl-C** fails, try hitting **ESC**. In most of the cases, this will do the trick.\n\n> Open the manual of the echo command ?\n\n```\nman echo\n```\nThe synopsis of this command is:\n```\necho [-n] [string ...]\n```\n\nThings in between square brackets are optional, so it means that you can use echo without options and arguments.\n\nWhen the manual page is longer than the terminal, you can scroll down the page one line at a time by pressing the down arrow key, or one page at a time by pressing the spacebar. To exit the man page, press **q** (for quit).\n\nThe manual explains that echo prints its argument by default to the screen and then puts the prompt on a new line. The way it does this is by appending a character called a newline (a special character that literally puts the text on a new line). Because echo is often used in programs to print out a sequence of strings not separated by newlines, there is an option to prevent the newline from being inserted.\n\n> By reading the man page, find the command to print hello without a newline, and verify that it works as expected.\nAgain remember to use the up arrow to go to previously typed commands.\n```\necho -n hello\n```\n\n> Open the manual of the sleep command, find how to make the terminal “sleep” for 5 seconds, and execute the command.\n \n```\nman sleep\n```\nAccording to the manual sleep has a required argument called number representing the number of seconds to sleep.\n```\nsleep 5\n```\n> Make the terminal sleep for 5000 seconds and rescue the terminal.\n \n```\nsleep 5000\n```\nThat’s more than an hour so use Ctrl-C to break off the command.\n\n### Navigating the Linux file system\n\nType the following command in the terminal:\n```\ncd\n```\ncd stands for change directory and is used for navigating the Linux file system\n\n> Which directory are you in ?\nTo view the name of the current working directory, type\n```\npwd\n```\npwd stands for print working directory.\nYou see that using cd without arguments leads you to your home directory, on the BITS laptops this is /home/bits.\n> Which directories are located in your home directory ?\nTo view a list of the files and directories that are located in the current working directory, type\n```\nls\n```\nls stands for **list** and is used for listing all files and directories in the current directory. \nOn the BITS laptops the home directory **/home/bits** contains a set of folders like Desktop, Documents, Downloads...\n\n> List all files and directories in your home directory that start with the letter D\n\n```\nls D*\n```\nD(star) means everything which name starts with a **D**\n\nA common pattern when using the command line is changing directories using **cd** and then immediately typing **ls** to view the contents of the directory.\n> List the detailed content of your home directory ?\n\n```\nls -l\n```\nthe l in -l stands for **long output**. \nAmong others, the detailed list shows a date and time indicating the last time a file was modified. The number before the date is the size of the file in bytes.\n> List the content of the /usr/local/bin directory ?\n\n```\nls /usr/local/bin\n```\n/usr/local/bin corresponds to a directory in the file system (/), with bin a subdirectory of local and local a subdirectory of usr.\n\nIf you have to reuse a variable often then it can be helpful to create a name for a variable, especially when the variable is long. Suppose you want to work in a directory called **Illumina_exp4_20042004_mapping_to_hg19_results**. To avoid repeating this long name over and over you can create a variable for it, give it a short name and use that in your commands. \n\n> Name the variable **folder**\n\nUse the following command: ```folder=Illumina_exp4_20042004_mapping_to_hg19_results```\n\nTo create a new directory use the **mkdir** (make directory) command.\n\n> Create the folder using the newly created variable\n\nIf you want to refer to a named variable in a command you have to preceed the name by a **$** sign to indicate that what is following is a **reference** to a variable.\nSo use the following command: ```mkdir ${folder}```\nThe curly braces delineate the start and end of the variable name.\nCheck if the folder is created using the **ls** command.\n\n{{Wiki-img|NGS/Intro/CL3.png|500px}}\nTo remove a directory, use the **rm** (remove) command. You could use **rmdir** but this only works on empty folders. To remove a folder with the rm command you need to use the **-r** option. This stands for **recursively** which means it will remove the folder and its complete content. \n\n> Remove the Illumina_exp4_20042004_mapping_to_hg19_results directory.\n\nUse the variable as an argument of the rm command:\n```\nrm -r ${folder}\n```\nCheck if it's removed using the **ls** command.\nNow navigate to the **NGS** folder which is located in the **/home/bits/** folder.\n\n> Navigate to this location.\nSince you want to navigate, you need to use the **cd** command. Since the NGS folder is located in the folder that you are currently in, you can simply give the name of the folder (NGS) as an argument:\n```cd NGS```\nIf you want to move to a folder that's located in another location of the file system, you have to give the full path to the folder.\n\n> Go to the **/usr/bin** folder\n```cd /usr/bin```\n> Go back to your home folder\n```cd```\n\n### Manipulating files\n\nEven without a text editor, there are ways to create a file with text using the redirect operator **>**\n\n> Create a file called test1.txt containing the text \"Why do bioinformaticians work on the command line?\" using echo\n\n```\necho \"Why do bioinformaticians work on the command line?\" > test1.txt\n```\nThe redirect operator > takes the text output of echo and redirects its contents to a file called test1.txt\n> Check if it worked by viewing the content of the file on the screen\n```\ncat test1.txt\n```\nThe name cat is short for “concatenate”. The command can be used to combine the contents of multiple files, but we use it here to dump the content of a single file to the screen. Cat is as a “quick-and-dirty” way to view the content of a file, less is a neater way. \n> Add the line \"Because they don't want to scare you with huge amounts of data!\" to the file and check if it worked\nTo add lines of text to a file, use the append operator **>>**:\n```\necho \"Because they don't want to scare you with huge amounts of data!\" >> test1.txt\ncat test1.txt\n```\nThe append operator >> appends the text output of echo to the file test1.txt\n\n> Create an empty file called test2.txt and check if it exists\nTo create an empty file, use the **touch** command:\n```\ntouch test2.txt\nls\n```\n> List the names of all text files in your current directory\n\n```\nls *.txt\n```\nHere *.txt automatically expands to all filenames that match the pattern “any string followed by .txt”.\n> Rename the file test2.txt to test_partII.txt using mv and check if it worked\nTo rename a file use the mv command, short for **move**:\n```\nmv test2.txt test_partII.txt\nls *.txt\n```\n> Copy the file test_partII.txt to test2.txt and check if it worked\nTo copy a file use the cp command, short for **copy**:\n```\ncp test_partII.txt test2.txt\nls *.txt\n```\nYou don't have to type out test_partII.txt, instead you can type something like test_-Tab thereby making use of tab completion. Tab completion involves automatically completing a word if there’s only one valid match on the system. For example, if the only file starting with the letters “test_” is test_partII.txt, test_-Tab refers to test_partII.txt\nEspecially with longer names, tab completion can save a huge amount of typing.\n\n> Remove the file test_partII.txt and check if it worked\nTo remove a file use the rm command, short for **remove**:\n```\nrm test_partII.txt\nls *.txt\n```\nDownload the file called exams.txt, containing the results of the spelling and maths exams of all 10-year olds of a school, into your home folder. Use wget to download the file from http://data.bits.vib.be/pub/trainingen/NGSIntro/exams.txt\n\n> Download the file.\n```\nwget http://data.bits.vib.be/pub/trainingen/NGSIntro/exams.txt\n```\n\n\n> Show the first 10 lines of the file.\n```\nhead exams.txt\n```\nTwo complementary commands for inspecting files are head and tail, which allow to view the beginning (head) and end (tail) of a file. The head command shows the first 10 lines of a file.\n\n> Show the last 10 lines of the file.\nSimilarly, tail shows the last 10 lines of a file.\n```\ntail exams.txt\n```\nOpen the manual of head to check out the options of head. Learn how to look at the first n lines of the file. \n\n> Save the first 30 lines of exams.txt in a file called test.txt\n```\nhead -n 30 exams.txt > test.txt\n```\n> Look at test.txt using the less command\n```\nless test.txt\n```\nThere are many commands to look at the full content of a file. The oldest of these programs is called **more**, and the more recent and powerful variant is called **less**. Less lets you navigate through the file in several ways, such as moving one line up or down with the arrow keys, pressing space bar to move a page down... Perhaps the most powerful aspect of less is the forward slash key /, which lets you search through the file from beginning to end. \n\n> Search for Jasper in test.txt\nThe way to do this in less is to type /Jasper\nThe last three essential less commands are G to move to the end of the file and 1G to move back to the beginning. To quit less, type **q** (for quit).\n\n> Look at the last 10 lines of the first 20 lines of exams.txt\n```\nhead -n 20 exams.txt | tail \n```\nThe command runs head -n 20 exams.txt and then pipes the result through tail using the pipe symbol **|** \n\nThe reason the pipe works is that the tail command, in addition to taking a filename as an argument, can take input from “standard in”, which in this case is the output of the command before the pipe. The tail program takes this input and processes it the same way it processes a file.\n\n### Running tools\n\nBioinformatics tools are just commands on the commands line. You use them in exactly the same way as all the commands we have run up to now, by defining options and arguments. A list of options and arguments can be found in the help file. \n\n#### Installing and running sl\n\nWe have seen **ls** the list command and use it frequently to view the contents of a folder but because of miss-typing sometimes you would result in **sl**, how about getting a little fun in the terminal and not **command not found**. This is a general linux command, you can install it from a repository. \n\n> Install sl\nFor installing you need superuser privileges !\n```\nsudo apt-get install sl\n```\n> Find out in the manual what sl stands for\n\n```\nman sl\n```\nYou can find the solution in the **description** section of the manual.\n> run the command\n\n```\nsl\n```\n:o)\nTry out some of the options !!\n\n#### Running blastp\nIn the folder /home/bits/Linux/ you find a file called [http://data.bits.vib.be/pub/trainingen/NGSIntro/sprot.fasta sprot.fasta] containing a set of protein sequences. We will use this file as a database for blast. The query sequence is the following: \n```\nMLLFAPCGCNNLIVEIGQRCCRFSCKNTPCPMVHNITAKVTNRTKYPKHLKEVYDVLGGSAAWE\n```\n\n> Create a fasta file containing the query sequence using echo called seq.fasta\n\n```\necho \">query seq\" > seq.fasta\ncat seq.fasta\necho MLLFAPCGCNNLIVEIGQRCCRFSCKNTPCPMVHNITAKVTNRTKYPKHLKEVYDVLGGSAAWE >> seq.fasta\ncat seq.fasta\n```\nBlast can be done via the [https://blast.ncbi.nlm.nih.gov/Blast.cgi blast website], but you can also download the blast tool and run it locally (on your computer) via the command line. For instance if you want to blast against you own database of sequences, you have to do it locally. Blast has been installed on the bits laptops.\n\nFirst you have transform your own database (the sprot.fasta file in our case) into a database that can be searched by blast using the **makeblastdb** command.\n\n> Look at the help file of makeblastdb and find the options to define the input fasta file and the database type\n\n```\nmakeblastdb -help\n```\nYou have to define the input fasta file using the -in option and the type of sequences using the -dbtype option\n> Create the blast database\n\n```\nmakeblastdb -in sprot.fasta -dbtype prot\n```\nNow you can perform a blastp search using the **blastp** command. Write the results to a tabular text file with comments called output.txt\n\n> Look at the help file of blastp and find the options to define input, database, output and output format\n\n```\nblastp -help\n```\nYou need the -query, the -db, the -out and the -outfmt option\n> Perform the blast and open the results with less\n\n```\nblastp -query seq.fasta -db sprot.fasta -out output.txt -outfmt 7\nless output.txt\n```\n\n#### Running cutadapt\n\nIn this exercise we'll do some real NGS analysis on the SRR074262.fastq file that is stored in folder /home/bits/NGS/Intro. \n\n> Go to this folder and look at the 10 first lines of the file.\n\n```\ncd /home/bits/NGS/Intro\nhead SRR0474262.fastq\n```\nThis data sets contain a high number of adapter sequences. These are reads that consist solely or partly of adapter sequence. You have to remove this adapter contamination using command line tools like [https://code.google.com/p/cutadapt/ cutadapt]. This tool is installed on the bits laptops. It is not a regular bash command (it's a python program) so it doesn't have a manual but it does have a help file.\n\n> Check the help file of cutadapt for the option to define the adapter sequence and trim at the 3'ends of the reads.\nTo open the cutadapt help file type:\n```\ncutadapt -h\n```\nThe **-a** option trims adapter sequences at the 3' end of the reads. \nAt the top of the help file you see that the standard usage of the command is:\n```\ncutadapt -a ADAPTER -o output.fastq input.fastq\n```\nThe sequence of the adapter is GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA\n\n> Trim the adapter sequence and store the trimmed sequences in a file called SRR074262trim.fastq\nGo to the folder where the input file is located and type:\n```\ncutadapt -a GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA -o SRR074262trim.fastq SRR074262.fastq\n```\n> Look at the first lines of the trimmed file\nGo to the folder where the input file is located and type:\n```\nhead SRR0474262trim.fastq\n```\n\n#### Running Picard\n\nThe trimmed fastq file is subsequently mapped resulting in a bam file that you can download from http://data.bits.vib.be/pub/trainingen/NGSIntro/1271011_reads.pair.1.list.accepted_hits.bam \n\n> Download the file via the command line\n\n```\nwget http://data.bits.vib.be/pub/trainingen/NGSIntro/1271011_reads.pair.1.list.accepted_hits.bam\n```\n> Rename the file SRR074262.bam\nRemember to use tab autocompletion !\n```\nmv 1271011_reads.pair.1.list.accepted_hits.bam SRR074262.bam\n```\nThis is a raw unsorted bam file, if we want to visualize the mapping results in IGV, we need to sort and index the file. We can do the sorting using one of [http://broadinstitute.github.io/picard/ the Picard tools], called SortSam. Picard can be downloaded from https://github.com/broadinstitute/picard/releases/download/2.8.2/picard-2.8.2.jar\n\n> Download the file\nRemember to use tab autocompletion !\n```\nwget https://github.com/broadinstitute/picard/releases/download/2.8.2/picard-2.8.2.jar\nll\n```\nFor the tools to run properly, you must have Java 1.8 installed. To check your java version run the following command:\n```\njava -version\n```\nRunning Java tools from the command line requires a special syntax: you have to start the command with **java** and then the name of the java tool and its options and arguments.\n\nJava jar-files are archives of multiple java files (similar to tar archives of multiple regular files). They require an even more elaborate syntax. You have to start the command with **java -jar** and then the name of the jar file and its options and arguments. As you can see the picard tools come as a jar-file.\n\n> Test the installation by opening the help file\n\n```\njava -jar picard-2.8.2.jar -h \n```\nBam files are enormous files that are hard to search through. The order of the reads in a bam file is the same as in the original fastq file. However, if you want to visualize the mapping results or if you want to calculate mapping statistics it's much more efficient to sort the reads according to genomic location. This can be achieved with the SortSam tool. Look in [https://broadinstitute.github.io/picard/command-line-overview.html the picard documentation] for the SortSam tool.\n\n> Sort the bam file to SRR074262sorted.bam\nRemember to use tab autocompletion !\n```\njava -jar picard-2.8.2.jar SortSam \\\n      I=SRR074262.bam \\\n      O=SRR074262sorted.bam \\\n      SORT_ORDER=coordinate\n```\nBam files contain duplicate reads unless you removed them during the quality control step. MarkDuplicates locates and tags duplicate reads in a bam or sam file. Duplicate reads originate from the same fragment and were typically introduced during library construction using PCR. Duplicate reads can also result from a single cluster on the flow cell, incorrectly detected as multiple clusters by the optical sensor of the sequencing instrument.\nMarkDuplicates compares sequences of reads and detects duplicates. The tool's output is a new SAM or BAM file, in which duplicates have been identified in the SAM flags field. If needed, duplicates can be removed using the REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES options. (See  [https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates the Picard documentation] for more details).\n\n> Remove duplicates from the sorted bam file\nRemember to use tab autocompletion !\n```\njava -jar picard.jar MarkDuplicates \\\n      I=SRR074262sorted.bam \\\n      O=SRR074262sortednodup.bam \\\n      M=marked_dup_metrics.txt \\\n      REMOVE_DUPLICATES=true\n```\nFor visualization and easy access you can build an index to the bam file using BuildBamIndex. Look in [https://broadinstitute.github.io/picard/command-line-overview.html the picard documentation] for the BuildBam Index tool.\n\n> Build the bai file for SRR074262sortednodup.bam\nRemember to use tab autocompletion !\n```\njava -jar picard-2.8.2.jar BuildBamIndex \\\n      I=SRR074262sortednodup.bam \n```\nCheck if the files were generated.\n\n### File compression\n\n> Compress the SRR074262.bam file to .gz format\nRemember to use tab autocompletion !\n```\ngzip SRR074262.bam\nll\n```\n> and unzip it again\nRemember to use tab autocompletion !\n```\ngunzip SRR074262.bam.gz\nll\n```\n\n### Writing scripts\n\n#### Writing and executing bash scripts\n\nWe are going to make additions to the bash script you find below:\n```\n#this program pretends to hack sites\n!Define a variable str equal to \" 0  1  23  45  6 789\"\nclear\n!Print to screen: \"hacking www.bits.vib.be\"\n!Do nothing for 2 seconds\n!Print to screen: \"Server hacking module is loading\"\n!Do nothing for 2 seconds\n!Print to screen: \"Hack module is starting in 2 seconds\"\n!Do nothing for 1 second\n!Print to screen: \"1 second\"\n!Do nothing for 1 second\nping -c 3 www.bits.vib.be\n!Do nothing for 1 second\nnetstat\n!Do nothing for 1 second\nfor i in {1..1000}\ndo\nnumber1=$RANDOM\nlet \"number1 %= ${#str}\"\nnumber2=$RANDOM\nlet \"number2 %=4\"\n!Print to screen without newlines and with backslash escapes: \"\\033[1m${str:number1:1}\\033[0m\"\ndone\n!Print to screen: \"453572345763425834756376534\"\n!Do nothing for 3 seconds\n!Print to screen: \"www.bits.vib.be succesfully hacked!\"\n!Print to screen: \"PASSWORD ACCEPTED: token is 453572345763425834756376534\"\n```\n\nOpen gedit and paste the code.\n\n> Replace all lines that start with ! by the appropriate command\n\n```\n#this program pretends to hack sites\nstr=\" 0  1  23  45  6 789\"\nclear\necho \"hacking www.bits.vib.be\"\nsleep 2\necho \"Server hacking module is loading\"\nsleep 2\necho \"Hack module is starting in 2 seconds\"\nsleep 1\necho \"1 second\"\nsleep 1\nping -c 3 www.bits.vib.be\nsleep 2\nnetstat\nsleep 1\nfor i in {1..1000}\ndo\nnumber1=$RANDOM\nlet \"number1 %= ${#str}\"\nnumber2=$RANDOM\nlet \"number2 %=4\"\necho -n -e \"\\033[1m${str:number1:1}\\033[0m\"\ndone\necho \"453572345763425834756376534\"\nsleep 3\necho \"www.bits.vib.be succesfully hacked!\"\necho \"PASSWORD ACCEPTED: token is 453572345763425834756376534\"\n```. \n> Add a shebang line to the top of the script\n\n```\n#!/usr/bin/env bash\n#this program pretends to hack sites\nstr=\" 0  1  23  45  6 789\"\nclear\n...\n``` \nSave the script as HackIt.sh\n\n> If necessary make executable\n\n```\nchmod 755 HackIt.sh\n```\n> Run the script\n\n```\nbash HackIt.sh\n```\nWhat if you want to \"hack\" another website ? The easiest way to do allow for this is to enable to give the url as an argument of the bash command so that's what we'll do.\n\nReopen the file in gedit\n\n> Replace www.bits.vib.be by $1\n\n```\n#!/usr/bin/env bash\n#this program pretends to hack sites\nstr=\" 0  1  23  45  6 789\"\nclear\necho \"hacking $1\"\nsleep 2\necho \"Server hacking module is loading\"\nsleep 2\necho \"Hack module is starting in 2 seconds\"\nsleep 1\necho \"1 second\"\nsleep 1\nping -c 3 $1\nsleep 2\nnetstat\nsleep 1\nfor i in {1..1000}\ndo\nnumber1=$RANDOM\nlet \"number1 %= ${#str}\"\nnumber2=$RANDOM\nlet \"number2 %=4\"\necho -n -e \"\\033[1m${str:number1:1}\\033[0m\"\ndone\necho \"453572345763425834756376534\"\nsleep 3\necho \"$1 succesfully hacked!\"\necho \"PASSWORD ACCEPTED: token is 453572345763425834756376534\"\n``` \n> Save and run the script again now giving www.kuleuven.be as an argument \n\n```\nbash HackIt.sh www.kuleuven.be\n```\n$1 refers to the first argument of the command. If you have two arguments you use $1 and $2 to represent them.\n\n#### Writing and executing Perl scripts\n\nWe are going to create and the perl script you find below:\n```\n#This program predicts if a sequence is protein, nucleic acid or rubbish\n$seq = $ARGV[0];\nif ($seq =~ /[JO]/) {\n  print \"is not a sequence, first illegal character is $&\\n\";\n} elsif ($seq =~ /[EFILPQZ]/) {\n  print \"is protein\\n\";\n} else {\n  print \"is nucleic acid\\n\";\n}\n```\n\nOpen gedit and paste the code.\n\n> Add a shebang line to the top of the script\n\n```\n#!/usr/bin/env perl\n#This program predicts if a sequence is protein, nucleic acid or rubbish\n$seq = $ARGV[0];\nif ($seq =~ /[JO]/) {\n...\n``` \nSave the script as SeqIt.pl\n\n> If necessary make executable\n\n```\nchmod 755 SeqIt.pl\n```\n> Run the script using your first name in capitals as an argument\n\n```\nperl SeqIt.pl JANICK\n```\n\n#### Writing and executing Python scripts\n\nWe are going to make additions to the python script you find below:\n```\n#This program counts the number of amino acids in a protein sequence\n!Define variable mySequence equal to \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGI\"\n!Create a set myUniqueAminoAcids out of mySequence\nfor aaCode in myUniqueAminoAcids:\n  !Print to screen, use format to fill in the values: \"Amino acid {} occurs {} times.\"\n```\n\nOpen gedit and paste the code.\n\n> Replace all lines that start with ! by the appropriate command\n\n```\n#This program counts the number of amino acids in a protein sequence\nmySequence = \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGI\"\nmyUniqueAminoAcids = set(mySequence)\nfor aaCode in myUniqueAminoAcids:\n  print(\"Amino acid {} occurs {} times.\".format(aaCode,mySequence.count(aaCode)))\n```\n> Add a shebang line to the top of the script\n\n```\n#!/usr/bin/env python\n#This program counts the number of amino acids in a protein sequence\nmySequence = \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGI\"\nmyUniqueAminoAcids = set(mySequence)\n...\n``` \nSave the script as CountIt.py\n\n> If necessary make executable\n\n```\nchmod 755 CountIt.py\n```\n> Run the script\n\n```\npython CountIt.py\n```\nWhat if you want to \"count\" another protein ? The easiest way to do allow for this is to enable to give the sequence as an argument of the python command so that's what we'll do.\n\nReopen the file in gedit\n\n> Adjust the code to read the first argument of the python command using the sys library\n\n```\n!#/usr/bin/env python\n#This program counts the number of amino acids in a protein sequence\nimport sys\nmySequence = sys.argv[1]\nmyUniqueAminoAcids = set(mySequence)\nfor aaCode in myUniqueAminoAcids:\n  print(\"Amino acid {} occurs {} times.\".format(aaCode,mySequence.count(aaCode)))\n``` \n> Save and run the script again now giving QWEERTIPSDFFFGHKKKKLLLLLLLLLLLLLL as an argument \n\n```\npython CountIt.py QWEERTIPSDFFFGHKKKKLLLLLLLLLLLLLL\n```\n\nsys.argv[1] refers to the first argument of the command. If you have two arguments you use sys.argv[1] and sys.argv[2] to represent them.\n\n#### Installing and using Python tools\n\nInstalling Python-based tools is not done with apt-get, instead the comand pip is used. If pip is not yet installed, the terminal will show an error message saying that pip is currently not installed. You can install pip using apt-get.\n\nAs an example we will install Biopython, a Python library for bioinformatics. See [http://biopython.org/wiki/Download the documentation] for more details. \n\n> Install biopython \n\nYou need superuser privileges for this\n```\nsudo pip install biopython\n```\nWe will write a small python script to check if Biopython was successfully installed. In the folder /home/bits/Linux/ you find a file called [http://data.bits.vib.be/pub/trainingen/NGSIntro/sprot.fasta sprot.fasta] containing a set of protein sequences that we will use as input. Move to the folder containing the file.\n\nWe will use SeqIO module of Biopython to parse the fasta file with the protein sequences. Check out [http://biopython.org/wiki/SeqIO the tutorial of the module].\n\n```\n!Import the SeqIO module of the Bio library\n!For every record in the sprot.fasta file do:\n!Print the id of the seq_record to the screen\n!Print the length of the sequence to the screen\n```\n\nOpen gedit and paste the code.\n\n> Replace all lines that start with ! by the appropriate command\n\n```\nfrom Bio import SeqIO\nfor seq_record in SeqIO.parse(\"sprot.fasta\",\"fasta\"):\n     print(seq_record.id)\n     print(len(seq_record))\n```\n> Add a shebang line to the top of the script\n\n```\n#!/usr/bin/env python\nfrom Bio import SeqIO\nfor seq_record in SeqIO.parse(\"sprot.fasta\",\"fasta\"):\n...\n``` \nSave the script as ParseIt.py in the folder that contains the input file.\n\n> If necessary make executable\n\n```\nchmod 755 ParseIt.py\n```\n> Run the script\n\n```\npython ParseIt.py\n```\n\n### Compressing and decompressing files\n\nSome files or tools come in **.zip** format, how to decompress them ? \n\nIn the **/usr/bin/tools** folder you can find the zipped version of the FastQC tool. To unzip it, you have to use the **unzip** command.\n\nThe **/usr/bin/** folder belongs to the root user, not to the bits user. Therefore only root is allowed to do manipulations in this folder. Switch to root using the **su** command or type **sudo** in front of your commands. The system will ask for the password: bitstraining on the BITS laptops. \n\n> Decompress the FastQC tool with unzip.\nFirst look at the unzip manual to get an idea about the working of the command. \n```man unzip```\nTo unzip the file you can use the simple command: ```unzip name_of_the_zip_file```. Remember to use tab autocompletion.\nThis will generate a folder called FastQC in /usr/bin/tools.\nAfter decompression use **ls** and **cd** to take a look at the content of the newly created **FastQC** folder. You will see the fastqc command in this folder.\n\n> Make sure that you can read, write and execute the fastqc command and that other people can read and execute it.\nTo see the current permissions of the command:\n```ls -l``` \nThe command that allows you to change the access permissions of files and directories is **chmod** (change mode). chmod has two mandatory arguments:\n\n - A three digit number representing the access permissions you want to set. Each digit refers to a different audience: \n\n - first digit refers to the owner of the file\n - second digit refers to the group the owner belongs to\n - third digit refers to all others\n\nThe numbers themselves represent the permissions:\n\n - 7 full access: read, write and execute\n - 6 read and write\n - 5 read and execute\n - 4 read only\n - 3 write and execute\n - 2 write only\n - 1 execute only\n - 0 no access\n \n\n - The name of the file for which you want to change the access permissions\n\n \n{{Wiki-img|NGS/Intro/LCLExtra2.png|400px}}\n\nAs you can see **root** is the owner of the file. This is why you need to log on as superuser (= root) to be able to change root's files. \n\n### Sorting files\n\nWe want to sort the file exams.txt from highest to lowest score on maths.\n\n> Sort the file based on score on maths. Write results to a file called examssort1.txt\nYou have to **sort** the lines in the file according to the maths score. So you want to sort the file based on the numbers in the second column: it means that you cannot use the default sort command (this will sort the lines based on the content of the first column) but you have to use an option that allows you to specify the column you wish to sort on.\nWhen you look in the manual you see that you can use the -k option for this: \n```\nsort -k2 exams.txt\n```\nThis will sort the file according to the values in the second column, but it will overwrite the original file. To save the sorted list in a new file, examssort1.txt, use the **redirect operator: >**\n```\nsort -k2 exams.txt > examssort1.txt\n```\n> Use the head command to look at the sorted file.\n\n```\nhead examssort1.txt\n```\nYou can see that the sorting was not done correctly: it was done alphabetically, treating the numbers in the second column as characters, instead of numbers. This means that we are still missing an option that allows for numerical sorting.\n\n> Sort the file numerically based on score on maths.\n```\nsort -k2 -n exams.txt > examssort1.txt\nhead examssort1.txt\n```\nThis looks a lot better, but we still have to reverse the order since we want the scores from high to low. \n\n> Sort the file numerically from highest to lowest score on maths.\nFor this we need to add a third option to the **sort** command.\nWhen you look in the manual you see that you can use the -r option for this:\n\n```\nsort -k2 -n -r exams.txt > examssort1.txt\nhead examssort1.txt\n```\n> Show the results of the 10 students with the highest scores on the maths exam using a single line of commands.\n\nThis means that you have to combine the **head** command and the **sort** command from the previous exercise into one single command. Remember that you can combine commands by writing them in the order they have to be performed, so in our case first **sort** then **head**, separated by the **pipe operator: |**\n\n```\nsort -k2 -n -r exams.txt | head\n```\n> Show only the names of the 10 students with the highest scores on the maths exam using a single line of commands.\n\nTo leave out gender and scores you have to use the **cut** command. To specify which columns to cut you can use the -f option. Please note that the -f option specifies the column(s) that you want to retain ! As an argument you have to specify the name of the file you want to cut.\nIn the manual you can see that TAB is the default delimiter for the cut command. So if you have a tab-delimited text file, as in our case, you do not need to specify the delimiter. Only if you use another delimiter you need to specify it.\n\n```\nsort -k2 -n -r exams.txt | head | cut -f3\n```\n\n\n**The case of chromosomes and natural sorting.**\n'sort' will sort chromosomes as text; adding few more parameters allows to get the sort you need.\n\n> Write a list of human chromosomes (values: 22 to 1 X Y MT) to the screen. Use {end..begin} to define a numerical range.\n\nRemember that you can use **echo** to print text to the screen, so to generate text. Try\n```\necho {22..1} X Y MT\n```\nand see what happens...\nYou don't want to numbers next to each other in one row, you want them in a column underneath each other. This means you want to replace the blanks by end-of-lines. \n\n> Replace blanks by end-of-lines. Use the sed command for this.\n\nLook up the command for replacing text in the slides. Blanks are represented by **\\ ** (back slash followed by a blank) and end-of-lines are represented by **\\n** (back slash followed by n). To replace all blanks by an end-of-line you need to add the **g** option (see [http://sed.sourceforge.net/sed1line.txt sed tutorial] for more info). So \n```\nsed \"s/\\ /\\n/g\"\n```\nshould do the replacement. Of course you need to combine the two commands using the output of echo as input in sed. Look in the slides or the cheat sheet how to do this.\nHowever, you do not want to print the text to the screen you want to print the text to a file. Look in the slides or the cheat sheet how to do this and try to combine the three parts of the command.\n\n> Write chromosomes as a column to a file called chroms.txt\n\nThe correct solution is: \n```\necho {22..1} X Y MT | sed \"s/\\ /\\n/g\" > chroms.txt\n```\nThe s in the sed argument refers to substitution: you want to substitute blanks by end-of-lines, it is followed by the character you want to replace (a blank or \"\\ \"), then the character you want to replace it with (an end-of-line or \"\\n\"), then you add g to use sed recursively, in other words to do the substitution more than once so each time a blank is encountered.\nIt prints the chromosome numbers as a column to the file chroms.txt\n\n> Look at the file using the less command.\n\n```\nless chroms.txt\n```\nRemember to use q to leave a less page. \n\n> Sort the chromosome file by using a simple sort. Write results to chromssort.txt\n\n```\nsort chroms.txt > chromssort.txt\nhead chromssort.txt\n```\nNot good! This is a tricky problem that always comes up when you are working with chromosome numbers e.g. when sorting bam/sam files, annotation files, vcf files...\n\n> Modify the sort command so that the sorting of the chromosomes is done in the correct way.\n\nMost people solve it by specifying that you want sort to do natural sorting using the -V option:\n```\nsort -V chroms.txt > chromssort.txt\nhead chromssort.txt\n```\nNice !\nNow try with chr in front.\n\n> Create a file with values chr22 to chr1 chrX chrY chrMT into one column called chroms2.txt in one single command\n\n```\necho chr{22..1} chrX chrY chrMT | sed \"s/\\ /\\n/g\" > chroms2.txt\nhead chroms2.txt\n```\n> Sort the file into a new file called chromssort2.txt\n\n```\nsort -V chroms2.txt > chromssort2.txthead chroms2.txt\n```\n\n### Getting files from the internet\n\nTo download data via a link on the internet you can use the **wget** command.\nFor NGS analysis you often need to download genomic sequence data from the internet. As an example we are going to download the E.coli genome sequence from the iGenomes website: ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Escherichia_coli_K_12_MG1655/NCBI/2001-10-15/Escherichia_coli_K_12_MG1655_NCBI_2001-10-15.tar.gz\n\nDownload this file into the folder NGS/ChIPSeq/ in your home directory. \n\n> Download the data into this folder. \n\nGo to this folder and use the wget command to download the data:\n```\ncd /home/bits/NGS?ChIPSeq/\nwget ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Escherichia_coli_K_12_MG1655/NCBI/2001-10-15/Escherichia_coli_K_12_MG1655_NCBI_2001-10-15.tar.gz\nll\n```\nIn the same way you can download NGS data from the internet. We are not going to actually do this because \nNGS data sets are enormous and can take hours to download. Interrupting the download is done with {{key press|Ctrl}} + C\n\n> Decompress the file. \n\n```\ntar -xzvf Escherichia_coli_K_12_MG1655_NCBI_2001-10-15.tar.gz\nll\n```\nThis creates a new folder called Escherichia_coli_K_12_MG1655.\nGo into this folder and look at the whole genome fasta sequence\n\n> Look at the fasta sequence. \nUse **cd** to navigate the folders and **head** to look at the file\n```\ncd Escherichia_coli_K_12_MG1655\nll\ncd NCBI\nll\ncd 2001-10-15\nll\ncd Sequence\nll\ncd WholeGenomeFasta\nll\nhead genome.fa\n```\n\n### Installing tools\n\nThe FastQC tool was installed by unzipping it. Most tools can be installed using the **make** command. There are many ways to install software on Linux:\n\n - via the software manager, an application with a very easy user friendly interface\n - via the **apt-get** command\n - software packages written in Python are installed via the **pip install** command\n\n\nThese methods handle the installation and removal of software on Linux distribution in a simplified way. They fetch the software from software repositories on the internet. However, these repositories do not always contain the most up-to-date version of software packages, especially not for niche software like bioinformatics tools.\n\nSo to be on the safe side, it is recommended that you download the latest version of a tool from its website (using wget) and use **make** to install it. In that way, you have full control over the version of the tool that you are installing.\n\nThis is not true for pip. Pip does the difficult steps in the installation for you and accesses an up-to-date package repository, so Python programs can safely be installed using pip.\n\nDownload and install all packages in the **tools** folder of the **/usr/bin/** folder. This is a folder owned by root so it is a good idea to switch to superuser again.\n\n#### Installing TopHat\n\nIn the Introduction training we use RNA-Seq reads. Mapping RNA-Seq reads is done using the TopHat tool. So we need to install the [http://ccb.jhu.edu/software/tophat/tutorial.shtml TopHat tool]. We are going to do this in the /usr/bin/NGS/ folder so we need to be superuser for this.\n\n> Go to the TopHat website and fetch the download link.\n\n - Go to the [http://ccb.jhu.edu/software/tophat/tutorial.shtml TopHat website]\n - Right click the Linux download link\n - Select **Copy Link Location**\n\n> Download the file into the /usr/bin/NGS/ folder.\n\n - Go to the terimnal\n - Navigate to the /usr/bin/NGS/ folder\n - Type **wget **\n - Press the Shift and Insert keys simultaneously to paste the url\n\nTopHat is downloaded as a .tar.gz file \n\n> Decompress the file\nFor decompressing a .tar.gz file you need the following command:\n```\ntar -xzvf tophat-2.1.1.Linux_x86_64.tar.gz\n```\nRemember to use tab autocompletion !\n\nThis creates a new folder called tophat-2.1...\nGo into the tophat folder and type:\n```\n./tophat\n```\n\nIf this opens the help of tophat, it means the software has been installed correctly. It does not mean that you can use the software now. Well you can but you will always have to type the commands from inside the tophat folder like we do here or provide the full path to the tophat folder. The dot slash (./) in front of the command means use the tophat **that is located in this folder**. It tells the command line where it can find the script (./ = the current directory = /usr/bin/tools/tophat-2.1.1.Linux_x86_64/).To avoid this we can create a symbolic link for tophat2 (see later).\n\n#### Installing samtools\n\nWhen you navigate to the **tophat** folder in /usr/bin/NGS/ you see that samtools is automatically installed when TopHat was installed:\n{{Wiki-img|NGS/Intro/MapRNASeq5.png|600px}}\n\nIf you see the samtools help page when you type\n```\n./samtools_0.1.18\n```\nit means that samtools is indeed installed\n{{Wiki-img|NGS/Intro/MapRNASeq6.png|600px}}\n\n\n[http://wiki.bits.vib.be/index.php/Introduction_to_ChIP-Seq_analysis Installing tools for the ChIP-Seq training]\n#### Installing cutadapt\n\nCutadapt is a Python program that removes adapter sequences from NGS reads.\nIt has already been installed on the bits laptops but if you need to install it, use [http://wiki.bits.vib.be/index.php/Installing_cutadapt these instructions].\n\n### Quality control of NGS data\n\n#### Checking the quality of the Introduction training data using FASTQC====\n\nIn the /home/bits/NGS/Intro directory you can find a file called SRR074262.fastq (the file containing Arabidopsis RNA-Seq reads), that was used in the exercises on FastQC in Windows. FastQC is a tool that checks the quality of fastq files, containing NGS data.\n\nWe will now try to do the same FastQC analysis from command line in Linux. FastQC is a java-based tool that needs java to be able to run. \n\n> Check if the correct version of java is installed\nIn command line you can check if java is installed on your laptop using the following command:\n```\njava -version\n```\nYou should see something like:\n```\nava version \"1.8.0_101\"\nJava(TM) SE Runtime Environment (build 1.8.0_101-b13)\nJava HotSpot(TM) 64-Bit Server VM (build 25.101-b13, mixed mode)\n```\nIf you get an error then you don't have java installed. If the version listed on the first line is less than 1.5 then you will have problems running FastQC and you need to update java on your laptop.\n\n> Run FastQC\nTo run FastQC as a GUI just like in Windows type:\n```\nfastqc\n```\nThis opens the FastQC GUI and you could load a fastq file via the GUI to get its quality report. However, you can also use fastqc as a command via the command line. \n\n> Open the file SRR074262.fastq to obtain the sequence of the contaminating adapter.\nCheck [http://wiki.bits.vib.be/index.php/Quality_control_of_NGS_data#Exercise_1:_Quality_control_of_the_data_of_the_introduction_training the exercise on FastQC in Windows] for details on the quality report that is generated\nThe big plus of running FastQC from command line is that command line allows you to combine and run a set of commands as a program by writing a command script.\n\n#### Automating FASTQC analyses\nIf you have many FASTQ files to check you might prefer running FASTQC from command line so you can loop over your files and process the reports automatically. \n\n> View the help files of the fastqc command\nAs for most commands the -h option nicely opens the help file:\n```\nfastqc -h\n```\n\n{{Wiki-img|NGS/Intro/CLFastQC1.png|500px}}\n\nTo run via command line you can simply specify a list of files to process:\n```\nfastqc somefile.fastq someotherfile.fastq\n```\nYou can specify as many files as you like. If you don't specify any files the program will open the GUI.\n\nHowever, there are a few options that might be helpful to use. Since FASTQC can process FASTQ, SAM and BAM files, it is always safer to tell him upfront which format to expect.\n\nWe will generate FASTQC reports for the two FASTQ files in the /home/bits/NGS/RNASeq/ folder.\n\n> Decompress the files\nFirst you have to decompress the fastq files. In the cheat sheet look up the command for decompressing a .gz file\n```\ngunzip chr22_SRR1039509_1.fastq.gz\ngunzip chr22_SRR1039509_2.fastq.gz\n```\nDecompression of the files results in two .fastq files that can be used as inputs generating the FASTQC reports.\n\n> Generate the FASTQC reports for the two fastq files.\nAs you can see in the help file of fastqc, the -f option allows you to specify the format of the input file(s). \n```\nfastqc -f fastq chr22_SRR1039509_1.fastq chr22_SRR1039509_2.fastq\n```\nThe two .html files contain the FASTQC reports and can be opened in a browser. \n\n> Open the first report in firefox via command line\n\n```\nfirefox chr22_SRR1039509_1_fastqc.html\n```\nBy default, FastQC will create an HTML report with embedded graphs, but also a zip file containing individual graphs and additional data files containing the raw data from which the plots were drawn.\n\n> Remove the .html and the .zip files \n\n```\nrm *.zip\nrm *.html\n```\nIf you have many files you might want to use a for-loop instead of typing all file names into the command.\n\n> Write a for-loop to process the two FASTQ files.\nFirst go back to the folder that contains the fastqc command and make sure you are operating as superuser.\nTake a close look at the syntax of the for-loop that is described in the slides. We are going to use the syntax for looping over files in a folder. Don' t forget the ***** to loop over all fastq files in the specified folder:\n```\nfor file in /home/bits/NGS/RNASeq/*.fastq\ndo\nfastqc -f fastq ${file}\ndone\n```\nDon't forget the **$** since file is just a variable that refers to the actual files in the folder. Write every line on a different line in the terminal.\n\nWhen you go to the /home/bits/NGS/RNASeq folder you should see the same html and zip files as in the previous exercise. The two .html files contain the FASTQC reports and can be opened in a browser.\nIf you want to save your reports in a folder other than the folder which contains your fastq files you can specify an alternative location by using the **-o** option.\n\n> Create a new folder called FASTQCresults\n\n```\nmkdir FASTQCresults\n```\n> Create a variable output. Its value is the path to the newly created folder.\n\n```\noutput=/home/bits/NGS/RNASeq/FASTQCresults/\n```\n> Write a for-loop to analyze the quality of the fastq files and write the report to the new folder\n\nAdjust the code of the for-loop to write the results to the newly created folder\n```\nfor file in /home/bits/NGS/RNASeq/*.fastq\ndo\nfastqc -f fastq -o ${output} ${file}\ndone\n```\nDon't forget the **$** since output and file are variables. Write every line on a different line in the terminal.\nWhen you go to the /home/bits/NGS/RNASeq/FASTQCresults folder you should see the same html and zip files as in the previous exercise. The two .html files contain the FASTQC reports and can be opened in a browser.\n\nIn this way you can process hundreds of FASTQ files automatically. You can even write a script to process the reports and create a general overview of the quality of the complete experiment.\n\nIn the **Templates** directory of the /usr/bin/tools/FastQC/ you will find a file called **header_template.html** which you can edit to change the look of the report. This file contains all information concerning the layout of the FASTQC reports like the header for the report, the CSS section... and you can alter this however you see fit.\n\n\n===Improving the quality of the data===\nIn this exercise we go back to the data set of the Intro training in folder /home/bits/NGS/Intro. \nAlmost all NGS data sets contain a high number of contaminating adapter sequences. You can remove these adapters using command line tools like [https://code.google.com/p/cutadapt/ cutadapt]. See [http://wiki.bits.vib.be/index.php/Installing_cutadapt installation instructions].\n\n> Check the help file for the option that defines the number of mismatches you allow (= error rate).\nTo open the cutadapt help files (it's not a regular bash command so it doesn't have a manual) type:\n```\ncutadapt -h\n```\n\nScrolling down the help file shows that the **-e** option defines the maximum allowed error rate: the default is 0.1 meaning that it allows one mismatch every 10 nucleotides. Adapter sequences are identified by aligning each read to the adapter sequence: if the frequency of mismatches in the alignment is below the allowed error rate then the adapter sequence is trimmed from the read.\n> Check the option you need for defining the adapter sequence\nIn the help file you see that you have multiple options:\n\n - **-a** to trim adapter sequences at the 3' end of the reads. In most cases this is the adapter that's causing the problems: when small RNA fragments are sequenced, the resulting reads can be longer than the RNA fragments. As a results they will contain (parts of) the 3’ adapter. In longer reads the adapter might even lie within the read:\n```\nMYSEQUEN                         (no adapter contaimination)\nMYSEQUENCEADAP                   (part of adapter at 3' end)\nMYSEQUENCEADAPTER                (adapter at 3' end)\nMYSEQUENCEADAPTERSOMETHINGELSE   (adapter within the read)\n```\nCutadapt will cut the adapter (part) and all sequence following it resulting in:\n```\nMYSEQUEN\nMYSEQUENCE\nMYSEQUENCE\nMYSEQUENCE\n```\n\n - **-g** to trim adapter sequences ligated at the 5' end of the reads. These adapters are expected to appear at the start of a read (where they can be just partially there) or somewhere within the read:\n```\nADAPTERMYSEQUENCE              (5' end)\nDAPTERMYSEQUENCE               (partial)\nTERMYSEQUENCE                  (partial)\nSOMETHINGADAPTERMYSEQUENCE     (within)\n```\nIn all cases, the adapter itself and the sequence preceding it will be removed, leaving in all examples above:\n```\nMYSEQUENCE\n```\n\n - **-b** to trim adapters at the 3' or 5' end of the read. If there is at least one base before the adapter, then the adapter is trimmed as a 3’ adapter and the adapter itself and everything following it is removed. Otherwise, the adapter is trimmed as a 5’ adapter and it is removed from the read, but the sequence after it it remains:\n```\nBefore trimming \t        After trimming \t\nMYSEQUENCEADAPTERSOMETHING \tMYSEQUENCE \nMYSEQUENCEADAPTER \t        MYSEQUENCE\nMYSEQUENCEADAP \t                MYSEQUENCE\nMADAPTER \t                M\nADAPTERMYSEQUENCE \t        MYSEQUENCE\nPTERMYSEQUENCE \t                MYSEQUENCE\nTERMYSEQUENCE \t                MYSEQUENCE\n```\n\n\nSince we probably have contaminating adapter at the 3' end we'll take the -a option\nAt the top of the help file you see that the standard usage of the command is:\n```\ncutadapt -a ADAPTER -o output.fastq input.fastq\n```\nYou can find the sequence of the adapter in the FastQC report of SRR074262.fastq\n\n> Trim the adapter sequence using the default error rate, store the trimmed sequences in a file SRR074262trim.fastq\nSo in our case the command is: \n```\ncutadapt -a GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA -o SRR074262trim.fastq SRR074262.fastq\n```\nNote that the default error rate means that you allow max. 10% mismatches in the alignment of adapter and read. \n\n> How many reads consisted solely of adapter sequence (and were consequently completely removed) ?\nThe output of the cutadapt command is:\n```\nThis is cutadapt 1.8.1 with Python 2.7.6\nCommand line parameters: -a GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA -o SRR074262trim.fastq SRR074262.fastq\nTrimming 1 adapter with at most 10.0% errors in single-end mode ...\nFinished in 66.92 s (7 us/read; 8.62 M reads/minute).\n\n### Summary\n\nTotal reads processed:               9,619,406\nReads with adapters:                 2,327,902 (24.2%)\nReads written (passing filters):     9,619,406 (100.0%)\n\nTotal basepairs processed:   346,298,616 bp\nTotal written (filtered):    271,141,022 bp (78.3%)\n\n### Adapter 1\n\nSequence: GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA; Type: regular 3'; Length: 36; Trimmed: 2327902 times.\n\nNo. of allowed errors:\n0-9 bp: 0; 10-19 bp: 1; 20-29 bp: 2; 30-36 bp: 3\n\nBases preceding removed adapters:\n  A: 6.1%\n  C: 1.5%\n  G: 1.8%\n  T: 3.0%\n  none/other: 87.5%\n\nOverview of removed sequences\nlength    count    expect    max.err    error counts\n3    156030    150303.2    0    156030\n4    48693    37575.8    0    48693\n5    12005    9394.0    0    12005\n6    8702    2348.5    0    8702\n7    6686    587.1    0    6686\n8    5546    146.8    0    5546\n9    5958    36.7    0    5484 474\n10    5479    9.2    1    4539 940\n11    4197    2.3    1    3737 460\n12    4038    0.6    1    3713 325\n13    3392    0.1    1    3158 234\n14    2730    0.0    1    2531 199\n15    2801    0.0    1    2625 176\n16    2384    0.0    1    2221 163\n17    1887    0.0    1    1759 128\n18    1998    0.0    1    1848 150\n19    1572    0.0    1    1447 123 2\n20    1257    0.0    2    1079 107 71\n21    1141    0.0    2    1029 90 22\n22    730    0.0    2    671 46 13\n23    504    0.0    2    471 21 12\n24    549    0.0    2    499 37 13\n25    495    0.0    2    441 39 15\n26    587    0.0    2    538 35 14\n27    657    0.0    2    585 53 19\n28    711    0.0    2    633 40 26 12\n29    764    0.0    2    687 49 24 4\n30    889    0.0    3    760 85 33 11\n31    887    0.0    3    739 94 42 12\n32    579    0.0    3    466 65 37 11\n33    438    0.0    3    347 36 38 17\n34    700    0.0    3    541 85 53 21\n35    5390    0.0    3    4652 507 171 60\n36    2037526    0.0    3    1870684 129754 20094 16994\n```\nIn the last line you see the number of reads with 36 bases aligned to the adapter sequence. Since that is the total of the read (the reads are 36bp long) it means that over 2 million reads only consist of adapter sequence, 1.870.684 being completely identical to the adapter, 129.754 containing 1 mismatch with the adapter...\n> Open the trimmed sequences in FastQC\nTo open the FastQC GUI type the fastqc command\n```\nfastqc\n```\nYou can compare the results with these of the original reads on [http://wiki.bits.vib.be/index.php/Quality_control_of_NGS_data the Quality control of NGS data wiki page].\n\n> Are all the reads still 36 nt long after trimming ?\nIn the **Basic statistics** tab you see that the length of the reads varies as was to be expected after trimming\n\n{{Wiki-img|NGS/Intro/fastqcTrim1.png|400px}}  \n> Have the quality scores of the reads significantly changed after trimming ?\nThe **Per base sequence quality** is similar to that of the untrimmed file, as is the **Per sequence quality**. The latter one just shows a lower number of sequences since the 2 million reads that consisted solely of adapter sequence are no longer taken into account.\n\n{{Wiki-img|NGS/Intro/fastqcTrim2.png|400px}} \n\nQuality scores have changed a bit of course since you removed bases and reads from the data set but you did not trim based on quality but based on similarity to an adapter sequence so the scores of the trimmed reads are similar to those of the untrimmed reads. If you had trimmed low quality bases, the quality scores would have been higher in the trimmed reads.\n> Has the per base sequence content improved as a result of the trimming ?\nThe **Per base sequence content** - the tool to detect adapter contamination - plot has greatly improved allthough it is still not considered stable enough.\n\n{{Wiki-img|NGS/Intro/fastqcTrim3.png|400px}} \n> What are the bumps you see in the Sequence length distribution plot ?\n\nThis question is related to the results of the trimming:\n```\nOverview of removed sequences\nlength    count    expect    max.err    error counts\n3    156030    150303.2    0    156030\n4    48693    37575.8    0    48693\n5    12005    9394.0    0    12005\n...\n33    438    0.0    3    347 36 38 17\n34    700    0.0    3    541 85 53 21\n35    5390    0.0    3    4652 507 171 60\n36    2037526    0.0    3    1870684 129754 20094 16994\n```\nAs you can see here over 2 million reads corresponded to adapter over their entire length and as a result were trimmed to length zero. This is the large peak at length zero on the plot. Over 150000 reads contain 3 bases that belong to the adapter. These 3 bases have been cut leaving reads of 33 nt long: this is the small peak you see on the plot at length 33. All intermediate lengths of adapter contamination have been detected but in such a small fraction of reads that you cannot see the influence of the trimming on the plot.\n\n{{Wiki-img|NGS/Intro/fastqcTrim4.png|400px}} \nFASTQC calls a failure for this plot because it knows the file contains Illumina data and it expects the reads to have the same lengths. The software does not consider the fact that this is no longer true after trimming.\n\n> Are there any overrepresented sequences left ?\nThe 2 million sequences that were initially detected as contaminating adapters are still in the list but now as sequences with zero length. The other contaminating sequences are of course still present but at very low counts.\n\n{{Wiki-img|NGS/Intro/fastqcTrim6.png|400px}} \n> Are there any overrepresented hexamers ?\nFASTQC still detects overrepresented hexamers although at much lower counts than before. These are probably parts of the remaining overrepresented sequences.\n\n{{Wiki-img|NGS/Intro/fastqcTrim5.png|400px}}\n\n### Linking files\n\n#### Linking FastQC\n\nIn the previous exercise you had to specify the path of the fastqc command, otherwise the operating system was not able to find (and thus execute) the command. You can avoid having to specify the path every time you want to execute a command by creating a link to the command using the **ln** command.\nYou can soft or hard links, for what we want to achieve a soft link is fine. When you place a link to the command in /usr/local/bin you will be able to run the program from any location by just typing\n```\nfastqc\n```\nSo the overall format of the command is as follows:\n```\nln -s (soft link) path_where_fastqc_is (source path) /usr/local/bin/fastqc (destination path)\n```\n\n> What's the command you would need for creating this soft link ? \nWhen you look in the manual of **ln** you see that for creating a soft link you need the **-s** option. So you use the following command: \n```\nln -s /usr/bin/tools/FastQC/fastqc /usr/local/bin/fastqc\n```\nCheck if you can run the fastqc command from any location now.\n\n#### Linking Tophat2\n\nIf you don't create a symbolic link you have to specify the full path of the command when you want to run it, otherwise the operating system is not able to find (and thus execute) the command. You can avoid having to specify the full path every time you want to execute a command by creating a link to the command using the **ln** command. For creating symbolic links you need superuser powers!\nYou can make soft or hard links, for what we want to achieve a soft link is fine. When you place a link to the command in /usr/local/bin/ you will be able to run the program from any location by just typing its name.\nSo the overall format of the command is as follows:\n```\nln -s (soft link) path_where_command_is (source path) /usr/local/bin/name (destination path)\n```\n\n> Create a symbolic link for tophat2\nFor creating the link you need the following command:\n```\nsudo ln -s /usr/bin/NGS/tophat-2.1.1.Linux_x86_64/tophat2 /usr/local/bin/tophat2\n```\nRemember to use tab autocompletion !\nNow type **tophat2**. If you see the help file, the link works.\n\nIf you mess up the link you have to remove it before you can try again using the following command:\n```\nsudo unlink /usr/local/bin/tophat2\n```\n\n\n#### Linking samtools\n\nWe will also do the same for samtools to use samtools from anywhere in the file system. \n\n> Create a symbolic link for samtools\nCreate a link using the **ln -s** command:\n```\nsudo ln -s /usr/bin/NGS/tophat-2.1.1.Linux_x86_64/samtools_0.1.18 /usr/local/bin/samtools-0.1.18\n```\nCheck if the command works. If you type\n```\nsamtools-0.1.18 view\n```\n(one of the possible samtools commands) you should see the manual of the command.\nIn many cases you will have several versions of samtools running on your laptop. That's why I don't call the tool samtools but I choose the full name including the version number.\n\n[http://wiki.bits.vib.be/index.php/Introduction_to_ChIP-Seq_analysis#Linking_tools Linking tools for the ChIP-Seq training]\n\n\n### Mapping reads\n\n#### Mapping reads of the ChIP-Seq training with Bowtie\n\n### Mapping reads with Bowtie\n\n*Exercise created by Morgane Thomas Chollier*\n\n#### Obtaining the reference genome\nIf you are going to follow the ChIP-Seq training, skip this part: you are going to do these steps during the ChIP-Seq training. The fasta file containing the reference genome is called Escherichia_coli_K12.fasta and is stored in the /home/bits/NGS/ChIPSeq/ folder on the BITS laptops. Alternatively you can use the file that you downloaded via wget in exercise 3.\n\nIf you are not going to follow the ChIP-Seq training, go on and see how to obtain the reference genome. \n\nBack to the ChIP-Seq data of *E. coli*. In this experiment we want to see which genomic regions are bound to transcription factor FNR. However, at this point what we have is a set of reads that are identified by their location of the flow cell. To answer our question we should link the reads to regions in the genome.\nTo obtain their genomic coordinates, we will map each read on the reference genome sequence\nAs said before, for Illumina reads the standard mappers are BWA and Bowtie (version 1 and 2). In this exercise we will use Bowtie version1. \nCheck out the [http://wiki.bits.vib.be/index.php/Linux_command_line#Installing_Bowtie installation instructions for Bowtie].\n\nBowtie1 was installed and a symbolic link was created so the command should work from anywhere in the file system when you type bowtie-1.1.2\n\n> What happens when you type the bowtie command ?  \nThis prints the help of the program. However, the help file is a bit difficult to read ! If you need to know more about the program, it's easier to directly check [http://bowtie-bio.sourceforge.net/manual.shtml the  manual on the website]\nBowtie needs a reference sequence to align each read on it. \n\n> Which *E. coli* strain was used in the experiment ?  \nGo to [http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1003565 the paper] and check the part **Strains and growth conditions** in the **Materials and methods** section. There you see that the experiment was done using *E. coli* K-12 MG1655.\n\n{{Wiki-img|NGS/Intro/mapping1.png|600px}}\nSo we need the genome sequence of *E. coli* K-12 MG1655 and it needs to be in a specific format (=index) for bowtie to be able to use it. Several pre-built indexes are available to download on [http://bowtie-bio.sourceforge.net/manual.shtml the bowtie webpages] or the [http://support.illumina.com/sequencing/sequencing_software/igenome.html iGenomes website].\n\nAlthough the *E. coli* sequence is available we will not use it to show you how you should proceed if you don't find your reference sequence here. In that case you will need to make the index file yourself. \n\nIf you can't find your reference on the iGenomes website you have to download it from:\n\n - [http://genome.ucsc.edu/ UCSC]\n - [http://www.ensembl.org/index.html Ensembl]\n - [http://www.ncbi.nlm.nih.gov/ NCBI]\n\nSince Ensembl focuses on higher eukaryotes, we are going to download the genome from NCBI.\n\n> Which reference sequence was used in the experiment ?  \nGo to [http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1003565 the paper] and check the part **High-throughput RNA sequencing (RNA-seq) analysis**. There you see that the reads were mapped to an NCBI sequence with version number **U00096.2**.\n\n{{Wiki-img|NGS/Intro/mapping2.png|600px}}\n\n> Search for this sequence on NCBI ?  \nGo to [http://www.ncbi.nlm.nih.gov/ the NCBI website], select the **Nucleotide** database, type **U00096.2** as a search term and click **Search**. In the record of this sequence you see that an updated version is available. Click the **See current version** link. \n\n{{Wiki-img|NGS/Intro/mapping3.png|500px}}\n\nThis sequence is not a RefSeq sequence (the high quality part of NCBI Genbank). You can see that because the version number does not contain an underscore and all RefSeq version numbers contain an underscore.\n\n> Is there a RefSeq sequence available ?  \nIn [http://www.ncbi.nlm.nih.gov/nuccore/U00096.3 the record of the current version], scroll down to the **Related information** section in the right menu. There you see that a RefSeq sequence is available. Click the **Identical RefSeq** link. \n\n{{Wiki-img|NGS/Intro/mapping4.png|600px}}\n\nThis brings us to a RefSeq record with version number NC_000913.3. Note that we will not take this lastest version but the previous one (NC_000913.2), because the available tools for visualization have not been updated yet to the latest version. This will not affect our results.\n\n> Download the sequence of the previous version of the RefSeq record in FASTA format  \nSearch the **Nucleotide** database for **NC_000913.2**\n\n{{Wiki-img|NGS/Intro/mapping5.png|600px}}\n\n{{Wiki-img|NGS/Intro/mapping6.png|600px}}\n\n\n - In the record expand the **Send to** section (red).\n - Select **File** as destination (green). This means that you download the data on your computer.\n - Select **FASTA** format (blue).\n - Click **Create File**.\n\nThis creates a file called **sequence.fasta** in the **Downloads** folder in your **Home** folder. Copy the downloaded file to the folder where the fastq files are located (/home/bits/NGS/ChIPSeq on the BITS laptops) and rename it as **Escherichia_coli_K12.fasta**.\n\n#### Writing a bash script to map the reads to the reference genome\n\nSuppose that you expect to be doing many NGS experiments on *E. coli*. Each time we analyze a data set, we will have to map the reads against the *E. coli* genome. The best way to ensure that you can reuse commands during the next analysis, is to combine them into a script (= small program). Since the script will consist of command line (= bash) commands, the script is called a bash script.\n\nYou cannot do the mapping directly on the .fasta file, you need to index the file first. Reference genomes from the Bowtie /iGenomes website are already indexed so when you get your reference there you can skip this step. Reference genomes downloaded from NCBI, Ensembl or UCSC need to be indexed using the bowtie-build command.  \n\nIndexing a reference genome is a one-time effort: you do not have to repeat it each time you do a mapping. This is why we are not going to include the indexing in the script.  \n\n> Create a variable called folder containing the path to the folder that contains the E. coli fasta file\n\n```\nfolder=/home/bits/NGS/ChIPSeq/\n```\n> Check out the manual of the bowtie-1.1.2-build command to see the arguments it takes\nSince we have created a soft link for the bowtie-1.1.2-build command, the command should work from any location in the Linux file system. To see to help file just type the command:\n```\nbowtie-1.1.2-build \n```\nIn the help file you see that you need to specify the reference genome that you want to index as an input (in our case the E. coli fasta file) and that you have to specify the output file.\n```\nUsage: bowtie-build [options]* <reference_in> <ebwt_outfile_base>\n    reference_in            comma-separated list of files with ref sequences\n    ebwt_outfile_base       write Ebwt data to files with this dir/basename\n```\nWe will give the output files the same name as our input file: Escherichia_coli_K12\n\n> Prepare an indexed reference sequence for E. coli using the bowtie-build command, use the folder variable\nSo as an input the command expects the name of the input and the output file. \n```\nbowtie-1.1.2-build ${folder}Escherichia_coli_K12.fasta ${folder}Escherichia_coli_K12\n```\nbowtie-build will index the Escherichia_coli_K12.fasta generating a whole set of .ebwt files whose name all start with Escherichia_coli_K12. We will write a bash script to do the rest of the mapping.\n\nWriting a script can be done in any text editor. On the BITS laptops you can use gedit:\n\n - Click the **Menu** at the bottom left corner of the desktop\n - Type **gedit** in the text search box\n - Click the **Text Editor** button\n\n{{Wiki-img|NGS/Intro/script1.png|300px}}\nThe first thing you do when you write a script is define all the variables you need.\nWe need the following variables:\n\n - The **folder** that contains the reference genome.\n - The name of the **input** fastq file you want to map (if it's in the same folder as the reference as it is in our case). If the fastq file is in another folder you have to specify the full path to the file.\n\n> Create the required variables \n```\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\n```\nMake sure that the file containing the indexed reference genome and the fastq files containing the *E. coli* reads are located in the same folder.\n\n> Check the help file for bowtie-1.1.2 \n Go back to the terminal and type\n```\nbowtie-1.1.2\n```\n> What is the first argument bowtie expects ?\nAs first argument bowtie expects the path to the ebwt files (= the genome index files) so in our case that's Escherichia_coli_K_12\n```\nUsage: \nbowtie [options]* <ebwt>\n```\n> What is the second argument bowtie expects ?\nAs second argument bowtie expects the information of the input file containing the reads, in our case SRR576933.fastq Bowtie can be used to map single end reads as we have but also to map paired end reads. In the case of paired end reads you have two fastq files, one with the upstream reads and one with the downstream reads. That's why you can specify two input files m1 and m2. In our case it's just one file.\n```\nUsage: \nbowtie [options]* <ebwt> {-1 <m1> -2 <m2> \n\n  <m1>    Comma-separated list of files containing upstream mates (or the\n          sequences themselves, if -c is set) paired with mates in <m2>\n  <m2>    Comma-separated list of files containing downstream mates (or the\n          sequences themselves if -c is set) paired with mates in <m1>\n```\n> What is the final argument bowtie expects ?\nAs final argument bowtie expects the output file which is in our case SRR576933.sam\n```\nUsage: \nbowtie [options]* <ebwt> {-1 <m1> -2 <m2> | --12 <r> | <s>} [<hit>]\n  <hit>   File to write hits to (default: stdout)\n\n```\nYou need to tell bowtie which type of file your input file is.  \n\n> What is the option for doing this ?\nVia the option: -q indicates the input file is in FASTQ format.\n```\nUsage: \nInput:\n  -q                 query input files are FASTQ .fq/.fastq (default)\n```\nFastq is the default, so you don't have to explicitly set this option. If you don't specify it in your command bowtie will automatically assume your input is in fastq format.\n\nYou need to tell bowtie the maximum number of mismatches you allow in the alignments of your reads to the reference.  \n\n> What is the option for doing this ?\nVia the option: -v \n```\nAlignment:\n  -v <int>           report end-to-end hits w/ <=v mismatches; ignore qualities\n```\nIf you set this option's argument to 2, it means that bowtie will allow two mismatches anywhere in the read, when aligning the read to the genome sequence.\n\nThen we want to set an option that allows to define a number of bases that should be trimmed from the 3' ends of the reads before the alignment is done.\n\n> What is the option for doing this ?\nVia the option: -3 \n```\n  -3/--trim3 <int>   trim <int> bases from 3' (right) end of reads\n```\nWe want to set this option to trim the last base from the 3' ends of the reads before the alignment is done.\n\nWe also want to specify that we only want reads that map specifically to one location in the genome in our output.\n\n> What is the option for doing this ?\nVia the option: -m \n```\n  -m <int>           suppress all alignments if > <int> exist (def: no limit)\n```\nFinally we want to specify that the output should be SAM format.\n\n> What is the option for doing this ?\nVia the option: -S \n```\nSAM:\n  -S/--sam           write hits in SAM format\n```\n> Write the error channel to a file called SRR576933.out\nVia the option: -S \n```\n2> SRR576933.out \n```\nIn the script you use the variables to you have created instead of the actual file name SRR576933\n\n> Map the reads to the indexed reference sequence ?\n\nSo the full script becomes:\n```\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\nbowtie-1.1.2 ${folder}Escherichia_coli_K12 -q ${folder}${input}.fastq  -v 2 -m 1 -3 1 -S 2> ${folder}${input}.out > ${folder}${input}.sam\n```\nWe asked the mapper to create a sam file with mapping results. In the same way we could create a bam file. While SAM files can be inspected using Linux commands (head, less, grep, ...), BAM format is compressed and requires a special parser to read the file. Samtools is used to view bam files but it can also be used to analyze sam files.\n\nLook at this [http://davetang.org/wiki/tiki-index.php?page=SAMTools very informative wiki on samtools] and the [http://samtools.sourceforge.net/samtools.shtml official manual of samtools]. The manual does not document some of the commands, so it is better to first look in the wiki to find the command you need and then look in the manual to have an overview of the options it uses.\n\nWe will use samtools to get a rough idea of the quality of the mapping. Look at the samtools wiki to see which command you need for getting the basic statistics of a sam file.\n\n> Command to get the basic stats of the mapping file.  \nOn the [http://davetang.org/wiki/tiki-index.php?page=SAMTools samtools wiki] \n\n{{Wiki-img|NGS/Intro/samtools2b.png|300px}}\n\nyou can see that you need the **samtools flagstat** command for this.\nHowever samtools flagstat expects a bam file as input. So look at the samtools wiki to see which command you need for transforming a sam into a bam file.\n\n> Command to convert sam into bam files.  \nOn the [http://davetang.org/wiki/tiki-index.php?page=SAMTools samtools wiki] you can see that you need the **samtools view** command for this. \nFor the exact command you need to know if the sam file contains a header. Let's assume that the sam file indeed contains a header (it does, I checked). The symbolic link for samtools is samtools-0.1.18  Notice that we include the version number of bowtie and samtools in the symbolic link because we have mutiple versions of bowtie and samtools installed on the laptops.\n\n> Add the command for transforming the sam into a bam file to your script\n\n```\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\nbowtie-1.1.2 ${folder}Escherichia_coli_K12 -q ${folder}${input}.fastq  -v 2 -m 1 -3 1 -S 2> ${folder}${input}.out > ${folder}${input}.sam\nsamtools-0.1.18 view -bS  ${folder}${input}.sam > ${folder}${input}.bam\n```\n> Add the command for analyzing the bam file to your script\n```\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\nbowtie-1.1.2 ${folder}Escherichia_coli_K12 -q ${folder}${input}.fastq  -v 2 -m 1 -3 1 -S 2> ${folder}${input}.out > ${folder}${input}.sam\nsamtools-0.1.18 view -bS  ${folder}${input}.sam > ${folder}${input}.bam\nsamtools-0.1.18 flagstat ${folder}${input}.bam\n```\nBash scripts all have one characteristic: the first line of a bash script is always the following:\n```\n#!/bin/bash\n```\nThis tells the system which program should be used to interpret the script (in this case: /bin/bash)\n \n\n> Add this line to your script\nSo the full script becomes:\n```\n#!/bin/bash\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\nbowtie-1.1.2 ${folder}Escherichia_coli_K12 -q ${folder}${input}.fastq  -v 2 -m 1 -3 1 -S 2> ${folder}${input}.out > ${folder}${input}.sam\nsamtools-0.1.18 view -bS  ${folder}${input}.sam > ${folder}${input}.bam\nsamtools-0.1.18 flagstat ${folder}${input}.bam\n```\nSave the script as \"my_mapping\" in the /home/bits/NGS folder.\n{{Wiki-img|NGS/Intro/script2.png|400px}}\n\n> Check permissions of the script and change them if needed.\nGo to the folder where you have saved the script: /home/bits/NGS and type\n```\nll\n```\nThe script is not executable:\n{{Wiki-img|NGS/Intro/script4.png|300px}}\nMake it executable by typing:\n```\nchmod 755 my_mapping\nll\n```\nTo run the script make sure you are in folder containing the script (/home/bits/NGS) and type:\n```\n./my_mapping\n```\nThe mapping should take few minutes as we work with a small genome. For the human genome, we would need either more time, or a dedicated server.\n\nThe samtools flagstat command displays an overview of the alignment results on your screen. The results are not very informative because the data set comes from a single-end sequencing experiment. You just see that 62% of the reads were mapped. This may seem low but remember that we haven't done any cleaning on the file. According to FASTQC the file contains about 30% of adapter sequences that will not map.\n\nRepeat the analysis for the control sample SRR576938.fastq  These two fastq files come from a ChIP-Seq experiment, the first contains the reads of the ChIP sample, the second of the control sample, which consists of fragmented genomic DNA. You need both to identify regions in the genome that are represented more in the ChIP reda than in the control (these are the regions that bind to the transcription factor).\n\n> Repeat the analysis for sample SRR576938.fastq ?\nRepeating the mapping is easy now the only thing you need to do is changing the value of the input variable in the script:\n\n - Reopen the script in gedit\n - Change the name of input file\n - Save the changes\n - In the terminal go to the folder containing the script (/home/bits/NGS)\n - Run the script by typing:\n```\n./my_mapping\n```\n\n \n> How many reads of the control sample were mapped ?\nIn the flagstat results, you see that 95% of the reads was mapped. This is of course ok but you expected a high percentage here since the control sample is nothing more than the reference genome cut up into small pieces. \nAt this point, you have two sam and two bam files, one for the treated sample, one for the control sample. \n\nFor paired-end data flagstat results are much more informative, see an example below:\n\n{{Wiki-img|NGS/Intro/samtools3.png|500px}}\n\nThis overview deserves some explanation:\n\n - **nan** means **Not A Number** (e.g: divided by 0 )\n - **paired in sequencing** means reads that belong to a pair regardless of the fact that they are really mapped as a pair\n - **read1** means forward reads\n - **read2** means reverse reads\n - **properly paired** means that both mates of a read pair map to the same chromosome, oriented towards each other, and with a sensible insert size\n - **with itself and mate mapped** means that both reads of a pair map to the genome but they are not necessarily properly paired, they just map somewhere on the genome\n - **singletons** means that one of the reads of a pair is unmapped while its mate is mapped\n - **with mate mapped to a different chr** means reads with a mate mapped on a different chromosome\n - **with mate mapped to a different chr (mapQ >= 5)** means reads with a mate mapped on a different chromosome having a mapping quality greater than 5\n\n> Compare the number of forward and reverse reads in the paired-end experiment.  \nthe counts of forward and reverse reads are to be found on the lines ending with read1 and read2 respectively. As you see the number of reverse reads exceeds the number of forward reads by 439. \n> How many reads were mapped as a pair in the paired-end experiment?   \n12.911.388 reads were properly mapped as a pair, that's 85,68% of the total number of reads\n\nYou can find similar info in the SRR576933.out file in the ChIPSeq folder (using the **less** command), which also contains some statistics about the mapping.\n\n> How many reads were mapped according to this file ?\nYou see that 62% of the reads was mapped, which is good considering 30% of the reads contained adapter sequences. Type **q** to leave the less editor. This result is in agreement with the result of the samtools flagstat command.\n\n\n#### Visualize mapping in IGV\n\nIGV is installed on the bits laptops and can be run using the **igv** command.\n```\nigv\n```\n\nThis opens the graphical user interface of the tool (similar to what we have with firefox during the class). Be patient, it might take a few minutes for the program to start.\n\nWe open the bam file that was generated by the Picard modules in IGV. The bam file contains Arabidopsis reads. This means we have to visualize them on the Arabidopsis genome. Change the genome in IGV from Human hg19 to A. thaliana (TAIR10).\n\n{{Wiki-img|NGS/Intro/IGV3.png|700px}}\n\nThis should display the Arabidopsis genome in the top and the bottom view.\nNow it's time to load the mapped reads via **File** in the top menu and **Load from File**.\n\n{{Wiki-img|NGS/Intro/IGV4.png|300px}}\n\nSelect the .bam file to open. You don't need to load the .bai file, it's suffcient that it is present in the same folder as the .bam file. \nThis loads the data into the center view. At this point, you can't see the reads, you have to zoom in to view them.\nAccording to the [http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0025279#s3 supplemental material] accompanying the paper describing this data set, AT1G02930 is highly expressed in all samples and differentially expreesed during the defense response in ''A. thaliana''. So we will zoom in on this gene. You can do this by typing the accession number in the top toolbar and clicking **Go**:\n\n{{Wiki-img|NGS/Intro/IGV5.png|700px}}\n\nThe reads for this gene are now visualized in the center view. You can zoom in even more using the zoom bar in the top toolbar:\n\n{{Wiki-img|NGS/Intro/IGV6.png|700px}}\n\nZoom in until you see the nucleotides of the reference sequence.\n\n{{Wiki-img|NGS/Intro/IGV7.png|700px}}\n\nThe reads are represented by grey arrows, the arrow indicating the orietation of the mapping. Hovering your mouse over a read gives additional info on the mapping. The colored nucleotides indicate mismatches between the read and the reference. Alignments that are displayed with light gray borders and white fill, have a mapping quality equal to zero. Interpretation of this mapping quality depends on the mapper as some commonly used mappers use this convention to mark a read with multiple alignments. In such a case, the read also maps to another location with equally good placement. It is also possible the read could not be uniquely placed but the other placements do not necessarily give equally good quality hits. \n\nBy default IGV calculates and displays the coverage track (red) for an alignment file. When IGV is zoomed to the alignment read visibility threshold (by default 30 KB), the coverage track displays the depth of the reads displayed at each locus as a gray bar chart. If a nucleotide differs from the reference sequence in greater than 20% of quality weighted reads, IGV colors the bar in proportion to the read count of each base (A, C, G, T). You can view count details by hovering the mouse over a coverage bar:\n\n{{Wiki-img|NGS/Intro/IGV8.png|700px}}\n","# Bitmap vs Vector images\n## Bitmap \n- Pixels in a grid/map\n- Resolution dependent\n- Restricted to rectangle\n- Resizing reduces visual quality\n- Easily converted\n- Minimal support for transparency\n- Popular file formats: BMP, GIF, JPEG, JPG, PNG, TIFF\n\nBit depth or color depth is the amount of data assigned to every pixel (e.g. 1-bit = black/white, 4-bit = 16 colors/shades of grey, etc.) The more data, the more realistic your image will be. More data per pixel also means larger files.\n\n## Vector\n- Scalable\n- Resolution independent\n- No background\n- Inappropriate for photo-realistic images\n- XML based text format\n- Popular file formats: SVG, AI, CGM, DXF, WMF, EMF\n\n# Pixels\nResolution = number of pixels =  how much detail an image holds\nPPI: pixel per inch\n- Screen pixel density (monitor/smartphone)\n- Tells you how large an image is\n\nDPI: dots per inch\n- Print-out dots density (inkjet/laser printer)\n- Printer settings\n\nAn image at 300 PPI will look fine on a monitor, but printing is another matter! Print it on paper and you will notice the difference between 72 DPI and 300 DPI\n\n# File formats and compression\n## JPG/JPEG\n- Supports 26 million colours (24 bit)\n- Lossy compression (information is lost from original file)\n- Small file size (compressed)\n- Photographs\n## BMP\n- Supports 8/16/24-bit\n- Uncompressed file format\n- Large file size\n## TIFF\n- Tagged Image File Format\n- All colour and data information is stored\n- Uncompressed (lossy and lossless compression is possible)\n- Very large file size\n## GIF\n- Graphics Interchange Format\n- Only 256 colours possible (8-bit)\n- Replace multiple occuring patterns into one\n- Small file size\n- Animation\n## PNG\n- Portable Network Graphics\n- 256 / 16M colours\n- 8-bit transparancy\n- Lossless compression\n## SVG\n- Scalable Vector Graphics\n- XML-based format\n- Lossless data compression\n- Creatable and editable with a text editor\n- Can contain both bitmap and vector data\n## PDF\n- Portable Document Format\n- Can contain both bitmap and vector data\n## RAW/DNG\n- Digital Negative (DNG) is a universal RAW file format\n- Raw image file (without white balance, color saturation, contrast settings, …)\n- RAW files can be camera brand specific\n- Large file size\n- Multiple options without taking the picture again\n## Publication vs Presentation\nKey features for publications:\n- Raw/uncompressed image file (e.g. TIFF)\n- High quality image (300 PPI) and resolution\n- Lossless compression (e.g. PNG)\n- Compression is sometimes allowed (check journal website!)\n\nKey features for presentation:\n- Normal quality image (72 PPI) and smaller resolution (max width: 1920 pixels)\n- Compression is allowed (e.g. JPEG)\n- Smaller file size\n\n# Guidelines on image editing\nScientific accepted image manipulations are described in guidelines. VIB also has a document to guide you in what is and what isn't acceptible when adjusting your images. Some examples are:\n- No specific feature within an image may be enhanced, obscured, moved, removed or introduced\n- Adjustments of brightness, contrast or color balance are acceptable if they are applies to the whole image as long as they do not misrepresent information in the original\n- Grouping of images from different parts of the same or different gel, fields or exposures must be made explicit by the arrangement of the figure (dividing lines)\n- The original data must be available by the author when asked to provide it, otherwise acceptance of the publications may be revoked\n\nyou can find all the VIB guidelines [here](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/VIB_guidelines.pdf).","# What is Inkscape?\nInkscape is professional quality vector graphics software which runs on Windows, Mac OS X and GNU/Linux. It is used by design professionals and hobbyists worldwide, for creating a wide variety of graphics such as illustrations, icons, logos, diagrams, maps and web graphics. Inkscape uses the W3C open standard SVG (Scalable Vector Graphics) as its native format, and is free and open-source software.\nDuring this training we will use **Inkscape 0.92** on Windows. To download the most recent version, browse to the [Inkscape Download page](https://inkscape.org/en/download/). For Windows 10 S: the Inkscape app is also available in the Microsoft Store.\n## External training material\n- [Online Inkscape tutorials](https://inkscape.org/en/learn/tutorials/).\n- [Nick Saporito Inkscape tutorials for beginners](https://www.youtube.com/playlist?listPLynG8gQD-n8BMplEVZVsoYlaRgqzG1qc4 )\n- [Nick Saporito Inkscape intermediate/advanced tutorials](https://www.youtube.com/playlist?listPLynG8gQD-n8AFcLFAkvqJYnQUiBweRh1y )\n\n## User Interface\nInkscape is a single-window program. Drawing tools are on the left hand side, option docks are on the right. \nIn the central window, you have the drawing area with default an A4 page as document layout. To select another format for e.g. posters, go to **File - Document Properties**. Next to the document size, you can adjust the background colour (default: transparant).\n\n## Import Images\nYou can import scalable vector graphic files (.svg) and also GraphPad Prism graphs (.emf or .pdf format).\nInkscape is not used for editing images like GIMP. If you import bitmap images, note that they are not scalable like vector objects!\n\n## Drawing lines and objects\nYou can draw a line with the Draw Bezier tool. You can make your own shape or just draw a line or path. On top of your drawing area you can select the Mode: Regular Bezier curves, Spiro paths, straight line segments and paraxial line segments. When selecting the straight line mode, you can hold the Ctrl button to make your line snap every 15 degrees around your first/previous point.\nYou can draw shapes by using the Rectangle tool, Ellipse tool and the Create Stars and Polygons tool. On top of the drawing area you can specify your polygon and star properties, size and lock aspect ration. Here is the Crtl key useful as well for creating squares, circles or specify the position of your object.\nWhen you have an object (polygon or others) you can select a color for the stroke and inside of the object. Selecting an object using the Selection tool will give you more options on top of the view area. You have the option to rotate, flip, change dimensions and XY position (in different units). You can change the position of the selected object compared to others (move up/down). \n\n## Paths\nA path consist of lines and nodes. These lines can be straight or curved and you can make an object using paths ( closed path). When in Path mode you have several options; add or remove a node, joining or breaking nodes apart and changing the node properties. You can also change the segment (line between nodes) properties with the options on top of the screen. \nYou can convert an object into a path to gain more flexibility by selecting the object and go to **Path – Object to path**. Afterwards you can use the object tool or the path tool to manipulate the object. \n\n## Fill and stroke\nPaths, lines and objects can be given a plain color, patterns, gradient color or left blank/transparent. You can also configure the stroke style and color. Click **Object – Fill and Stroke** to see all the options. Paths/lines can be transformed into arrows using the Stroke style option **Markers**.\n\n## Text\nAt the left there is also a Text tool available. With this tool you can create and change text, it's colour, font, style and size. After entering text, you’re able to manipulate it like an object. You can also attach text into a frame by selecting both objects and click on **Text – Flow into Frame**.\nYou can also align text to a path. Select both text and path and click **Text – Put on Path**. Once the text in aligned to the path it stays adaptable and can be removed from the path; **Text - Remove from Path**.\nText is an object at first. When you select **Path - Object to path** you can modify your text like any other object that is converted into a path.\n\n## Grouping, aligning and arranging object/paths\nTo group several object you must select them all (hold Shift) and select **Object – Group**. To unite several paths you must select **Path – Combine**. Both options are the same and allow you to manipulate objects/paths as one. Both actions can be reversed (Ungroup / Break Apart).\nSeveral object must be aligned before you group them, think of text inside a box. To display the options, go to **Object - Align and Distribute**. When multiple objects are selected, you can align the top, bottom, left and right edges of the objects. Aligning on the central axes is also possible, this in both horizontal as vertical direction. The aligned objects always need an anchor, this can be changed in the box on top of the toolbox (Relative to:). This anchor can be an object (first, last, smallest or biggest) or the page, a selection or the complete drawing. Distributing objects works in a similar way, but manages the space between objects. For paths you can only align the nodes.\nAligning or distributing objects allows you to manipulate the X and Y position of your objects. There is also a virtual Z axis. When you have multiple objects with different colours, you can move the one above the other. Every new object you draw will be on top of all the rest. To raise an object one step or to the top, you can use the buttons on top of your screen. The same can be done to lower an object one step or to the bottom.\n\n## Path Effects and operations\nWhen you want to distribute/multiply an object along a guideline, there is a tool called Path Effects. First draw and select the object or group of objects and past it in the clipboard (Ctrl + C). Draw or select your path (guideline) and select **Path – Path Effects**. Click on the '+' sign and select the effect **Pattern Along Path**. In the new box on the right: select 'Repeated' on the option Pattern copies. Now click on 'Paste path' to paste the object you want to multiply. Note that only the shape is pasted, not the color. When adjusting the color, it will affect the entire path. To copy the colour, use Crtl+C again on your original, select your path of objects and go to **Edit - Paste Style - Paste Style**. There are also standard patterns to distribute along a path. When clicking on the '+' sign to add an effect, select ‘Gears’ or ‘Hatches (rough)’. Each of these effects have their own options to create an effect and to adjust the pattern.\nWhen it comes to paths, you can do much more than combining them. When you want to cut one shape out of another shape, you can use the options in the Path menu; Union, Difference, Intersection, Exclusion, Division and Cut Path.\n\n## Diagrams\nTo make a diagram with objects (circles, rectangles, stars, etc.) connected by lines, there is the Diagram connector tool. First you must draw and align the objects to create your diagram. Then select the Diagram connector tool. Every object can be selected by clicking in the white box in the middle of the object. Once connected the lines will follow the object if you move it to another place. The lines can be used as a path, therefore you can also modify them to e.g. dashed lines, arrows, etc.\n\n# Exercises\n> ### {% icon hands_on %} Hands-on: Exercise 1\n> Image 1 PNG: [Image 1](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Drawing.png)\n> Image 1 SVG: [Image 1 SVG](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Drawing.svg)\n> Task: Reproduce the top strand. Afterwards, reproduce the bottom strand using the first one.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 2\n> Image 2 PNG: [Image 2](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Drawing2.png)\n> Image 2 SVG: [Image 2 SVG ](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Drawing2.svg)\n> Task: Reproduce one of the sets of this image. Afterwards, reproduce the others using the first set.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 3\n> Image infographic 1: [Image 1](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/procent_bars.png)\n> Image infographic 2: [Image 2](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/circle_infographic.png)\n> Image infographic 3: [Image 3](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/flower_diagram.png)\n> Task: Try to reproduce one of these images using the video tutorial series from Nick (see top of this page).\n{: .hands_on}","Using the mapping results we can define the peaks, the regions with a high density of reads in the ChIP sample, where the transcription factor was bound.\nThere are multiple programs to perform the peak calling. Some are more directed towards histone marks (broad peaks) while others are specific to narrow peaks (transcription factors). Here we will use MACS because it's known to produce generally good results, and it is well-maintained by the developer.\n\nMACS is installed on GenePattern. Check the documentation on GenePattern or read the manual on [the MACS github](https://github.com/taoliu/MACS/). Let's see the parameters of MACS before launching the peak calling.\n\n> How to define the input files?  \n> **treatment** and **control**: the treatment mapped read file (SRR576933.bam) and the control mapped read file (SRR576938.bam)\n\nNote that the bam files need to be sorted according to genomic location. At this point they are not, the reads are in the same order as they were in the fastq file, according to position on the flow cell.\n\n> Which Picard tool can we use to sort the files? \n> You can use Picard.SortSam for this.\n\n> Sort the bam files so that they can be used as input for MACS. \n> Use the default parameter settings.\n\nLet's go over the different parameters of MACS: \nThe **effective genome size** is the size of the genome considered \"usable\" for peak calling. This value is given by the MACS developers on their website. It is smaller than the complete genome because many regions are excluded (telomeres, highly repeated regions...). The default value is for human (2700000000), so we need to change it. As the value for <i>E. coli</i> is not provided, we will take the complete genome size **4639675**.\n\nMACS needs the length of the fragments, which are longer than the read length, because the sequencer sequences only parts starting from the end of the fragments. MACS2 does this by making a model of enrichment of reads in the ChIP sample versus the background, searching pairs of peaks within a bandwidth of 300 bases with an enrichment ratio between 5 and 50. If there are not enough pairs of peaks, as is the case in our data, you can fall back on using a preset fragment length by setting the **model** parameter to **no**. The default of **shift 0 extsize 200** is adequate for ChIPSeq. It means that reads are extended to a length of 200 bases before they are counted.\n\nThe **duplicates** specifies how MACS should treat the reads that are mapped to the exact same location (duplicates). The manual specifies that keeping only 1 representative of these \"stacks\" of reads is giving the best results.\n\nThe **make BedGraph** parameter will output a file in BEDGRAPH format to visualize the peak profiles in a genome browser. There will be one file for the treatment, and one for the control.\n\n**FDR** and **FDR for broad peaks** indicates that MACS will report peaks if their associated p-value is lower than the value specified here. Use a relaxed threshold as you want to keep a high number of peaks (even if some of them are false positives).\n\n> Perform peak calling on the sorted bam files. \n> Set the parameters as described above: \n\n- Load sorted bam files for treatment and control\n- Set effective genome size to 4639675\n- Don't use a model\n- Make a bedgraph file\n\nLook at the files that were created by MACS.\n\n> Which files contains which information?\n> \n- macs_summits.bed: location of the summit base for each peak (BED format).If you want to find the motifs at the binding sites, this file is recommended. The file can be loaded directly to the UCSC genome browser. Remove the beginning track line if you want to analyze it by other tools.\n- macs_peaks.xls: peak coordinates with more information, to be opened with Excel. Information include:\n\n- chromosome name\n- start position of peak\n- end position of peak\n- length of peak region\n- absolute peak summit position\n- pileup height at peak summit\n- -log10(pvalue) for the peak summit (e.g. pvalue =1e-10, then this value should be 10)\n- fold enrichment for this peak summit against random Poisson distribution with local lambda\n- -log10(qvalue) at peak summit\n\nCoordinates in XLS is 1-based which is different from BED format. \n\n- MACS_peaks.narrowPeak is a BED file which contains the peak locations together with peak summit, p-value, and q-value. You can load it to the UCSC genome browser. Definition of some specific columns are:\n\n- 5th: integer score for display. It's calculated as int(-10*log10pvalue) or int(-10*log10qvalue) depending on whether -p (pvalue) or -q (qvalue) is used as score cutoff.\n- 7th: fold-change at peak summit\n- 8th: -log10pvalue at peak summit\n- 9th: -log10qvalue at peak summit\n- 10th: relative summit position to peak start\n\nThe file can be loaded directly to the UCSC genome browser. Remove the beginning track line if you want to analyze it by other tools.\n- The MACS_treat_pileup.bdg and MACS_control_lambda.bdg files are in bedGraph format which can be imported to the UCSC genome browser or be converted into even smaller bigWig files. The MACS_treat_pileup.bdg contains the pileup signals (normalized) from the ChIP sample. The MACS_control_lambda.bdg contains local biases estimated for each genomic location from the control sample.\n","## Introduction to GenePattern\n\n### Access GenePattern\n\nYou can work on our [BITS Genepattern server](https://dev.bits.vib.be:8686/gp/pages/index.jsf). Ask the trainer for login details.\n\n### The GenePattern user interface\n\nLogging in brings you to the GenePattern homepage: \n\n![loggingingp](../../images/GP2b.png)\n\n\n- Click the GenePattern icon at the top of the page (red) to return to this home page at any time.\n- The upper right corner shows your user name (green).\n- The navigation tabs (blue) provide access to other pages.\n\nWe'll zoom in on the navigation tabs: \n\n- The Modules tab gives access to the tools that you can run. Enter the first few characters of a module in the search box to locate a tool. Click the Browse modules button to list the tools.\n\n![modulesgp](../../images/GP3b.png)\n\n- The Jobs tab shows an overview of the analyses that you have done by showing the tools that you have run, together with a list of output files that were generated.\n\n![jobsgp](../../images/GP4.png)\n\n- The **Files** tab shows a list of files you can use as input for the tools. These are files that you have uploaded from your hard drive or files that were generated as the output of a tool and that were saved to the **Files** tab. In your case the Files tab contains a folder **uploads**. \n\n### Searching a tool in GenePattern\n\nYou can find a module by typing its name into the search box on the **Modules** tab: \n\n![modulesgp](../../images/GP4a.png)\n\nSearching a tool makes its name appear in the main window.\n\n### Running tools in GenePattern\n\nClicking the name of the tool will open its parameter form in the main window.\n\n![groomer](../../images/GP5.png)\n\nFill in the parameters and click **Run** to start the analysis.\n\nAs long as the tool is running you see an arched arrow in the top right corner: \n\n![groomer](../../images/GP11.png)\n\nWhen the tool has finished the arched arrow is replaced by a checkmark and the file(s) containing the results appear at the bottom: \n\n![groomersrr074262](../../images/GP12.png)\n\nNote that apart from the file containing the results, other files are generated e.g. stdout.txt containing the error log of the tool. You can consult the error log in case of problems.\n\n#### Check the documentation\n\nTo obtain a description of the parameters of a tool and their default values click the Documentation link at the top of the page. \n\n![groomerdoc](../../images/GP16.png)\n\n#### Define input files\n\nMany input files are located in the shared data folder. In the parameter form of a tool, you will find the **Add Paths** or **URLs button** in the **input files** section: \n\n![groomerinput](../../images/GPAddPaths.png)\n\nClick the button and expand **BITS trainingdata Chipseq**: \n\n![groomerinput](../../images/GPSharedData.png)\n\n### Store the output of a tool in GenePattern \n\nCopy the file in the uploads folder on the Files tab to store it permanently and allow to use it as input for other tools. Output files that are not saved in the uploads folder are stored 7 days on the server and are visible via the Jobs tab.\n\nWhen a tool has finished output files are generated at the bottom of the page. \n\n- Click the name of the output file. \n\n![groomerinput](../../images/GP14.png)\n\n- Select Copy to Files Tab\n\n![groomerinput](../../images/GP13.png)\n","## Mapping reads with Bowtie\n\nExercise created by Morgane Thomas Chollier\n\n### Obtaining the reference genome\n\nIn the ChIP-Seq experiment of *E. coli* we want to see which genomic regions are bound to transcription factor FNR. However, at this point what we have is a set of reads that are identified by their location on the flow cell. To answer our question we should link the reads to regions in the genome to obtain their genomic coordinates. This process is called mapping.\nFor Illumina reads the standard mappers are BWA and Bowtie (version 1 and 2).\n\n> Which version of Bowtie are we going to use?\n> We will use Bowtie version 1 as this version was designed for mapping short reads (< 50nt) and our reads are short (36nt).\n\nThe Bowtie_1 aligner is installed on GenePattern. Check the documentation on GenePattern or read [the manual on the Bowtie website](http://bowtie-bio.sourceforge.net/manual.shtml).\n\nBowtie needs the complete genome, in FASTA format as a reference sequence to align the reads to.\n\n> Which *E. coli* strain was used in the experiment?\n> Go to [the paper](http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1003565) and check the part **Strains and growth conditions** in the **Materials and methods** section. There you see that the experiment was done using *E. coli* K-12 MG1655.\n\n![](../../images/mapping1.png|600px}}\n\nThe genome sequence of *E. coli* K-12 MG1655 needs to be in a specific format (=index) for bowtie. Several pre-built indexes are available to download on [the bowtie webpages](http://bowtie-bio.sourceforge.net/manual.shtml) or the [iGenomes website](http://support.illumina.com/sequencing/sequencing_software/igenome.html).\nAlthough the *E. coli* sequence is available we will not use it to show you how you should proceed if you don't find your reference sequence on this website. In that case you will need to make the index yourself. \n\nIf you can't find your reference on the iGenomes website you have to download it from:\n- [UCSC](http://genome.ucsc.edu/)\n- [Ensembl](http://www.ensembl.org/index.html)\n- [NCBI](http://www.ncbi.nlm.nih.gov/)\n\nSince Ensembl focuses on higher eukaryotes, we are going to download the genome from NCBI.\n\n> Which reference sequence was used in the experiment ?  \n> Go to [the paper](http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1003565) and check the part **High-throughput RNA sequencing (RNA-seq) analysis**. There you see that the reads were mapped to an NCBI sequence with accession number **U00096**.\n\n![](../../images/mapping2.png)\n\n> Search for this sequence on NCBI ?  \n> Go to [the NCBI website](http://www.ncbi.nlm.nih.gov/), select the **Nucleotide** database, type **U00096** as a search term and click **Search**.\n\nNCBI Nucleotide is notorious for the amount of errors it contains both in sequences and in annotations. Therefore, if available you should always use sequences from RefSeq, the clean subset of NCBI’s Nucleotide database. This sequence is not a RefSeq sequence. You can see that because the accession number does not contain an underscore and all RefSeq accession numbers contain an underscore.\n\n> Is there a RefSeq sequence available ?  \n> In [the Nucleotide record](http://www.ncbi.nlm.nih.gov/nuccore/U00096.3), scroll down to the **Related information** section in the right menu. There you see that a RefSeq sequence is available. Click the **Identical RefSeq** link. \n\n![](../../images/mapping4.png)\n\nThis brings us to a RefSeq record with accession number NC_000913.\n\n> Download the sequence of the RefSeq record in FASTA format  \n> \n![](../../images/mapping6.png)\n\n- In the record expand the **Send to** section (red).\n- Select **File** as destination (green). This means that you download the data on your computer.\n- Select **FASTA** format (blue).\n- Click **Create File**.\n\nIf all goes well you should see the following message” => the message is “sequence.fasta. This creates a file called **sequence.fasta** in the **Downloads** folder of your computer. \n\n> Upload the downloaded file to your **Uploads** folder in GenePattern. \n> \n- Go to the **Files** tab in GenePattern.\n- Drag and drop the file onto the **Drag Files Here** section.\n- Select the **Uploads** folder and click **Select**\n\n![](../../images/GPUpload.png)\nIf all goes well you should see the following message\n![](../../images/GPUpload2.png)\n\nIf the upload takes too long use the fasta file from the **SHARED_DATA** folder in GenePattern.\n\n### Indexing the reference genome\n\nYou cannot do the mapping directly on the .fasta file, you need to index the file first. Reference genomes from the Bowtie/iGenomes website are already indexed so when you get your reference there you can skip this step. Reference genomes downloaded from NCBI, Ensembl or UCSC need to be indexed using the Bowtie_1 indexer tool.  \nIndexing a reference genome is a one-time effort: you do not have to repeat it each time you do a mapping.  \nCheck the documentation of the Bowtie_1 indexer to see the parameters it takes. The documentation shows that you need to specify: \n- the reference genome that you want to index as an input (in our case the *E. coli* fasta file)\n- the name of the indexed output file\n\nGive the output file the same name as the input file: **Escherichia_coli_K12**. The Bowtie indexer will generate a zip file containing a whole set of .ebwt files whose name all start with Escherichia_coli_K12.\nCopy the zip-file to your **Uploads** folder.\n\n### Mapping the reads\n\nOpen the Bowtie_1 aligner parameter form. \n\n> Use the indexed E.coli genome for mapping\n> The first parameter of the Bowtie 1 aligner parameter form are the genome index files (= the zipped ebwt files in your **Uploads** folder). \n- Go to the **Files** tab\n- Click the **Upload your own file** button in the **bowtie1 index** section of the bowtie 1 parameter form\n- Drag and drop the zip file to the **Drag your files here** section\n\n![](../../images/GPBowtie1.png)\n\n> How to define the input file(s) ?\n> Bowtie needs an input file containing the reads (in our case SRR576933.fastq). Bowtie can map single end reads like we have but also paired end reads. In the case of paired end reads you have two fastq files, one with the upstream reads and one with the downstream reads. That's why you can specify two input files: **reads pair 1** and **reads pair 2**. We just select **SRR576933.fastq** from the **SHARED_DATA** folder as input for **reads pair 1**.\n\n![](../../images/GPBowtie2.png)\n\nYou need to tell bowtie what type of file your input file is.\n\n> What is the parameter for doing this ?\n> Via the parameter called **input format** you can specify that the input file is in fastQ format.\nFastQ is the default, so you don't have to explicitly set this option.\n\nBowtie has two modes of mapping. The simplest strategy is called v-mode alignment: you align complete reads (from the first to the last base aka end-to-end) to the reference and you count the number of mismatches in this alignment. In this mode quality values are ignored and you need to tell bowtie the maximum number of mismatches you allow.  \n\n> Do a v-mode mapping allowing 2 mismatches in the alignments.\n> - Expand the **advanced customization of run** parameters\n> - Set **alignment mode** to **v-mode**\n> - Set **max mismatches for vmode alignment** to **2** it means that bowtie will allow two mismatches anywhere in the alignments. The value for this parameter must be a number from 0 through 3.\n\n![](../../images/GPBowtie3.png)\n\nRemember because the base quality at the 3'end of the reads is lower, base calls at the 3'ends are often incorrect. This will inevitably lead to mismatches in the alignments. Reads with more than 2 mismatches will not be reported. To avoid losing too many reads during the mapping we can either trim low quality bases from the 3' ends of the reads before the alignment is done or use a mapping strategy that takes into account the quality scores of the bases.\n\nThis strategy is called n-mode alignment. It's the default mode. It aligns seeds, the first N bases of the reads at the high quality 5'end, to the reference. You have to set the length of the reads and the maximum number of mismatches allowed in the seed alignment. Additionally the sum of the quality scores at all mismatched positions (not just in the seed) is calculated and you can set a maximum for this parameter. In this way, reads with mismatches with high quality scores will not be reported whereas mismatches with low scores are more or less ignored.\n\nThe FASTQC report showed that the last base is of low quality. Since the reads are 36 bases ling we could use seeds of 35 bases for the mapping.\n\n> Do an n-mode mapping with seeds of 35 bases allowing 2 mismatches in the seeds.\n> - Expand the **advanced customization of run** parameters\n> - Set **alignment mode** to **n-mode**\n> - Set **seed length for nmode alignment** to **35**\n> - Set **max mismatches in seed** to **2** it means that bowtie will allow two mismatches in the alignments of the seeds (the first 35 bases of the reads) to the reference. The value for this parameter must be a number from 0 through 3.\n\n![](../../images/GPBowtie4.png)\n\nWe also need to specify that we only want to report reads that map specifically to one location in the reference.\n\n> What is the parameter for doing this ?\n> Via the parameter called **report alignments** you can specify that the output file should contain reads **only mapping at unique location**.\n\nBy default, bowtie will include unmapped reads in the output file. That's unnecessary since no one uses these unmapped reads.\n\n> How to exclude unmapped reads from the output file?\n> Via the parameter called **include unaligned** in the **output** section you can specify not to include unmapped reads in the output file.\n\nWe want to get a rough idea of the quality of the mapping. Look at the stdout.txt file that was generated by bowtie to get the basic statistics of the mapping.\n\n![](../../images/GPBowtie5.png|300px}}\n\nYou see that 62% of the reads were mapped. This may seem low but remember that we haven't done any cleaning on the file. According to FASTQC the file contains about 30% of adapter sequences that will not map.\n\n> How many multi-mapped reads were originally present in the sample?\n> Multimappers are reported as **reads with alignments suppressed due to -m**. Behind the scenes the **report alignments** parameter in the form is translated into a bowtie -m option that is run at command line. This option is used to guarantee that reported alignments are unique. Specifying -m 1 instructs bowtie to refrain from reporting any alignments for reads having more than 1 reportable alignment. \n\nThe output of Bowtie is a sam file. The SAM format corresponds to large text files, that can be compressed (\"zipped\") into .bam files that take up to 4 times less disk space and are usually sorted and indexed for fast access to the data they contain. The index of a .bam file is named .bai aand some tools require these index files to process the .bam files. So we need to transform the .sam file with our mapping results to a .bam file. You can use one of the tools from the Picard toolbox for this. \n\n> Convert the sam to a bam file.\n> You can use the tool Picard.SamToBam for this.\n\nRepeat the analysis for the control sample SRR576938.fastq  These two fastq files come from a ChIP-Seq experiment, the first contains the reads of the ChIP sample, the second of the control sample, which consists of fragmented genomic DNA. You need both to identify regions in the genome that are represented more in the ChIP reads than in the control (these are the regions that bind to the transcription factor).\nSuppose that you have many fastq files that you need to map to the *E. coli* genome. The best way to ensure that you can reuse tools and parameter settings during the next analysis, is to combine them into a pipeline.\n\n> Create a pipeline to map ChIPSeq data set?\n> - In the top menu select **Modules & Pipelines**\n> - Click **New Pipeline**\n\n> ![](../../images/GPPL.png|100px}}\n\n> - In the **Search Modules** section search for the modules you need: first **Bowtie_1_aligner** and then **Picard.SamToBam**\n> - Click a tool to open its parameter form in the right pane of the pipeline editor.\n> - You can set values for the parameters or you can allow users to give values for the parameters when they use the pipeline. For the Bowtie_1_aligner allow users to define the index and the input fastq file by checking the boxes in front of these parameters. After you have done this you should see a user icon appearing in front of these parameters in the middle pane of the pipeline editor.\n\n![](../../images/GPPL2.png|750px}}\n\n> - Use the same settings for the remaining parameters as you used for mapping SRR576933.fastq\n> - Connect the sam output of bowtie as input file in Picard.\n> - Click the **Properties** button at the top to open the **Editing pipeline** parameters in the right pane.\n> - Type a name for the pipeline and hit Enter\n> - Click the **Save** button at the top.\n> - The pipeline has now become a module that you can search for and run in GenePattern. Exit the pipeline editor by clicking the **GenePattern** button at the top.\n\n![](../../images/GPPL3.png|150px}}\n\nNow you use the pipeline as a regular module.\n\n> Repeat the analysis for sample SRR576938.fastq use the ChIPSeqMapping pipeline.\n> Repeating the mapping is easy, the only thing you need to do is define the index and the input file:\n> - Open the parameter form of the ChIPSeqMapping pipeline\n> - Drag and drop the zip file with the indexed genome to the **Drag your files here** section\n> - Use **SRR576938.fastq** from the **SHARED_DATA** folder as input file\n> - Run the pipeline\n\n> How many reads of the control sample were mapped ?\n> In the stdout.txt file generated by bowtie, you see that 95% of the reads was mapped. This is of course ok but you expected a high percentage here since the control sample is nothing more than the reference genome cut up into small pieces. \n\nAt this point, you have two sam and two bam files, one for the treated sample, one for the control sample.\n","For visualization with deepTools we need a bam file in which the order of the reads is determined by genomic location. We have created such a bam file in the peak calling step using the SortSam tool from the Picard suite.\nThe bam file still contains duplicate reads (=reads that map to exactly the same position in the genome). Such reads represent technical duplicates often caused by biased PCR amplification during the library prep or by fragments coming from repetitive elements in the genome... Since we are going to quantify the reads (we look for regions that are enriched in the ChIP sample) these technical duplicates will distort the quantifications. So they should be removed from the .bam file\nAdditionally an index file should be created to allow for fast and easy access to the sorted and processed .bam file.\n\n> Which tool from the Picard suite can be used to mark/remove duplicates?\n> Picard MarkDuplicates can be used to remove duplicates. \n\n> Remove duplicates and index the .bam files?  \n> - Use the sorted .bam files as input files\n> - Indicate that the files are sorted according to coordinates\n> - Remove the sequencing duplicates, duplicates generated by PCR\n> - Create an index file\n\nMarkDuplicates generates an error but you can ignore the error. Open the metrics.txt file that is generated by MarkDuplicates.\n\n> How many duplicates were found in the ChIP sample?  \n> \n ![](../../images/DuplicationMatrics.png)\n\nNow we will plot a Lorenz curve with DeepTools to assess the quality of the ChIP. It answers the question: “Did my ChIP work?” Did the antibody-treatment enrich sufficiently so that the ChIP signal can be differentiated from the background signal in the control sample? This is a valid question since around 90% of all fragments in a ChIP experiment will represent the genomic background. \n\nFor factors that enrich well-defined, narrow regions, the plot can be used to assess the strength of the ChIP, but the broader the enrichments, the less clear the plot will be. Vice versa, if you do not know what kind of signal to expect, the plot will give you an indication of how careful you have to be during downstream analyses to separate biological noise from meaningful signal.\n\nThe tool randomly samples genome regions (bins) of a specific legth in indexed BAM files, calculates the sum of all reads that map in a bin. These sums are sorted according to their rank and a profile of cumulative sums is plotted.\n\n> Which tool from the DeepTools toolset are you going to use for this? \n> Run DeepTools [plotFingerprint](http://deeptools.readthedocs.io/en/latest/content/tools/plotFingerprint.html) to draw the Lorenz curve.\n\n> Create a Lorenz curve for the ChIP sample \n> - You have to provide both the .bam and the .bai file as input! \n> - The **nsamples** parameter represent the number of bins that is sampled from the genome. It has to be smaller than the genome size divided by the size of the bins (default 500nt). The size of the *E. coli* genome is 4639675 nt. So we will set this parameter to 9000.\n> - Other parameters can be kept at default settings\n\nAn experiment with perfect uniform distribution of reads along the genome (without enrichment) and infinite sequencing coverage should generate a straight diagonal line. A very specific and strong ChIP enrichment will be indicated by a prominent and steep rise of the curve towards the highest rank. This means that a big chunk of reads from the ChIP sample is located in few bins.\nBelow you see a few examples on how to interpret this curve:\n\n ![](../../images/DTLorenz.png)\n\n**What do you think about the fingerprint plot that was generated on the *E. coli* data?**\n","## Downloading a data set for the ChIP-Seq training\n\n### Download the data from GEO\n\nFor the ChIP-Seq training, we are going to use the data set that is described in the article of Myers et al., 2013 [6]. The data consists of reads from ChIP enriched genomic DNA fragments that interact with FNR, a well-studied global transcription regulator of anaerobiosis. As a control, reads from fragmented genomic DNA were used.\n\nNGS datasets are (usually) made freely accessible, by depositing them into specialized databases. Sequence Read Archive (SRA) located in USA and hosted by NCBI, and its European equivalent European Nucleotide Archive (ENA) located in England hosted by EBI both contains raw, unprocessed reads.\n\nProcessed reads from functional genomics datasets (transcriptomics, genome-wide binding such as ChIPSeq,...) are deposited in Gene Expression Omnibus (GEO) or its European equivalent ArrayExpress. <p>The article contains the following sentence at the end of the Materials and Methods section:\n\"All genome-wide data from this publication have been deposited in NCBI’s Gene Expression Omnibus (GSE41195).\"\nIn this case GSE41195 is the identifier that allows you to retrieve the dataset from the NCBI GEO (Gene Expression Omnibus) database.\n\nGEO hosts processed data files from experiments related to gene expression studies, based on NGS or microarrays. The files of NGS experiments can include alignments, peaks and/or counts.\n\nGo to the [GEO page](http://www.ncbi.nlm.nih.gov/geo/)\n\n> ### {% icon hands_on %} Download the data of the experiment with GEO ID GSE41195 \n>\n> - Type the ID in the search box on the GEO home page\n> - Click Search\n>   ![searchGEO](../../images/GEO1.png)\n> - This redirects you to the GEO record of the full experiment consisting of microarrays, tiling arrays and a ChIP-Seq experiment.\n>  ![searchGEO](../../images/GEO2.png)\n> - In the Experiment type section you can see that this GEO record indeed reports a mixture of expression analysis and ChIP-Seq experiments.\nScroll to the bottom of the page:\n     ![searchGEO](../../images/GEO3.png)\n> 4. You can see that the ChIP-Seq data have their own GEO ID: GSE41187\n> 5. Click the ChIP-Seq data ID: GSE41187.\nThis brings us on the GEO record of the ChIP-Seq experiment.\nIn the GEO record scroll down to the Samples section:\n>   ![searchGEO](../../images/GEO4.png)\n> For time's sake, we will focus in the training on a single sample: FNR IP ChIP-seq Anaerobic A\n> 6. Click the GEO ID GSM1010219 of the sample that we will use in the training\n> This brings us to the GEO record of the sample.\n> 7. Scroll to the bottom of GEO record of the sample to the Relations section:\n>  ![searchGEO](../../images/GEO5.png)\n>  GEO only contains processed data, no raw data. The raw data is stored in the SRA database. In the Relations section you can find the SRA identifier of this data set. For the training we would like to have a fastq file containing the raw data.\n> 8. Copy the SRA identifier\n{: .hands_on }\n\n### Download the data from ENA at EBI\n\nAlthough direct access to the SRA database at the NCBI is doable, SRA does not store sequences in a FASTQ format. So, in practice, it's simpler (and quicker!!) to download datasets from the ENA database (European Nucleotide Archive) hosted by EBI (European Bioinformatics Institute) in UK. ENA encompasses the data from SRA.\n\nSRA identifiers are also recognized by ENA so we can download the file from EBI.\n\nGo to the ENA website at [EBI](http://www.ebi.ac.uk/)\n\n> ### {% icon hands_on %} Download the data with SRA ID SRX189773\n> \n> - Type the ID in the search box on the EBI home page\n> - Click the **search icon**\n    This returns two results: a link to the record of the experiment and a link to the record of the run:\n>\n>   ![resultssearchENA](../../images/ENA2.png)\n> - Click the first result (red)\n>   ![resultssearchENA3 -80width](../../images/ENA3.png)\n>   The table at the bottom of the page contains a column called Fastq files (ftp)\n> - Click the link in this column to download the data in fastq format\n{: .hands_on }\n\n\n\nFor the training you do not have to download the data, it's already on the GenePattern server.\n\nTo download the replicate and the control data set, we should redo the same steps starting from the GEO web page of the ChIP-Seq experiment (click the sample ID of the FNR IP ChIP-seq Anaerobic B and the anaerobic INPUT DNA sample). The fastq file of the control sample is also available on the GenePattern server.\n","## Choosing a genome browser\n\nThere are several options for genome browsers, divided between the local browsers (need to install the program, eg. IGV) and the online web browsers (eg. UCSC genome browser, Ensembl). We often use both types, depending on the aim and the localisation of the data.\nNote that if you're working on a non-model organism, the local viewer will be the only choice. If the aim is to share the results with your collaborators, view many tracks in the context of many existing annotations, then the online genome browsers are more suitable.\n\n## Viewing the aligned reads in IGV\n\nOpen IGV. Be patient, it might take a few minutes for the program to start.\nChange the genome in IGV from '''Human hg19''' to the one you used in the mapping.\n\n> Load the desired genomed. \n> Load the *E. coli* genome as reference (from the file Escherichia_coli_K_12_MG1655.fasta, downloaded to build the bowtie index).\n\n- Top menu: **Genome** -> **Load Genome from File**\n ![](../../images/IGVLoadGenome.png)\n- The loaded genome appears in the top left panel:\n ![](../../images/IGVLoadGenome2.png)\n\nYou can also visualize the annotation (genes) in IGV. You can obtain a file with annotations from the Refseq record.\n\n> Download the annotations from RefSeq in GFF3 format. \n> Go to the [RefSeq record](https://www.ncbi.nlm.nih.gov/nuccore/NC_000913.3) of the *E. coli* genome.\n> - Expand the **Send to** section at the top of the page.\n> - Choose **File** as destination.\n> - Select **GFF3** format.\n ![](../../images/NCBIGFF3.png)\n\nYou can also [download the GFF3 file](http://data.bits.vib.be/pub/trainingen/NGSIntro/Ecoli_annotations.gff3) from our website.\n\nIf you want to load the .gff3 file and visualize the annotation properly in IGV, it’s necessary to comment (or remove) the third line:\n```\n##sequence-region NC_000913.3 1 4641652\n##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=511145\n## NC_000913.3\tRefSeq\tregion\t1\t4641652\t.\t+\t.\tID=NC_000913.3:1..4641652;Dbxref=taxon:511145;Is_circular=...\nNC_000913.3\tRefSeq\tgene\t190\t255\t.\t+\t.\tID=gene-b0001;Dbxref=ASAP:ABE-0000006,ECOCYC:EG11277...\n```\n\nYou can visualize reads in IGV as long as they are sorted according to genomic location. Download the two sorted and indexed bam files (for SRR576933 and SRR576938) from GenePattern to your computer and load them in IGV.\n\n> Load the annotation and the bam files of the ChIP and the control sample. \n> - Top menu: **File** -> **Load from File**:\n>  ![](../../images/IGVLoadFile.png)\n> - You should see the track now.\n> - Do the same for the .bam files.\n> Note that you have to download the .bai files too and store them in the same folder as the .bam files. You do not have to explicitly open the .bai files in IGV but they have to be in the same folder as the .bam files or IGV will throw an error.\n> - Zoom in u8ntil you see the reads\n\n ![](../../images/IGVbams.png)\n\n**Browse around in the genome. Do you see peaks?**\n\n> Go to the following gene: pepT.\n> Type **pepT** in the box at the top (red) and click **Go**:\n ![](../../images/IGVpepT.png)\n\nDo the same for gene ycfP.\nLooking at .bam files does not allow to directly compare the two samples as data are not normalized. To generate normalized data for visualization you can use bamCoverage from deepTools (it's available in GenePattern). It generates BigWig files out of .bam files.\n\n> Create a BigWig file from the sorted and indexed .bam file of the ChIP sample\n> The bamCoverage tool has the following parameters:\n> - **input file** is the sorted and indexed .bam file to process\n> - **index** is the accompanying .bai file\n> - **output format** is the output file type, we want to generate a **BigWig** file\n> - **genomeSize** **4639675** nt for *E. coli*\n> - **normalize**: different overall normalization methods; we will use the **RPGC** method corresponding to 1x average coverage\n> - **skip noncovered**: skip non-covered regions (without mapped reads) in the genome? Set to **yes**.\n> - **extend reads**: extend reads to fragment size, in our case **200** nt.\n> - **ignore duplicates**: reads that map to the same location in the genome will be considered only once. Set this to **yes**.\n\nRepeat for the control (again you see the benefit of creating a pipeline for repeating the same steps on multiple samples).\nDownload the BigWig files, start a new session in IGV and load the BigWig files in IGV.\n\n> Create a BigWig file from the sorted and indexed .bam file of the ChIP sample\n> - Top menu: **File** -> **New session**\n> - Top menu: **File** -> **Load from File**. Load the two BigWigs and the .ggf3 with the annotation.\n> - Right click the names of the BigWig tracks and select **Autoscale**.\n\n**Go back to the genes we looked at earlier: pepT, ycfP. Look at the shape of the signal.**\n\n## Viewing the peaks in IGV\n\nDownload the bdg files generated by MACS from GenePattern to your computer and rename them with the extension .bedgraph.\n\n> Dowload the bdg files. \n> Click the names of the bdg files in the **Files** tab and select **Save File**\n ![](../../images/GPDownloadBdg.png)\n\nReplace .bdg by .bedgraph otherwise the file will not be recognized by IGV.\nOpen a **new session** in IGV. Reload the .ggf3 file with the annotation.\n\n> View the bedgraph files. \n> Load the control bedgraph file:\n\n- Top menu: **File** -> **Load from File**:\n ![](../../images/IGVLoadFile.png)\nYou might get a warning that the file is big. Simply click on the button continue.\n\n- You should see the track (in blue):\n ![](../../images/IGVLoadFile2.png)\n\nRepeat this step to load the treatment bedgraph file. You should now see the 2 tracks (in blue):\n ![](../../images/IGVLoadFile3.png)\n\nDownload and view the BED file containing the peak locations.\n\n> View the bed file with the peak locations. \n> Save the file from GenePattern to your computer and load the bed file into IGV.\nA new track with discrete positions appears at the bottom:\n ![](../../images/IGVLoadFile4.png)\n\nThe end result should look like this: 3 tracks with data (the bedgraph files of the 2 samples and the peaks file) and 1 track with annotation:\n ![](../../images/IGVLoadFile5.png) \n\n**Go back again to the genes we looked at earlier: pepT, ycfP. Do you see peaks?**\n","## Motif analysis\n\nFor the motif analysis, you first need to extract the sequences corresponding to the peaks. There are several ways to do this (as usual...). If you work on a UCSC-supported organism, the easiest is to use RSAT fetch-sequences. Here, we will use Bedtools, as we have the genome of interest at our disposal (Escherichia_coli_K12.fasta). However, we have to index the fasta file first to make it easy to access.\n\n> Which tool can be used to index the fasta file ?  \n> When you search for modules containing the word *fasta* you find a tool called SAMtools.FastaIndex that can index a reference sequence in fasta format and this is exactly what we need. \n\nUse this tool to index the *E. coli* genome and copy the resulting .fai file to the **Files** tab (in the same folder as the fasta file).\n\n> How to extract sequences corresponding to the peaks ?  \n> Use the BEDTools.fastaFromBed module for this.\n> - The **input file** is the fasta file of the *E. coli* genome that you uploaded to the server.\n> - The **bed file** is the bed file with the peaks that was generated by MACS (narrowPeak)\n\nSave the resulting .fa file to your computer.\n\nTo detect transcription factor motifs, you will use the **Regulatory Sequence Analysis Tools**. It has a specific teaching server recommended for trainings: [http://pedagogix-tagc.univ-mrs.fr/rsat/](http://pedagogix-tagc.univ-mrs.fr/rsat/)\nYou will use the program **peak-motifs**.\n\n> How to find the peak-motifs program \n> In the left menu, click on NGS ChIP-seq and then click on peak-motifs. A new page opens, with a form\n\nThe default peak-motifs web form only displays the essential options. There are only two mandatory parameters.\n\n> Fill the mandatory options \n> - The **title box**, which you will set as **FNR Anaerobic** . \n> - The **sequences**, that you will upload from your computer, by clicking on the button Choose file, and select the file FNR_anaerobic_combined_peaks.fa from your computer.\n\nWe will now modify some of the advanced options in order to fine-tune the analysis according to your data set.\n\n> Fill the advanced options \n> \n- Open the \"Reduce peak sequences\" title, and make sure the **Cut peak sequences: +/- ** option is set to **0** (we wish to analyze our full dataset) \n- Open the “Motif Discovery parameters” title, and check the oligomer sizes 6 and 7 (but not 8). Check \"Discover over-represented spaced word pairs [dyad-analysis]\"\n- Under “Compare discovered motifs with databases”, remove \"JASPAR core vertebrates\" and add RegulonDB prokaryotes (2015_08) as the studied organism is the bacteria E. coli.\n\n> Launch the analysis \n> - You can indicate your email address in order to receive notification of the task submission and completion. This is particularly useful because the full analysis may take some time for very large datasets. \n> - Click on the button “GO”. As soon as the query has been launched, you should receive an email indicating confirming the task submission, and providing a link to the future result page.\n\nThe Web page also displays a link, You can already click on this link. The report will be progressively updated during the processing of the workflow.\n","## Quality control of the data of the ChIP-Seq training\n\nUse FASTQC inside GenePattern to get basic information on the data (read length, number of reads, global quality of the datasets).\n\nRead the GenePattern tutorial for more details on how to use GenePattern.\nThe data is already present on the GenePattern server. When you open a tool in GenePattern, you will find the **Add Paths or URLs button** in the **input files** section: \n\n![inputfiles](../../images/GPAddPaths.png)\n\nClick the button and expand BITS trainingdata Chipseq: \n\n![inputfiles](../../images/GPSharedData.png)\n\nThe fastq file of the control data set is also available in the shared data folder (SRR576938.2.fastq)\n\n> ### {% icon hands_on %} Generate and view the FASTQC report of SRR576933.2.fastq in GenePattern \n>\n> - Search for **FASTQC** in the **Modules** section and open the parameter form.\n> - Use the fastq file from the Shared Data folder as input file.\n> - Leave the other parameters at their default values.\n> - Run FASTQC\n> FASTQC will generate a zip file and a html file. You can open the HTML report in your browser: \n> - Click the name of the output file at the bottom of the page.\n> - Select **Open Link**\n>   ![searchGEO](../../images/GP18.png)\n{: .hands_on }\n\nThe only parameter you might want to change in if you work on your own data is the contaminants file. It contains a long list of known adapter sequences (see the Documentation in GenePattern). If for some reason the adapters you used are not in the list, you have to provide them as a fasta file. \n\n### {% icon question %} How many reads does the file contain?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is one of the results of the Basic statistics module in FASTQC (red): \n\n![fastqc9b](../../images/FASTQC9b.png)\n\nKnowing that it is recommended for ChIPSeq to have around 30 million reads, the number of reads in this fastq file seems very low. \n</details>\n{: .question }\n\n### {% icon question %} Should we be concerned about the low number of reads in the sample?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nNo it's not a problem because the sample comes from E. coli. This bacterium has a very small genome so 3 million reads will still generate high coverage. However, if this was a human or mouse sample the number of reads would be way too low and we would indeed be concerned. \n\n</details>\n{: .question }\n\n### {% icon question %} What is the length of the reads?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is one of the results of the Basic statistics module in FASTQC (green): \n\n![fastqc9b](../../images/FASTQC9b.png)\n\nAgain, you see that the data set consists of very short reads although this data set is very recent. This is because it has been shown that elongating the reads does not improve your results in ChIP-Seq analysis. It will just cost you more money.\n\n</details>\n{: .question }\n\n### {% icon question %} Are there any positions with low sequence quality?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is shown in the Per base sequence quality module in FASTQC:\n\n![fastqcpositions](../../images/FASTQC11b.png)\n\nThe overall sequence quality is good, although it drops sharply at the last position, but this is normal in Illumina data, so this feature is not raising hard concerns.\n\n</details>\n{: .question }\n\n### {% icon question %} What could be the cause of the failure in the per base sequence content plot?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe content of the 4 nucleotides is far from constant over all positions: \n\n![fastqcadapters](../../images/FASTQC12b.png)\n\nThis typically point the presence of adapter or other contaminating sequences in your reads. \n\n</details>\n{: .question }\n\n### {% icon question %} Which FASTQC module allows you to confirm this suspicion?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe **Overrepresented sequences** module will show if your read file is enriched in known contaminants.\n\n</details>\n{: .question }\n\n### {% icon question %} What does this module tell you?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe **Overrepresented sequences** module shows a high percentage of adapter sequencess (29% !).\n\n![fastqcadapters](../../images/FASTQC13b.png)\n\nAgain you see that adapter contamination is a frequently occurring problem of Illumina NGS data.\n\n</details>\n{: .question }\n\n### {% icon question %} What about sequence duplication levels?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThere is sequence duplication. Adapter contamination will be partly responsible for the high duplication levels (the blue peaks at the far right of the plot) but the main cause lies in the technique itself. Typically, after ChIP, you end up with a very small initial amount of DNA (antibodies are not that effective, many cleanup steps in the protocol,...) and you have to do PCR to get your library up to a proper size for sequencing. So naturally, you expect many clones of the same DNA fragment due to the small initial size of the library. \n\n![fastqcadapters](../../images/FASTQC17b.png)\n\n</details>\n{: .question }\n\nNow do the same for the control data set: **SRR576938.2.fastq**.\n\nIn theory one expects that regions with high read count in the ChIP sample represent the regions that were enriched by the immunoprecipitation, i.e. the regions that were bound to the protein. However many studies have shown that the read count is affected by many factors, including GC content, mappability, chromatin structure, copy number variations... To account for these biases, a control sample is used consisting of fragmented genomic DNA that was not subjected to immunoprecipitation or that was precipitated using a non-specific antibody.\n\n### {% icon question %} How many reads does the control data set contain?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is one of the results of the **Basic statistics** module in FASTQC. You see that the control data set contains the double amount of reads as the ChIP data set.\n\nThe ChIP and control samples are usually sequenced at different depths, generating files with different total number of reads. This means that these two samples have to be made comparable later on in the analysis by normalization (see ChIP-Seq training).\n\n</details>\n{: .question }\n\n### {% icon question %} What is the length of the reads in the control data set?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is one of the results of the **Basic statistics** module in FASTQC. You see that the control data set contains reads of 36 bases just like the ChIP data set.\n</details>\n{: .question }\n\n### {% icon question %} Are there any positions with low sequence quality?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is shown in the Per base sequence quality module in FASTQC: \n\n![fastqc9b](../../images/FASTQC14b.png)\n\nThe overall sequence quality is good, although it drops sharply at the last position, but this is normal in Illumina data, so this feature is not raising hard concerns.\n\n</details>\n{: .question }\n\n### {% icon question %} Why did the per base sequence quality plot raise a failure in the ChIP sample and not in the control?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nIn the slides you can see that the thresholds for a warning are: \n\n- end of box < 10\n- median < 25\n\n![fastqcpositions](../../images/FASTQC14c.png)\n\nOn the figure you see that the culprit is the median:\n\n- In the ChIP sample the median Phred score of the last position is 21 (so below 25) raising a failure\n- In the control sample the median Phred score of the last position is 26 (so above 25)\n\n</details>\n{: .question }\n\n### {% icon question %} Which FASTQC module gives a failure?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe **Per tile sequence quality** module. The quality of one of the tiles is consistently different from the rest of the tiles\n\n![fastqcadapters](../../images/FASTQC15b.png)\n\n</details>\n{: .question }\n\n### {% icon question %} Is this also the case in the ChIP sample?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nYes, you see exactly the same problem in the ChIP sample. Since both samples were probably loaded on the same lane, it seems normal that you see the same problem in the ChIP sample.\n\n![fastqcadapters](../../images/FASTQC15c.png)\n\n</details>\n{: .question }\n\n### {% icon question %} Why does the Sequence duplication levels modules give a failure in the control sample?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe duplication levels in the control data set are high. \n\n![fastqcadapters](../../images/FASTQC15d.png)\n\nThere are a high number of sequences with low duplication levels which could be due to high coverage. Remember that you are working with E. coli which has a small genome.\n\n</details>\n{: .question }\n\n### Estimation of coverage\n\nKnowing your organism size is important to evaluate if your data set has sufficient coverage to continue your analyses, e.g. for the human genome (3 Gb), 10 million reads are considered sufficient.\n\n### {% icon question %} What is the size of the genome of the E. coli K-12 strain substr. MG1655??  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\n- Go to the [NCBI website](http://www.ncbi.nlm.nih.gov/)\n- Select the **Genome** database to search in\n- Type **Escherichia coli** in the search term box\n- Click **Search**\n\n![fastqcadapters](../../images/Genome2.png)\n\nThe genome is 4.64 Mbase. \n\n</details>\n{: .question }\n\nThe FASTQC report has shown that the fastq files of the ChIP and control sample contain 3.6 and 6.7 million reads respectively. As you aim for 10 million reads for 3 Gb in human, we can assume that these data sets contain enough reads for proper analysis. ","# 1. Routine usage\nAs mentioned in the first chapter, there are three conceptual areas in Git: the development area, the staging area and the commit repository. The routine usage is depicted in the figure below. When we want to save a file from the development area on our computer to the commit repository, we'll always have to add it to the staging area first, before we can commit it. The usual routine looks like this: \n\n\n---\n\n<center><img src=\"../../images/conceptual_areas_push.png\" width=\"1000\" /></center>\n\n---\n\n\nThese commands will subsequently add the file `<file>` to the **staging area** and then commit it to the **commit repository**. If we wouldn't pass along the `-m`-message parameter, Git would have opened the editor asking to write the commit message there. It's good practice to write a short, but powerful commit message that helps your future self to determine what has changed in this commit. \n\nThe last step is to take these commits, essentially representing the folder with all the committed files, and push them to GitHub. Uptil now we kept track of our code locally on our computer. Why do we want to store this project and all of its files on GitHub? Imagine that you lose your computer now, you've also lost your project (and all the files in it). A bit less drastical, if you would just like to show your project to your colleagues or with the whole world, we need to publish it somewhere on the internet. And that is exactly what GitHub does for us. Here's how it looks like (once everything is set) when we would use the appropriate commands on GitHub. \n\n```\ngit add <file>\ngit commit -m \"some text that explains what has changed\"\ngit push\n```\n\nThat's all you need to know: `add-commit-push` x repeat. This repetition represent 90% of how we interact with Git & GitHub. \n\n\nBefore we can start adding, committing and pushing, we have to start a version controlled project/repository. There are two ways of **initializing a new Git repository** which only has to be performed once right at the start:\n- Clone a GitHub repository (from GitHub): see Section 2\n- Initialize Git on a folder on your computer: see Section 4   \nBoth options will work just fine and it depends on your preferences or maybe the situation of the project which one is preferable. The first option can be used if you're about to start a new project, the second option can be used when you already have some files in a project which you now want to start version controlling. \n\n\n\n# 2. Create a new repository from GitHub\n\nGo to your GitHub homepage and click on the '+' icon in the upper right corner and select 'New repository'. The following screen will pop up.\n\n\n---\n\n<center><img src=\"../../images/02-2-create-repository.PNG\" /></center>\n\n---\n\n\nWe already filled in a repository name and an optional description. You can choose to already publish your repository, however as this is a meaningless repository, we will choose not to. When you're about to start a new project, there are three things to consider:\n- For a new repository, it's a good practice to initialize the repository with a [README file](https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-readmes). This file will eventually include a (general) description about the project, what others can expect to find in the project and how they can use it. \n- Adding an `.ignore` file is something we will cover later, however for now it suffices to know that the `.ignore` file will contain some code which tells git to exclude certain files from tracking and avoids uploading them to GitHub.\n- Adding a license makes sense when your project becomes public. It defines under which license the content is made available. More information on licenses is available [here](https://elearning.bits.vib.be/courses/writing-a-data-management-plan/lessons/licences/).\n\nIn our case, we will initialize the repository with a README file and click 'Create repository', which will then look like this:\n\n---\n\n<center><img src=\"../../images/02-3-create-readme-repository.PNG\" /></center>\n\n---\n\nThis is the home page of our GitHub repository. From here we can already do a lot, like changing or uploading files. We initialized a GitHub repository with a README file and we can see that we have only one file in this repository: a `README.md` file. By default the text in this README file is the title of the repository and the description that we created here above. Notice that it's a Markdown-file as we can see by the `.md` extension, similar to an ordinary text file on your computer with a `.txt` extension. Markdown is enriched text allowing us to create formatted text using plain-text. More information related to markdown can be accessed from the Github guides [here](https://guides.github.com/features/mastering-markdown/).  \n\n\nNow that we created the repository in GitHub, we want to work on it on our computer. Therefore we need to download it, i.e. we have to **clone** it to our computer. Click on the green button 'Clone' and choose any of the options:\n- Clone: with https link or with **SSH**. This will download the repository and all its contents, keeping the link to the GitHub repository. \n- Open with GitHub Desktop (this might be interesting for you at a later stage).\n- Download: will download all of the contents in a zipped file, however loses the connection to the repository.  \n\nWith the Git Bash (or Terminal), navigate with `cd` to the folder where you want to keep your project folder and type the following:\n```\ngit clone <link>\n```\nwith `<link>` being the link from GitHub that will look something like this for SSH: `git@github.com:username/repository-name.git`. This command is only used once in the beginning and creates a new folder on your computer with all the contents from GitHub (the README file). \n\n---\n\n> ### {% icon hands_on %} Exercise 1\n>\n>  Create a new GitHub repository, give it a name and initialize it with a `README`-file. Upload [this file](../../../../assets/files/git-introduction/plot1.R) to the repository on GitHub. What is GitHub asking you to do? Which stage is omitted when uploading a file directly to GitHub?  \n> \n> Clone the repository to your computer. How many files are there in your local repository?\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > Click on upload files and drag the file into the screen. GitHub is asking to add a commit message which defines the changes that you'll do to your repository. In this case we'll add the very brief *Upload R script* message. Notice that there is no staging area when you upload a file directly on GitHub. \n>    > \n>    > <center><img src=\"../../images/solution1.PNG\"/></center>\n>    > \n>    > Click on 'Commit changes' and find the two files: `README.md` and `example.R` in your repository. Now, we can find the clone link via the green 'Clone' button. In our Terminal we type the following command to start using the repository locally on our computer: \n>    > ```\n>    > git clone <link>\n>    > ```\n>    > in which you change `<link>` to the link that you copied from GitHub. There should be two files in your local repository as well.   \n>    > On a Windows computer we have a folder that contains the following files:\n>    > <center><img src=\"../../images/folder1.PNG\"/></center>\n>    >  \n>    > </details>\n>\n{: .hands_on}\n---\n\n# 3. Our first commit\nOur local copy (clone) of the GitHub repository is now able to communicate with the GitHub repository. Every change within this repository is traceable, whether it is a new file or changes to a file. When we make changes in our local repository (e.g. create a new file), you have to add the file to the staging area first (`git add`) and then commit it (`git commit`) before pushing it (`git push`) to GitHub. \n\n\n## 3.1 Staging\nLet's add a new file to our folder on our computer locally. Download [this file](../../../../assets/files/git-introduction/plot2.R) and add it in the folder where also the `plot1.R` file is located. It contains some R code for plotting a new figure.  \n\nThe first thing we will have to do now, is to stage the file into the staging area.  Remember that this is an intermediate area before committing the file to the repository. In a next section we will learn why this staging area can be useful. \n\nNow we have two options, depending on the situation:\n  1. `git add <file>` : will add a **specific** file to the staging area\n  2. `git add .` : will add **all** the changed or new files to the staging area\n\nIn this case, we can choose either of both options as we have only added one file. As this is a new file, `git add` will not only add it to the staging area, but it will also tell Git that it needs to keep track of changes that happen in this file. \n\n## 3.2 Committing\nOur new file is now in the staging area, ready to be committed. For this, we have to use the following command:\n```\ngit commit -m \"some descriptive yet short message\"\n```\nWe added a parameter `-m` (message) to the command followed by a descriptive text. This text informs our future selves or our colleagues of what changes were done. In this case it could be: \"added plot2.R script\". We make this message as explanatory as possible, yet as short as possible. Some tips and general best practices in writing commit messages are described in [this link](https://chris.beams.io/posts/git-commit/). \n\n---\n> ### {% icon question %} Question\n> \n>  Which of the following commit messages would be most appropriate for a hypothetical commit made to our `README.md` file?\n>   - “Update README file”\n>   - “Added line ‘We use this repository as an example’ to README.md”\n>   - “Added purpose description to the README file”\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > One can argue on the appropriatness of commit messages as it is subjective. In this case however, the third options seems most ideal. It's both not too generic and not too specific. \n>    >\n>    > </details>\n>\n{: .question}\n\n---\n\n\n\n> ### {% icon question %} Question\n> \n> What has happened after committing?\n>   - We saved a version of the file which is now visible on GitHub.com\n>   - We saved a version of the file which is now stored in our commit repository\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > We've been working locally uptil now and didn't push the commits to the GitHub repository, hence it's still in our commit repository. \n>    >\n>    > </details>\n>\n{: .question}\n\n---\n\n\n> ### {% icon question %} Question\n> \n> What would have happened if we forgot about the message argument when committing a file (`-m`)\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > If the `-m` parameter was not added, git will launch a text editor and ask to write a message. We can not make a commit without providing a message. \n>    >\n>    > </details>\n>\n{: .question}\n\n---\n\n\n\n## 3.3 Push commits to GitHub\nRecall that when we added the first file on GitHub (exercise 1), it was immediately committed and showed up right away in the GitHub repository. When we change or add files on our computer and commit them, GitHub doesn't know this yet. Hence, we have to do one final step: \n```\ngit push\n```\nHave a look in the GitHub repository and verify that the new file is now in our repository. \n\n\n## 3.4 Stage-commit-push\nWe've learned how to make a GitHub repository, clone it to our computer, add a file, commit it and push it back to GitHub. This is everything you need to know for a routine usage of Git(Hub) on one of your projects. In order to grasp this concept a bit better, we'll repeat it by making changes on both files in the next exercise. \n\n\n---\n\n> ### {% icon hands_on %} Exercise 2\n>\n>  Add a title to both files (\"# Title plot 1\" and \"# Title plot 2\"). You can choose how you do this: e.g. open the files in a text editor and add the line on top of the file. Follow the routine steps to push your changes to our GitHub repository, however to make it a bit more difficult, you need to store the changes of both files in separate commits. \n> \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > After adding the titles, use the following commands \n>    > ```\n>    > git add plot1.R\n>    > git commit -m \"Added a title to plot1.R files\"\n>    > git add plot2.R\n>    > git commit -m \"Added a title to plot2.R files\"\n>    > git push\n>    > ```\n>    > We first added the changes of `plot1.R` in the staging area, then we commit those changes in a given commit. Afterwards, we add the changes of `plot2.R` in the staging area and subsequently commit them. Finally, we use push to push all the latest commits together towards GitHub. \n>    > </details>\n>\n{: .hands_on}\n---\n\n## 3.5 Commit all tracked files at once\nOne thing we haven't really said until now is that Git actually keeps track of the changes that you make to files as soon as you have told Git to do so. The first thing you have to do when you add a new file, is to tell Git to keep track of changes made in this file. If you do not do this, Git will know that there is a new file, but it will classify it as *untracked*. After adding it to the staging area a first time, it will always keep track of the changes in this file. \n\nOn the premise that Git is already keeping track of the files, you can simply do `git commit -a -m \"some informative text\"` in which `-a` stands for add all changes in all files to the staging area and commit them at once. \n\n\n\n# 4. Create a new repository from your computer\nAs discussed here above, you can also create a Git repository from your computer. This is especially useful when we already have a project with a bunch of files which we now want to start version controlling. The first thing that we will do is **initialize Git** on this folder. Alternatively, make a new folder which will contain the files of an imaginary project in case you don't have one yet. In Git Bash (Windows) or in your Terminal (Mac, Linux), move to the project folder with `cd` and use the following command: \n\n```\ngit init\n```\n\nUnfortunately, it is not possible to create a GitHub repository from our computer. Hence, we need to open GitHub and create a new repository and DO NOT initialize it with a `README.md`, `.gitignore` or a license. It is important that it is empty in the beginning. We can add those files later.\n\nOnce created, GitHub will seggest commands that you might want to use on the Terminal to push our first changes to this GitHub repository. \n\nWe already initialized Git in our folder, so we can skip this step:\n```\ngit init\n```\n\nTHe following steps basically ask us to commit our first changes. Given that we edited the README file:\n```\ngit add README.md\ngit commit -m \"first commit\"\n```\n\nHere comes the tricky part. We will learn about branches in [Chapter 5](https://material.bits.vib.be/topics/git-introduction/tutorials/5_branches/tutorial.html), however it suffises for now to understand that each branch carries a name and the default one is now called `main` where it earlier was called `master`. The following command will overwrite the name of the branch to `main`. \n```\ngit branch -M main\n```\n\nThen, we need to link the repository on our computer to the one on GitHub with: \n```\ngit remote add origin git@github.com:tmuylder/testtt.git\n```\n\nAnd finally push our commit to GitHub. The argument `-u` or `--set-upstream` will set the remote as upstream (see later):\n```\ngit push -u origin main\n```\n\n\n> ### {% icon question %} Question\n> \n> What if we want to create a new folder inside the folder which we are using for version controlling? Do we need to initialize Git inside this subfolder as well? \n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > It is important to note that `git init` will keep track of all the subdirectories and their files that are in the folder. Thus, you don't need to create a git repository for each folder or subdirectory. Git repositories can interfere with each other if they are “nested”: the outer repository will try to version-control the inner repository. This will lead to errors.\n>    >\n>    > </details>\n>\n{: .question}\n\n\n> ### {% icon question %} Question\n> \n> How can we know whether a folder is already initialized with Git, meaning that we are already version controlling the project? \n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > If we use  `ls -al` we get a list of all files and directories, including the hidden ones. A `.git` folder is present when the project is being version controlled. Git uses this special directory to store all the information about the project like the history of all commits. If we ever delete the `.git` sub-directory, we will lose the project’s history. \n>    >\n>    > Another possibility is to use the `git status` command which results in *fatal: not a git repository...* if the project is not being version controlled. \n>    >\n>    > </details>\n>\n{: .question}\n\n\nBefore starting with the next exercise we also want to stress the importance of not uploading data to GitHub. It's good practice to have links to data, however not the data itself. GitHub is not your next cloud storage instance. \n\n---\n\n> ### {% icon hands_on %} Exercise 3\n>\n>  Find a folder on your computer with some files that you want to version control, initialize Git on that folder and make it (privately) available on GitHub. \n> \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > See the steps in Section 4.  \n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n# 5. The strength of the staging area\nNow you're probably wondering why it's useful to have that many steps to save a file (add, commit, push). We will give a practical example based on the figure below: \n\n---\n\n<center><img src=\"../../images/staging_area.png\" /></center>\n\n---\n\nImagine that you're working on a project with multiple Python scripts and you're working on all of them. In this case your folder in your development area contains the files  `scriptA.py`, `scriptB.py` and `scriptC.py`. The changes that you made in script A and script C are somehow related, but script B is not. It's good practice to make commits in which changes that are related to each other are bundled. Hence, in this case we want to make one commit with the changes from file A and C. Now we can simply add scripts A and C to the staging area and commit it. The changes in script B will remain unsaved until we commit the changes in a separate commit. \n\nIt's always better to have more commits; in case you want to remove part of your work in a later stage, or you want to start your work again from a specific commit. \n\n\n# 6. Pull\nImagine that you change something in a file on GitHub, or upload a new file online via GitHub. We would want to include these changes or that file in the folder on our computer as well. For this we need to use the `pull` command to pull in the changes from GitHub. \n\nLet's go back to our repository on GitHub. We will make a change in the repository on GitHub and then pull these changes back into the repository on our computer (i.e. the project folder on our computer). \n\nClick on the `README.md` file in the list of files and click the pencil icon on the upper right. The file will open in an editor mode and we can change the title from *introduction-github* to *Introduction GitHub* or we can add some more descriptive text. Note that a README file is by default a markdown-file. Markdown is a text file with lay-outing options. If you haven't heard of it before, it's worth some [further reading](https://guides.github.com/features/mastering-markdown/).\n\nSave the changes by committing them as depicted here below:\n\n---\n\n<center><img src=\"../../images/commitReadme.PNG\"/></center>\n\n---\n\nGitHub is now one commit ahead of our local repository. Hence, we have to pull this commit into our local repository. We can do this by using the following command:\n```\ngit pull\n```\n\nOpen the file `README.md` and check whether the changes have merged in.  \n\n---\n\nLet's go to the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/4_history_status/tutorial.html)!","# What is GIMP?\nGIMP is short for **GNU Image Manipulation Program**. It is a free and Open-source, cross-platform image editor available for GNU/Linux, MacOS and Windows operating systems. During this training we will use **GIMP 2.10** on Windows. To download the most recent version for your OS, browse to the [GIMP Download page](https://www.gimp.org/downloads/).\n## External training material\n- [GIMP Manual page](https://www.gimp.org/docs/).\n- [GIMP 2.10 Basics on YouTube](https://www.youtube.com/watch?v=2EPIUyFJ4ag)\n- [Nick Saporito GIMP Tutorials](https://www.youtube.com/playlist?list=PLynG8gQD-n8Dl23X0o1HFu_5PmBl79niz)\n\n## User Interface\nGIMP has a 'Single-window' mode, this allows you to switch from multiple windows (for e.g. multiple monitors) to a single window. When the 'Single-window' mode is disabled, you have separate windows for toolboxes, view area and dockable dialogs. When enabled you have one window with all tools, options and dockable dialogs attached to the central view area. For beginners, we would advise the 'Single-window' enabled.\nOn the left panel you have the 'Toolbox' (if not present: **Windows - Toolbox** or press **Ctrl + B**) and underneath the 'Tool Options' dialog. Selecting a tool will result in a different Tool Option bar. Every tool has his own set of parameters and functions, best to keep them close to each other. \nOn the right-hand panel you can find other 'dockable dialogs'. These are easy to move, remove and re-introduce if necessary. To get a list of all 'dockable dialog' go to **Windows – Dockable Dialogs - ...** . If you want a full screen view of your image select **Windows – Hide Docks**. \n\n## Import data and image properties\nTo import an image: **File – Open**\nWhen you select an image (any file type) in the import window, you get a preview and information on the right side. Click **Open** and the image(s) will be displayed in the middle box at zoom level 100% (1 pixel image = 1 pixel screen) or fitted to your windows. To zoom use Ctrl + mouse scroll up or down. Multiple images in GIMP are displayed in different tabs on top of the View Area.\nBefore you export your image, make sure it has the right resolution and pixel density. **Image - Image Properties** will give you all the information your image holds. This information can be very useful when you open an image from an unknown source.\n\n## Selection\nRectangular selection has several options and shortcut keys. The first icons in the tool options are the selection modes: add to selection (Shift), subtract from selection (Ctrl) and intersect with selection (Shift+Ctrl). More options are: feathering edges, rounding of the corners, expand from center, lock aspect ratio, size and position and if necessary to highlight the selection). The Ellipse selection tool has more or less the same options.\nThere are other selection tools available: Free Selection, Select by Color, Fuzzy Selection, Scissor Selection, Foreground Selection. Those tools have different tool options and are only used in specific cases.\n\n## Transforming\nThere are several ways to transform your image or selection; rotating, scaling, shearing and flipping. You can transform a selection, a layer or the image. When using the rotation tool, you have several options in the dockable dialog below. An important option is “Clipping” this will change the aspect ratio of your image after rotating. \nAnother way of rotating an entire image is: **Image – Transform – ...** then you have the option to flip (horizontal/vertical) or rotate (90°/180°). The entire image will be rotated including the selection and image orientation. \n\n## Layers\nMake sure you have the dockable dialog ‘Layers’ in your window. All options for layers can be found in the menu bar “Layer”. You can make a new blank layer or duplicate the current layer (e.g. copy of original image to compare or as back-up). In the dockable dialog you can hide or show a layer (eye button), rename them or move them up and down in the layer stack. If you want to link/connect two or more layers, you can use the chain button (next to the eye button).\nTo copy a selection to a new layer, perform a regular copy/past action of that selection (Ctrl+C and then Ctrl+V) and select **Layer - To New Layer**\nIf you want to merge all layers into one layer you can select **Image – Merge Visible Layers**.\n\n## Brightness and contrast\nIn the menu bar you can find **Colors** . This menu has multiple option to manipulate your image; \n- Color Balance will change the cyan, magenta and yellow color levels of your image\n- Brightness and Contrast will change brightness and contrast and you can save these settings as a favorite \n- Threshold will reduce your image to two colors by using a threshold value\n- Adjust color curve will change the gamma setting of your image\n- Posterize will change the number of colors (2-256)\n\n## Guides and cropping\nYou can split your image in different sub-images. This can be done by using 'Guides'. To create such a break-line, go to **Image - Guides - New Guide... or (by Percent)...**. You can create a horizontal or vertical guide at the value/percentage you enter. A guide will be displayed as a blue dashed line. To chop your image in multiple parts, go to **Filters- Web- Slice** (Older versions: Image - Transform - Guillotine). The sub-images will be generates in the folder you selected.\nIf you only want a selection of your image without all the rest you can crop by clicking **Image – Crop to Selection** or use the Crop tool from the Toolbox.\n\n## Scaling and print size\nWhen you want to scale your image to a smaller resolution you can select **Image – Scale Image**. There you can scale in pixels (or another unit) and you can lock the aspect ratio (chain symbols).\nIf you want to change the print size to make your image suitable for publication you can select **Image - Print Size...**. There you can change the dimension/resolution and pixel density of your image.\n\n## Remove background color\nIf you download an image of a company or university logo, it might have a white (or any other color) background. This can be very annoying when the destination background is different. In order to remove the background color, we first have to add an alpha channel to this image: **Layer - Transparency - Add Alpha Channel** - If the Alpha channel is already present, skip this step. Now you're able to get a transparent background using the option: **Image - Color to Alpha**. In the new window you can select the color which you would like to convert to transparent pixels. You can either select by clicking the color bar or use the color picker icon.\n\n## Exporting\nSelect **File – Export as…**\nIf you click on the '+' next to Select File Type, you have a list of all possible extensions in which you can export your image. Each of those file formats has different compression options.\n\n# Exercises on image manipulations in GIMP\n\n> ### {% icon hands_on %} Hands-on: Exercise 1\n> Source file: [http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/original_file.tif Image 1]\n> Task: Split this image in 2 parts, one for each gel. Make sure the band are horizontal and export the 2 new images in the same file format as the original. You can adjust brightness and contrast to make all the band more visible.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 2\n> Source file: [http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Exercise1.1.jpg Image 2]\n> Task: Rotate this image 45 degrees and crop an image of 500x500 pixels out of the original. Make sure the printing resolution is set to 300 ppi and export this image as a PNG file. Adjust brightness and contrast to make this image look better.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 3\n> Source file: [http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Exercise1.2.jpg Image 3]\n> Task: Cut this image in 4 equal parts. Know that the printing width is 150 mm and the journal demands a minimum op 300 ppi for all 4 images. Also export each of them in a different file formats without losing image quality. Adjust brightness and contrast to your own opinion.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 4\n> Source file: [http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Exercise1.3.jpg Image 4]\n> Task: Adjust brightness and contract of this images and export it in a way to make the file as small as possible. Use preferably lossless compression (try lossy compression to compare file size), there is no restriction on file formats. Be sure your image is exported with at least 300 ppi.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 5\n> Source file: select from the internet\n> Task: Download an image from your most favorite brand and remove the white (or other color) background. Export this new image in a format that support transparent pixels.\n{: .hands_on}","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\nThe goal of this exercise is appreciate how protein interactions can be studied through visual inspection and other software tools. Protein interactions can be classified into different groups regarding the molecular properties and functions of the interacting partners. (These groups are intertwined in several cases.) Some examples include:\n\n- the interactions of proteins with other proteins, small molecules, carbohydrates, lipids or nucleic acids;\n- Receptor-ligand interactions;\n- Antigen-antibody interactions;\n- Enzymatic interactions, enzyme-inhibitor interactions.\n\n## Exploring the structure of a nanobody-stabilized active state of the β2 adrenoceptor - the ligand \n\nWe will start with exploring one crystal structure of the β2 adrenoceptor. Together with the Steyaert lab from VIB, Kobilka published several crystal structures of the β2 adrenoceptor in its various activation states (Rasmussen et al. Nature 2011, 477)\n\n\n> ### {% icon hands_on %} Get the structure\n>\n> 1. Download the crystal structure 3P0G from the PDB into YASARA. \n>\n>    ```\n>    File - Load - PDB file from internet    \n>    ```\n>    As you can immediately appreciate, it is a bigger crystal structure with more than one molecule. \n>\n{: .hands_on}\n\n> ### {% icon question %} Questions\n>\n> 1. How many molecules are present in the crystallized structures? \n> 2. And how many chain identifiers are used? \n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> 1. There are three molecules, chain A Beta-2 adrenergic receptor; Endolysin, chain B Camelid Antibody Fragment, and a small molecule ligand. \n     Also have a look at PDBe [3P0G](https://www.ebi.ac.uk/pdbe/entry/pdb/3p0g) which gives a very nice overview of the structure and its composition.\n> 2. Only two chain identifiers A and B. Sometimes, this leads to issues depending on the software you might want to use for downstream processing.\n> \n> </details>\n>\n>\n{: .question}\n\nSome software routines need seperate chain identifiers for molecular entities to work correctly, so we suggest to rename the small molecule to chain L.\n\n\n> ### {% icon hands_on %}  \n>\n> 1. Activate the Head-up display\n> 2. Select Rename\n> 3. Enter 'L' to proceed with the renaming. \n>\n{: .hands_on}\n\nWe first have a look whether we can find out if there are specific interactions of the small molecule ligand with the adrenoreceptor.\n\nIn order to do so, we first have to add Hydrogens to all present molecules.\n\n> ### {% icon hands_on %}  \n>\n> 1. Edit - Add - hydrogens to : All \n> 2. Change the display of the ligand to Sticks\n> 3. Select the amino acids of the binding pocket i.e. a sphere of 10 Angstrom around the ligand\n>    ```\n>    Select – in sphere around – Residue and drag with the mouse until the display says 10 Å\n>    ``` \n> 4. ```\n>    View – show interactions – hydrogen bonds of - Residues\n>    ```\n>    select 'Selected' in the panel Belongs to or has\n>    and press OK in the subsequent window\n>\n{: .hands_on}\n\nGiven that hydrogen bonding is dependent on the definition of a hydrogen bond in the program, it is not a bad idea to use other tools to compare the analysis. There are many options to do this online if you look at published crystal structures. Next to the tools which are directly linked out from the web site of the crystal structure at the PDB database you can use the [ProteinPlus server](http://proteinsplus.zbh.uni-hamburg.de/)\n\nGo to the web site of ProteinPlus and enter the PDB code 3P0G into the search box. After clicking on Go, you should be presented with on overview of tools the ProteinPlus server provides.\n\nWe do not go into great detail on all the tools but only mention PoseView. With this tool, you can prepare an automatic sketch of the small molecule-protein interactions.\n\n<figure id=\"figure-1\"><img src=\"../../images/ProteinPlusPoseView.png\" alt=\"Protein Plus Server\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Overview of 3P0G</figcaption></figure>\n<figure id=\"figure-2\"><img src=\"../../images/3P0G_A_PoseView_Input.png\" alt=\"Zoom on ligand of 3P0G\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Zoom on ligand co-crystallized with 3P0G</figcaption></figure>\n\n\n> ### {% icon question %} Questions\n>\n> 1. Between which amino acids and the ligand do you see hydrogen bonds using YASARA? \n> 2. According to PoseView, between which amino acids and the ligand do you see hydrogen bonds?\n> 3. What other interactions are presented in the sketch?\n> 4. Inspect the visualisation in Yasara: Do you see the interactions in Yasara as well?\n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> 1. In YASARA, you observe hydrogen bonds between Asp113A as well as the carbonyl function of Asn312A and the charged amine function.\n>    \n> 2. PoseView indicates hydrogen bonds between Asp113A as well as the carbonyl function of Asn312A and the charged amine function. Furthermore, hydrogen bonds are indicated between the phenolic OH and Ser207A and Ser203A as well as the amine function and Ser203A.\n> \n> 3. Furthermore, hydrophobic interactions are indicated for the methylbenzyl moiety and pi-pi interactions of Phe290A and the phenolic moiety.\n>\n> 4. With YASARA Structure license, those hydrophobic interactions can also be visualised. \n> </details>\n{: .question}\n\n\n# Exploring the structure of a nanobody-stabilized active state of the β2 adrenoceptor - the nanobody \n\nIn order to estimate the binding energy between the nanobody and the β2 adrenoceptor, we can use the FoldX tool AnalyseComplex. It is recommended to calculate these binding energies on energy-minimized structures. To illustrate the effect of the energy minimization, we compare the interaction energy of the current crystal structure and its minimized structure.\n\n\n## Use the tool FoldX tool AnalyseComplex \n\n> ### {% icon hands_on %} \n>\n> 1. Given that energy-minimization takes a while for this rather large complex,\n>     please download the Yasara scene [here](http://data.bits.vib.be/pub/trainingen/PSA/3P0G_1.sce)  \n>    \n>    Calculate the interaction energies between the chain A and B of the object 3P0G \n>    and the RepairObj1, respectively. \n>\n>    ```\n>    Analyze - FoldX - Interaction energy of molecules\n>    ```\n{: .hands_on}\n\n> ### {% icon question %} Questions\n>\n> 1. What is the dG in the two cases? \n> 2. Any idea why the difference is rather hugh?\n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> 1. first case (X-ray structure): Interaction energy between molecule(s) A and B in object 1 = -9.86 (kcal/mol)\n>    second case: \n>    Interaction energy between molecule(s) A and B in object 2 = -20.19 (kcal/mol)\n> 2. Through the energy minimisation of the Repair Object function, the interactions of the amino acids are optimised.  \n> </details>\n{: .question}\n\nThis command also creates a list of residues forming the interface of the two proteins. Hit space to see the list of residues in the interface.\n\nTip: This list can also be useful if you want to make visualisations of the interaction site.\n\n```\nPlugin>interface residues between A and B\nPlugin>TA66 TA68 IA72 IA127 RA131 AA134 IA135 TA136 SA137 PA138 FA139 KA140 QA142 YA219 VA222 EA225 AA226 LA266 KA267 EA268 AA271 LA272 TA274 LA275 IA278 IA325 YA326 RA328 SA329 PA330 SB27 IB28 FB29 SB30 IB31 TB33 AB50 IB51 eB52 SB56 TB57 NB58 YB100 AB102 VB103 LB104 YB105 EB106 YB107\n```\n\n# Comparing the active and the inactive conformation of the β2 adrenoceptor \n\nIn case, there is still time, I would recommend to try to use some of your capabilities you learned today and create a superposition of the inactive and active conformation of the β2 adrenoceptor. We take one of the crystal structures which are available: 3SN6\n\n```\nFile - Load - PDB file from Internet\n```\n\nYou will be kind of overwhelmed once the structure is loaded into YASARA. In order to get a first quick overview, click on the 'Center' buttom in the menu of YASARA (5th buttom from the left). Then, it is time to look at the PDB entry of 3SN6 in the PDB database to have a first idea on what molecules are in the PDB file.\n\nAs you see on the website [3SN6](http://www.rcsb.org/pdb/explore/explore.do?structureId=3SN6i), the chain R consists of 2 molecules, the β2 adrenoceptor and lysozyme. \nIn the corresponding article, it is stated that 'the unstructured amino terminus of the β2AR is replaced with T4 lysozyme (T4L)'.\n\nSince this is an extra molecule in the crystal structure which disturbes our view, we will delete it.\n\nAfter the manipulation, the overall picture should look roughly like this.\n\n<figure id=\"figure-3\"><img src=\"../../images/3SN6_withoutLysozyme.png\" alt=\"Superposition\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Overview of 3SN6 without lysozyme</figcaption></figure>\n\nIn the following step, we structurally align only the receptors. The rest of the structures will move along.\nIt is suggested to use the first chain A from 3P0G as target. In order to do a structural alignment, it is suggested to use the first chain A from 3P0G as target.\n\n```\nAnalyze - Align - Pairwise, based on structure - Molecules with MUSTANG\n```\n\nInvestigate the differences in TM helices and the binding of the nanobody compared to the subunit of the G protein.\n\nTip: Color the secondary structures to better identify the individual chains/units of G protein. \n\n# Conclusion\n{:.no_toc}\n\nSum up the tutorial and the key takeaways here. We encourage adding an overview image of the\npipeline used.\n","## Search for a structure\n\n### Via [UniProt](http://www.uniprot.org/)\nThe way of searching for a specific protein structure depends on the data you already have. You might already have the PDB ID (a unique identifier), that's an easy one. But mostly you have the protein name or you just have a sequence. In the last cases I recommend to start from the UniProt website at <http://www.uniprot.org>, which is the best annotated protein database in the world. Our first model protein will be the molecular chaperone DnaK from *E. coli*. Below is an image of the UniProt search box where you can start your search for proteins.\n\n<figure id=\"figure-1\"><img src=\"../../images/uniprotsearchbox.png\" alt=\"uniprotsearchbox.png\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Search box</figcaption></figure>\n\n> ### {% icon hands_on %} Explore a PDB structure on the Uniprot web site\n>\n> 1. Go to the UniProt website and search for the DnaK protein\n> - The UniProt search engine returns a list of DnaK protein sequences from a variety of organisms. An entry with accession code **P0A6Y8** and entry name **DNAK_ECOLI** should be near the top of this list.\n> 2. Click on the *accession code* (column Entry) to view the protein page of this DnaK from the model organism *Escherichia coli*.\n> 3. Click on *Structure* in the left-side menu and then look at the *3D structure databases* table.\n{: .hands_on }\n\n### {% icon question %} Guidelines which PDB structures to select\n\nWhich structures (give the 4-character PDB ID) of the C-terminal domain of DnaK should preferentially be use for analysis and why?\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n>\n> Usually, the recommended selection criteria are using an X-ray structure with low resolution and low $R_{free}$ factor. Furthermore, the PDB database has pre-calculated a validation report for all of the structures.\n>\n> As an example, have a look at https://www.ebi.ac.uk/pdbe/entry/pdb/4EZX under the section 'Experiments and Validation'. For many PDB structures, there is also a re-done structure available with a vast amount of information on the quality of the X-ray structure and suggested 'better' models e.g. (https://pdb-redo.eu/db/4ezx). In our case, we could opt for the structures 1DKX and 4EZX.\n>\n> This is a difficult example since there are so many high resolution structures available. So, it is recommended to study the articles and compare the available structures to find your favorite structure for further analysis.\n> </details>\n{: .solution}\n{: .question }\n\n\n### Via the Protein Data Bank by PDB ID\n\nYou can find structural information directly at the PDB database. The web site of the PDB consortium is located at <http://www.wwpdb.org>. This web site provides links to all members of the PDB (left side). It is a question of taste which resource you start off with. For X-ray structures, it is currently PDBe, RCSB PDB, PDBj. For NMR structres, you find the BMRB. In today's course, we focus on the PDB resources only.\n\nBelow is an image of the RCSB search box <http://www.rcsb.org/pdb/home/home.do> where you can start your search for structures.\n\n<figure id=\"figure-2\"><img src=\"../../images/pdbsearchbox_RCSB.png\" alt=\"Pdbsearchbox_RCSB.png\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> PDB Search Box</figcaption></figure>\n\nThe PDB file with ID **1DKX** contains the atomic coordinates of the molecular chaperone (DnaK) from *E. coli*.\n\n### {% icon hands_on %} Search a structure on the RCSB web site\n\n1. Go to the PDB website and type 1DKX in the search box\n{: .hands_on }\n\n\nThis will lead you to the same page we got earlier through UniProt.\n\n### Via the Protein Data Bank by sequence\n\nIn lots of cases we only have a sequence of which we would like to find out if there is structural information. The PDB can be searched using a sequence as input. Here is the sequence of the C-terminal substrate binding domain of DnaK:\n```\n    DVKDVLLLDVTPLSLGIETMGGVMTTLIAKNTTIPTKHSQVFSTAEDNQSAVTIHVLQGE\n    RKRAADNKSLGQFNLDGINPAPRGMPQIEVTFDIDADGILHVSAKDKNSGKEQKITIKAS\n    SGLNEDEIQKMVRDAEANAEADRKFEELVQTRNQGDHLLHSTRKQVEEAGDKLPADDKTA\n    IESALTALETALKGEDKAAIEAKMQELAQVSQKLMEIAQQQHAQQQTAGADASANNAKDD\n    DVVDAEFEEVKDKK\n```\nThe PDB allows sequence searches through the same search box we used before.\n\n<figure id=\"figure-3\"><img src=\"../../images/pdbsearchbox_RCSB.png\" alt=\"Pdbsearchbox_RCSB.png\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> PDB Search Box</figcaption></figure>\n\nThere is also an Advanced Search section, with a Blast/Fasta option in the Sequence Features section.\n\n<figure id=\"figure-4\"><img src=\"../../images/blastpdb.png\" alt=\"Blastpdb.png\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> BLAST</figcaption></figure>\n\n> ### {% icon hands_on %} Hands-on: BLAST search for PDB structure\n>\n> 1. Go to the Advanced Search section\n> 2. Please select 'Sequence BLAST/PSI-BLAST' in the Query type drop down.\n>    This method allows you to change some parameters for the search.\n> 3. Copy and paste the sequence in the ''Sequence'' field\n> 4. Press ''Submit query''.\n> 5. You should see the same structures popping up as you saw in the UniProt page of DnaK.\n{: .hands_on}\n\n# The PDB file\n\n## Introduction\n\nA PDB (Protein Data Bank) file is a plain text file that contains the\natom coordinates of a solved 3D structure of a protein or even DNA. Such\ncoordinate files can be obtained at the Protein Data Bank at\n<http://www.rcsb.org/pdb>. Each PDB file has a unique identifier (ID)\nconsisting of 4 characters, the first one is always a number. Note: It\nhas been announced that the 4 character code will change in the future\n<https://www.wwpdb.org/news/news?year=2017\\#5910c8d8d3b1d333029d4ea8>.\n\nThe PDB file with ID **1DKX** contains the atomic coordinates of the\nmolecular chaperone (DnaK) from *E coli*.\n\n> ### {% icon hands_on %} Hands-on: BLAST search for PDB structure\n>\n> 1. Go to the PDB website at <http://www.rcsb.org/pdb>\n> 2. Type 1DKX in the search and try to answer the following questions.\n{: .hands_on}\n\n### {% icon question %} Questions\n\n1. How many molecules were solved in this PDB file? What kind of molecules are these (proteins, peptides, DNA, ...)?\n2. Does the structure represent the full protein? If not, how many residues are missing? Hint: Click on the UniProt KB link in the Sequence tab to see the full sequence.\n3. Was this structure solved by X-Ray or NMR?\n4. What is the atomic resolution and R-factor?\n\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> 1. Two, called polymers or chains: they are polypeptides ![Type](../../images/Mol_desc_1DKX.png)\n> 2. To answer this question you can go to the sequence tab at the top:\n>    - ![Uniprot view](../../images/Pdb_firstresiduesmissing_1dkx.png)\n>    - Summary: a large chunk of the N-terminus is missing from the structure, the C-terminus is virtually complete.\n> 3. X-RAY diffraction, as shown by Experimental Details\n> 4. Atomic resolution: 2.00 Ångstrom and R-factor of 0.206\n> </details>\n{: .question}\n\n\n## Downloading the structure\n\nThe file that holds the 3D coordinates can be downloaded by clicking on\n*Download files* in the top right corner and then choosing *PDB file (text)*.\nFor convenience, save this file on your desktop. The filename is the\n4-character unique PDB ID.\n\n![Pdbdownloadfile1.png](../../images/pdbdownloadfile1.png)\n\n> ### {% icon hands_on %} Hands-on: Open downloaded PDB file in an editor\n> 1.   Open this file with a text editor, e.g. WordPad is an excellent tool for that.\n> 2. Do you see the different sections in the PDB file? Analyse some ATOM lines and try to explain what kind of data is in each column.\n{: .hands_on}\n\nAdditional exercises on searching PDB can be found on [the basic bioinformatics exercises page](http://wiki.bits.vib.be/index.php/Exercises_on_Protein_Structure).\n","## Install Python and PovRay\n\nPython and PovRay should be installed already, so you can skip this part.\n\nThe programming language Python must be installed to use some very useful YASARA features. Simply start YASARA as administrator. Right click the YASARA icon on the desktop and choose \"Run as administrator\". Once the program is opened, click\n\n```\nHelp > Install program > Python\n```\n\nPovRay is used to make high quality publication-ready images and should be downloaded first with:\n\n```\nHelp > Install program > PovRay\n```\n\n## Tutorial movie\n\nPlay the movie \"Working with YASARA\":\n\n```\nHelp > Play help movie > General: Working with YASARA\n\n```\n\n## Scene styles\n\nOpen the PDB with code 1TRZ in YASARA.\n```\nFile > Load > PDB file from Internet\n```\nIf this option is not there, it means you haven't installed Python yet. Please check above.\n\nThe molecule will be loaded and presented in the ball style. Different scene styles exist to rapidly change the view:\n\n* F1: Ball\n* F2: Ball & Stick\n* F3: Stick\n* F4: C-alpha\n* F5: Tube\n* F6: Ribbon\n* F7: Cartoon\n* F8: Toggle sidechains on/off (press multiple times and see what happens)\n\n**Be careful!** If you have just made a nice close-up of e.g. an active site where you show some residues and hide others, and put some atoms in balls while others are in sticks, you will lose everything when you press one of the F-keys!!! The F-keys change the viewing style without asking.\n\nTry all the different scene styles!\n\n## Showing and hiding residues\n\nThe function keys F1-F3 show all atoms and residues by default. The keys F4-F7 do not explicitly show atoms and residues but are merely a impressionistic representation of the structure. The F8 keys does, to a certain extent, show atoms, but only of side chains, not main chain atoms.\nMostly to do structure analysis, we want to show only the most interesting residues, the ones we want to analyze, and hide all the others.\n\nThe structure of insulin was crystallized together with some water molecules. In many cases, it is no problem to permanently delete those waters. To visualize the waters, select an atom view such as F1, F2 or F3. See the red water (oxygen) atoms floating around the surface?\n```\nEdit > Delete > Waters\n```\n\nThen select the base scene style without any explicit atoms, e.g. tube style (F5). Press F5. This is our representation of the backbone.\n\nThere are several ways to show the residues of interest:\n\n1. From the menu\n```\n   View > Show atoms in > Residue\n```\n   Select Cys7 from Molecule **A** and press OK\n2. From the sequence selector ![seqselector.png](../../images/Seqselector.png)\n   Hover the mouse on the bottom of the screen, you will see the sequence selector opening. Open it permanently by pressing the blue nailpin on the left side of it. Search for Cys7 from Molecule **B**, right-click and select:  \n```\n   Show > Residue\n```\n\nNow show the atoms of His5 in Molecule B using a method of choice.\n\nAnd now that we're on it, what is special about the two cysteines we just visualized?\n\n**Hiding** individual atoms or residues works in the same way as showing them, only now you should go to **Hide atoms** in the menus.\n\n## Showing and hiding secondary structure\n\nMost published molecular images show a detailed active site and all the\nrest is hidden for clarity. From the previous exercise we show the atoms\nof 3 residues (let's assume this is our active site). Now secondary\nstructure of the rest of the molecule is also still visible. To hide all\nthat, we do not have to hide atoms, but hide the secondary structure\n(the F5 tube view) from the rest of the structure. Atoms and residues in\nYASARA are not the same as the term 'secondary structure'. Atoms and\nresidues are balls and sticks, 'secondary structure' is an artistic\nimpression of the structure (beta sheet arrows, helix ribbons, ...). If\nyou get this concept, you are a YASARA master.\n\nSo let's hide many of the secondary structure, but keep just a few\nstretches around our active site. Our active site is Cys7 (A), Cys7 (B)\nand His 5 (B). This can be done in several ways. Since we would have to\nhide almost everything, I propose to hide first everything and then show\nagain those stretches that we want. But if you have a better idea, I\nwould like to hear it.\n\nHide all secondary structure:\n```\n   View > Hide secondary structure of > All\n```\n\nThen show stretches of residues 2-10 in Mol B and residues 4-10 in Mol A\nin tube view as:\n```\n    View > Show secondary structure > Tube through > Residue\n```\nThen select the correct stretches of residues by keeping the CTRL key\npressed to select multiple residues.\n\nThere are still some metal-bound histidines flying around that weren't\nhidden because they are metal bound (a YASARA specific thing). Hide\nthose histidines by clicking on one of the sidechain atoms, then\nright-click and select:\n\n```\n   Hide atoms > Residue\n```\n\nThe nasty dative bonds and metals can be removed simply by deleting all\nof them:\n\n```\n   Edit > Delete > Residue > Name\n```\n\nIn the name column select all the metals and ions you can find.\n\nEt voilà, a publication ready image!\n\n<figure id=\"figure-1\"><img src=\"../../images/Insulin_hires.jpg\" alt=\"Insuline\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Insuline</figcaption></figure>\n\n## Labels\n\nYou can put labels on the residues you want to highlight by going to the\nmain menu or selecting an atom from a residue and right-click. In the\nlatter case you select:\n\n```\n   Label > Residue\n```\n\nNote that *residue name* and *residue number* is automatically selected.\nChange the height to 0.5 or so and select a nice color for the label.\nPresto\\!\n\n## Colors\n\nYou can color on all levels: atoms, residues, molecules and objects. So\nbe careful, if you color a residue, all of its atoms will get that\ncolor. If you color a molecule, all atoms in that molecule will get that\ncolor.\n\nLet's color the secondary structure (the backbone in our case) of our\nactive site in orange. But the sidechains should keep their Element\ncolors. So we shouldn't color entire residues, but only a selected atom\nset. Therefore our selection will be at the atom level, not the residue\nlevel. Go to:\n\n```\n    View > Color > Atom > Belongs to or has > Backbone\n```\n\nThen select the orange color (color code 150) and select 'Apply unique\ncolor'. Hopefully, it is a satisfying result.\n\n## Saving all the beautiful work\n\nIt would be a pitty that you spent hours creating fancy molecular\ngraphics for that next Nature paper while you can't continue on the work\nthe next day. That's why YASARA can save the entire Scene including\norientations, colors, views, everything. To save the current scene, go\nto:\n\n```\n    File > Save as > YASARA Scene\n```\n\nChoose a filename such as MyInsulin.sce\n\nTo load the work again in YASARA go to:\n\n```\n    File > Load > YASARA Scene\n```\n    \nCareful: loading a Scene will erase everything else!\n\n## Creating high quality images\n\nTo save the current view to a high quality publication ready image file,\ngo to:\n\n```\nFile > Save as > Ray-traced hires screenshot\n```\n\nThis requires that the PovRay program has been installed. See the first\nitem on this page.\n\nUsually, you prefer to have a transparent background, so check the\nrespective box.\n\n## Distances\n\n**Distances** between atoms are calculated as follows:\n\n  - select the first atom\n  - keep CTRL pressed and select the second atom.\n  - left of the screen indicates the 'Marked Distance' in Angstrom.\n\n<!-- end list -->\n\n### {% icon question %} Questions\n\nWhat is the distance between the C-alpha (CA) atoms of Tyr19 and Leu16?\n\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> To solve the question you need to select a view that shows you atoms\nincluding C-alphas. Possible views or scene styles that show these atoms\ncan be F1 (ball), F2 (stick), F3 (ball\\&stick) and F4 (C-alpha). The\nviews F5-F8 won't show you any CA's explicitly. Try it.\n> So you've probably noticed that pressing the CTRL button allows you to select multiple atoms. This is important for the next exercise.\n> The distance is 5.8 Ångstrom.\n> </details>\n{: .question}\n\n## Hydrogen bonds\n\nTo show hydrogen bonds, YASARA needs the actual hydrogens to be present.\nIn NMR structures these are normally there. But in X-Ray structures\nhydrogens are missing. Luckily YASARA can add the hydrogens for you.\n\nSelect tube view (F5) and toggle on the sidechains with F8.\n\nAdd hydrogens with:\n\n```\nEdit > Add > Hydrogens to all\n```\n\nThen show the hydrogen-bonds:\n\n```\nView > Show interactions > Hydrogen bonds of> All > OK\n```\n\nIf the view is to chaotic for you, toggle off the sidechains with F8\n(press untill the sidechains are hidden).\n\n### {% icon question %} Questions\n\nDo you see the typical helix and beta sheet pattern?\n\nArg22 from Molecule/Chain B is making an hydrogen bonded electrostatic interaction (salt bridge) with another residue. Which residue?\n\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> The interaction partner is Glu17 from chain A.\n> </details>\n{: .question}\n\n\n\nTo remove the hydrogen bonds, you have multiple choices:\n\n```\nView > Hide hydrogen bonds of > All\n```\n\nor just delete all hydrogens (this will also delete all hydrogen bonds):\n\n```\nEdit > Delete > Hydrogens\n```\n\n## Surfaces\n\nIt can be very useful and informative to show the molecular surface of a\nprotein. you can visualize cavities, ligand binding sites, etc ... To\nshow the molecular surface of one monomer of dimeric insulin, go to:\n\n```\nView > Show surface of > Molecule\n```\n\nSelect in the *Name* column A and B (these are the two chains in 1\nsubunit). Press *Continue with surface color* and make sure Alpha is\n100. Any number lower than 100 will create transparency in the surface\n(could be nice as well).\n\n## Molecular graphics exercise\n\nTry to reproduce the following image of the 1TRZ insulin structure\n(hints below):\n\n<figure id=\"figure-2\"><img src=\"../../images/Insulin.png\" alt=\"insulin.png\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Insuline</figcaption></figure>\n\nHints:\n\n  - choose the proper secondary structure scene style (F6 was used here)\n  - find the correct orientation first\n  - color all backbone atoms in gray\n  - find the residue numbers of the 2 colored helices\n  - color those residues magenta\n  - show the sidechain atoms and the CA of the two histidines and the\n    glutamate\n  - color the sidechain atoms of all residues in the Element color\n  - label the histidines and the glutamate\n  - if you need some help how to change the parameters for the label,\n    please have a look at Help -\\> Show user manual and search in\n    Commands / Index\n\n## More coloring\n\nDownload GroEL via PDB code 1WE3 in YASARA.\n\nTry to reproduce (approximately) the following image (hints below):\n\n<figure id=\"figure-3\"><img src=\"../../images/Groel.png\" alt=\"groel.png\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> GroEL</figcaption></figure>\n\nHints:\n\n  - load the PDB as File \\> Load \\> PDB file from internet\n  - zoom out and find the correct orientation\n  - delete the ADP, DMS and Mg molecules (are treated as residues in\n    YASARA). So Edit \\> Delete \\> Residue \\> Adp ...\n  - color by molecule (every molecule will get another color) and color\n    by gradient (now you need to specify 2 colors, the begin and end\n    color).\n  - choose a first color (eg. color with code 0)\n  - choose a second color (eg. color with code 300, so you go over the\n    entire color wheel spectrum)\n\nMore exercises can be found on the [basic bioinformatics exercises\npage](http://wiki.bits.vib.be/index.php/Exercises_on_Protein_Structure).\n\n# Conclusion\n{:.no_toc}\n\nNow, you have explored the YASARA interface and acquainted with basic visualisations. You have identified how you can visualise secondary structure elements, surfaces, and hydrogen bonds. And most importantly, you can create publication-ready figures using Yasara.\n","## Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\nThe goal of homology modeling is to predict the 3D structure of a protein that comes close to what would be achieved experimentally with X-Ray experiments.\n\nMain principles of homology modeling\n\n- We predict the structure of a protein sequence on the basis of the structure of another protein with a similar sequence (the template)\n- If the sequences are similar, the structures will have a similar fold\n- Structure is more conserved than sequence\n\n## Main ingredients for homology modelling \n\n### The sequence\n\nLast week my colleague sequenced a plant protein. He is not a bioinformatician. Yet, he would like to know what the structure might look like to do some rounds of rational mutagenesis. Let's try to address the problem for him.\n \nHe came up with this sequence:\n\n```\nSVCCPSLVARTNYNVCRLPGTEAALCATFTGCIIIPGATCGGDYAN\n```\n\n### Searching for the template structure\n\nActually, the first step is to check whether the PDB already contains the structure of this sequence. That would be easy so we don't have to model anything. We will use Blast again to search with the sequence.\n\n> ### {% icon hands_on %} Hands-on: BLAST search for PDB structure\n>\n> 1. Go to the Advanced Search section\n> 2. Please select 'Sequence BLAST/PSI-BLAST' in the Query type drop down.\n>    This method allows you to change some parameters for the search.\n> 3. Copy and paste the sequence in the ''Sequence'' field\n> 4. Press ''Submit query''.\n> 5. You should see the same structures popping up as you saw in the UniProt page of DnaK.\n{: .hands_on}\n\nA suitable template structure to make a high quality model should have following properties:\n\n- The highest possible sequence identity from all structures in the PDB when aligned to the target sequence\n- A good resolution (and R-factor): if many identical template structures exist with the same sequence, filter by resolution\n- Is solved by X-RAY, not NMR.\n\n> ### {% icon question %} Questions\n>\n> 1. Is there a difference in the number of identities, positives and gaps between the two remaining x-ray structures? \n> 2. What is the PDB ID with the highest resolution, does not have insertions or deletions and should thus be the better template structure? \n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n>\n> 1. **TODO** \n> 2. **TODO** \n>\n> </details>\n>\n>\n{: .question}\n\n\n## Aligning target and template sequence and template selection\n\nThe alignment is the most crucial part of homology modeling. We will not explain what an alignment is and how you make it, this should be known. In an alignment, we put homologous sequences on top of each other in a text file. The point is that amino acids that are on top of each other in the same column are assumed to have the equivalent position in the 3D structure. So if the template sequence has an Ala at position 3, where the target sequence has a Phe, homology modelling tools will use the backbone of the template structure and replace the sidechain at position 3 from Ala to Phe.\n\nHomology modelling evolved over the years and many online tools for homology modelling are available. In my experience, homology modelling can be rather difficult and needs expert knowledge depending on the actual situation (sequence conservation, available templates, etc.).\n\nCan you imagine what could be the reasons?\n\n# Building the homology model with Swiss Model \n\nOur current request for homology modelling is a rather safe one, so we can use an automatic server for homology modelling. There are many automatic tools available and many of them compete in regular competitions like lastly, the 12th Community Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP12) - [1].\n\nIn our example, we take the [Swiss Model server](https://swissmodel.expasy.org/interactive). SWISS-MODEL is a fully automated protein structure homology-modelling server, accessible via the ExPASy web server, or from the program DeepView (Swiss Pdb-Viewer). The purpose of this server is to make Protein Modelling accessible to all biochemists and molecular biologists worldwide.\n\n> ### {% icon hands_on %} Hands-on: Template selection step with Swiss Model \n>\n> 1. Browse to the [Swiss Model server](https://swissmodel.expasy.org/interactive) \n> 2. On the first page, paste the sequence of our unknown protein in the field 'Target Sequence' and give the project a name. \n>    <figure id=\"figure-1\"><img src=\"../../images/Modelling_sequence_template_step1.png\" alt=\"Swiss Model Start page -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Start page of Swiss Model</figcaption></figure>\n> 3. Click 'Search templates' to initiate the first step. \n>    Thereafter, the server identifies structural template(s) and gives an overview list of hits \n>    which you can select the templates from.\n>\n{: .hands_on}\n\n> ### {% icon question %} Question\n>\n> Which of the 10 (at the time of writing) possible template structures would you select as template for the model building process? \n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> We suggest as template **1jxx.1.A** given that it is an X-ray structure with high resolution and a very high \n> sequence identity (X-ray, 0.9 Å, 78.26 %).\n> </details>\n{: .question}\n\n\n> ### {% icon hands_on %} Hands-on: Model Building Step and Visualisation \n>\n> 1. Once you have selected the template, hit 'Build Model' to start the homology modelling procedure. \n>    The server will alignment of target sequence and template structure(s), build a model and evaluate it. \n>    These steps require specialized software and integrate up-to-date protein sequence and structure databases. \n>    Each of the above steps can be repeated interactively until a satisfying modelling result is achieved. \n>    ![download model -80width](../../images/Modelling_template_selection_step2.png)\n> 2. Once the model has been built, you can download it.\n>    ![download model -80width](../../images/Modelling_results_step3.png)\n> 3. If the Swiss Model server is too busy at the moment you execute the request, you can download the model from\n>    [here](https://zenodo.org/record/3551850#.Xdqs4ehKiUk).\n> 4. Load the created model into YASARA. \n>    Perform a structural alignment with your reference e.g. 1CRN and try to detect the differences through manipulating the visualisations.\n>    ![structural alignment](../../images/1214.png)\n{: .hands_on}\n\n\n# Conclusion\n{:.no_toc}\n\nHomology modelling evolved over the years and many online tools for homology modelling are available. You have used the Swiss Model service with a reasonable simple modelling request. Often, in research projects, homology modelling can be rather difficult and needs expert knowledge depending on the actual situation (sequence conservation, available templates, etc.).\n","## Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\nMutations in proteins can have various origins. Natural occurring mutations are random and can have any kind of effect on the protein structure and/or function. Mutations can have no effect at all, be stabilizing of destabilizing. In the last two cases, these can lead to diseases.\n\nBut we can also make mutations in the wet lab to study the effect of a single residue position on protein stability, interaction with a peptide ligand etc ... Such site-directed mutagenesis in the wet lab is hard labour and costs money, I don't have to explain that to you. So wouldn't it be easier, cheaper and more rational if you could predict the effect of some mutations first with bioinformatics and then test the really interesting ones in the lab?\n\nFoldX is a molecular modeling tool that can quantitatively predict the change in free energy (kcal/mol) upon mutation. These values approach experimental determined values. FoldX is a non-interactive command line program. In other words, not user friendly. But the bright news is that I recently developed a YASARA plugin for FoldX, so that all predictions are just a few clicks away. And the nice thing is, it's all free!\n\n## P53 as example protein \n\nIn this section we will let the FoldX plugin loose on some real world examples and give you step-by-step instructions on how to proceed and analyze the results. We will use the P53 tumor suppressor protein as our example molecule. In a first exercise you will make a point mutation with FoldX and determine if the mutation is stabilizing or destabilizing for the P53 structure. In a second exercise you will design a mutation in the P53 structure at the DNA binding interface and determine how the mutation affects the interaction energy of P53 with the DNA strand.\n\n## Get data\n\n> ### {% icon hands_on %} Hands-on: Data download\n>\n> Download the file [2AC0.sce](https://zenodo.org/record/3551686/files/2AC0.sce?download=1).\n>\n{: .hands_on}\n\n## What do FoldX energies mean?\n\n\nBefore we start, some basic information about FoldX energies is necessary.\n\nFirst of all, FoldX energies are expressed in kcal/mol.\n\nThe main focus of FoldX is the prediction of free energy changes, e.g. what happens to the free energy of the protein when we mutate an Asp to a Tyr? FoldX will then calculate the free energy of the wild type (WT) and the mutant (MT) and make the difference:\n\n```\nddG(change) = dG(MT) - dG(WT)\n```\n\nFoldX is trained using experimental values to predict ddG(change). It is important to realize that dG(WT) and dG(MT) are meaningless numbers as such. These do not correlate with experimental values. Only ddG(change) does.\n\nAs a rule of thumb we use:\n\n\n**ddG(change) > 0 : the mutation is destabilizing**\n\n**ddG(change) < 0 : the mutation is stabilizing**\n\n\nThe error margin of FoldX is approximately 0.5 kcal/mol, so changes in that range are insignificant. \n\n## How to minimize the structure with FoldX\n\nFoldX assumes that the starting structure has been energy minimized. Although crystal structures with high resolution represent the form with a low energy, FoldX performs best when we minimize it just before we do the predictions. This FoldX procedure is called RepairPDB and should be done on each structure you want to perform calculations on.\n\n> ### {% icon hands_on %} Energetically minimise the structure of P53 bound to DNA\n> \n> Open the YASARA scene 2AC0.sce in YASARA. This is a part of a tetrameric complex of the transcription factor P53 bound to DNA. I removed 3 of the 4 P53 structures for simplicity and visualized some nice features.\n> \n> Load the scene with:\n> \n> ```\n> File > Load > YASARA Scene\n> ```\n> <figure id=\"figure-1\"><img src=\"../../images/Training_1.png\" alt=\"monomer bound to DNA -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> P53 monomer bound to DNA</figcaption></figure>\n> \n> To Repair (or minimize) the structure with FoldX go to:\n> ```\n> Analyse > FoldX > Repair object \n> ```\n> \n> <figure id=\"figure-2\"><img src=\"../../images/Training_2.png\" alt=\"Select the object for repairing -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Select the object for repairing</figcaption></figure>\n> \n> And select the only object in the list.\n{: .hands_on}\n\nWhen the Repair is finished, the Repaired Object is placed in Object 2 (see top right corner) and superposed with the original Object 1. Take a look at the sidechains and see what FoldX has done while Repairing.\n\nIf you feel the repair takes too long (more than 10 minutes) due to a slow computer, download and open this YASARA Scene with the [Repaired Object](https://zenodo.org/record/3551686/files/2AC0_Repaired.sce?download=1).\n\nBecause we will continue working with this Repaired Object, we can now hide the entire Object 1 by toggling the Visibility column in the top right corner head-up display (HUD).\n\n# How to analyze a mutation \n\nFoldX has mutated the Ala to Trp and the structure with the Trp mutation has been loaded in the next Object (3) and is superposed with the wild type (WT, Object 2). We selected an option to show the VdW clashes in WT and mutant. The atoms that give rise to steric clashes are colored in red. Toggle the Visibility of Object 2 (WT) and Object 3 (mutant) and see how many clashes we introduced by mutating the Ala to Trp.\n\n\n<figure id=\"figure-3\"><img src=\"../../images/Training_7.png\" alt=\"Zoomed-in-view on the original Ala159, no Vander Waals clashes here -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Zoomed-in-view on the original Ala159, no Vander Waals clashes here</figcaption></figure>\n\n<figure id=\"figure-4\"><img src=\"../../images/Training_8.png\" alt=\"Zoomed-in-view on the mutated Ala159Trp, lots of red Vander Waals clashes here -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Zoomed-in-view on the mutated Ala159Trp, lots of red Vander Waals clashes here</figcaption></figure>\n\n\n> ### {% icon question %} Questions\n>\n> Van der Waals clashes are red colored atoms. \n> Do you see a difference around the mutation site between WT and mutant? \n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> Toggle the Visibility of WT and mutant to see the differences. \n> Open the Console by pressing the spacebar twice and see the free energy change of the mutation. \n> Anything above a change of +0.5kcal/mol is already assumed to be destabilizing.\n> In the console - to open press spacebar twice - we see an energy change of +29 kcal/mol.\n> <figure id=\"figure-5\"><img src=\"../../images/Training_9.png\" alt=\"In the console - to open press spacebar twice - we see an energy change of +29 kcal/mol. -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> Open the console to explore the situation.</figcaption></figure>\n> This is clearly a destabilizing mutation.\n> </details>\n{: .question}\n\n\n# Study the effect of a second mutation \n\nHide Object 3 by toggling its Visibility so that only Object 2 (the repaired WT) is visible.\nFirst turn on all atoms in the molecules G and H (DNA) again as you did previously, because the FoldX run has hidden it (it rearranged the view to show the VdW clashes).\n\nShow the sidechain of Arg273 of Object 2 by searching for it in the sequence selector, then right-click on it and go to:\n\n\n```\nShow atoms > Sidechain and CA and zoom in on Arg273\n```\n\nNotice how the positively charged Arginine is making an electrostatic interaction with the negative phosphate from the DNA backbone.\n\n<figure id=\"figure-6\"><img src=\"../../images/Training_10.png\" alt=\"R273 makes an electrostatic interaction with the DNA phosphate groups. -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 6:</span> R273 makes an electrostatic interaction with the DNA phosphate groups.</figcaption></figure>\n\nLet's see what would happen to the interaction energy between the DNA and P53 when we mutate this Arginine to Alanine.\n\nRight-click on this Arg273 in the sequence selector and go to:\n\n```\nFoldX > Mutate residue\n```\n\nA number of menus is now presented and here is what you need to do in each menu:\n\n1. Select Calculate interaction energy change\n2. Select Ala\n3. 'Move neighbours' and 'Show disrupted and new hydrogen bonds'\n4. Don't change any numerical options in the last menu\n\n<figure id=\"figure-7\"><img src=\"../../images/Training_11.png\" alt=\"View of the first options menu with 'Show new and disrupted hydrogen bondsxi' selected. -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 7:</span> View of the first options menu with 'Show new and disrupted hydrogen bonds' selected.</figcaption></figure>\n\n> ### {% icon question %} Questions\n> \n> 1. What is the change in interaction energy is between P53 and DNA chain G upon mutation?\n>    And what is the reason?\n> 2. Why doesn't the mutation affect the interaction with DNA chain H?\n>\n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n>\n> 1. Toggle the Visibility between this mutant and the WT structure and see how the hydrogen bonding changes and check the output in the Console. \n>     <figure id=\"figure-8\"><img src=\"../../images/Training_12.png\" alt=\"Mutation\"><figcaption><span class=\"figcaption-prefix\">Figure 8:</span> Change in interaction energy</figcaption></figure>\n>     We see that the mutation decreases the interaction with DNA strand G by approximately 1 kcal/mol\n>     since we lost 1 hydrogen bond.\n> \n> 2. ***TODO***  \n> \n> </details>\n>\n>\n{: .question}\n\n# Conclusion\n{:.no_toc}\n\nInstead of DNA-protein, FoldX can of course also calculate interaction energy changes in protein-protein or peptide-protein complexes.\n\n","## Structural comparison and RMSD \nWe compare structures by structurally aligning them on top of each other. That is, we\nalign structurally equivalent atoms. For now, we will only use CA atoms as a representation of the backbones. \nBut Yasara also can align on any type of atom you want. You always need to specify:\n\n-  source object(s): the structure(s) that needs to be rotated and translated to superpose on anoth\ner structure\n-  target object: the structure to superpose on\n\nAn optimal alignment is found when the root-mean-square deviation (RMSD) is at a minimum. \nThe RMSD is given as:\n<figure id=\"figure-1\"><img src=\"../../images/RMSD.gif\" alt=\"RMSD\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> calculation of RMSD</figcaption></figure>\nwhere R is the distance between two structurally equivalent atom pairs (CA in our case) and n is the total number of atom pairs.\n\n> ### {% icon hands_on %} Hands-on: Data download\n>\n> 1. Download the following adapted PDB files from [Zenodo](https://zenodo.org/record/3550492#.XdeNL1dKiUk) \n>\n>    ```\n>     1DKX_1.pdb 1DKY_1.pdb 1DKZ_1.pdb 3DPO_1.pdb 3DPP_1.pdb \n>    ```\n>\n{: .hands_on}\n\n## Aligning multiple structures using YASARA\n\nNow load all of them in YASARA:\n\n```\nFile > Load > PDB File\n```\n\nand select the CA (C-alpha) view (F4) and superpose with the MUSTANG algorithm:\n\n\n```\nAnalyze > Align > Objects with MUSTANG\n```\n\n\nIn the first window you have to select the source objects that will be repositioned. Select Objects 2 till 5. In the second window you select the target Object to superpose on. That would then be the first object.\n\nNotice that YASARA prints the RMSD of every structural alignment in the lower Console. Open the Console by pressing the spacebar once or twice to extend it.\n\nColor the atoms by their B-factor:\n\n```\nView > Color > Atom > Belongs to or has > All\nThen choose BFactor in the next window and press 'Apply unique color'.\n```\n\nHigh BFactors are yellow, low BFactors are blue.\n\n### {% icon question %} Questions\n\nDo you see a correlation between the BFactors and the variability in the structure?\n\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> ![Structural alignemnt](../../images/aligned-structures.png) \n>\n> \n> </details>\n\n\n# Conclusion\n{:.no_toc}\n\nStructural alignment of related structures is a very efficient approach to spot similarities and differences of structutally related proteins.\n","# 1. Introduction\nWhat if we have files that we do not want Git to track for us, like backup files or intermediate files created during data analysis? Remember that GitHub is not your next cloud storage infrastructure. Hence, (big) data should not be uploaded on GitHub. In fact, there's a strict file size limit of 100MB so you won't even be able to do so. \n\nRegardless of the above, it is often useful if your data is in the same projectfolder. And you can't help the fact that Jupyter Notebooks makes intermediate checkpoints (.ipynb_checkpoints) in the same folder of the notebook. \n\nGit has a file, the `.gitignore` file in which we can write expressions that define the files it should ignore. This chapter will briefly discuss the `.gitignore` file with a few simple examples. \n\n# 2. Expressions\nImagine the following project folder structure:\n\n```\n project-folder/\n    |\n    |- .git/\n    |- .ipynb_checkpoints/\n    |- .Rhistory/\n    |\n    |- data/\n    |   |- R1.fastq\n    |   |- dataset.csv\n    |\n    ...\n```\n\nLet's discuss how to ignore a specific file and how we can use symbols to generalize the ignoring behaviour.     \n\n- **Ignore a file**:\n\nThe easiest would be to define the file or the path to the file. E.g. the fastq file can be ignored by adding `data/R1.fastq` to the `.gitignore` file. \n\nSimilar to a file, a folder can also be ignored. The folders `data/` and `.ipynb_checkpoints/` can be ignored by adding the following lines:\n```\ndata/\n.ipynb_checkpoints/\n``` \n\n- **`*`, `!` and `#`**:\n\nThe asterisk is often used in `.gitignore` files and represents a wildcard. E.g. `*.csv` will ignore any csv file in your folder and subfolders. The asterisk can precede a file format in which case it will ignore all the files with that format (e.g. ignore all csv, fastq, sam, bam, xlsx, pdf, etc. files). \n\nAn exclamation mark is used for exceptions. The following lines of code will ignore all files in the data folder, except for the `dataset.csv` file:\n```\ndata/\n!data/dataset.csv\n```\n\nDocumentation lines are preceded by a `#`. \n\n# 3. Standard files\n\nIt's always good to think this through and manually add the files or folders that need to be ignored. However, it's also useful to know that there are standardized `.gitignore` files. These files have been created based on a specific programming environment. They are all accessible in [this repository](https://github.com/github/gitignore) and contain `.gitignore` files for Python, R, Ruby, Java, Perl, C++, amongst many others. These files can also be added on the fly to a new repository by initializing the repository with one of these files (see figure below). \n\n--- \n\n<center><img src=\"../../images/gitignore.PNG\" /></center>\n\n---\n\nLet's continue with the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/8_github_rstudio/tutorial.html)!\n","# Basic statistics with GraphPad Prism \n{:.no_toc}\n\nThis introductory video has been created during a livestream session in March 2020. We cover basic statistics, advanced statistics, graphs, curve fitting and survival analysis.\n\n<iframe src=\"https://www.youtube.com/embed/7KqYZ9P9eIk\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n","# 1. Getting started\nAs mentioned in the first chapter, we will first introduce Git in a terminal session. Linux and Mac users can open a terminal directly, Windows users have to open the 'Git Bash' program which will act like a normal Linux terminal. \n\nIf we want to use git from the command line, we always start with typing  `git` followed by a verb defining a more specific command. These commands can be anything like staging, committing, pushing, etc. If we want to have an overview of the most common Git commands we can enter `git --help`.\n\nBefore we can get started, we have to personalize a couple of configurations (e.g. we need to tell git who we are). Git comes with a configuration file that allows us to control all aspects of how Git looks and operates. There are three different levels on which we can do configurations, for example we could set configurations on a specific project (in one folder) or we could set them on a more global level where the configurations are applicable for all our projects. We will only edit the global configurations file here which is fine for pretty much all cases. \n\nWe can have a look at our global config file with the following command:\n```\ngit config --global --list\n```\n\nHowever, if this is the first time it will result in an failure, telling us that this file does not exist. If we just run the following commands, Git will create the configuration file automatically and add resp. our GitHub username and the email address of our account.\n\n```\ngit config --global user.name \"yourgithubusername\"\ngit config --global user.email \"your_email@domain.com\"\n```\n\nWith these settings we can already get started, however passing on information from and to GitHub in this manner is not really secure. Using the SSH protocol, we can connect and authenticate to remote servers and services in a secure way. With SSH keys, we can connect to GitHub without supplying our username or password at each visit. If you want to create one, follow the brief instructions below or find them at [GitHub](https://help.github.com/en/github/authenticating-to-github/checking-for-existing-ssh-keys). SSH keys exist of a private key and a public key. The private key must stay secured on your computer at all times, the public key can be shared with third-party softwares to connect to them. \n\n1. List all the files (using Git Bash) to see if existing SSH keys are present. \n\n```\nls -al ~/.ssh\n```\nIf there is a public SSH key present (file ending in `.pub`) we're all set and can continue to step 3, otherwise we will now generate a public key.\n\n2. The following will create a new ssh key, using the provided email as a label.\n```\nssh-keygen -t ed25519 -C \"your_email@domain.com\"\n```\nWhen you're prompted to \"Enter a file in which to save the key,\" press Enter. This accepts the default file location.\nThen it will ask us to type a secure passphrase, press Enter to skip this step. As long as no-one other than you has access to the key, you do not require a passphrase. \n\nNow we have our SSH keys, we can let them be managed by the `ssh-agent`. Ensure the ssh-agent is running with:\n```\neval `ssh-agent -s`\n```\n\nAdd your SSH private key to the ssh-agent.\n```\nssh-add ~/.ssh/id_ed25519\n```\n\n3. Lastly, we need to add the public key to GitHub. Copy paste the content of the public key file manually or with: \n```\nclip < ~/.ssh/id_ed25519.pub\n```\nThen, go to GitHub, click on your profile picture and go to settings. In the user settings sidebar, click **SSH and GPG keys** and select **New SSH key**. Choose a descriptive title (e.g. personal computer) and paste your key into the \"Key\" field and save your changes by clicking on **Add SSH key** and confirming with your password. \n\n\nAnother thing we can edit in the configuration file is the editor. An editor is the program we want Git to use when we have to add a message or solve conflicts. During this course we will ommit the usage of editors as much as possible, though it does not mean that something might go wrong and we need to interact with our editor. \n```\ngit config --global core.editor <editor>\n```\nin which we replace `<editor>` with `vim`, `notepad`, `emacs`, `atom`, `sublime` or any other editor you prefer.  \n\nThe next chapter is considered further reading material and will be discussed later in the course, however since it is related to the configurations file, we have mentioned it here. \n\n# 2. Aliases\n\nThe configuration file is also a place where we can make our own aliases. An alias is a new command tailored to your wishes. It often consists of an existing Git command (e.g. `git log`) followed by a bunch of variables. This omits that we have to type a long command the whole time. Here are some useful aliases for a structured history overview:\n\n```\ngit config --global alias.hist \"log --pretty=format:'%h %ad | %s%d [%an]' --graph --date=short\"\ngit config --global alias.oneline \"log --graph --decorate --pretty=oneline --abbrev-commit --all\"\ngit config --global alias.mylog1 \"log --pretty=format:'%h %s [%an]' --graph\"\ngit config --global alias.mylog2 \"log --pretty=format:'%Cgreen%h%Creset %ai | %s %Cblue[%an] %Cred%d' --date=short -n 10 --color\"\ngit config --global alias.mylog3 \"log --decorate --pretty='format:%C(auto) %h %d %s %Cgreen(%cr by %cn)%Creset' --graph --all\"\n```\nOnce they are set, you can use them whenever you like. E.g.: running `git hist` gives us the same result as `git log --pretty=format:'%h %ad | %s%d [%an]' --graph --date=short`. \n\n\nIf at some point we are not happy any more about an alias, we can delete it with the following command:\n```\ngit config --global --unset alias.<command>\n```\n\n---\n\nLet's go to the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/3_first_commit/tutorial.html)!","qbase+ is software to visualize and analyze qPCR data. It allows you to perform various types of analyses:\n  - statistical analysis of gene expression\n  - advanced copy number analysis\n  - miRNA profiling\n  - ChIP-qPCR analysis\n# Installation and licensing\nYou can find the installation instructions on [VIB qbase+ support page](https://www.bits.vib.be/index.php/software-overview/qbaseplus)\nVIB only offers qbase+ to VIB scientists, you need a valid VIB email address to run the software. Biogazelle (the company who has developed the software) have written a manual with instructions on how to use the software. Download [Biogazelle's user manual](https://www.biogazelle.com/system/files/manuals/qbaseplus_manual_0.pdf). Before you can download the manual you have to log on to [the qbase+ website](https://www.qbaseplus.com/) using your qbase+ account. Use your VIB email address for setting up this account.\n# Training material\n  - [slides](http://data.bits.vib.be/pub/trainingen/qbasePLUS/qbase_2018.pdf)\n  \n  **Extra**\n  - [clean log10 transformed CNRQs](http://data.bits.vib.be/pub/trainingen/qbasePLUS/Log10CNRQsClean.xlsx) for checking normality in Prism\n  - [clean untransformed CNRQs](http://data.bits.vib.be/pub/trainingen/qbasePLUS/CNRQsClean.xlsx) for visualization in Prism\n  - [R script](http://data.bits.vib.be/pub/trainingen/qbasePLUS/qPCR.R) for analysis and visualization\n  - [log10 transformed CNRQs of control samples](http://data.bits.vib.be/pub/trainingen/qbasePLUS/resultslog.csv) for analysis and visualization in R\n  - [log10 transformed CNRQs of treated samples](http://data.bits.vib.be/pub/trainingen/qbasePLUS/resultslogTreated.csv) for analysis and visualization in R\n","Since normalization of qPCR data is based on the assumption that the reference targets have the same expression level in all samples it is crucial that the expression of the chosen reference genes is stable.\nHowever, none of the so-called **housekeeping** genes is universally stably expressed.\n\n[Genevestigator](https://www.genevestigator.com/gv/), both the commercial and the free version, contains a tool, called RefGenes, that allows to identify candidate reference genes that display very stable expression in the context that you are working in, typically a certain tissue of a certain organism.\n\nGenevestigator is a platform that contains curated public microarray data from thousands of experiments/conditions.\n\nRefGenes allows you to select the conditions that are relevant for you, e.g. mouse liver, human fibroblasts, or Arabidopsis thaliana leaves. In a next step, RefGenes identifies the genes with the most stable expression in the selected conditions.\n\n## Starting the RefGenes tool\n\n| How to start the RefGenes tool ? |\n| :-------------------------------- |\n| - Open the [RefGenes page](http://www.refgenes.org/rg/).\n - Click **start GENEVESTIGATOR**\n - Click the **Install/Start** button\n - This will automatically open a Genevestigator startup page. Keep this page open during the analysis. Closing this page will close Genevestigator.\n - Login. Also for the free version you need to create an account (use your academic email for this since you will need your vib email to get access to the commercial version).\n - Genevestigator is opened automatically\n\n## The Genevestigator user interface\n\nThe Genevestigator consists of the following components:\n  - **Sample Selection** panel: to choose the experimental conditions you're interested in (green)\n  - **Gene Selection** panel: to choose the genes you're interested in (blue)\n  - Center panel shows an overview of all available tools (purple). Once you have selected a tool, the panel will show the results of the analysis that is done by the tool.\n  - **Home** button (red) allows to return to the overview of the tools at any time. The text next to the home button indicates the toolset that you have selected.\n\nClick the **RefGenes** tool at the bottom.\n\n## Using the RefGenes tool to find reference genes\n\n### STEP 1: Choose samples from a biological context similar to those in your qPCR expriment\n\n| How to choose the samples you want to analyze ? |\n| :-------------------------------- |\n|\n - Click the **New** button in the **Sample Selection** panel. The selection of samples defines which data are used for the analysis.\n - Select the organism you're interested in (in this example: human)\n - Select the array type you want to analyze (in this example: human 133_2).\nFor most organisms Genevestigator contains expression data from multiple types of microarrays, e.g. different generations of Affymetrix GeneChips®. On these arrays, genes are sometimes represented by different sets of probes. To keep the analysis results easily interpretable, data from different array types are not mixed.\n - Click the **Select particular conditions** button to select all samples with a certain annotation, e.g. all data from a certain tissue type.\n - Select the type of conditions (red) you want to base your selection on (in this example: Anatomy). For each type (anatomy, neoplasms, perturbations, development...) you can browse the corresponding ontologies and select the desired condition(s) (green) (in this example: cardiac muscle).\n - Click **OK**\n\nNote that you can select multiple tissues.\nWhen you select samples for use in the RefGenes tool, you have to focus on microarrays from samples that were collected in conditions similar to those in your qPCR experiment. Don't make a too general selection, e.g. all human samples: you might end up with genes that are stable in most conditions but not in yours. Don't make a very specific selection either, e.g. human heart samples from patients taking the same medication as yours. If you want to broaden your study later on with samples from other patients, your reference genes might not be valid anymore. It is recommended to select reference genes in the same organism and the same / a similar tissue type as the one that you used in your experiments.\n\n### STEP 2: Select the gene(s) you want to measure in your qPCR experiment\n\nThis step is not essential, but it helps you to see whether your target gene(s) is (are) strongly or weakly expressed in the conditions of interest selected in STEP1. This allows you to search for candidate reference genes in a similar range of expression.\n\n| How to choose the genes you want to analyze ? |\n| :-------------------------------- |\n|\n - Click the **New** button in the **Gene Selection** panel.\n - Enter the name of your target gene in the text area (in this example: GOT1) and click **OK**\n - Open the RefGenes tool (if you haven't done that already). A red box plot representing the distribution of the expression levels of GOT1 in the 68 selected human heart samples appears in the center panel. As you can see, this gene is highly expressed in heart.\n\n\n\n\n### STEP 3: Find candidate reference genes\n\nThe reference genes that are suggested by GeneVestigator have the\nfollowing characteristics:\n\n  - They have the most stable expression levels across all selected samples (a small boxplot)\n  - Their overall expression level is similar to that of the target gene(s) of your qPCR experiment\n| How to find the candidate reference genes ? |\n| :-------------------------------- |\n|Click the **Run** button in the RefGenes tool. RefGenes will show the top 20 most stable genes with similar expression levels:\n\n\n\n## Exercises\n\n### Finding candidate reference genes in the free version of Genevestigator\n\nNow we will make a more elaborate exercise on finding candidate reference genes. We will do the analysis in the free version of RefGenes but the analysis in the commercial version is very similar.\nSuppose we want to compare the expression stability of the 4 commonly used reference genes for qPCR on mouse liver samples (ACTB, GAPDH, HPRT and TUBB4B) to that of 4 reference genes that are suggested by Genevestigator.\nTo this end we open the RefGenes tool and select the liver samples of the mouse 430_2 arrays.\n\n| Check the expression stability of the 4 commonly used reference genes ? |\n| :-------------------------------- |\n|\n - Click the **New** button in the **Gene Selection** panel to create a new selection. The selection of samples defines which data are used for the analysis.\n - Enter the name of your target gene in the text area (for example: ACTB) and click **OK**\n\nWhen you are using the commercial version, you may enter multiple genes at the same time, in the free version you have to enter them one by one. This means that you have to add the first gene as described above and then add the next gene by clicking the **Add** button and so on...\n\nFinally you end up with an expandable list of the genes you asked for and you can tick or untick them to control the display of their expression data in the main window. When you tick the 4 commonly used reference genes you can see how stable they are expressed in the 651 mouse liver samples that are stored in Genevestigator:\n\nAs you can see, the expression levels of the commonly used reference genes in the selected mouse liver samples is pretty variable which is also confirmed by their relatively high SD values.\nOften there are multiple probe sets for the same gene. When you use the free version you may only choose one probe set per gene so you have to make a choice. How to make that choice ?\nAffymetrix probe set IDs have a certain meaning: what comes after the underscore tells you something about the quality of the probes:\n\n  - **_at** means that all the probes of the probe set hit one known transcript. This is what you want: probes specifically targeting one transcript of one gene\n  - **_a_at** means that all the probes in the probe set hit alternate transcripts from the same gene. This is still ok the probes bind to multiple transcripts but at least the transcripts come from the same gene (splice variants)\n  - **_x_at** means that some of the probes hit transcripts from different genes. This is still not what you want: the expression level is based on a combination of signals of all the probes in a probe set so also probes that cross-hybridize\n  - **_s_at** means that all the probes in the probe set hit transcripts from different genes. This is definitely not what you want: if the probes bind to multiple genes you have no idea whose expression you have measured on the array\n\nSo I always ignore probe sets with s or x. If you have two specific probe sets for a gene, they should more or less give similar signals. If this is not the case, I base my choice upon the expression level that I expect for that gene based on previous qPCR results.\n\nAs you can see, each of these 4 commonly used reference genes has a high expression level. Most genes do not have such high expression levels. In most qPCR experiments your genes of interest will have low or medium expression levels, so these reference genes will not be representative for the genes of interest.\n\nReference genes should ideally have similar expression levels as the genes of interest. Therefore, we will select the four most stably expressed genes with a medium expression level (between 8 and 12) according to the RefGenes tool.\n\n| Select the 4 most stably expressed candidate reference gene with medium expression levels. |\n| :-------------------------------- |\n|\n - Untick all target genes.\n - Click the **Run** button at the top of the main window and check if the range is set correctly\n\nSelect the 4 candidates with the lowest SD: Then, we performed qPCR on a representative set of 16 of our liver samples to measure the expression of these 8 candidate reference genes and analyzed the data ([See how to select the best reference genes using geNorm in qbase+](http://wiki.bits.vib.be/index.php/Analyzing_data_from_a_geNorm_pilot_experiment_in_qbase%2B)).\n\n\n### Finding candidate reference genes in the commercial version of Genevestigator\n\nWe will do the same exercise as above in the commercial version of Genevestigator. The difference between the free and commercial version of RefGenes is the number of target genes you can select. In the free version you have to select one gene and then gradually add all other genes one at a time. The commercial version allows you to load as many target genes as you want simultaneously. As a consequence, you can select multiple probe sets for the same gene.\nAll VIB scientists have free access to the commercial version of Genevestigator via their VIB email address. If you don't know your VIB email address, check [the Who's Who of VIB](http://www.vib.be/en/whoiswho/Pages/default.aspx).\n\n  - Open a browser and go to the [Genevestigator website](https://www.genevestigator.com/)\n  - If it's your **first time to access Genevestigator**, create an account by clicking **join now** button. You will be redirected to a new window in which you will give some personal information including a valid VIB email address. Click **Register** and check your email to activate your new account. Go back to the [GeneVestigator website](https://www.genevestigator.com/)\n  - Choose the research field you want to investigate: **pharma/biomediacal** or **plant biology** by clicking the corresponding button\n  - Click **Start**\n  - Use your VIB email address and password to login to Genevestigator.\n  - This will automatically open a Genevestigator startup page in your browser. Keep this page open during the analysis. Closing this page will close Genevestigator.\n  - Genevestigator is opened automatically\n\nOpen the RefGenes tool by clicking its icon in the **Further tools** secion and select the liver samples of the mouse 430_2 arrays [as explained in the previous exercise](http://wiki.bits.vib.be/index.php/Using_GeneVestigator_to_select_candidate_reference_genes#STEP_1:_Choose_samples_from_a_biological_context_similar_to_those_in_your_qPCR_expriment).\n| Check the expression stability of the 4 commonly used reference genes ? |\n| :-------------------------------- |\n| - Click the **New** button in the **Gene Selection** panel to create a new selection. The selection of samples defines which data are used for the analysis.\n - Enter the names of the 4 commercial reference genes in the text area and click **OK**\n\nI still remove probe sets with an _s or _x since they do not specifically bind to one single gene:\nFinally you end up with an expandable list of the genes you asked for and you can tick or untick them to control the display of their expression data in the main window. By default all probe sets are ticked so you can see how stable the commonly used reference genes are expressed in the 651 mouse liver samples that are stored in Genevestigator:\nAs you can see, the expression levels of the commonly used reference genes in the selected mouse liver samples is pretty variable which is also confirmed by their relatively high SD values.\n\nThe next step of selecting the 4 most stable candidate reference genes with medium expression levels is exactly the same as described above for the free version of RefGenes.\n\n| Create a new gene selection with 20 found candidate reference genes and call it mouse_references. |\n| :-------------------------------- |\n|Click the **New** button at the top of the main window to create a new selection.\n\nTo change the name of the selection right click the name in the **Gene selection** panel and select **Rename**\n\n| Identify perturbations where the mouse_references genes show more than 1,5 fold differential expression using the Condition perturbations tool. |\n| :-------------------------------- |\n|Click the **Home** button at the top to go back to the tools overview page.\n\nClick the **Perturbations** tool in the **Condition Search tools** section\n\n\nMake a **New Sample selection** including all mouse 430_2 arrays.\nUntick all genes except for the first one and filter the long heatmap for at least 1.5 fold change differential expression:\n\n\nYou now get a list of mouse samples in which the gene is not stably expressed so you can check if any of these samples is related to the samples in your study. Hover your mouse over the name of a sample to see more details about the sample.\nYou can do this for each of the candidate reference genes and select the ones that best fit your needs\n\n[Exercise on selecting reference genes for metacaspases in Arabidopsis thaliana](http://wiki.bits.vib.be/index.php/GV_Exercise.1).\n\n\nIn a geNorm pilot experiment you analyze a set of candidate reference genes in a representative set of samples that you want to test in your final experiment. Based on the M-values and CVs that are calculated by qbase+, you can choose the genes that most satisfy the criteria for a good reference gene.\n\n### Exercise 1: reference genes for mouse liver\n\nWe come back on the 8 candidate reference genes that we selected for mouse liver:\n\n  - 4 commonly used reference genes: ACTB, TUBB4B, GAPDH and HPRT\n  - 4 candidate reference genes with very stable medium expression levels selected based on expression data coming from more than 600 microarrays of mouse liver samples using Genevestigator: Gm16845, MUSK, OTOP3, EDN3\n\nWe have measured their expression in a represetative set of 16 of our mouse liver samples, each in triplicate. We will now analyze the stability of these candidate reference genes in our samples.\n\n#### Creating a new Experiment\n\n| Create a new Experiment called GeNormMouse in Project1 |\n| :------------------------------------------- |\n| Open qbase+ or, if the software is already open, click the Launch Wizard button.\n\nYou can find the details on how to create a new experiment in Creating a project and an experiment\n\n#### Loading the data into qbase+\n\nThe data is stored in [the RefGenes folder](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/RefGenes.zip). It consists of 8 Excel files, one file for each candidate reference gene. If you are not working on a BITS laptop, download and unzip the folder.\n\n| Import the data. This files are in qBase format. |\n| :------------------------------------------- |\n| You can find the details on how to start the data import in Loading data into qbase+\n\nUnlike the previous exercise, qbase+ does not allow you to do a quick import this time. In the Import Run window Manual import is selected:\nMake sure that Upload file to Biogazelle support for further analysis is NOT selected and click Next\nMake sure the correct File type is selected (qBase) and click Finish.\nThis file contains the data of the geNorm pilot experiment. In the pilot experiment, 8 candidate reference genes were measured in 16 representative mouse liver samples.\n#### Analyzing the geNorm pilot data\n\n| Specify the aim of the experiment. |\n| :------------------------------------------- |\n| In this experiment we want to select the ideal reference genes for our next experiments so we choose selection of reference genes (geNorm)\n\n| Check the quality of the replicates (use default parameter settings). |\n| :------------------------------------------- |\n| You can find the details on how to check the quality of the replicates in the Checking the quality of technical replicates and controls section of Analyzing gene expression data in qbase+\n\nWe haven't included any positive or negative controls so you don't need to show their details.\n\n| Select the Amplification efficiencies strategy you want to use. |\n| :------------------------------------------- |\n| You can find the details on how to select the Amplification effciencies strategy in the Taking into account amplification efficiencies section of Analyzing gene expression data in qbase+\n\nWe haven't included dilution series nor do we have data from previous qPCR experiments regarding the amplification efficiencies so we choose to use the same efficiency for all genes.\nIt is of course better to include a dilution series for each gene to have an idea of the amplification efficiencies of each primer pair.\n\n| Convert all genes to Reference genes. |\n| :------------------------------------------- |\n| You can convert all the genes simultaneously by selecting Use all targets as candidate reference genes\n\nClick Finish.\n\n| Which genes are you going to use as reference targets in further experiments ? |\n| :------------------------------------------- |\n| Upon clicking Finish, the geNorm window containing the analysis results is automatically opened. The geNorm window consists of three tabs. The tabs are located at the bottom of the window: geNorm M, geNorm V and Interpretation.\nThe first tab, geNorm M, shows a ranking of candidate genes according to their stability, expressed in M values, from the most unstable genes at the left (highest M value) to the best reference genes at the right (lowest M value):\nThe second tab, geNorm V, shows a bar chart that helps determining the optimal number of reference genes to be used in subsequent analyses:\n\nThe number of reference genes is a trade-off between practical considerations and accuracy. It is a waste of resources to quantify more genes than necessary if all candidate reference genes are relatively stably expressed and if normalization factors do not significantly change when more genes are included. However, Biogazelle recommends the minimal use of 3 reference genes and stepwise inclusion of more reference genes until the next gene has no significant contribution to the normalization factors.\nTo determine the need of including more than 3 genes for normalization, pairwise variations Vn/n+1 are calculated between two sequential normalization factors. Simply stated: V is measure of the added value of adding a next reference gene to the analysis. A large variation means that the added gene has a significant effect and should be included.\nIn normal experiments like the Gene expression experiment (see Analyzing gene expression data in qbase+), we only have 3 reference genes so we will see only 1 bar here. But in this geNorm pilot experiment, we analyzed 8 candidate reference genes, so we see 6 bars.\nAll pairwise variations are very low, so even the inclusion of a third gene has no significant effect. Based on a preliminary experiment that was done by Biogazelle, 0.15 is taken as a cut-off value for V, below which the inclusion of an additional reference gene is not required. Normally this threshold is indicated by a green line on the geNorm V bar chart. However since all V-values fall below the threshold in this geNorm pilot experiment, you don’t see this line on the bar chart.\nSo, these results mean that for all subsequent experiments on these samples, two reference genes, EDN3 and MUSK, would be sufficient. However, as stated before, Biogazelle recommends to always include at least three reference genes in case something goes wrong with one of the reference genes (so also include Gm16845). |\nThese are artificial data. But when you read [the paper by Hruz et al., 2011](http://www.biomedcentral.com/1471-2164/12/156/abstract) you see that the genes that are selected by Genevestigator are often outperforming the commonly used reference genes.\n\n### Exercise 2: reference genes for human heart\n\n#### Creating a new Experiment\n\n| Create a new Experiment called GeNormHuman in Project1        |\n| :------------------------------------------------------------ |\n| You can find the details on how to create a new experiment in Creating a project and an experiment |\n\n#### Loading the data into qbase+\n| Import [Run6](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run6.xls) . This file is in qBase format. |\n| :------------------------------------------- |\n| You can find the details on how to start the data import in Loading data into qbase+. Unlike the previous exercise, qbase+ does not allow you to do a quick import this time. In the Import Run window Manual import is selected:\n\nMake sure that Upload file to Biogazelle support for further analysis is NOT selected and click Next. Select the correct File type (qBase) and click Finish. This file contains the data of the geNorm pilot experiment. In the pilot experiment, 10 candidate reference genes were measured in 20 representative samples.\n\n#### Analyzing the geNorm pilot data\n\n| Specify the aim of the experiment.        |\n| :---------------------------------------- |\n| In this experiment we want to select the ideal reference genes for our next experiments so we choose selection of reference genes (geNorm) |\n\n| Check the quality of the replicates and the controls (use default parameter settings). |\n| :------------------------------------------- |\n| You can find the details on how to check the quality of the replicates in the Checking the quality of technical replicates and controls section of Analyzing gene expression data in qbase+\n\nAll replicates and controls have met the quality criteria so there's no need to inspect them further. |\n| Select the Amplification efficiencies strategy you want to use. |\n| :------------------------------------------- |\n| You can find the details on how to select the Amplification effciencies strategy in the Taking into account amplification efficiencies section of Analyzing gene expression data in qbase+. We haven't included dilution series nor do we have data from previous qPCR experiments regarding the amplification efficiencies so we choose to use the same efficiency (E=2) for all genes. |\n\nIt is of course better to include a dilution series for each gene to have an idea of the amplification efficiencies of each primer pair.\n| Convert all genes to Reference genes.                                                                         |\n| :------------------------------------------------------------------------------------------------------------ |\n| You can convert all the genes simultaneously by selecting Use all targets as candidate reference genes |\n\nClick Finish.\n\n| Which genes are you going to use as reference targets in further experiments ? |\n| :------------------------------------------- |\n| Upon clicking Finish, the geNorm window containing the analysis results is automatically opened. The geNorm window consists of three tabs. The tabs are located at the bottom of the window: geNorm M, geNorm V and Interpretation.\nThe first tab, geNorm M, shows a ranking of candidate genes according to their stability, expressed in M values, from the most unstable genes at the left (highest M value) to the best reference genes at the right (lowest M value):\nThe second tab, geNorm V, shows a bar chart that helps determining the optimal number of reference genes to be used in subsequent analyses:\n\nThe number of reference genes is a trade-off between practical considerations and accuracy. It is a waste of resources to quantify more genes than necessary if all candidate reference genes are relatively stably expressed and if normalization factors do not significantly change when more genes are included. However, Biogazelle recommends the minimal use of the 3 most stable candidate reference genes and stepwise inclusion of more reference genes until the next gene has no significant contribution to the normalization factors.\nTo determine the need of including more than 3 genes for normalization, pairwise variations Vn/n+1 are calculated between two sequential normalization factors. Simply stated: V is measure of the added value of adding a next reference gene to the analysis. A large variation means that the added gene has a significant effect and should be included.\nIn normal experiments like the Gene expression experiment, see Analyzing_gene_expression_data_in_qbase+, we only have 3 reference genes so we will see only 1 bar here. But in this geNorm pilot experiment, we analyzed 10 candidate reference genes, so we see 8 bars.\nAll pairwise variations are very low, so even the inclusion of a third gene has no significant effect. Based on a preliminary experiment that was done by Biogazelle, 0.15 is taken as a cut-off value for V, below which the inclusion of an additional reference gene is not required. Normally this threshold is indicated by a green line on the geNorm V bar chart. However since all V-values fall below the threshold in this geNorm pilot experiment, you don’t see this line on the bar chart.\nSo, these results mean that for all subsequent experiments on these samples, two reference genes, HPRT1 and GADP, would be sufficient. However, as stated before, Biogazelle recommends to always include at least three reference genes in case something goes wrong with one of the reference genes (so also include YHWAZ). \n\n\n\nIn this example we will analyze data from an artificial expression study containing the following samples:\n  - 6 treated samples: treated1, treated2, ... treated6\n  - 6 control samples: control1, control2, ... control6\n\nIn this study, the expression of the following genes was measured:\n  - 4 commonly used reference genes: ACTB, HPRT, GAPDH, and TUBB4. We have seen in [the previous exercise](http://wiki.bits.vib.be/index.php/Analyzing_data_from_a_geNorm_pilot_experiment_in_qbase%2B#Exercise_1:_reference_genes_for_mouse_liver) that the expression of these reference genes in mouse liver samples is not as stable as generally thought.\n  - 3 genes of interest:\n      - Low: a gene with low expression levels\n      - Medium: a gene with moderate expression levels\n      - HighVar: a gene with low and very noisy expression\n\nIn general, the lower the expression level, the more noisy the qPCR results will become. For each of the genes of interest we have included a run in which a 2-fold difference in expression between control and treated samples was created (Low1, Medium1 and HighVar1) and a run with a 4-fold difference in expression (Low2, Medium2 and HighVar2).\nThere are three technical replicates per reaction. In a second experiment we used [the reference genes that were obtained via Genevestigator](http://wiki.bits.vib.be/index.php/Using_GeneVestigator_to_select_candidate_reference_genes#Finding_candidate_reference_genes_in_the_free_version_of_Genevestigator) and that proved to be [more stably expressed in mouse liver samples than the commonly used references](http://wiki.bits.vib.be/index.php/Analyzing_data_from_a_geNorm_pilot_experiment_in_qbase%2B#Exercise_1:_reference_genes_for_mouse_liver).\nThe data can be found in the NormGenes folder on the BITS laptops or can be downloaded: [from our website](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/NormGenes.zip).\n\n#### Creating a new experiment\n\n| Create a new Experiment called NormGenes1 in Project1 |\n| :---------------------------------------------------- |\n| You can find the details on how to create a new experiment in Creating a project and an experiment |\n\n#### Loading the data\n\n| Import Run1 to Run5. These files are in qBase format. |\n| :---------------------------------------------------- |\n| You can find the details on how to import the data file in the Loading the data into qbase+ section of Analyzing data from a geNorm pilot experiment in qbase+ |\n\nWe are going to compare expression in treated versus untreated samples so we need to tell qbase+ which samples are treated and which not. To this end, we have constructed [a sample properties file](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Sample_Properties_Norm.xlsx) in Excel containing the grouping annotation as a custom property called Treatment.\n\n| Import the Sample Properties file. |\n| :---------------------------------------------------- |\n| You can find the details on how to import the data file in the Adding annotation to the data section of Loading data into qbase+.\n\n| Select to import the custom property. |\n| :---------------------------------------------------- |\n| So as you can see we have 6 treated and 6 untreated samples and we have measured the expression of the 4 commonly used reference genes and 6 genes of interest:\n\n#### Analyzing the data\n\n| Which amplification efficiencies strategy are you going to use ?     |\n| :------------------------------------------------------------------- |\n| You don't have data of serial dilutions of representative template to build standard curves so the only choice you have is to use the default amplification efficiency (E = 2) for all the genes. |\n\n| Appoint the reference genes. |ACTB, GAPDH, HPRT and TUBB4B are the reference genes:\nYou can find the details on how to appoint reference targets in the Normalization section of Analyzing gene expression data in qbase+ |\n\n| Is the stability of the reference genes ok ?                                                                 |\n| :----------------------------------------------------------------------------------------------------------- |\n| The M and CV values of the reference genes are shown in green so the stability of the reference genes is ok. |\n\n| Which scaling strategy are you going to use ? |Since you have a treated and a control group, it seems logical to use the average of the control group for scaling.\n\nYou can find the details on how to specify the scaling strategy in the Scaling section of Analyzing gene expression data in qbase+\nLook at the target bar charts.\n\n| In the target bar charts plot the average expression level of each group. |In the Grouping section at the bottom of the chart you can select Plot group average: Now do exactly the same for the second experiment with the same genes of interest but with other reference genes. This means that you have to return to the Analysis wizard. To this end, click the Launch wizard button a the top of the page:\n\n| Create a new Experiment called NormGenes2 in Project1 |\n| :---------------------------------------------------- |\n| You can find the details on how to create a new experiment in Creating a project and an experiment |\n\n| Import Run5 to Run9. These files are in qBase format. |\n| :---------------------------------------------------- |\n| You can find the details on how to import the data file in the Loading the data into qbase+ section of Analyzing data from a geNorm pilot experiment in qbase+ |\n\n| Import the Sample Properties file.                    |\n| :---------------------------------------------------- |\n| You can find the details on how to import the data file in the Adding annotation to the data section of Loading data into qbase+. Select to import the custom property. |\n\nSo as you can see we have 6 treated and 6 untreated samples and we have measured the expression of the 4 new reference genes and 6 genes of interest:\n| Appoint the reference genes. |EDN3, Gm16835, MUSK and OTOP3 are the reference genes:\n| :---------------------------------------------------- |\n| You can find the details on how to appoint reference targets in the Normalization section of Analyzing gene expression data in qbase+ |\n\n| Is the stability of the reference genes ok ?                                                                 |\n| :----------------------------------------------------------------------------------------------------------- |\n| The M and CV values of the reference genes are shown in green so the stability of the reference genes is ok. |\n\nAs you can see the M and CV values of these reference genes is much lower than these of the 4 commonly used reference genes pointing to the fact that genes are more stably expressed. It's not that the commonly used reference genes are bad references. Then qbase+ would not display them in green. It's just that the other reference genes are more stable. But this can have a big impact on the results of your analysis...\n\n| Use the average of the control group for scaling |You can find the details on how to specify the scaling strategy in the Scaling section of Analyzing gene expression data in qbase+\n\nPlot the average expression level of each group. Now we will compare the target bar charts of the second and the first experiment to assess the influence of the stability of the reference targets on the analysis results.\n\n| How to display the target bar charts of the second and the first experiment next to each other ? |You can display the bar charts next to each other by clicking the tab of the bar chart of the second experiment. Drag the tab to the right while you hold down the mouse button until you see and arrow at the right side of the qbase+ window and a dark grey box in the right half of qbase+ window. Release the mouse button when you see the arrow and the box. Now the two bar charts should be next to each other. Some laptop screens are too small to nicely display the two bar charts next to other. If this is the case switch to full screen mode by double clicking the tab of the first experiment. |\n\nNow you can compare the expression of each gene in the first and in the second experiment.\n\nWhen we do this for HighVar1 for instance, you see that the average expression levels of both groups are the same in the first and the second experiment (check the scales of the Y—axis\\!). Both experiments detect the two-fold difference in expression level between the groups. However, the error bars are much larger in the first experiment than in the second. The variability of the reference genes does have a strong influence on the errors and the size of the error bars will influence the outcome of the statistical test to determine if a gene is differentially expressed or not. The larger the error bars the smaller the less likely it is that the test will say that the groups differ.\n\nRemember that the error bars represent 95% confidence intervals:\n  - if the error bars of the two groups do not overlap: you are certain that the difference between the means of the two groups is significant\n  - if they do not overlap: you know nothing with certainty: the means can be different or they can be the same. Of course the more they overlap the smaller the chance that there is a significant difference between the groups.\n\nCheck out the results of HighVar2. Here, you clearly see the influence of the reference genes. Again, the fourfold difference in expression is detected by both experiments but:\n\n  - the least stable reference genes (experiment 1) give large overlapping error bars\n  - the most stable reference (experiment 2) give smaller, barely overlapping error bars\n\nThis means that in experiment 2, a statistical test will probably declare that HighVar2 is differentially expressed while in experiment 1 this will not be the case. We will test this assumption by performing a statistical test.\n\n#### Statistical analysis of differential expression\n\n| Use a non-parametric test to identify DE genes in experiment 1 ? |\n| :---------------------------------------------------- |\n| You can find full details on statistical analyses in qbase+ in the statistical analysis section of analyzing gene expression data in qbase+. In brief, you need to perform the following steps:\n\nOpen the Statistical wizard\n\nThe goal of this analysis is to compare the mean expression levels of our genes of interest in treated and untreated samples\n\nUse the Treatment property to identify treated and untreated samples\n\nAnalyze all genes of interest\n\nUse the default settings to perform the non-parametric Mann-Whitney test\n\nAs you can see, none of the genes is considered DE by the very conservative non-parametric test. Additionally most genes have the same p-value. That's normal when you don't have many replicates. In our case, we have 6 replicates. Non-parametric tests are based on a ranking of the data values and there are not so many ways to rank 6 data points. This is why you see the same p-values for many genes.\nAs said before, the non-parametric test is very stringent. If the data do come from a normal distribution, the test will generate false positives. Some of the genes might have have been labeled not DE while in fact they are DE so you might have missed some differential expression. The choice of statistical test with 6 biological replicates depends on what you prefer: false negatives or false positives. Most people will choose false negatives since they don't want to invest time and money in research on a genes that was labeled DE while in fact it is not DE.\n\nSuppose I don't mind false positives but I don't want to miss any potential DE genes. In that case, it's better to go for a t-test. Let's repeat the test n ow choosing a parametric t-test.\n| Use a t-test to identify DE genes in experiment 1 ? |\n| :---------------------------------------------------- |\n| You can find full details on statistical analyses in qbase+ in the statistical analysis section of analyzing gene expression data in qbase+.\nIn brief, you need to perform the following steps:\nOpen the Statistical wizard\nThe goal of this analysis is to compare the mean expression levels of our genes of interest in treated and untreated samples\nUse the Treatment property to identify treated and untreated samples\nAnalyze all genes of interest\nDescribe the data set as log-normally distributed\n\nStill none of the genes is considered DE but you do see that the p-values of the t-test are lower than these of the Mann-Whitney test.\n\n| Use a non parametric test to identify DE genes in experiment 2 ? |\n| :---------------------------------------------------- |\n| You can find full details on statistical analyses in qbase+ in the statistical analysis section of analyzing gene expression data in qbase+.\nIn brief, you need to perform the following steps:\n\nOpen the Statistical wizard\nThe goal of this analysis is to compare the mean expression levels of our genes of interest in treated and untreated samples\nUse the Treatment property to identify treated and untreated samples\nAnalyze all genes of interest\nUse default settings\n\nNow you see that 4 out of the 6 genes are considered DE. This is also what we expected since 3 of our genes of interst have a 4-fold difference in expression level between the two groups. It's understandable that it's hard to detect 2-fold differences in expression especially when the expression of the gene is somewhat variable as is the case for Low1 and HighVar1 but a 4-fold difference is a difference that you would like to detect.\n| Use a t-test to identify DE genes in experiment 2 ? |\n| :---------------------------------------------------- |\n| You can find full details on statistical analyses in qbase+ in the statistical analysis section of analyzing gene expression data in qbase+.\nIn brief, you need to perform the following steps:\n\nOpen the Statistical wizard\nThe goal of this analysis is to compare the mean expression levels of our genes of interest in treated and untreated samples\nUse the Treatment property to identify treated and untreated samples\nAnalyze all genes of interest\nDescribe the data as log normally distributed\n\nAgain the t-test generates lower p-values than the Mann-Whitney test but realize that choosing the t-test when the data is not normally distributed will generate false positives \\!","#### Create a project\n\nWhen you use qbase+ for the first time, you can't do anything unless you\ncreate a project to store your experiments in.\n\n| Create a new Project |\n| :----------------------------------- |\n| When you double click the qbase+ icon, the software starts up automatically opens the Start page where you can create a new project by clicking the Create new project button : This will create a new project with a default name like Project 1 . |\n\n#### Create an experiment\n\nTo open actual data (one/more runs) in qbase+, creating a project is not sufficient. You need to create an experiment in this project to hold the run data.\n\n| Create a new Experiment called GeneExpression in the new project. |\n| :----------------------------------- |\n| Select the Create a new qbase+ experiment option  in the Start page. Type a name for th new experiment . Click the Next button at the bottom of the page . This will create the experiment.\n\nWhen you leave the **Start page**, the **Import run** page is automatically opened allowing you to import the actual qPCR data into qbase+.\n\n#### Loading the data\n\nFirst a few quick words about the data set. We’ll be working with data coming from 3 runs (plates in the qPCR instrument): [Run1](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run1.xls), [Run2](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run2.xls) and [Run3](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run3.xls)\n\nThe data consist of Cq values for:\n\n  - 3 reference target genes: Stable, Nonregulated, and Flexible\n  - 3 target genes of interest: Duvel, Leffe, and Palm\n\neach measured twice (= technical replicates) in 16 different samples. Half of the samples have undergone a treatment, half of them are untreated control samples.\n\nThe data set also contains a series of standard samples consisting of a four-fold dilution series of cDNA for each target gene. These measurements allow to generate a standard curve from which target-specific amplification efficiencies can be calculated. Finally, negative controls (No Template Controls) have been measured. The goal of the analysis is to identify target genes of interest that have different expression levels in the treated samples compared to the untreated control samples.\n\n| In GeneExpression load CFX run files [Run1](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run1.xls), [Run2](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run2.xls) and [Run3](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run3.xls).\n| :---------------------------- |\n|  \n* Click the **Import runs** button  to open the **Import Run** window\n* Click the **Browse** button  to go to the directory that stores the files containing the qPCR data\n\nSelect the 3 run files simultaneously by holding the **Ctrl** key on your keyboard during the selection in Windows or the command button in MacOSX.\n\nClick the **Open** button \n\nNow you go back to the **Import Run** window, click the **Next** button (purple)\n\n* \nqbase+ tries to recognize the format of the selected import files. If only one format matches the files (as in our case CFX), it is selected and the quick import option is enabled. Click the **Finish** button.\n\nIn the Imported run names area on the **Import run** page you should now see the names of the 3 run files. If these are the correct files, click the **Next** button at the bottom of the page.\n\n#### Adding annotation to the data\n\nWhen you leave the **Import run** page, you are redirected to the **Sample target list** page, which gives you an overview of the targets (= genes) and samples qbase+ detected when reading in the datafiles.\nTake a look at the data. You see that the list of samples and targets matches the description of the qPCR experiment at the top of this page. The samples in this experiment are divided into two groups: samples that received some kind of treatment and untreated control samples. This information was not included in the run files so qbase+ does not know which sample belongs to which group. However, this is relevant information: in our analysis we are going to compare the expression of our genes of interest between treated and untreated samples. This means that qbase+ needs the grouping annotation to be able to perform the analysis we want to do. So we have to give qbase+ this annotation: we can do this by adding a custom sample property. To do this we need to create a sample properties file with a specific format that is described in [the tutorial](http://data.bits.vib.be/pub/trainingen/qbasePLUS/TutorialIII.pdf). You can find the file in the qbase+ folder on the BITS laptops or you can [download the file here](http://data.bits.vib.be/pub/trainingen/qbasePLUS/Sample_Properties_file.xlsx).\n\n| How to add the grouping annotation ?\n| :---------------------------- |\n|  To import the file containing to grouping annotation:\n\n* select **Add samples and targets** \n* click **Import sample list** \n* browse to the folder that contains the samples file\n* select the file and click **Open**\n* click **Next**\n\nIn the **Importing samples** window, you have to tell qbase+ which sample annotation you want to import from the sample properties file\n\nIn our case we could import Quantities (this annnotation is available in the sample properties file) but the quantities of the standard samples were included in the run files so qbase+ has already imported this annotation from the run files during data import.\nWe definitely need to import the Custom properties since they were not a part of the run files. The Treatment property will tell qbase+ which samples belong to the group of control samples and which samples belong to the group of treated samples.\nClick the **Next** button at the bottom of the page to finish the import.\n\nAt this point you don't see the custom annotation that you have imported, you will see it later in the analysis during scaling\nLeaving the **Sample target list** page takes you to the **Run annotation** page, where you have to confirm again that the sample and gene names are ok. If this is not the case you can adjust the annotation here.\n\nClick the **Next** button at the bottom of the page\n\nOur data file contains all required annotation:\n\n  - Cq values\n  - sample and target names\n  - sample types\n  - quantities for the standard samples\n  - grouping of the samples\n\n\n\nOnce runs are imported, you can start analyzing the data. Data consist\nof Cq values for all the wells.\n\n#### Specifying the aim of the experiment\n\nOn the **Aim** page you tell the software what type of analysis you want to do. Different types of analyses require different parameters, parameter settings and different calculations. By selecting the proper analysis type, qbase+ will only show the relevant parameters and parameter settings.\n\nSince we are doing a **gene expression analysis** in this exercise, this the option we should select. Click the **Next** button on the bottom of the page to go to the **Technical quality control** page.\n\n#### Checking the quality of technical replicates and controls\n\nThe **Technical quality control** page handles the settings of the requirements that the data have to meet to be considered high quality. For instance the maximum difference between technical replicates is defined on this page. If there are technical replicates in the data set, qbase+ will detect them automatically (they have the same sample and target name) and calculate the average Cq value. In theory, technical replicates should generate more or less identical signals.\n\n| How to set the maximum difference in Cq values for technical replicates ?\n| :---------------------------- |\n|  The quality criterium that the replicates must meet to be included for further analysis is one of the parameters in qbase+. You can set it on the **Technical quality control** page:\n\nThe default maximum allowed difference in Cq values between technical replicates is 0.5\n\nAdditionally, you can do quality checks based on the data of the positive and negative controls.\n| How to set quality requirements for the control samples ?\n| :---------------------------- |\n|  On the same **Technical quality control** page you can define the minimum requirements for a well to be included in the calculations:\n\n* **Negative control threshold** : minimum allowed difference in Cq value between the sample with the highest Cq value and the negative control with the lowest Cq value: the default is 5 which means that negative controls should be more than 5 cycles away from the sample of interest.\n* **Lower and upper boundary** : allowed range of Cq values for positive controls.\n\nExcluded means that the data are ignored in the calculations.\n\n| How to check if there are wells that do not meet these criteria ?\n| :---------------------------- |\n|  You can see flagged and excluded data by ticking the **Show details…** options  on the **Technical quality control** page and clicking the **Next** button (purple) at the bottom of the page.\n\nQbase+ will open the results of the quality checks for the replicates  and the controls  on two different tabs. These tabs show lists of samples that failed the quality control criteria. When you open the replicates tab  you can get an overview of the flagged  or the excluded (purple) wells. Select the **failing**  wells.\n\nWhen the difference in Cq between technical replicates exceeds 0.5, the wells end up in the flagged or failing list. They are included in calculations unless you exclude them by unticking them. You see that the two replicates of Palm in Sample05 have very different Cq values. All other bad replicates are coming from standard samples.\nIf you are finished checking the data quality, click **Next** to go to the **Amplification efficiencies** page.\n\n#### Taking into account amplification efficiencies\n\nQbase+ calculates an amplification efficiency (E) for each primer pair (= gene). Genes have different amplification efficiencies because:\n\n  - some primer pairs anneal better than others\n  - the presence of inhibitors in the reaction mix (salts, detergents…) decreases the amplification efficiency\n  - inaccurate pipetting\n\nQbase+ has a parameter that allows you to specify how you want to handle amplification efficiencies on the **Amplification efficiencies** page.\n\n| How to specify the amplification efficiencies strategy you want to use ?\n| :---------------------------- |\n|  Since we have included a dilution series for creating a standard curve in our qPCR experiment, we will select\n\n* **Use assay specific amplification efficiencies**\n* **Calculate efficiencies from included standard curves**\n\nAmplification efficiencies are calculated based on the Cq values of a serial dilution of representative template, preferably a mixture of cDNAs from all your samples. Since you know the quantity of the template in each dilution, you can plot Cq values against template quantities for each primer pair. Linear regression will fit a standard curve to the data of each gene, and the slope of this curve is used to calculate the amplification efficiency.\n\n| How to check the amplification efficiencies of the genes ?\n| :---------------------------- |\n|  Once you have made this selection, qbase+ starts calculating the efficiencies and the results are immediately shown in the **calculation efficiencies** table.\n\nIn this way, one amplification efficiency (E) for each gene is calculated and used to calculate **Relative Quantities (RQ)**:\n∆Cq is calculated for each well by subtracting the Cq of that well from the average Cq across all samples for the gene that is measured in the well. So ∆Cq is the difference between the Cq value of a gene in a given sample and the average Cq value of that gene across all samples. Cq is subtracted from the average because in this way high expression will result in a positive ∆Cq and low expression in a negative ∆Cq. \n**So at this point the data set contains one RQ value for each gene in each sample.**\n\nClick **Next** to go to the **Normalization** page.\n\n#### Normalization\n\nDifferences in amplification efficiency are not the only source of variability in a qPCR experiment. Several factors are responsible for noise in qPCR experiments e.g. differences in:\n\n  - amount of template cDNA between wells\n  - RNA integrity of samples\n  - efficiency of enzymes used in the PCR or in the reverse\n    transcription\n\nNormalization will eliminate this noise as much as possible. In this way it is possible to make a distinction between genes that are really upregulated and genes with high expression levels in one group of samples simply because higher cDNA concentrations were used in these samples.\nIn qPCR analysis, normalization is done based on housekeeping genes.\n\nHousekeeping genes are measured in all samples along with the genes of interest. In theory, a housekeeping gene should have identical RQ values in all samples. In reality, noise generates variation in the expression levels of the housekeeping genes. This variation is a direct measure of the noise and is used to calculate a normalization factor for each sample.\nThese normalization factors are used to adjust the RQ values of the genes of interest accordingly so that the variability is eliminated.\n\nThese adjusted RQ values are called **Normalized Relative Quantities (NRQs)**. In qbase+ housekeeping genes are called reference genes. In our data set there are three reference genes: Stable, Non-regulated and Flexible. On the **Normalization page** we can define the normalization strategy we are going to use, appoint the reference genes and check their stability of expression.\n\n| How to specify the normalization strategy you want to use ?\n| :---------------------------- |\n|  You can specify the normalization strategy you want to use on the Normalization method page:\n\n* **Reference genes** normalization is based on the RQ values of the housekeeping genes\n* **Global mean** normalization calculates normalization factors based on the RQ values of all genes instead of only using the reference genes. This strategy is recommended for experiments with more than 50 random genes. Random means that the genes are randomly distributed over all biological pathways.\n* **Custom value** normalization is used for specific study types. This strategy allows users to provide custom normalization factors such as for example the cell count.\n* **None** means that you choose to do no normalization at all. This option should only be used for single cell qPCR.\n\nWe have incorporated 3 housekeeping genes in our experiment so we select the **Reference genes** strategy.\n\n| How to appoint reference targets ?\n| :---------------------------- |\n|  You have to indicate which targets should be used as reference genes since qbase+ treats all genes as targets of interest unless you explicitly mark them as reference genes on the Normalization method page:\n\nWe have measured 3 housekeeping genes: Stable, Flexible and Non-regulated so we tick the boxes in front of their names.\n\nIt's not because you have appointed genes as reference genes that they necessarily are **good** reference genes. They should have stable expression values over all samples in your study. Fortunately, qbase+ checks the quality of the reference genes. For each appointed reference gene, qbase+ calculates two indicators of expression stability\n\n  - **M** (geNorm expression stability value): calculated based on the pairwise variations of the reference genes.\n  - **CV** (coefficient of variation): the ratio of the standard deviation of the NRQs of a reference gene over all samples to the mean NRQ of that reference gene.\n\nIt is considered that the higher these indicators the less stable the reference gene.\n\n| Are Flexible, Stable and Nonregulated good reference targets ?\n| :---------------------------- |\n|  M and CV values of the appointed reference genes are automatically calculated by qbase+ and shown on the Normalization method page:\n\nThe default limits for M and CV were determined by checking M-values and CVs for established reference genes in a pilot experiment that was done by Biogazelle. Based on the results of this pilot experiment, the threshold for CV and M was set to 0.2 and 0.5 respectively.\nIf a reference gene does not meet these criteria it is displayed in red. As you can see the M and CV values of all our reference exceed the limits and are displayed in red.\n\nIf the quality of the reference genes is not good enough, it is advised to remove the reference gene with the worst M and CV values and re-evaluate the remaining reference genes.\n\n| Which reference target are you going to remove ?                                                                                                                                    |\n| :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Both the M-value and the CV are measures of variability. The higher these values the more variable the expression values are. So we will remove the gene with the highest M and CV. |\n\nYou can remove a reference gene simply by unticking the box in front of its name.\n\n| Are the two remaining reference genes good references ?\n| :---------------------------- |\n|  After removing Flexible as a reference gene the M and CV values of the two remaining reference genes decrease drastically to values that do meet the quality criteria. M and CV values that meet the criteria are displayed in green.\n\nThis exercise shows the importance of using a minimum of three reference genes. If one of the reference genes does not produce stable expression values as is the case for Flexible, you always have two remaining reference genes to do the normalization.\n\n[See how to select reference genes for your qPCR experiment](http://wiki.bits.vib.be/index.php/Using_GeneVestigator_to_select_candidate_reference_genes).\n\n**So after normalization you have one NRQ value for each gene in each sample.**\n\nClick **Next** to go to the **Scaling** page.\n\n#### Scaling\n\nRescaling means that you calculate NRQ values relative to a specified reference level.\n\nQbase+ allows you to rescale the NRQ values using one of the following as a reference:\n\n  - the sample with the minimal expression\n  - the average expression level of a gene across all samples\n  - the sample with the maximal expression\n  - a specific sample (e.g. untreated control)\n  - the average of a certain group (e.g. all control samples): this is\n    often how people want to visualize their results\n  - positive control: only to be used for copy number analysis\n\nAfter scaling, the expression values of the choice you make here will be set to 1 e.g. when you choose **average** the average expression level across all samples will be set to 1 and the expression levels of the individual samples will be scaled accordingly.\n\n| How to scale to the average of the untreated samples ?\n| :---------------------------- |\n|  You can specify the scaling strategy on the **Scaling** page. Select **Scale to group** and set the **Scaling group** to the **untreated** samples . This is one of the reasons why you need the grouping annotation.\n\nRescaling to the average of a group is typically used to compare results between 2 groups, e.g. treated samples against untreated controls. After rescaling, the average of the NRQs across all untreated samples is 1 and the NRQs of the treated samples are scaled accordingly.\n\nClick **Next** to go to the **Analysis** page.\n\n#### Visualization of the results\n\nOne of the things you can select to do on the **Analysis** page is viewing the relative expression levels (= scaled NRQs) of each of the genes in a bar chart per gene. It is recommended to visualize your results like this.\n\nIt is possible to view the relative expression levels of all genes of interest on the same bar chart. You can use this view to see if these genes show the same expression pattern but you cannot directly compare the heights of the different genes because each gene is independently rescaled\\!\n\n| How to visualize single gene expression bar charts ?\n| :---------------------------- |\n|  Select **Visually inspect results For individual targets** on the **Analysis** page and click **Finish**\n\n| How to visualize the expression levels of Palm in each sample ?\n| :---------------------------- |\n|  Select **Visually inspect results For individual targets** on the **Analysis** page and click **Finish**\n\nThe **Target** select box allows you to select the gene you want to view the expression levels of. Relative expression levels are shown for each sample. Error bars are shown and represent the technical variation in your experiment (variation generated by differences in amounts pipetted, efficiency of enzymes, purity of the samples...).\n\nYou see that Palm has a low expression level and a very large error bar in Sample05 because the two replicates of this sample had very different Cq values. You can group and colour the bars according to a property.\n\n| How to group the bars of Palm according to treatment (so treated at one side and untreated at the other side)\n| :---------------------------- |\n|  In the **Grouping** section you can specify the property you want to group by.\n\n| How to view average expression levels in each group ?\n| :---------------------------- |\n|  In the **Grouping** section you can choose to plot individual samples as shown above but you can also choose to **plot group average** expression levels.\n\nThe error bars that you see here represent biological variation and will be used later on in the statistical analysis. The error bars are 95% confidence intervals which means that they represent the range that will contain with 95% certainty the real average expression level in that group of samples.\nThe nice characteristic of 95% confidence intervals is the following:\n\n  - if they do not overlap you are sure that the expression levels in the two groups are significantly different, in other words the gene is differentially expressed\n  - if they do overlap you cannot say that you are sure that the expression levels are the same. You simply don’t know if the gene is differentially expressed or not.\n\n| Assess the effect of switching the Y-axis to a logarithmic scale for Palm.\n| :---------------------------- |\n|  In the **Y axis** section you can specify if you want a linear or logarithmic axis.\nAs you can see you do not change the expression values, you just change the scale of the Y axis. Switching the Y-axis to a logarithmic scale can be helpful if you have large differences in NRQs between different samples\n\n| Assess the effect of switching the Y-axis to a logarithmic scale for Flexible.\n| :---------------------------- |\n|  Switch to the bar charts of Flexible. By switching the Y-axis to logarithmic you can now see more clearly the differences between samples with small NRQs.\n\n#### Statistical analysis\n\nOnce you generate target bar charts you leave the **Analysis wizard** and you go to the regular qbase+ interface. Suppose that you want to perform a statistical test to prove that the difference in expression that you see in the target chart is significant.\nAt some point, qbase+ will ask you if your data is coming from a normal distribution. If you don't know, you can select **I don't know** and qbase+ will assume the data are not coming from a normal distribution and perform a stringent non-parametric test.\nHowever, when you have **7 or more replicates per group**, you can check if the data is normally distributed using a statistical test. If it is, qbase+ will perform a regular t-test. The upside is that the t-test is less stringent than the non-parametric tests and will find more DE genes. However, you may only perform it on normally distributed data. If you perform the t-test on data that is not normally distributed you will generate false positives i.e. qbase+ will say that genes are DE while in fact they are not. Performing a non-parametric test on normally distributed data will generate false negatives i.e. you will miss DE genes.\n\nChecking if the data is normally distributed can be easily done in GraphPad Prism. To this end you have to export the data.\n| How to export the data ?\n| :---------------------------- |\n|  To export the results click **the upward pointing arrow** in the qbase+ toolbar:\nYou want to export the normalized data so select **Export Result Table (CNRQ)**:\nYou will be given the choice to export results only (CNRQs) or to include the errors (standard error of the mean) as well . We don't need the errors in Prism so we do not select this option.\nThe scale of the Result table can be linear or logarithmic (base 10) . Without user intervention, qbase+ will automatically log10 transform the CNRQs prior to doing statistics. So we need to check in Prism if the log transformed data are normally distributed.\nAdditionally, you need to tell qbase+ where to store the file containing the exported data. Click the **Browse** button for this .\n\nExporting will generate an Excel file in the location that you specified. However, the file contains the results for all samples and we need to check the two groups (treated and untreated) separately. The sample properties show that the even samples belong to the treated group and the odd samples to the untreated group.\nThis means we have to generate two files:\n\n  - [a file containing the data of the untreated samples](http://data.bits.vib.be/pub/trainingen/qbasePLUS/resultslog.csv)\n  - [a file containing the data of the treated samples](http://data.bits.vib.be/pub/trainingen/qbasePLUS/resultslogTreated.csv)\n\nNow we can open these files in Prism to check if the data is normally distributed.\n\n| How to import the data of the untreated samples in Prism ?\n| :---------------------------- |\n|  \n* Open Prism\n* Expand **File** in the top menu\n* Select **New**\n* Click **New Project File**\n* In the left menu select to create a **Column** table. Data representing different groups (in our case measurements for different genes) should always be loaded into a column table.\n* Select **Enter replicate values, stacked into columns** (this is normally the default selection) since the replicates (measurements for the same gene) are stacked in the columns.\n* Click **Create**\n\nPrism has now created a table to hold the data of the untreated samples but at this point the table is still empty. To load the data:\n\n* Expand **File** in the top menu\n* Select **Import**\n* Browse to the resultslog.csv file, select it and click **Open**\n* In the **Source** tab select **Insert data only**\n* Since this is a European csv file commas are used as decimal separators so in contrast to what its name might imply, semicolons and not commas are used to separate the columns in the csv file (you can open the file in a text editor to take a look). In American csv files dots are used as decimal separator and the comma is used to separate the columns. Prism doesn't know the format of your csv file so you have to tell him the role of the comma in your file. Select **Separate decimals**\n* Go to the **Filter** tab and specify the rows you want to import (the last rows are these of the standard and the water samples, you don't want to include them)\n* Click **Import**\n\nAs the file is opened in Prism you see that the first column containing the sample names is treated as a data column. Right click the header of the first column and select **Delete**\n\n| How to check if the data of the untreated samples comes from a normal distribution ?\n| :---------------------------- |\n|  \n* Click the **Analyze** button in the top menu\n* Select to do the **Column statistics** analysis in the **Column analyses** section of the left menu\n* In the right menu, deselect **Flexible**. It's a bad reference gene so you will not include it in the qbase+ analysis so there's no point checking its normality (it is probably not normally distributed). In that respect you could also deselect the other two reference genes since you will do the DE test on the target genes and not on the reference genes.\n* Click **OK**\n* In the **Descriptive statistics** and the **Confidence intervals** section deselect everything except **Mean, SD, SEM**. These statistics is not what we are interested in: we want to know if the data comes from a normal distribution. The only reason we select Mean, SD, SEM is because if we make no selection here Prism throws an error.\n* In the **Test if the values come from a Gaussian distribution** section select the **D'agostino-Pearson omnibus test** to test if the data are drawn from a normal distribution. Although Prism offers three tests for this, the D'Agostino-Pearson test is the safest option.\n* Click **OK**\n\nPrism now generates a table to hold the results of the statistical analysis: As you can see, the data for Palm are not normally distributed.\n\nSince we found that there's one group of data that does not follow a normal distribution, it's no longer necessary to check if the treated data are normally distributed but you can do it if you want to. We will now proceed with the statistical analysis in qbase+. Statistical analyses can be performed via the **Statistics wizard**.\n\n| How to open the Statistics wizard ?\n| :---------------------------- |\n|  You can open it in the **Project Explorer** (window at the left):\n\n* expand **Project1** if it's not yet expanded\n* expand the **Experiments** folder in the project if it's not yet expanded\n* expand the **GeneExpression** experiment if it's not yet expanded\n* expand the **Analysis** section if it's not yet expanded\n* expand the **Statistics** section\n* double click **Stat wizard**\n\nThis opens the **Statistics wizard** that allows you to perform various kinds of statistical analyses.\n\n| Which kind of analysis are you going to do ?                                                                                                                                                                                                                                                      |\n| :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| On the **Goal** page: Select **Mean comparison** since you want to compare expression between two groups of samples so what you want to do is comparing the mean expression of each gene in the treated samples with its mean expression level in the untreated samples. Click **Next**. |\n\n| How to define the groups that you are going to compare ?\n| :---------------------------- |\n|  On the **Groups** page: specify how to define the two groups of samples that you want to compare. Select **Treatment** as the grouping variable to compare treated and untreated samples. Click **Next**.\n\n| How to define the genes that you want to analyze ?\n| :---------------------------- |\n|  On the **Targets** page: specify for which targets of interest you want to do the test. Deselect **Flexible** since you do not want to include it in the analysis. It's just a bad reference gene. Click **Next**.\n\nOn the **Settings** page you have to describe the characteristics of your data set, allowing qbase+ to choose the appropriate test for your data. \n\nThe first thing you need to tell qbase+ is whether the data was drawn from a normal or a non-normal distribution. Since we have 8 biological replicates per group we can do a test in Prism to check if the data are normally distributed.\n\n| Which gene(s) is/are differentially expressed ?\n| :---------------------------- |\n|  On the **Settings** page you describe the characteristics of your data set so that qbase+ can choose the ideal test for your data. For our data set we can use the default settings. Click **Next**. In the results **Table** you can see that the p-value for Palm is below 0.05 so Palm is differentially expressed.\n\n\n\nIn this example we will analyze data from another expression study with the following characteristics:\n\nAll samples fit in a single run: [Run7](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run7.xls)\nWe have the following samples:\n\n  - 5 control samples: control1, control2…\n  - 5 treated samples: treated1, treated2…\n  - 1 no template control: NTC\n\nThe expression of the following genes was measured:\n\n  - 2 reference genes: refgene1 and refgene2\n  - 2 genes of interest: gene1 and gene2\n\nThere are two technical replicates per reaction\n\n#### Creating a new experiment\n\n| Create a new Experiment called GeneExpression2 in Project1\n| :---------------------------- |\n| You can find the details on how to create a new experiment in Creating a project and an experiment\n\n#### Loading the data\n\n| Import [Run7](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run7.xls). This file is in qBase format.                    |\n| :-------------------------------------------------------------------------------------------------------------------------------------- |\n| You can find the details on how to import the data file in the **Loading the data into qbase+** section of Analyzing data from a geNorm pilot experiment in qbase+ |\n\n#### Adding sample annotation\n\nDownload the [the sample properties file](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Sample_Properties_GE2.xlsx).\n| Add a custom sample property called Treatment.\n| :---------------------------- |\n| You can find the details on how to add a custom sample property in the **Adding annotation to the data** section of Loading data into qbase+\n\n#### Analyzing the data\n\n| Choose the type of analysis you want to perform.\n| :---------------------------- |\n| \n\n| Check controls and replicates.\n| :---------------------------- |\n| First set the minimum requirements for controls and replicates  You see that 6 replicates do not meet these requirements . Select to **Show details and manually exclude bad replicates**\nAll negative controls pass the test . Positive controls were not included in this analysis. Qbase+ will now open the results for the failing replicates: as you can see the difference in Cq values between these replicates is not that big. They fail to meet the requirement just slightly.\n\n| Which amplification efficiencies strategy are you going to use ?\n| :---------------------------- |\n| You don't have data of serial dilutions of representative template to build standard curves so the only choice you have is to use the default amplification efficiency (E = 2) for all the genes.\n\n| Appoint the reference genes as reference targets.\n| :---------------------------- |\n| You can find the details on how to appoint reference targets in the **Normalization** section of Analyzing gene expression data in qbase+\n\n| Is the stability of the reference genes ok ?\n| :---------------------------- |\n| In the **Reference target stability window** the M and CV values of the reference genes are shown in green so the stability of the reference genes is ok. You can find the details on how to check reference target stability in the **Normalization** section of [](Analyzing_gene_expression_data_in_qbase+\" title=\"wikilink)Analyzing gene expression data in qbase+\n\n| Which scaling strategy are you going to use ?\n| :---------------------------- |\n| Since you have a treated and a **control** group, it seems logical to use the average of the control group for scaling. You can find the details on how to specify the scaling strategy in the **Scaling** section of Analyzing gene expression data in qbase+\n\nLook at the target bar charts.\n\n| In the target bar charts group the samples according to treatment.\n| :---------------------------- |\n| You can find the details on how to group the samples in the **Visualization of the results** section of Analyzing gene expression data in qbase+\n\nThe samples of each group are biological replicates so you might want to generate a plot that compares the average expression of the treated samples with the average expression of the untreated samples.\n\n| In the target bar charts plot the group averages instead of the individual samples.\n| :---------------------------- |\n| In the **Grouping** section at the bottom of the chart you can select **Plot group average**:\n\n| Are there any genes for which you see a clear difference in expression between the two groups ?\n| :---------------------------- |\n| For gene 1, the mean expression levels in the two groups are almost the same and the error bars completely overlap.\n\nWhen you look at the title of the Y-axis, you see that 95% confidence levels are used as error bars. In case of 95% confidence intervakls you can use the following rules:\n\n* if they do not overlap: you are certain that the difference between the means of the two groups is significant\n* if they do not overlap: you know nothing with certainty: the means can be different or they can be the same\n\nSo for gene 1 the means are very close but just based on the plot we may not make any conclusions with certainty. For gene 2, the mean expression levels in the two groups are very different and the error bars do not overlap. So the 95% confidence intervals do not overlap meaning that we can be certain that the difference between the means of the two groups is significant.\n\n| Use a statistical test to compare the expression levels between the two groups of samples ?\n| :---------------------------- |\n| You only have 5 replicates per group so you cannot test if the data comes from a normal distribution. Qbase+ will assume they're not normally distributed and perform a non-parametric Mann-Whitney test.\n\nThe p-value of gene2 is smaller than 0.05 so it has a statistically significant difference in expression levels in treated samples compared to untreated samples. For gene1 the p-value is 1 so we have no evidence to conclude that the expression of gene1 is different in treated compared to untreated samples. You can find the details on how to compare the means of the two groups in the **Statistical analysis** section of Analyzing gene expression data in qbase+\n","### Exercise 1: simple gene expression study\n\nIn my qPCR experiment I want to study the expression of 12 genes of interest in 8 samples of interest. I want to use 2 PCR replicates for each reaction.\n\n> How many 96 well plates do I need for this experiment ?\n> > I have 12 genes in 8 samples which gives a total of 96 reactions (one plate). I want to perform each reaction twice (2 PCR replicates) so I need two plates. However, I need to include reference genes in my experiment, preferably more than one. I can put these reference genes on a separate plate, I do not have to include them on each plate.\nIdeally, you need to include 3 reference genes so having 8 samples and 2 replicates this gives an additional 48 reactions. Thus, I need three 96 well plates to perform this experiment.\n\n| Do I need to include IRCs (inter-run calibrators) ?                      |\n| :--------------------------------------------------- |\n| No, I can easily fit all samples of the same gene on the same plate so I don't need to include IRCs. |\n\n### Exercise 2: a large study\n\nIn my qPCR experiment I want to study the pattern of expression of 96 genes (genes of interest and reference genes) in 96 samples of interest, divided into a few groups. I want to use 2 PCR replicates for each reaction.\n\n| Do I need to include IRCs (inter-run calibrators) ?                    |\n| :------------------------------------------------------- |\n| No, I can fit all samples of the same gene on the same plate so I don't need to include IRCs. |\n\nI want to include PCR replicates.\n\n| Do I need to include IRCs when I work on a 96 well plate ?                                                                                    |\n| :-------------------------------------------------------------------------------------------------------------------------------------------- |\n| Yes, I have 192 reactions per gene so I cannot place them on the same plate. Remember that replicates have to be located on the same plate \\! |\n\n| Do I need to include IRCs when I work on a 384 well plate ?                        |\n| :--------------------------------------------------------------------------------- |\n| No, I have 192 reactions per gene so I can even place two genes on the same plate. |\n\nI want to include no template controls but I don't want to increase the\nnumber of plates.\n\n| What is the most elegant strategy to make room for including negative controls ?      |                                                      \n| :------------------------------------------------------------------------------------ |\n| This kind of study screen for expression patterns and requires statistical analysis. Since you have many samples divided over a few groups it means you have many biological replicates so you could easily do without the PCR replicates. By doing so you preserve the biological variability which is often far greater than the technical variation. |\n\n### Exercise 3: how to fill plates ?\n\nIn my qPCR experiment I want to study the pattern of expression of 5 genes (genes of interest and reference genes) in 38 samples (samples of interest and control samples). I want to use 2 PCR replicates for each reaction.\n\n| What is the minimum number of 96 well plates I need for this experiment ? |\n| :------------------------------ |\n| 5 genes * 38 samples * 2 replicates = 380 reactions.\nI need a minimum of 4 plates for this experiment.\n\n| If I use the minimum number of 96 well plates do I need to include IRCs ?                                                      |\n| :----------------------------------------------------------------------------------------------------------------------------- |\n| Yes, 5 genes spread over 4 plates with 72 reactions per gene means that at least one gene will be spread over multiple plates. |\n\n| What can I do to avoid inter-run variability ?                                                                                                                                                                                         |\n| :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| I can use 5 plates and fill them with one gene each. They will not be completely filled (72 reactions) but at least I do not have to use IRCs (which are additional reactions that also cost money) and I have no inter-run variation. |\n\nSuppose there's only one 96-well plate left in your lab. You have 10 samples (samples of interest + control samples) and you want to make the most of what you have.\n\n| How many genes of interest would you measure ? |\n| :------------------------------ |\n| Since you want to make most of what you have, let's assume you are omitting PCR replicates.\nTheoretically, you could fit 9 genes on your 96-well plate. However, to avoid pipetting mistakes I would measure only 8 genes so I can work with one row / gene. This is very handy for multichannel pipets.\n\n### Exercise 4: a growing study\n\nIn my qPCR experiment I want to study the pattern of expression of 24 genes (genes of interest and reference genes) in 48 samples (samples of interest and control samples). I want to use 2 PCR replicates for each reaction.\n\n| How many genes can I analyze on one 384 well plate ? |\n| :------------------------------ |\n| 48 samples * 2 replicates = 96 reactions per gene.\nI can analyze 4 genes on each 384 well plate.\n\nEach week I receive 2 additional samples to analyze.\n\n| Do I analyze them immediately after I get them ? |\n| :------------------------------ |\n| No. Since the samples are placed on different plates as in the previous experiment, you have to use IRCs. You typically need 3 IRCs and a no template control sample. It means that if you want to analyze these 2 samples you have to include 4 additional samples for each gene. This is a lot of overhead for just 2 samples !\nTry to avoid this: it's better to wait a few weeks until you have 6 or 8 or even more samples.\n\n### Exercise 5: a diagnostic copy number screen\n\nIn diagnostic screens all samples are important: you cannot leave out samples and all measurements need to be of the highest quality possible. In my qPCR experiment I want to study copy number variation of 16 genes\n(genes of interest and reference genes) and 2 calibrator samples (samples with known copy number). Since we need high quality data we will use 4 technical replicates.\n\n| Are we going to use sample maximization ?                                   |                   \n| :------------------------------------------------------------------------- |\n| No. In contrast to gene expression studies, where we want to compare expression levels of a gene between different groups of samples, copy number analyses do compare genes. It means that in this case the sample maximization approach (placing all samples of the same gene on the same plate) is not valid. Instead we use a gene maximization approach here (placing same sample for different genes on the same plate). |\n\n| How many samples can I fit on a 384 well plate ? |\n| :------------------------------ |\n| We have 16 (genes) * 4 (replicates) = 64 reactions per sample.\nThis means that we can fit 6 samples on a 384 well plate: 4 unknowns and 2 calibrators.\n\n### Exercise 6: fix experiments with bad or missing data\n\nIn my qPCR experiment I want to study gene expression of 6 genes (3 genes of interest and 3 reference genes) in 20 samples (samples of interest and control samples). I want to use 2 technical replicates. One of my genes of interest failed completely and I want to repeat the measurements for this gene in a new run.\n\n| Do I need to include IRCs ?                                                                                |\n| :--------------------------------------------------------------------------------------------------------- |\n| No. We can put the 20 samples of the gene that failed on a single plate so we do not have to include IRCs. |\n\n| Do I need to include reference genes ?                                                                 |\n| :----------------------------------------------------------------------------------------------------- |\n| No. We just repeat all samples for the gene that failed and replace the old data with the new results. |\n\nOne of the reference genes failed completely.\n\n| What should I do ?                                                                                                                                                                                                                                                                                     |\n| :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Depending on the quality of the two remaining reference genes, you should either do nothing or do the same as in the previous example where one of your genes of interest failed. If the two remaining reference genes are stable you can do the normalization with the two remaining reference genes. |\n\nThree samples failed completely.\n\n| What's the first thing I need to do ?                                                                                                                             |\n| :---------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Since they failed completely, they are probably of low quality. Therefore, you have to prepare the samples again, check their quality and then use them for qPCR. |\n\n| Do I need to include IRCs ?                                                                                             |\n| :---------------------------------------------------------------------------------------------------------------------- |\n| Yes. If you want to compare these samples with the samples that didn't fail, you have to perform inter-run calibration. |\n\nThree samples failed for one of the genes of interest\n\n| What is the first question I need to ask ? |\n| :----------------------------------------- |\n| Is the gene expressed in these samples ?   |\n\n| Is it possible the RNA of these three samples was of low quality ?        |\n| :------------------------------------------------------------------------ |\n| Not likely, the measurements for the other genes in these samples are ok. |\n\nThree samples failed for one of the reference genes\n\n| Can I use the measurements of that reference gene in the non-failing samples for normalization ?                                         |\n| :--------------------------------------------------------------------------------------------------------------------------------------- |\n| No, qbasePLUS requires that you use the same reference genes for all samples so you have to discard all samples for that reference gene. |\n\n### Exercise 7: dilution series for calculating amplification efficiencies\n\nIn my qPCR experiment I want to study 8 new genes for which I had to design new primer pairs in 12 samples (samples of interest and control samples). I want to use 2 technical replicates and 96 well plates.\n\n| What is the first thing I need to do ?                                                        |\n| :-------------------------------------------------------------------------------------------- |\n| Perform a pilot experiment to determine the amplification efficiencies of these primer pairs. |\n\nFor this I need a dilution series of representative cDNA template.\n\n| How many dilutions would you include ?                                           |\n| :------------------------------------------------------------------------------- |\n| A dilution series with 6 dilutions for 8 genes nicely fits into a 96 well plate. |\n\nA few weeks after my initial qPCR experiment I want to test these 8 genes in a new set of samples.\n\n| Do I have to repeat the pilot experiment ?      |\n| :---------------------------------------------- |\n| No, dilution series do not need to be repeated. |","You need to do inter-run calibration if you want to compare samples from different runs e.g.:\n\n  - when it is not possible to get all samples for the same gene on the same plate\n  - when you do additional runs weeks or months after your initial experiment\n\nOf course there is a lot of variability between runs on a qPCR instrument:\n\n  - thermal block is not always heating uniformously\n  - quality of the lamp, the filters and the detector decreases over time\n  - data analysis settings on the qPCR instrument (baseline correction and threshold) can be slightly different\n  - efficiency of reagents (polymerase, fluorophores) is variable\n  - optical properties of the plastic plates vary\n\nFortunately, inter-run calibration allows you to eliminate most of this variability.\n\nIn this experiment we will analyze the data from the gene expression experiment (see Analyzing gene expression data in qbase+) together with data from 2 runs (Run4 and Run5) that were done weeks after the initial gene expression experiment.\n\nBecause the data comes from two different experiments spread over time, we have included three inter-run calibrators on the plates: Sample01, Sample02 and Sample03.\n\nThe principle of the IRCs is very similar to that of the reference genes:\nIn theory, the IRCs should have the same NRQ in each run. In practice, the difference in NRQ between two runs is a measure of the inter-run variation and can be used to adjust the NRQs to remove the inter-run variation.\n\n#### Creating a new Experiment\n| Import [Run1](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run1.xls), [Run2](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run2.xls), [Run3(all three in CFX format)](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run3.xls), [Run4](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run4.xls) and [Run5 (the latter two are in qBase format)](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run5.xls).\n| :----------------------------- |\n| Since the data is in files of two different format, you have to do a separate import for each format. So first import Run1, Run2 and Run3, then import Run4 and Run5. You can find the details on how to import CFX files in [](Loading_data_into_qbase+\" title=\"wikilink)Loading data into qbase+.\nThe details of importing qBase files are in [](Analyzing_data_from_a_geNorm_pilot_experiment_in_qbase+\" title=\"wikilink)Analyzing data from a geNorm pilot experiment in qbase+\n\n#### Analyzing the data\n\n| Use assay specific amplification efficiencies.\n| :----------------------------- |\n| You can find the details on how to convert the targets in the **Taking into account amplification efficiencies** section of Analyzing gene expression data in qbase+\n\nIn Analyzing gene expression data in qbase+ we have already checked the stability of the reference genes (see **Normalization** section). We determined that Flexible did not show stable expression.\n\n| Convert Stable and Nonregulated to Reference targets.\n| :----------------------------- |\n| You can find the details on how to convert the targets in the **Normalization** section of Analyzing gene expression data in qbase+\n| Appoint Sample01, Sample02 and Sample03 as IRCs.\n| :----------------------------- |\n| Leave the Analysis wizard by clicking the **Close wizard** button in the top menu.\n\n - Expand **Intermediate results** (red) in the **Project Explorer**\n - Double click **Interrun calibration** (green)\n\nThis opens the **Interrun calibration window**:\n\n - Click the **New** button (blue) to create a IRC\n - Once the IRC is created you have to appoint samples to it: select **Sample01** in the list of **Other samples**\n - Click the **Add Sample** button (purple)\n - Remember that you cannot give IRCs the same name in different runs: the software would think that they are technical replicates spread over different plates (which is not allowed). Therefore, in Run4 and Run5 we have given Sample01 another name: Sample01_2. Select **Sample01_2** in the list of **Other samples**\n - Click the **Add Sample** button (purple)\n\nYou have appointed the first IRC (grey), now do the same for the other two IRCs.\n\nRemember that for each target the variability of the normalized\nexpression levels of the IRCs between different runs will be used to\nadjust the other normalized expression levels of that target gene. The\nadjustment is done by amplifying the normalized expression levels with a\ncalibration factor that is calculated based on the normalized expression\nlevels of the IRCs.\nSince variability between runs is the same for each IRC, you expect that\nall IRCs measure the variability between the runs to the same extent,\nhence leading to similar calibration factors.\n\n| Do these IRCs generate similar calibration factors ?\n| :----------------------------- |\n| Open the **Calibration Factors** tab (red) of the **Interrun calibration window** and look at the result for Duvel:\n\nYou see that IRC2 returns a substantially different calibration factor in Run5 (green) so the validity of this IRC should be interpreted with care.\nFor Leffe the IRCs also gives inconsistent results in Run5. Switch to the results for Leffe by selecting **Leffe** in the **Targets** list (blue)\n| Do you still see the same expression pattern for Palm as you did in the first three runs ?\n| :----------------------------- |\n| Open the target bar chart for Palm.\n\nYou see that the pattern Palm showed in the first three runs (sample01 to sample16): high expression in the odd and low expression in the even samples is reversed in the samples from Run4 and Run5 (sample17 to sample25). In the latter runs you see high expression in the even and low expression in the odd samples. However, without annotation for Run4 and Run5 (which samples are treated and which not) it's impossible to interpret the bar chart.\n\n1. [Link](http://youtu.be/OJFsuZqNUHs)","The following exercise will make you familiar with the Primer3Plus software for designing primers for PCR. Primer3Plus is the user-friendly version of Primer3, the standard software for primer design.\n\n### Criteria for qPCR primers\n\nPrimers for qPCR have to follow all the gudelines for regular primers is and an additional set of rules specific for qPCR primers:\n\n  - qPCR products are small: 80-160 bp\n  - use intron or exon-exon junction spanning primers to detect genomic DNA contamination in the RNA samples. Primers of intron spanning primer pairs are located at both sides of an intron and will therefore generate a larger product on genomic DNA (containing the intron). Primer pairs containing an exon-exon junction spanning primer will not generate a PCR product on genomic DNA since the exon-exon junction only exist in the cDNA.\n  - primer length between 9 and 30 bp with an optimum at 20 bp\n  - melting temperature (Tm) of the primers between 58 and 60°C with an optimum at 59°C\n  - maximum Tm difference between the primers of a pair: 2°C\n  - GC content of the primers between 30 and 80% with an optimum at 50%\n  - the 5 nucleotides at the 3' end of the primers should have no more than 2 G or C bases\n  - avoid runs of 4 or more identical nucleotides (especially Gs)\n  - primers must specifically target the region you want to amplify\n\nThere are many programs for designing primers, the most important ones:\n\n  - [Primer3](http://frodo.wi.mit.edu/) \\[1\\] or use it's user-friendly version: [Primer3Plus](http://primer3plus.com/cgi-bin/dev/primer3plus.cgi)\\[2\\]\n  - [PrimerBLAST](http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome)\\[3\\]\n\nThe major downside of Primer3 and Primer3Plus is the fact that you have to check the specificity of the primers yourself. Primer3 will suggest a number of primer pairs that fulfill all of the above requirements, but Primer3 will not check the specificity of the primers. So you have use BLAST to check the specificity of the suggested primer pairs. Very often, the selected primers are not specific and you have to repeat the entire Primer3 analysis.\nIf you use Primer3 and do the BLAST yourself, BLAST against Refseq sequences unless they are not available for the organism you work with or you have reasons to believe that they are not complete (i.e. they do not represent the full genome). For model organisms, you can BLASTagainst the Refseq database. Limit the database to sequences from the organism you work with.\nAdditionally, it is especially important to check that the primers are specific at the 3' end because that's the site where the polymerase will attach nucleotides. So it is recommended to not use primers that contain long identical stretches (\\> 15nt for primers of 20nt long) to other regions in the genome, and certainly not if these stretches comprise the last nucleotide at the 3' end of the primer.\nFor these exercises we will use PrimerBLAST since [it uses the same algorithm to pick primers as Primer3](http://www.ncbi.nlm.nih.gov/tools/primer-blast/primerinfo.html) \\[4\\] and does the specificity check for you\\!\n\n## Designing qPCR primers for the fruit fly tap gene\n\n### Designing qPCR primers using PrimerBLAST\n\nThe RefSeq entry NM_079400 contains the sequence of the D. melanogaster mRNA coding for tap, the target of Poxn. Tap encodes a bHLH protein expressed in larval chemosensory organs and involved in the response to sugar and salt. We wish to amplify the region encoding the Helix-loop-helix domain. In the sequence of the RefSeq record, the domain is located between position +577 and +745.\nWe want to design qPCR primers for measuring the expression level of the hlh domain using SYBR green. Remember that it is advised to design intron/exon-exon junction spanning primers for qPCR experiments that are based on fluorescent labels to detect/avoid amplification of contaminating genomic DNA.\n\n| Check in NCBIs Gene database if the hlh domain contains any introns ? |\n| :------------------------------ |\n|To know the location of the introns, you need the genomic sequence instead of the mRNA sequence.\n\n - Go to [the NCBI RefSeq record](https://www.ncbi.nlm.nih.gov/nuccore/NM_079400).\n - In the right menu click the link to the **Gene** record\n - In the **Genomic regions, transcripts and products** secton you can see that the gene contains no introns: the transcript is not chopped up into pieces when aligned to the genome. Click [here](https://www.ncbi.nlm.nih.gov/gene/39934) for an example of a gene with introns.\n\nNext, we will design primers to measure the expression of the hlh domain.\n\n| Go to Primer BLAST by using the link in the Refseq record |\n| :------------------------------ |\n|Go back to the RefSeq mRNA record. There, you can go directly to PrimerBLAST by clicking the **Pick Primers** link in the **Analyze this sequence** section of the right menu.\n\nSince you want to measure the expression of the hlh domain you want\nprimers that are located inside the domain.\n\n| Define the range of the sequence in which you want to design primers. |\n| :------------------------------ |\n|You have to specify the range as follows:\n\n| Define the primer parameters to comply with the rules of qPCR primer design: product size and Tm. |\n| :------------------------------ |\n|To comply with the rules for qPCR primer design, you have to change the settings for PCR product size and melting temperature:\n\n| The PrimerBLAST automatically decides to check primer specificity in the Drosophila (organism ID: 7227) RefSeq mRNA database which is exactly what you want. For the qPCR you are going to use RNA samples from fruitfly. This means that the primers will only come into contact with Drosophila mRNAs so you only have to check their specifity in this database. Make sure the last 2 nucleotides are completely specific. |\n| :------------------------------ |\n|You want to ensure that the 3' end of the primers really is specific:\n\nThe PrimerBLAST gives you a set of 9 primer pairs that are specific (according to the criteria that you have specified) and that fulfill all other requirements that you have defined. Look at the detailed report of the first primer pair:\nAll parameters are quite self-explanatory except for the Self complementary and Self 3'complementarity scores.\n\n  - The first score represents the local alignment score when aligning a primer to itself. The scoring system gives 1.00 for a match, -1.00 for a mismatch. This means that the lower the score (the more mismatches), the less likely that the primer binds to itself.\n  - The second score represents the global alignment score when aligning a primer to itself. Here again, the lower the score, the better.\n\nThe scores are followed by information on the specificity of the primer: alignments of the two primers to all target sequences from the database that match the criteria that you specified. In these alignments dots represent matching nucleotides while letters represent mismatches. A specific primer pair will have two alignments (one for each primer): both perfect alignments (all dots) to the sequence you want to amplify.\n\n### Analyzing primer characteristics using OligoAnalyzer\n\n[OligoAnalyzer](https://eu.idtdna.com/calc/analyzer) is a tool implemented by ID\\&T (who sell primers) to check the characteristics of your primers. Take the first primer that is suggested by Primer-BLAST, the pair resulting in a product of 100bp.\n\n| What's the Tm of the first primer ? |\n| :------------------------------ |\n|Copy the sequence of the first primer in the **Sequence** box, adjust the concentrations to these that are typically used in PCR (see slides) and click **Analyze**:\nAs you can see the predicted melting temperature is 63.9 ºC, which is slightly different from the prediction made by BLAST. There are many different methods to predict Tm and each method will give a different result. Assumed concentrations of primers and ions have an enormous impact on the Tm prediction. So don't worry about these differences: these are theoretical calculations anyway, the only way to determine Tm values is by doing actual PCR. As long as the difference in Tm between the two primers is not too large, everything is fine.\n\n| What's the Tm of the second primer ?                                                                                                                                             |\n| :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Copy the sequence of the second primer in the **Sequence** box and click **Analyze**. The predicted melting temperature is also 63.9 ºC , the same Tm as the first primer. |\n\nRemember that the second primer had a large Self complementarity score according to PrimerBLAST.\n\n| Check the self-complementarity of the second primer in OligoAnalyzer ? |\n| :------------------------------ |\n|Click **Self-Dimer**:\n\nYou see that the highest scoring alignment indeed has 6 matches, giving a score of 6 as predicted by PrimerBLAST.\n\n| Do you expect this self-complementarity will give problems in the PCR ? |\n| :------------------------------ |\n|No, the complementarity is concentrated at the center of the primer, not at the 3' end. Since polymerases add bases at the 3’ end of the primer, the primer duplex cannot be extended so it will not give rise to aspecific products. [ID&amp;T](https://eu.idtdna.com/pages/docs/default-source/default-document-library/idt_self-dimer_and_hetero-dimer_results_guide.pdf) recommends to avoid complementary stretches of more than 2 bp at the 3’ end.\nHowever, even if the primer dimer cannot be extended, it could interfere when its formation competes with the annealing of primer and target. This is only the case when the stability of the dimer is similar to the stability of a perfectly matched primer-target duplex. The stability of the perfectly matched duplex is shown as a Maximum Delta G at the top of results. So non-extendable dimer structures that are much shorter than the intended duplex, as we have here, are not going to disrupt the PCR reaction.\nIt is advised to review all possible interactions between primers so both Self-Dimer (primers binding to themselves) and Hetero-Dimer (primers binding to each other) interactions between primers are examined.\n\n| Is it likely that the primers bind to each other ? |\n| :------------------------------ |\n|Click **Hetero-Dimer**:\n\nThis opens a text box to enter the second primer. Click **Analyze**. There is one structure (the fourth one) that looks problematic because there is a stretch of 3 matching nucleotides at the 3'end of one of the primers.\n\nSo you might consider taking a look at the second pair of primers that PrimerBLAST suggests. On the other hand, this structure is has relatively high free energy (delta G). The structure with the lowest total free energy, the target-primer duplex, is most important because it will dominate in solution. Structures with higher free energy are less stable and will be present in smaller amounts in the reaction mixture.\n\nTake a look at the second primer pair that was suggested by PrimerBLAST.\n\n| Is it likely that these primers bind to each other ?                                 |\n| :----------------------------------------------------------------------------------- |\n| No these primers do not form duplex structures that could pose a problem during PCR. |\n\n## Designing qPCR primers for the human F9 gene\n\n### Designing qPCR primers using PrimerBLAST\n\nThe RefSeq entry NM_000133.3 contains the sequence of the human mRNA coding for coagulation factor F9. The gene contains 8 coding exons and gives rise to a transcript of 2780 bp encoding a protein of 461 amino acids.\nNext, we want to design primers to measure the expression of the F9 gene.\n\nGo to [the RefSeq record of this transcript](http://www.ncbi.nlm.nih.gov/nuccore/NM_000133.3) to study its structure. When you scroll down to the **features** section you see that the CDS is located from position 40 to position 1415. Since RNA degradation starts at the 5'end of transcripts, we don't want to pick primers at the 5'end. On the other hand, we don't want to pick primers in the long 3'UTR either because it doesn't contain any introns (the exons are all coding) and we want to design exon-exon junction or intron spanning primers.\nLet's try to find exon-exon junction spanning primers between position 400 and 1600, with optimal anneal temperature = 60.\n\n| Find primers that fulfill the above defined criteria |\n| :------------------------------ |\n|Go to [PrimerBLAST](http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome) and fill in the form as follows:\n\nExclude predicted sequences in the database to search in .\n\n| Find primers that fulfill the above defined criteria |\n| :------------------------------ |\n|Go to [PrimerBLAST](http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome) and fill in the remainder of the form as follows:\n\nThe PrimerBLAST gives you a set of 10 primer pairs. Look at the detailed\nreport of the first primer pair:\n\nAs you can see the primers are not specific: they can bind to various other targets albeit with lower affinity because of the mismatches . The best option seems to be primer pair 7, which binds to both F9 transcript variants and potentially to one unintended target, but as you can see the last nucleotide at the 3' end of both primers are specific.\n\n### In silico PCR in the UCSC Browser\n\nWe will proceed using the third primer pair Primer-BLAST suggests. You can visualize the PCR product (and additional annotation) in the UCSC Genome Browser using [UCSC's In Silico PCR tool](http://genome.ucsc.edu/cgi-bin/hgPcr).\nSelect the most recent version of the human genome and paste the sequences of forward and reverse primers in their respective boxes. Click **submit**\nNormally, this returns the location and the sequence of the PCR product but our primer pair doesn't return a match. When you think about this was to be expected since we are working with exon-exon junction spanning primers that are not able to match the genome sequence. So checking SNPs is not so straight-forward in the case of exon-exon junction spanning primers.\nWe will repeat the primer search now searching for intron-spanning primers to show you how to use the in silico PCR tool. Taking into account the fact that the results for the exon-exon junction spanning primers were so messy we will make the search more stringent this time:\n\n  - We will the minimum number of mismatches to 4\n  - and at least 3 mismatches in the last 3 bps at the 3'end\n\n| Find intron spanning primers that fulfill the above defined criteria |\n| :------------------------------ |\n|Go back to the Primer-BLAST and fill in the form like in the previous exercise except that they should span an intron:\n\nPrimer-BLAST returns 10 primer pairs. Again the seventh primer pair is\nthe specific one.\n\n| Take the seventh suggested primer pair and check for SNPs in the UCSC Browser |\n| :------------------------------ |\n|Go to [PrimerBLAST](http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome) and paste the sequences of forward and reverse primers in their respective boxes.\nThis time the search finds a PCR product:\n\nClicking the location visualizes the PCR product in the UCSC genome browser. Remove unnecessary trancks by right clicking the box in front of them and selecting **hide**\n\nAdd tracks showing relevant annotation like position of SNPs...\n\nSetting the SNPs track from **hide** to **full** shows the SNPs in the browser. Center the forward primer by grabbing and dragging it to the center.\n\nZoom in to **base** display to see if the forward primer is matching any SNPs.\n\nAs you can see the forward primer does match two SNPs but none of them are located near the 3'end of the primer.\n\n1.  <http://frodo.wi.mit.edu/>\n2.  <http://primer3plus.com/cgi-bin/dev/primer3plus.cgi>\n3.  <http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome>\n4.  <http://www.ncbi.nlm.nih.gov/tools/primer-blast/primerinfo.html>","# 1. Introduction\n\nRstudio is a popular platform for downstream data-analysis, statistics, machine learning and more scientific related analysis using the R language. If you're unfamiliar with R and Rstudio, some materials on this website that will get you started are accesible via [this link](https://material.bits.vib.be/topics/R/). Uptil now we have focused on the core principles of Git & GitHub, which gives us enough knowledge to start integrating in other platforms. \n\nThere are three plausible scenarios:\n1. You have a version controlled project on your computer which you want to integrate in Rstudio\n2. You have a version controlled project on GitHub which you want to integrate in Rstudio locally \n3. You have an Rstudio project that you now want to start version controlling\n\nCreating a version controlled project in Rstudio from each of these scenarios is discussed in section 2: *Starting a project*. Exploiting Git's features in Rstudio is further exploited in section 3: *Exploring Git's integration in Rstudio*.\n\nWe will exploit the repository that we created in the previous chapters of this tutorial. A sample repository is also downloadable [here](https://github.com/vibbits/introduction-github). Download the repository as a ZIP-file and extract it.  \n\n# 2. Starting a project \n\n## 2.1 Integrating a version controlled project in Rstudio (scenario 1 & 2)\nLet's start by making a new project (File > New project...). The following screen pops up:\n\n---\n\n<center><img src=\"../../images/rstudio-1.PNG\" /></center>\n\n---\n\nThere are two options relevant for us to create a project in RStudio initialized with GitHub:\n- **Existing Directory**: The preferred choice when a project folder already exists and which has previously been initialized with Git. \n- **Version Control**: Ideally for creating a new R project based on a repository in GitHub. \n\nGiven the situation that there is a folder on our computer, created during this tutorial and initialized with Git, we will go for the first option. Select **Existing Directory**, browse to the location of the project folder and create the project. (If you've downloaded the sample repository mentioned above, this option does not hold as it only downloads the files)\n\n**Alternatively**, if we were to choose to create a new R project based on a GitHub repository, you would need to select **Version Control**, followed by *Git* and then copy the link of the GitHub repository from the green *Clone or Download* button and add it as the repository URL, and finally create the project. Using the sample repository for this option would mean that we need to fill in the following link as repository URL: *https://github.com/vibbits/introduction-github.git*.\n\n---\n\n<center><img src=\"../../images/rstudio-2.PNG\" /></center>\n\n---\n\nNotice that after creating the repository, a `.gitignore` file is added on the fly containing the following 4 lines. These lines will make sure that irrelevant information related to Rstudio is neglected.   \n```\n.Rproj.user\n.Rhistory\n.RData\n.Ruserdata\n``` \n\n## 2.2. Initiating version controlling on an existing Rstudio project (scenario 3)\nA third option assumes that you already have an R/Rstudio project. Click on *Tools > Version control > Project Setup...*. In the new screen, select Git as the version control system as depicted below and select yes when asked \"Do you want to initialize a new git repository for this project?\". Rstudio will need to restart for the changes to take place.\n\n---\n\n<center><img src=\"../../images/rstudio-7.PNG\" /></center>\n\n---\n\nThis approach will initialize Git on the project. As discussed in chapter 3, this local repository does not exist on GitHub yet, hence we can't push our commits to GitHub. In order to do so, we'll have to make a repository on GitHub first (see chapter 3.2.). This repository should be initialized without(!!) a README file, `.gitignore` file or license. Copy the link that GitHub created for the new repository (e.g. https://github.com/vibbits/rstudio-project.git). In Rstudio, find a *Git* tab in the upper right corner and click on *New Branch* (or the icon next to it).  \n\n---\n\n<center><img src=\"../../images/rstudio-8-1.PNG\" /></center>\n\n---\n\nClick on *add remote* in the new screen, paste the GitHub link and add the name of the project. \n\n---\n\n<center><img src=\"../../images/rstudio-9.PNG\" /></center>\n\n---\n\nFinally, add the name of the new branch *main* and hit create. Select *overwrite* when asked.  \n\n---\n\n<center><img src=\"../../images/rstudio-8-1.PNG\" /></center>\n\n---\n\n# 3. Git's features in Rstudio\n\nBy initializing Git on an Rstudio project, there appears a *Git* tab in the upper right corner as depicted below. The tab consists of the main actions that can be performed with Git (the window might be too small to contain the keywords related to the symbol). Neglecting the *diff* keyword which is out of scope for this tutorial, we can find the following actions: *Commit, Pull, Push, History* and *More* followed by *New Branch*, the name of the branch (*main*) and a refresh button.\n\n- **Stage**: The only action we're missing is the *staging*. Rstudio & Git actually continuously process the files within the project searching for new changes. If there is a new change it will appear in the list in the screen as depicted here for the `.gitignore` file. \n- **Commit**: Opens a new screen that controls the staging area and committing. \n- **Pull**: Pulls upstream changes from the GitHub repository into our, this local repository.\n- **Push**: Pushes previous commits to the GitHub repository.\n- **History**: Neatly visualizes the history log of the repository. Each commit, branch, contributor is reviewed in this screen. \n- **More**: Allows us to revert (undo) changes to a previous commit or ignore selected files (discussed below).\n- **New Branch**: Creates a new branch. \n\n---\n\n<center><img src=\"../../images/rstudio-3.PNG\" /></center>\n\n---\n\n\n# 4. Routine usage\n\nRecall the routine usage: *stage-commit-push*. Staging changes in Rstudio is done by simply checking the tickmarks in the list. This approach makes it very user-friendly to stage changes that are related with each other and that should be contained within the same commit. \n\n--- \n\n<center><img src=\"../../images/rstudio-4.PNG\" /></center>\n\n---\n\nSubsequently, click on commit and find a similar screen:\n\n--- \n\n<center><img src=\"../../images/rstudio-5.PNG\" /></center>\n\n---\n\nLet's explore this screen for a while: \n- We can find a *history* tab summarizing all the previous commits in this repository. As this project already existed before, it also contains the commits from before the integration in RStudio. \n- Next to that tab we can switch the branch, generally we leave this untouched as we're already in the preferred branch. \n- The *staging* tab allows us to stage and unstage specific files, even after they were staged in a previous step.\n- The *revert* tab is neglected in this tutorial\n- *Ignore* allows us to edit the `.gitignore` file by simply selecting the file that we want to ignore and clicking on *Ignore*. \n\nIf you're happy with the changes and the staging area, a commit message is written in the right tab and finalized by hitting the *Commit* button. A message will pop up summarizing the commit in a technical way. \n\nIf the commit has to appear on GitHub we need one more step. Click on *Push* and find your new status of the project in the GitHub repository.\n\n\n---\n\n> ### {% icon hands_on %} Exercise \n>\n> Add the `.gitignore` file to the staging area and exploit the *Ignore* button to add the *Rproj* file to the `.gitignore` file. Write a commit message, and commit and push your changes to GitHub. If the *Rproj* file already is in the `.gitignore` file, make a new example R-script which you can ignore. \n>\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > \n>    > Select *File > New File > R Script*, write something like `# test` and save the file. When they are saved, they will appear in the Git-tab. Select the files in the Git-tab and click on *More > Gitignore*. When you do this, the explicit name of the file will appear in the gitignore file. *Click* on Save. Now the gitignore file will apear in the Git-tab, ready to be staged, and the new file (or *Rproj* file) has disappeared from it. \n>    > The rest of the workflow remains the same. Click on the tickmarcks to stage the files, click on commit, write a message in the designated textbox and push your changes to the repository on GitHub. \n>    > \n>    > \n>    > </details>\n> \n{: .hands_on}\n\n---\n \n\n","## What is Linux?\nLinux is a very popular operating system in bioinformatics. In this training you will learn why that is and how it can help you with your bioinformatics analysis. After this training you will be able to:\n- install software on Linux\n- use command line to run tools\n- use command line to handle files\n- write small scripts to automate your analysis\n\n## Linux installation\n### Live modus\nWant to test a Linux distribution? Follow this procedure: \n- Grab an USB key and put your Linux distribution (e.g. Ubuntu) on it. \n- Boot your computer from that bootable USB key, and you have a full linux OS to play around with. This 'live modus' is an easy way to test the new stuff linux has to offer. \n- Before you test anything else, check if your hardware works (printer, sound,...) and check internet connection. \n- Secondly, do you like the desktop environment? Does it suit your needs? Play around and test. \nDone testing? Just reboot your computer, remove the USB key, and the original operating system will start up again as if nothing has happened...\n\n### Virtual machine\nGo to <https://www.virtualbox.org> and choose Downloads. Download the correct installer for your platform and install VirtualBox on your computer.\nSometimes VirtualBox displays errors when starting. Or trying VirtualBox for the first time, a virtual machine might not start. These problems might be related to not having virtualization enabled on your CPU.\nAll the latest processors and motherboards support virtualization technology (vt-x/amd-v). It many cases, VirtualBox requires this to be enabled. To do so, you have to reboot your computer, and get into the BIOS menu. In the BIOS menu, you should enable virtualization. Where this setting is located is different between computers, so check your hardware vendor for the BIOS options, or browse around in your BIOS menu until you find it. Most of the times it is named in a decent way. Enable the option, and boot your computer.\n\nWe need to download an .iso file, which is a (binary) copy of an installation DVD containing your distribution of choice. You can find it in the downloads section of the distribution's web page. You can download it using a direct download, depending on your preference and the options offered by the distribution's web page.\nYou can run Linux in 'live modus' (see instructions above) and install it directly on your virtual machine. Afterwards you have to reboot your virtual machine to get out of the live modus.\n\n### Dual boot\nMulti-booting allows more than one operating system to reside on one computer, for example if you have a primary operating system and an alternate system that you use less frequently. Another reason for multi-booting can be to investigate or test a new operating system without switching completely. Multi-booting allows a new operating system to configure all applications needed, and migrate data before removing the old operating system.\n\n## Training material\n[slides](http://data.bits.vib.be/pub/trainingen/Linux/Command_line_2019.pdf)\n\nOn the training there is a Linux Ubuntu installation available on a cloud environment. To access Linux we use Google Chrome and the 'VNC Viewer for Google Chrome' application.\nWhen you launch the application, you have to enter an IP address, this will be mentioned on the training.\n\n### Additional information\n- [Linux Beginner's Cheat page](https://wiki.bits.vib.be/index.php/Linux_Beginner%27s_Cheat_page)\n- [The practical command line cheat sheet](https://wiki.bits.vib.be/index.php/The_practical_command_line_cheat_sheet)\n- [AWK](https://wiki.bits.vib.be/index.php/AWK)\n- [Terminal keyboard shortcuts](http://data.bits.vib.be/pub/trainingen/cheat-sheets/Bash_Keyboard_Shortcuts.pdf)","## A script\nA script is just a plain text file. I will show this below. It contains written instructions, that can be understood by a programming language, in our case **bash** .\n\n### An example script \n\n> Create a text file named 'buddy' in your home with following content:\n```\nbadday=\"Cheer up\"\ngoodday=\"Doing great\"\necho \"$badday, $USER !\"\necho \"$goodday, $USER !\"\n```\n\n> One way of doing this is:\n```\nnano buddy\n```\n\n> and copy of the contents of the header above. Save the contents by pressing <ctrl>+O. Close nano with <ctrl>+x\n> What type of file did you create?\n```\nfile buddy\nbuddy: ASCII text\n```\n\n> That file contains plain text. To execute the commands in that file, feed it as an argument to the program 'bash'.\n```\nbash buddy\nCheer up, bits !\nDoing great, bits !\n```\n\nFew things to notice:\n- in the script, we have defined 2 **variables** 'badday' and 'goodday'\n- their values can be displayed by the program **echo** which takes as an argument the name of the variable preceded by a **$** sign.\n- the $USER variable, is an **environment variable**. They can be used in scripts. Env variables are typically written in capitals.\n\n### Getting more professional\nWe can make this easier. If you start your script with the symbol '#' and next specify the path to the interpreter, the terminal will feed this script automatically to the right interpreter for you! To see what this means, follow these steps.\n> Find out the path to the program bash\n```\nwhich bash\n/bin/bash\n```\n\nNow we know the path to bash, we have to provide this path, on the very first line, preceded by **#!** (shebang or crunchbang). If you have another type of script, let's say perl, you find out the path to perl, and at this path behind a #! on the very first line.\n> Open the text file 'buddy', and add at the start of the file '#!' followed by the path to bash:\n```\nnano buddy\n```\n\n... edit the text\n```\ncat buddy\n#!/bin/bash\nbadday=\"Cheer up\"\ngoodday=\"Doing great\"\necho \"$badday, $USER !\"\necho \"$goodday, $USER !\"\n```\n\n> Before turning the text file into a script, set the execute permission (to allow execution) with chmod\n```\nchmod +x buddy\n```\n\n> What type of file is your script?\n```\nfile buddy \nbuddy: Bourne-Again shell script, ASCII text executable\n```\n\nBy setting the **shebang**, the interpreter on the command line knows that this is a bash script! \n> Now run your script as if it were a program (./<script_name>)\n```\n./buddy\nCheer up, bits !\nDoing great, bits !\n```\n\nTo make it more readable, often the extension **.sh** is given to the text file. Note that this is not necessary! Linux does not define file types by extensions.\n> Rename your script to 'buddy.sh'\n```\n$ mv buddy buddy.sh\n```\n\n> **Alternative (less typing!)**\n```\n$ mv buddy{,.sh}\n```\n\n### A good habit\n\n>The last line of your script should be 'exit 0'. If bash reaches this lines, it means that the script was successfully executed. Add it by opening the file with 'nano' and modifying its contents.\n```\n$ cat buddy.sh \n#!/bin/bash\nbadday=\"Cheer up\"\ngoodday=\"Doing great\"\necho \"$badday, $USER !\"\necho \"$goodday, $USER !\"\nexit 0\n```\n\n> Alternative. Less typing! \n```\necho \"exit 0\" >> buddy.sh\n```\n\nThis was our first bash script! I hope it was a painless experience.\n\n## Download a Perl script\nMany bioinformatics programs are written in python or perl. It's quick to type some python or perl code in a text file, and get your job done. Those scripts are **text files**. You can download and store scripts on your computer. Usually these files have .py or .pl extension. As long as you have python or perl on your system (by default in Linux!), you can run the scripts. \n\n### Run perl code\nLet's try a small script below.\n\n- Download a simple perl script [here](http://data.bits.vib.be/pub/trainingmaterial/introduction_to_linux_for_bioinformatics/motifs_new.pl)\n- Download the dna file [here](http://data.bits.vib.be/pub/trainingmaterial/introduction_to_linux_for_bioinformatics/dna.txt)\n- Save the file, under ~/Downloads for now.\n- Open Geany on your computer, and copy the script code to Geany.\n- Execute the script by clicking the little 'gear' box. For this script, you will need to download the dna.txt file as input.\n- The results of the script appear in a small window. It will ask for an input (depending on your script). Enter the required details.\n\n### Extract some lines ## \n\n> Download the bed file [here](http://data.bits.vib.be/pub/trainingen/Linux/TAIR9_mRNA.bed) via command line \n```\nwget http://data.bits.vib.be/pub/trainingen/Linux/TAIR9_mRNA.bed\n```\n\n> Look at the first 10 lines of this file. \n```\n$ head TAIR9_mRNA.bed \nchr1\t2025600\t2027271\tAT1G06620.1\t0\t+\t2025617\t2027094\t0\t3\t541,322,429,\t0,833,1242,\nchr5\t2625558\t2628110\tAT5G08160.1\t0\t-\t2625902\t2627942\t0\t6\t385,143,144,186,125,573,\t2167,1523,1269,928,659,0,\nchr5\t2625558\t2628110\tAT5G08160.2\t0\t-\t2625902\t2627942\t0\t7\t258,19,143,144,186,125,573,\t2294,2167,1523,1269,928,659,0,\nchr4\t12006985\t12009520\tAT4G22890.5\t0\t+\t12007156\t12009175\t0\t10\t370,107,97,101,57,77,163,98,80,263,\t0,802,1007,1196,1392,1533,1703,1945,2120,2272,\nchr4\t12007040\t12009206\tAT4G22890.2\t0\t+\t12007156\t12009175\t0\t9\t315,113,97,101,57,77,163,98,101,\t0,741,952,1141,1337,1478,1648,1890,2065,\nchr4\t12006985\t12009518\tAT4G22890.3\t0\t+\t12007156\t12009175\t0\t10\t370,113,97,101,57,77,163,98,80,257,\t0,796,1007,1196,1392,1533,1703,1945,2120,2276,\nchr4\t12006985\t12009520\tAT4G22890.4\t0\t+\t12007156\t12009175\t0\t10\t370,104,97,101,57,77,163,98,80,263,\t0,805,1007,1196,1392,1533,1703,1945,2120,2272,\nchr4\t12006985\t12009520\tAT4G22890.1\t0\t+\t12007156\t12009175\t0\t10\t370,113,97,101,57,77,163,98,80,263,\t0,796,1007,1196,1392,1533,1703,1945,2120,2272,\nchr2\t14578539\t14581727\tAT2G34630.2\t0\t+\t14578688\t14581632\t0\t11\t293,93,81,72,132,87,72,86,133,189,275,\t0,797,1120,1320,1488,1711,1898,2165,2435,2649,2913,\nchr2\t14578629\t14581727\tAT2G34630.1\t0\t+\t14579725\t14581632\t0\t11\t203,96,81,72,132,87,72,86,133,189,275,\t0,704,1030,1230,1398,1621,1808,2075,2345,2559,2823,\n```\n\nThis is a typical bioinformatics text file, with every row divided in field by tabs. \n> Extract all lines that start with chr1 from the TAIR9_mRNA.bed and put them in a new text file “chr1_TAIR9_mRNA.bed”.\n```\n $ grep \"^chr1\" TAIR9_mRNA.bed > chr1_TAIR9_mRNA.bed\n```\n\n### Checking the data ##\n\n> Download human chromosome 21 from [this link](https://data.bits.vib.be/pub/trainingen/Linux/Homo_sapiens.dna.chromosome21.zip) and unzip the file.\n```\nwget https://data.bits.vib.be/pub/trainingen/Linux/Homo_sapiens.dna.chromosome21.zip\n```\n\n```\nunzip Homo_sapiens.dna.chromosome21.zip\n```\n\nEntries in a fasta file start with > \n> How many entries are in that fasta file? Remember you can combine commands with a |.\n```\ngrep \"^>\" Homo_sapiens.GRCh37.73.dna.chromosome.21.fa | wc -l\n```\n\n### How many?\n\nUse the TAIR9_mRNA.bed file used in the first exercise. Remember it looks like this\n```\nchr1\t2025600\t2027271\tAT1G06620.1\t0\t+\t2025617\t2027094\t0\t3\t\nchr5\t2625558\t2628110\tAT5G08160.1\t0\t-\t2625902\t2627942\t0\t6\t\nchr5\t2625558\t2628110\tAT5G08160.2\t0\t-\t2625902\t2627942\t0\t7\t\nchr4\t12006985\t12009520\tAT4G22890.5\t0\t+\t12007156\t12009175\t0\t10\t\nchr4\t12007040\t12009206\tAT4G22890.2\t0\t+\t12007156\t12009175\t0\t9\t\n```\n\nIf you want to find entries that lie on the + strand of a certain chromosome, you need to find lines that start with the chromosome number and that contain a + sign. The number of characters between the chromosome number and the + sign is variable.\n> How many genes are lying on the + strand of the first chromosome ?\n> Since you need to use the + sign to represent a set of characters of variable length you need to use egrep for this:\n```\ngrep \"^chr1.+\\+\" TAIR9_mRNA.bed | wc -l\n```\n\n### More complex extraction\n\nGet the last exon size for all mRNA records in Arabidopsis. Use TAIR9_mRNA.bed for this: this file contains the exon sizes. See the [.BED page](https://wiki.bits.vib.be/index.php/.bed) to check that the field we need is field 11. This contains a comma separated list of the sizes of all the exons of a mRNA\n> Get the exon sizes for all mRNA records in Arabidopsis. Write them to a file called exons.txt\n```\nawk '{ print $11 }' TAIR9_mRNA.bed > exons.txt\n```\n\n> Take a look at the first 10 lines of exons.txt\n```\nhead exons.txt\n```\n\nIf we try to print the last field with awk, using ',' as a delimiter, things go wrong:\n```\nawk -F',' '{ print $NF }' > lastexons.txt\n```\n\nThe reason is that the last field is empty, because the lines end with a ','. We need to remove the last ',' and can use sed for this.\n> Remove the last comma from the lines and save in a file called exonsclean.txt. You want to substitute the comma at the end of the line by nothing:\n```\nsed 's/,$//' exons.txt > exonsclean.txt\nhead exonsclean.txt\n```\n\n> Fetch the last field from exonsclean.txt and save in a file called lastexons.txt\n```\nawk -F',' '{ print $NF }' exonsclean.txt > lastexons.txt\nhead lastexons.txt\n```\n\n> Sort exonsizes from largest to smallest into a file called lastexonssort.txt\n```\nsort -nr lastexons.txt > lastexonssort.txt\nhead lastexonssort.txt\n```\n\nYou can use uniq to summarize the results\n```\nuniq -c lastexonssort.txt | head\n      2 6885\n      1 5616\n      1 5601\n      1 5361\n      1 5239\n      1 4688\n      2 4470\n      1 4446\n      1 4443\n      1 4275\n```\n\n### Analyzing a short read alignment\n\nSAM ('sequence alignment map') file format is the format which summarizes the alignment of reads to a reference genome. Is is one of the key files in NGS analysis, and you can learn a lot from it. See the [SAM page](https://wiki.bits.vib.be/index.php/.sam) for a description of this format.\n> Download the sam file from [here](http://data.bits.vib.be/pub/trainingen/Linux/sample.sam)\n```\nwget http://data.bits.vib.be/pub/trainingen/Linux/sample.sam \n```\n\n> How many lines has the SAM file?\n```\nwc -l sample.sam\n```\n\n100015 lines\n\n> How many lines start with '@', which is the comment symbol in the SAM format.\n```\ngrep '^@' sample.sam | wc -l\n```\n\n15 lines\n\nYou can use grep to skip the lines starting with '@', since they are comment lines.\n```\ngrep -v '^@' sample.sam | head\n```\n\n> Write the FLAG field (second field) to a file called flags.txt and pipe the grep results to awk to print the second field.\n```\ngrep -v '@' sample.sam | awk '{ print $2 }' > flags.txt\nhead flags.txt\n```\n\n> Sort and summarize (using uniq) flags.txt and pipe the grep results to awk to print the second field.\n```\nsort -nr flags.txt | uniq -c\n```\n\n> Sort the results on number of times observed (the first field). We build on the previous command, and just pipe the output to sort -nr. We do not have to use the option -k, since sort always takes the first field.\n```\nsort -nr flags.txt | uniq -c | sort -nr \n```\n\n### Advanced\nWe use the TAIR9_mRNA.bed to answer this.\nFirst we check how many different genes are in the file. A gene has the code ATG. Splice variants have to same AT number but different version number (the numbers after the . are different. We are not interested in splice variants so want to remove the .1, .2... before counting. You can do this by using the . as a field delimiter\n> Remove everything after the . and save in a file called TAIRpart.txt\n```\nawk -F'.' '{ print $1 }' TAIR9_mRNA.bed > TAIRpart.txt\nhead TAIRpart.txt\n```\n\nNow you need to summarize the fourth column of this file and count the lines of the result\n> How many different genes are in the file?\n```\ncut -f4  TAIRpart.txt | sort | uniq | wc -l\n```\n\n27379\n\nWhen you look at TAIR9_mRNA.bed you see that the the fifth column contains 0.\n> Check if there is any entry that contains another number in that column ? (summarize will give you the answer)\n```\ncut -f5 TAIR9_mRNA.bed | sort -nr | uniq -c\n```\n\nNo\nAnother example: Show all Arabidopsis mRNA with more than 50 exons\n```\nawk '{ if ($10>50) print $4 }' TAIR9_mRNA.bed\n```\n\n> Print the number of exons (field number 10) of mRNAs from the first chromosome.\n```\ngrep '^chr1'  TAIR9_mRNA.bed | awk '{ print $10 }' \n```\n\n> Obtain AT numbers (field 4) and exon info (field 11)\n```\nawk '{ print $4,\",\",$11 }'  TAIR9_mRNA.bed \n```\n\n## Bash Aliases to enhance your productivity\n\nYou specify aliases in the **.bashrc file** in your home directory. \n```\nalias myalias=\"<my fancy command>\"\n```\n\nChange 'my fancy command' to a real command!!\nBefore you can use your new aliases, you have to reload the .bashrc file. You do this by \n```\n$ source ~/.bashrc\n```\n\nor \n```\n$ . ~/.bashrc\n```\n\nNow, let's do this exercise.\nSometimes you might want to open a big text file from the end on, and start scrolling towards the top. We will create an **alias** for this in this exercise.\n> Create an alias that starts scrolling from the bottom. Tip: it's less and the appropriate option you must configure. Read through the man page of less. To help you: you can search for the string \"at the end\". Open the man page of less \n```\n$ man less\n```\n\n> Type \"/at the end\" and <ENTER>. Less will search in the content for \"at the end\". Examine the entries with the string./ Go to the following result by typing \"/\" followed by ENTER.\n> The option is: add the alias by opening .bashrc with an editor, and adding the line:\n```\nalias sell=\"less +G\"\n```\n\n> When you have changed the content of .bashrc, it needs to be reloaded. Close your terminal and fire it up again. OR execute:\n```\n$ . ~/.bashrc\n```\n\n```\n$ source ~/.bashrc\n```\n\n> We now have **sell** to our disposal, which starts scrolling large text files from the end of the file.\n```\n$ sell /var/log/syslog\n```\n\n### Show all aliases on your system\n\nForgot an alias? To see all your aliases, run the command \n```$ alias```.\n\n## Writing loops\n\n**For** loops are used to repeat commands a number of times. We will start with two simple examples.\n> Write a for loop to create 3 files: test1.txt, test2.txt, test3.txt\n```\nfor i in 1 2 3\ndo\ntouch test$i.txt\ndone\nls -l\n```\n\n> Write a for loop to create 3 folders: folder1, folder2, folder3\n```\nfor i in 1 2 3\ndo\nmkdir folder$i\ndone\nls -l\n```\n\n","## Tutorial on the linux file system\n\n### Which protocol achieves highest compression ratio?\nLet's do a little test. Download [this compressed file](http://data.bits.vib.be/pub/trainingmaterial/introduction_to_linux_for_bioinformatics/data_linux_training.tar.gz).\n\n> Create a folder named 'Compression_exercise' in your home. Copy the downloaded tar.gz to it.\n```\n$ cd\n$ mkdir Compression_exercise\n$ cp Downloads/data_linux_training.tar.gz Compression_exercise/\n```\n\n> Unpack the data_linux_training.tar.gz  file.\n```\n$ tar -xvzf data_linux_training.tar.gz \n```\n\nAlternative: you can specify the options without the '-' sign.\n```\n$ tar xvfz data_linux_training.tar.gz \n```\n\n> Decompress the file DRR000542_1.fastq.subset.gz\n```\n$ gunzip DRR000542_1.fastq.subset.gz\n```\n\n> Copy the DRR000542_1.fastq.subset file to a new file called 'bzip2_test.fastq'. Compress this file with bzip2.\n```\n$ bzip2 bzip2_test.fastq\n```\n\n**Tip!** If you would like to know how long the command took to finish, use \"time\"\n```\n$ time bzip2 bzip2_test.fastq\nreal\t0m5.878s\nuser\t0m5.728s\nsys\t0m0.112s\n```\n\nThree different times are given. What matters to you is the line 'real', also called the wall-clock time.\n> Copy DRR000542_1.fastq.subset file to a new file called gzip_test.fastq and compress with gzip.\n```\n$ time gzip gzip_test.fastq\nreal\t0m5.878s\nuser\t0m5.728s\nsys\t0m0.112s\n```\n\nA relatively unknown package is lrzip, 'long range zip', which achieves very good results on big files. Let's try that one also!\n> Copy DRR000542_1.fastq.subset file to a new file called lrzip_test.fastq and compress with lrzip.\n```\n$ lrzip lrzip_test.fastq\nThe program 'lrzip' is currently not installed.  You can install it by typing:\nsudo apt-get install lrzip\n```\n\n**apt-get** is the command line tool to install software on Debian distro's. Equivalent to the software center.\n```\n$ sudo apt-get install lrzip\n[sudo] password for joachim: \nReading package lists... Done\nBuilding dependency tree       \nReading state information... Done\nThe following packages were automatically installed and are no longer required:\nlibnet-ip-perl diffstat libnet-dns-perl libparse-debianchangelog-perl\ngir1.2-unique-3.0 kde-l10n-engb python-webpy libnet-domain-tld-perl\nlibemail-valid-perl libapt-pkg-perl python-flup kde-l10n-zhcn\nUse 'apt-get autoremove' to remove them.\nThe following NEW packages will be installed:\nlrzip\n0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded.\nNeed to get 159 kB of archives.\nAfter this operation, 313 kB of additional disk space will be used.\nGet:1 http://be.archive.ubuntu.com/ubuntu/ precise/universe lrzip amd64 0.608-1 [159 kB]\nFetched 159 kB in 0s (780 kB/s) \nSelecting previously unselected package lrzip.\n(Reading database ... 662617 files and directories currently installed.)\nUnpacking lrzip (from .../lrzip_0.608-1_amd64.deb) ...\nProcessing triggers for man-db ...\nSetting up lrzip (0.608-1) ...\n```\n\nNow we can compress:\n```\nOutput filename is: lrzip_test.fastq.lrz\nlrzip_test.fastq - Compression Ratio: 6.724. Average Compression Speed:  0.563MB/s.\nTotal time: 00:03:02.97\nreal\t3m3.026s\nuser\t3m1.947s\nsys\t0m0.804s\n```\n\n> Compare the sizes of the different resulting compressed files.\n```\n$ ls -lh *zip*\n-rw------- 1 bits bits 17M Oct 22 14:06 bzip2_test.fastq.bz2\n-rw------- 1 bits bits 21M Oct 22 14:06 gzip_test.fastq.gz\n-rw------- 1 bits bits 104M Oct 22 14:06 lrzip_test.fastq\n-rw------- 1 bits bits 16M Oct 22 14:10 lrzip_test.fastq.lrz\n```\n\nDecide for yourself whether the extra time needed for higher compression is worth the gain in compression.\n> Put the three files in a newly created folder 'results', and make an archive of it.\n```\n$ mkdir results\n$ mv  *{bz2,q.gz,lrz} results/\n$ ls results/\nbzip2_test.fastq.bz2  gzip_test.fastq.gz  lrzip_test.fastq.lrz\n$ tar cvf results.tar results/\n$ rm -rf results/\n$ ls -lh\ntotal 281M\n-rw------- 1 bits bits 104M May  4  2011 ERX000016.test.fastq\n-rw-r--r-- 1 bits bits 21M Oct 22 14:02 ERX000016.test.fastq.tar.gz\n-rw------- 1 bits bits 104M Oct 22 14:06 lrzip_test.fastq\n-rw-r--r-- 1 bits bits 53M Oct 22 14:28 results.tar\n```\n\n### Symbolic links\nSymbolic links (symlinks) point to a file, making the file accessible in another directory than where the file is. So you can avoid copying! When the original file is deleted, the symlink is dead. When you remove the symlink, the original file is still present. \nThe syntax for symbolic links is:\n```\n$ ln -s /home/bits /data/large.fastq /home/bits /Projects/ProjectA/\n```\n\nTip: when using **ln**, preferably provide absolute paths. If you want to use relative paths, make sure first going to the directory you want the link to be in, and create the link using a relative path (using '.' and '..' to make the path).\nRemoving symbolic links as such:\n```\n$ unlink /home/bits /Projects/ProjectA\n```\n\nIn contrast, there is also something as a \"hard link\" (ln without the -s option). When you delete a hard link, the file to which it referred is gone. So 'ln -s' is mostly used.\n\n### Linking data instead of copying\nIn the Rice Example directory (should be available under your home): download [this annotation file](http://rice.plantbiology.msu.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3) into the 'Genome data'/'Annotation' directory. Make a symbolic link to this file in the 'Genome data'/'Sequence' directory. Read the first 10 lines from the symbolic link file.\n> When you have tried yourself, see the solution.\n```\n$ cd Rice\\ Example/\n~/Rice Example $ ls\nbin  Genome data\n~/Rice Example $ cd Genome\\ data/Annotation/\n~/Rice Example/Genome data/Annotation $ ls\n~/Rice Example/Genome data/Annotation $ wget http://rice.plantbiology.msu.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3\n--2013-10-28 11:45:26--  http://rice.plantbiology.msu.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3\n           => `all.gff3'\nResolving http://rice.plantbiology.msu.edu (http://rice.plantbiology.msu.edu)... 35.8.196.190\nConnecting to http://rice.plantbiology.msu.edu (http://rice.plantbiology.msu.edu)|35.8.196.190|:21... connected.\nLogging in as anonymous ... Logged in!\n==> SYST ... done.    ==> PWD ... done.\n==> TYPE I ... done.  ==> CWD (1) /pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir ... done.\n==> SIZE all.gff3 ... 81498659\n==> PASV ... done.    ==> RETR all.gff3 ... done.\nLength: 81498659 (78M) (unauthoritative)\n100%[======================================>] 81,498,659  1.34M/s   in 65s     \n2013-10-28 11:46:33 (1.20 MB/s) - `all.gff3' saved [81498659]\n~/Rice Example/Genome data/Annotation $ ls ..\nAnnotation  Sequence\n~/Rice Example/Genome data/Annotation $ cd ../Sequence/\n~/Rice Example/Genome data/Sequence $ ln -s ../Annotation/all.gff3 .\n~/Rice Example/Genome data/Sequence $ ls -l\ntotal 381300\nlrwxrwxrwx 1 bits bits 22 Oct 28 11:49 all.gff3 -> ../Annotation/all.gff3\n-rw-r--r-- 1 bits bits 390444160 Mar  8  2013 IRGSPb5.fa.masked\n-rw-r--r-- 1 bits bits 55 Mar  8  2013 IRGSPb5.fa.masked.gz.md5\n~/Rice Example/Genome data/Sequence $ head all.gff3 \n##gff-version 3\nChr1\tMSU_osa1r7\tgene\t2903\t10817\t.\t+\t.\tID=LOC_Os01g01010;Name=LOC_Os01g01010;Note=TBC%20domain%20containing%20protein%2C%20expressed\nChr1\tMSU_osa1r7\tmRNA\t2903\t10817\t.\t+\t.\tID=LOC_Os01g01010.1;Name=LOC_Os01g01010.1;Parent=LOC_Os01g01010\nChr1\tMSU_osa1r7\texon\t2903\t3268\t.\t+\t.\tID=LOC_Os01g01010.1:exon_1;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t3354\t3616\t.\t+\t.\tID=LOC_Os01g01010.1:exon_2;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t4357\t4455\t.\t+\t.\tID=LOC_Os01g01010.1:exon_3;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t5457\t5560\t.\t+\t.\tID=LOC_Os01g01010.1:exon_4;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t7136\t7944\t.\t+\t.\tID=LOC_Os01g01010.1:exon_5;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t8028\t8150\t.\t+\t.\tID=LOC_Os01g01010.1:exon_6;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t8232\t8320\t.\t+\t.\tID=LOC_Os01g01010.1:exon_7;Parent=LOC_Os01g01010.1\n```\n\n### Introduction: symbolic links to easily install manually applications\nIf a package is not available via a package manager, manual installation might be an option. I put manually applications in '''/opt'''. Next, I link them to a correct location on our system, usually '''/usr/local/bin'''. Below you have some examples of this, which you can try out yourself.\n\nIf you want to manually install apps, '''/opt''' is the advised directory. However, only the administrator ('root') can access /opt. You can check that the /opt directory belongs to root with <pre>ls -l /opt</pre>\nTo be able to copy and write stuff into /opt, we need root permissions. To do so, precede your commands with '''sudo''', as exemplified in the next exercise below. When we do that, our password will first be asked. Next, the command is executed with root permissions. In this way, we can edit contents in root-owned directories! You are a sudoer!\n\n### Transpose, a tool to transpose\nTranspose is an extremely convenient text tool to transpose tabular data. We will use it later. The code is hosted on [SourceForge](http://sourceforge.net/projects/transpose/).\n> Download transpose installation file (zip) via the browser. Copy them to /opt using **sudo cp**.\nGo to the Sourceforce website with the browser, and click on the Download button.\n```\nDownloads $ sudo cp transpose-2.0.zip /opt\n[sudo] password for joachim: \n Downloads $\n```\n\nWe need to precede the ''cp'' command with the ''sudo'' command, since only the root user can copy into ''/opt''.\n> Unpack the installation in /opt, compile the binary and test it with 'tranpose --help'. Use sudo to do so.\n```\n$ pwd\n/opt\n$ ls trans*\ntranspose-2.0.zip\n$ sudo unzip transpose-2.0.zip \nArchive:  transpose-2.0.zip\n   creating: transpose-2.0/\n   creating: transpose-2.0/win32-bin/\n  inflating: transpose-2.0/win32-bin/transpose.exe  \n   creating: transpose-2.0/src/\n  inflating: transpose-2.0/src/transpose.c  \n  inflating: transpose-2.0/README \n```\n\nThe zip file is now unpacked. Let us now compile the code. ALWAYS have a look at the README file for this.\n```\n$ cd transpose-2.0\n$ head README\nTo Compile:\n\tgcc transpose.c -o transpose\nTo Install - Just copy into your path. e.g.:\n\tcp transpose /usr/local/bin/\n$ cd src/\n$ sudo gcc transpose.c -o transpose\n```\n\nThe program **gcc** compiles the human readable code in the file **transpose.c** and produces a binary file out of it, called **transpose**.\n> We can now run the binary file from within the directory.\n```\n$ ./transpose --help\n       Description:     \n\tThis software is released under the GPL license\n\tReshapes delimited text data - amongst other things, it can transpose a matrix of plain text data.\n```\n\n> Create a symbolic link to the newly created binary to /usr/local/bin. This directory collects binaries/commands to be used on the command line.\n```\n$ sudo ln -s /opt/transpose-2.0/src/transpose /usr/local/bin\n$ which transpose \n/usr/local/bin/transpose\n```\n\n","## Tutorial on the linux command line\nWe will first hold your hand: type over these commands below step by step, and watch what they do.\nUse **cd** to change the current working directory (user bits). To create your own directories use the **mkdir** (make directory) command.\n\n```\n$ cd ~\n$ mkdir sequences\n$ cd sequences\n$ mkdir proteins\n$ cd proteins\n$ pwd\n/home/bits/sequences/proteins\n$ cd ../..\n$ pwd\n/home/bits\n```\n\nTo create a new file, use the **touch** command:\n```\n$ cd ~/sequences/proteins/\n$ touch my_sequence.txt\n$ ls -l\n-rw-r--r-- 1 bits users 0 Sep 19 15:56 my_sequence.txt\n```\n\nIn the last command above, the **-l** (a lowercase “L”, not a “1” (one)) option was used with the ls command. The -l indicates that you want the directory contents shown in the “long listing” format.\nMost commands accept options. But which options can you use? The command **man** helps you. Type **man** followed by the command name. E.g. **man ls** to see what options are available for the ls command. You get a the list of options. Keep pressing Space until the page stops scrolling, then enter “q” to return to the command prompt.\nLuckily, most tools have the **--help** option. (ls --help for example). These 2 methods should help you further. To see what options can be used with ls, enter **man ls**.\n\n```\n$ man ls\n```\n\nTo delete a file, use the **rm** (remove) command:\n\n```\n$ cd ~/sequences/proteins/\n$ ls\nmy_sequence.txt\n$ rm my_sequence.txt\n$ ls\n$\n```\n\nTo remove a directory, use the **rmdir** (remove directory) command. The directory needs to be empty to do this.\n```\n$ cd ~/sequences/\n$ ls\nproteins\n$ rmdir proteins\n$ ls\n$\n```\n\nTo copy a file, use the **cp** (copy) command:\n\n```\n$ cd ~/sequences\n$ touch testfile1\n$ ls\ntestfile1\n$ cp testfile1 testfile2\n$ ls\ntestfile1 testfile2\n```\n\nTo rename a file, or to move it to another directory, use the **mv** (move) command:\n```\n$ cd \n$ touch testfile3\n$ mv testfile3 junk\n$ mkdir testdir\n$ mv junk testdir\n$ ls testdir\njunk\n```\n\nTo download a file, use the **wget** command:\n```\n$ cd ~/Downloads\n$ wget http://data.bits.vib.be/pub/trainingen/Linux/sample.sam\n$ ls sample.sam\n$\n```\n\nThe commands covered so far represent a small but useful subset of the many commands available on a typical Linux system.\n\n### Make a project folder structure\nWe assume that start from your home folder.\n> Create the following directory structure\n> <figure id=\"figure-1\"><img src=\"https://wiki.bits.vib.be/images/c/c4/Dirstructureex1.png\" alt=\"tree-structure\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Tree</figcaption></figure>\n```\n$mkdir -p docs/{pdf/{man,readme},html/{man,readme}}\n```\n\nThe '{' and '}' can group arguments but you can also create the structure step by step.\n> The little tree figure above is created with the 'tree' command. Display such a tree.\n```\ntree /home/bits/docs/\n```\n\n## Downloading and storing bioinformatics data\n\n### Create a project folder \nThe first thing to do when you start a bioinformatics project, is to create a structure of folders to put your data in an organised fashion.\n\n### Downloading\nAs an example, we will download the rice genome from the Rice Annotation Project database. But first create the folder structure.\n> Create following folder structure.\n```\n $ mkdir \"Rice Example\"\n $ cd Rice\\ Example\n $ mkdir Genome\\ data\n $ cd Genome\\ data\n $ mkdir Sequence\n $ mkdir Annotation\n $ cd\n```\n\n** Be aware of white spaces on the command line!**\nOn the command line, programs, options and arguments are separated by **white spaces**. If you choose to use a folder name containing a white space, it will interpret every word as an option or argument. So you have to tell Bash to **ignore the white space**. This can be done by: putting **strings between quotes** like ' or \" **escape** a white space with \\. See the examples above.\nHence, you might save yourself some trouble (and typing!) by putting _ instead of white spaces in names. Also make sure to use tab expansion, wherever possible!\n\n### Download the genome data directly on the command line\nYou can fetch the rice genome [from this link](http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz).\n> Download the genome data to the \"Rice example\"/\"Genome data\"/Sequence folder. Use **wget** to download from the link.\n> Right-click on the download link, and copy the download link. The download link is: http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz\n> Go the directory and execute wget\n```\n$ cd      ## to go back to the home directory\n$ cd Ric<tab>\n$ cd Gen<tab>/Seq<tab>\n$ wget http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz\n--2013-10-15 09:36:01--  http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz\nResolving rapdb.dna.affrc.go.jp (rapdb.dna.affrc.go.jp)... 150.26.230.179\nConnecting to rapdb.dna.affrc.go.jp (rapdb.dna.affrc.go.jp)|150.26.230.179|:80... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 122168025 (117M) [application/x-gzip]\nSaving to: `IRGSPb5.fa.masked.gz'\n100%[======================================>] 122,168,025  973K/s   in 2m 40s  \n2013-10-15 09:38:42 (747 KB/s) - `IRGSP-1.0_genome.fasta.gz' saved [122168025/122168025]\n$ ls\nIRGSPb5.fa.masked.gz\n```\n\nAllright. We have fetched our first genome sequence!\n\n### Did your data get through correctly?\nLarge downloads or slow downloads like this can take a long time. Plenty of opportunity for the transfer to go wrong. Therefore, large downloads should always have a **checksum** mentioned. You can find the md5 checksum on the downloads page. The md5 checksum is an unique string identifying (and calculated from) this data. Once downloaded, you should calculate this string yourself with **md5sum**.\n```\n$ md5sum IRGSPb5.fa.masked.gz\n7af391c32450de873f80806bbfaedf05  IRGSPb5.fa.masked.gz\n```\n\nYou should go to the rice genome download page, and compare this string with the MD5 checksum mentioned over there. You can do this manually. Now that you know the concept of checksums, there is an easier way to verify the data using **md5sum**. Can you find the easier way?\n> Search how to use md5sum to check the downloaded files with the .md5 file from the website. Check the man page\n```\n$ man md5sum\n```\n\nIt does not say much: in the end it refers to \n```\n$ info coreutils 'md5sum invocation'\n```\n\nReading the options, there is one option sounding promising:\n```\n`-c'\n`--check'\n     Read file names and checksum information (not data) from each FILE\n     (or from stdin if no FILE was specified) and report whether the\n     checksums match the contents of the named files. \n```\n\nThis way we can check the download:\n```\n$ wget http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz.md5\n--2013-10-15 09:47:02--  http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz.md5\nResolving rapdb.dna.affrc.go.jp (rapdb.dna.affrc.go.jp)... 150.26.230.179\nConnecting to rapdb.dna.affrc.go.jp (rapdb.dna.affrc.go.jp)|150.26.230.179|:80... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 55 [application/x-gzip]\nSaving to: `IRGSPb5.fa.masked.gz.md5'\n100%[======================================>] 55          --.-K/s   in 0s      \n2013-10-15 09:47:03 (757 KB/s) - `IRGSPb5.fa.masked.gz.md5' saved [55/55]\n$ ls\nIRGSPb5.fa.masked.gz  IRGSPb5.fa.masked.gz.md5\n$ md5sum -c IRGSPb5.fa.masked.gz.md5 \nIRGSPb5.fa.masked.gz: OK\n```\n\n## Ensuring integrity of downloads\nA handy tool to use is the [DownThemAll](https://addons.mozilla.org/nl/firefox/addon/downthemall/) addon for Firefox, in which you have to provide the checksum at the time of download. It will automatically check whether the download is finished.\nThe Short Read Archive (SRA), storing NGS data sets, makes use of [Aspera](http://asperasoft.com/technology/transport/fasp/) to download data a great speeds, ensuring integrity. To download from SRA using aspera in linux, follow the [this guide from EBI](http://www.ebi.ac.uk/ena/about/read_download).\n\n### Extracting the data\n> What type of file have you downloaded?\n```\n$ file IRGSPb5.fa.masked.gz\nIRGSPb5.fa.masked.gz: gzip compressed data, was \"IRGSPb5.fa.masked\", from Unix, last modified: Wed Aug 18 03:45:47 2010\n```\n\nIt is a compressed file. Files are compressed to save storage space. Before using these files, you have to decompress them. What can you do with this type of file? Check the command apropos.\n```\n$ apropos gzip\ngzip (1)             - compress or expand files\nlz (1)               - gunzips and shows a listing of a gzip'd tar'd archive\ntgz (1)              - makes a gzip'd tar archive\nuz (1)               - gunzips and extracts a gzip'd tar'd archive\nzforce (1)           - force a '.gz' extension on all gzip files\n```\n\n**apropos** is a command that helps you discover new commands. In case you have a type of file that you don't know about, use apropos to search for corresponding programs.\n> Decompress the file. Check the man page of gzip. From the man page:<pre>gunzip [ -acfhlLnNrtvV ] [-S suffix] [ name ...  ]</pre>\n```\n$ gunzip IRGSPb5.fa.masked.gz \n$ ls\nIRGSPb5.fa.masked  IRGSPb5.fa.masked.gz.md5\n```\n\n","# 1. Status\nGit can display the state of your working directory and staging area. The command that we'll use for this is `git status` and depending on the situation the output will look differently, but it will always give you some informative status description.\n\n```\n$ git status\nOn branch main\nYour branch is up to date with 'origin/main'.\n\nnothing to commit, working tree clean\n```\nThe first sentence tells us that we're on the `main` branch, which is the default branch name in Git. More on branches later. The second sentence tells us that our local branch is exactly the same as our origin. This means that all of the files and folders within our local project are identical to the ones in the remote GitHub repo. Lastly, git tells us that there is nothing to commit, which makes sense as we don't have any changes at the moment. \n\n\nLet's make some changes to one of our files again.  Check the status again with `git status`.\n```\n$ git status\nOn branch main\nYour branch is up to date with 'origin/main'.\n\nChanges not staged for commit:\n  (use \"git add <file>...\" to update what will be committed)\n  (use \"git restore <file>...\" to discard changes in working directory)\n        modified:   plot1.R\n\nno changes added to commit (use \"git add\" and/or \"git commit -a\")\n```\nThis time, git tells us that there are changes in the file `plot1.R` and they are not in the staging area. There are two options here:\n- Use `git add plot1.R` to add the changes to the staging area \n- Use `git restore plot1.R` to remove the changes from your working directory. This will undo the changes that you made since the last time you committed it. \n\nAdd the file to the staging area and check the status again with `git status`\n\n```\n$ git status\nOn branch main\nYour branch is up to date with 'origin/main'.\n\nChanges to be committed:\n  (use \"git restore --staged <file>...\" to unstage)\n        modified:   plot1.R\n```\nThe file is now in the staging area and we have two options:\n- Use `git commit -m \"some informative text\"` to commit the changes to the commit repository\n- Use `git restore --staged plot1.R` to remove the file from the staging area.\n\nLet's do the latter, check the status again and then remove the changes from your working directory. \n\n\n\n\n\n# 5. The history (log)\nBesides checking the current state of your project with `git status`, there is also a possibility to have a look in your commit history. In order to list all your previous commits, enter `git log`. The output is a long list containing several blocks like this:\n```\ncommit e2d7e9a0b4614a6bee6b3ffd7583237125671dc1\nAuthor: username <user@xyz.com>\nDate:   Wed Jan 01 01:23:45 2020 +0200\n\n    The informative commit message\n```\n`git log` lists all commits made to a repository in reverse chronological order. Each commit starts with an identifier which is a unique code for each commit (hash). Besides the identifier, the commit’s author and date are given, and the commit message is given.\n\nIf we have pushed the commits to our Github repository (online) we will see the last commit ID somewhere in the upper right corner. This is a verification for us so we know that the remote repository is up to date with the local repository. Can you also find an overview of all commits in GitHub? \n\n---\n\n> ### {% icon question %} Question\n> \n> Why is it useful to have the author's name and e-mail address in the history log?\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > It's obvious that in this local project we've been doing all the changes & commits. However at a certain point you migth collaborate with someone else on the same project. In this case it's useful to know who did what changes. \n>    >\n>    > </details>\n>\n{: .question}\n\n---\n\nGit log can be extended with many other parameters, e.g. combine it with the `--oneline` argument, or add `--graph` to display the commit history as a text-based graph and `--decorate` to indicate which commits are associated with the current HEAD, the current branch main, or other Git references. Git's aliases are very useful in this case as the way how the history is displayed is very personal. With this information you should understand the last section of [Chapter 2](https://material.bits.vib.be/topics/git-introduction/tutorials/2_configurations/tutorial.html) better and create your own alias. \n\n\n\n---\n\n**Intermezzo / extra reading**:\n\nWhen the output of git log is too long to fit in your screen, git uses a program to split it into pages of the size of your screen. When this “pager” is called, you will notice that the last line in your screen is a :, instead of your usual prompt.\n- To get out of the pager, press `Q`.\n- To move to the next page, press `Spacebar`.\n- To search for `some_word` in all pages, press `/` and type `some_word`. Navigate through matches pressing `N` (next).\n\n---\n\nLet's continue with the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/5_branches/tutorial.html)!\n\n\n\n\n\n\n","# 1. Introduction\nHave you also been in a similar and recognizable situation as depicted below? Saving different versions of your files and scripts is essential to keep track of changes, though it can become chaotic very quickly if we do not use the excellent tools we have available to us. Git is one of these excellent tools. It works similar to [Google Docs'](https://support.google.com/drive/answer/2409045?co=GENIE.Platform%3DDesktop&hl=en) history feature in which Google automatically saves your document and the changes that happened at a particular moment in time. However, Git allows you to control and decide yourself when changes are worth saving, hence making it much more powerful and flexible. Each change is saved together with a message that enables you or your collaborators to keep an overview of the history of the project.  \n\n\n---\n\n<center><img src=\"../../images/version-control-meme.png\"/></center>\n\n---\n\nGit is an open-source tool that keeps track of the changes made to your project files throughout their history. \n\nWhy should you version control? \n- **Keeping track of changes** to your files done by yourself or your collaborators. At any moment you can exploit the history of the project to see who wrote what on a particular day. It even allows you to go back to a specific version or undo specific edits. \n- **Synchronizes files between different people or infrastructures** (i.e. laptops, servers, ...), making it a powerful collaborating system. \n- **Testing new code/changes**. Git can control multiple alternative versions of the same project in which you can make some changes and only when you or your collaborators are happy with hem, you can include them in the main version.\n\n\nThere is a major difference between Git and GitHub though. Git is software that works on your computer, whereas GitHub is a service for connecting and uploading/downloading files much like saving files in the cloud. There are some alternatives for Git ([link](https://www.g2.com/products/git/competitors/alternatives)) which will not be discussed in this course, and there are some for GitHub with Gitlab and Bitbucket as main competitors. These alternatives essentially share the same concepts and therefore we choose for the tools that enjoy the most traction in the community, namely Git and GitHub. In this course we will learn how Git works on your computer, giving us a proper understanding of its functionalities. Grasping these concepts is important if we want to use Git in other apps (e.g. in [Chapter 8](https://material.bits.vib.be/topics/git-introduction/tutorials/8_github_rstudio/tutorial.html) we will learn how GitHub and RStudio interact).\n\n# 2. Installations \nFor this course we will explore version controlling in a mixture of [Git](https://git-scm.com/) via the command-line and [GitHub](https://github.com/). The former requires some basic understanding of the Linux command line. If you're not familiar with Linux command line, you can have a look at the materials [here]((https://material.bits.vib.be/topics/linux/)). After discussing Git's essential features, we'll introduce how you can setup a collaboration with externals or colleagues, how to integrate version controlling in Rstudio, etc. \n\n- Git can be installed for any OS (Windows, Mac or Linux) from [this link](https://git-scm.com/downloads). Please keep the recommended and default settings as is. \n- Make an account on [GitHub](https://github.com/). \n\nWe will address further configurations in the next chapter. \n\n# 3. Three conceptual areas\nBefore diving in, let's have a look at how Git works. It's important to understand the three conceptual areas that exist locally when using Git on your computer: the development area, the staging area and the repository containing the commits. We already know that we want to use Git for keeping track of changes in our files. To keep track of those changes we need to run through these conceptual areas: first we edit a file on our computer (development area), then we tell Git about it (add it to the staging area) and lastly we commit those changes (commits repository). Let's have a closer look: \n\n![Three conceptual areas](../../images/conceptual_areas.png)\n\n1. The **development area** is where your coding happens. Usually this is a folder with multiple files on your computer. Git will never change anything at this level, actually it won't really do anything. The only thing Git does is remembering that it needs to keep track of changes made in this folder or its files. However, for this we first need to initialize Git on this folder (only once in the beginning).  \n2. The **staging area** is an intermediate stage which assembles the files that contain changes. We can select one or multiple files with changes and stage them for a commit. This means that we're telling Git that we will want to save those changes. Hence, imagine that we want to save a file, we first have to add it to the staging area before we can commit it.  \n3. Files that are in the staging area are then committed to what we'll call the **commit repository**. Once we have done that, we stored a specific version of the committed files. Committing is a synonym for saving the files in the Git terminology. The repository with commits contains a list of all the commits that we have done in a project. It's neatly structured in a history log which we can call at any point. Notice that all of this is still happening on our computer. \n\n\nHere's an example. Let's assume that we're starting a new project. Usually that also means that you make a new folder on your computer where you will keep all the files related to the project. The first thing you have to do is to tell Git that it has to keep track of this folder.In this step, we're initializing Git on this folder. Now, you just made your first file. Even though it is stored on your computer, it's not automatically saved in Git. First, you'll have to add it to the staging area and afterwards you need to commit it to the repository. When we initialized Git on the folder, a new folder `.git/` was created which will store the different versions. That allows us to only have the latest version of the files visible on our computer and all of its histories in the `.git/` folder.   \nIf we make a second file, the only thing we have to do is adding it to the staging area and then commit it. \n\nNotice that the repository is not yet visible on [github.com](https://github.com/). For this we would still need a fourth and last step, namely pushing the commits repository from your computer to GitHub. By pushing your commits repository, you will push the files within the project to GitHub. After this last step, your project and all of the files are accessible in a GitHub repository.\n\nDuring our adventure through Git & GitHub we'll use some specific glossary. Confused on what the meaning of all these new words are? Check out the [GitHub glossary](https://help.github.com/en/github/getting-started-with-github/github-glossary).\n\n\n---\n\nLet's go to the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/2_configurations/tutorial.html)!\n","# Data structures in R\n{:.no_toc}\n\nThe power of R lies not in its ability to work with simple numbers but in its ability to work with large datasets.  R has a wide variety of data structures including scalars, vectors, matrices, data frames, and lists.\n\n### Matrices\nA matrix is a table, the columns are vectors of equal length. \nAll columns in a matrix must contain the same type of data. The top row, called the header, contains column labels. Rows can also have labels. Data values are called elements. Indices are often used as column and row labels.\n\n### Creating a matrix\nTo create a matrix M use the matrix() function\n```\nM <- matrix(data,nrow=r,ncol=c,byrow=FALSE))\n```\n\nIt takes a long list of arguments:\n- *data* usually is a vector of elements to will fill the matrix\n- *nrow* and *ncol*: dimensions (number of rows and columns). Only one dimension argument is needed. If there are 20 elements in the *data* vector and *ncol=4* then R will automatically calculate that there should be 5 rows. \n- *byrow*: how the matrix is filled, *byrow=TRUE* fills the matrix row by row whereas *byrow=FALSE* fills the matrix column by column\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data creation: matrices** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 8a\n>\n> 1. Create a 2x2 matrix named mat containing numbers 2,3,1,5\n> 2. Print the matrix\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  mat<-matrix(c(2,3,1,5),nrow=2,ncol=2)\n>    >  mat\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 8b\n>\n> 1. Create a 2x3 matrix named onemat consisting of all ones\n> 2. Print the matrix\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  onemat<-matrix(1,nrow=2,ncol=3)\n>    >  onemat\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 8c\n>\n> 1. Create a 3x3 matrix containing numbers 1,2,3,4,5,6,7 \n> 2. Retrieve all elements that are larger than 3\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  m <- matrix(c(1,2,3,4,5,6,7),ncol=3) \n>    >  m[m > 3]\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Data frames\nJust like a matrix, a data frame is a table where each column is a vector. But a data frame is more general than a matrix: they are used when columns contain different data types, while matrices are used when all data is of the same type. \n\n> ### {% icon comment %} Comment\n>\n> R has a number of built-in data frames like mtcars. \n{: .comment}\n\n### Creating a data frame\nTo create a data frame D use the function data.frame() with the vectors we want to use as columns:\n```\nD <- data.frame(column1,column2,column3)\n```\n\n> ### {% icon comment %} Comment\n>\n> The columns of a data frame are all of equal length\n{: .comment}\n\nYou can provide names (labels) for the columns:\n```\nD <- data.frame(label1=column1,label2=column2,label3=column3)\n```\n\n> ### {% icon comment %} Comment\n>\n> As an argument of data.frame() you use label=vector_to_add: the equals (and not the assignment) operator is used because you are naming columns not creating new variables. \nIf you don't define labels (as in the first example), the names of the vector names are used as column names. \n{: .comment}\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data creation: data frames** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 9a\n>\n> Create a data frame called Plant_study containing days and Plants_with_lesions. Name the columns Days and Plants.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Plant_study <- data.frame(Days=days,Plants=Plants_with_lesions)\n>    >  Plant_study\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 9b\n>\n> Create a data frame called Drug_study consisting of three columns: ID, treatment and smoking\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Drug_study <- data.frame(ID,treatment,smoking)\n>    >  Drug_study\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 9c\n>\n> Create a data frame genomeSize containing genome sizes and print it. \n> - The first column is called organism and contains Human,Mouse,Fruit Fly, Roundworm,Yeast \n> - The second column size contains 3000000000,3000000000,135600000,97000000,12100000\n> - The third column geneCount contains 30000,30000,13061,19099,6034\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  organism <- c(\"Human\",\"Mouse\",\"Fruit Fly\", \"Roundworm\",\"Yeast\")\n>    >  size <- c(3000000000,3000000000,135600000,97000000,12100000)\n>    >  geneCount <- c(30000,30000,13061,19099,6034) \n>    >  genomeSize <- data.frame(organism,size,geneCount)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 9d\n>\n> Create a data frame ab and print it. \n> - The first column is called a and contains 1,3,2,1\n> - The second column is called b and contains 2,3,4,1\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  a <- c(1,3,2,1)\n>    >  b <- c(2,3,4,1)\n>    >  ab <- data.frame(a,b)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n##### Referring to the elements of a data frame\nReferring to elements of a data frame can be done in the same way as for matrices, using row and column **indices** in between square brackets. The only difference is that in data frames you can also use the **labels** of the columns to retrieve them.\n\nTo retrieve the element on the second row, first column:\n```\nD[2,1]\n```\n\nTo select all values from one dimension leave the index blank, e.g. all elements of the first column:\n```\nD[,1]\n```\n\n> ### {% icon comment %} Comment\n>\n> If you want to retrieve **all** the rows you don?t write any index before the comma inside the square brackets.\n{: .comment}\n\nYou can also use column labels for retrieving elements. Column names have to be written between quotes:\n```\nD[,\"label1\"]\n```\n\nYou can also use the range function to select elements:\n```\nD[2:4,1]\n```\n\nThe **$** symbol can be used to retrieve a column based on its label e.g. to retrieve column label1 from D:\n```\nD$label1\n```\n\n> ### {% icon comment %} Comment\n> With $ you do not have to put quotes around the column name\n{: .comment}\n\nSince the result of $ is a vector, you can address a specific element of a column using its index:\n```\nD$label1[2]\n```\nretrieves the second element of the column called label1\n\nSpecific for data frames is the **subset()** function that can be used to select columns that satisfy a logical operation:\n```\nsubset(D,select=columns to extract)\nsubset(D,logical expression,columns to extract)\n```\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data extraction: data frames** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 10a\n>\n> 1. Retrieve the data for the Volvo 142E from mtcars \n> 2. Retrieve the gas usage (mpg column) for the Volvo 142E \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  mtcars[\"Volvo 142E\",]\n>    >  mtcars[\"Volvo 142E\",\"mpg\"]\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  mtcars[\"Volvo 142E\"]\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  mtcars[Volvo 142E,]\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 10b\n>\n> 1. Retrieve the IDs of the smoking patients in Drug_study\n> 2. Retrieve ID and treatment of the smoking patients \n> 3. Retrieve the smoking behavior of all the patients\n> 4. Change the treatment of the fourth patient to A\n> 5. Add a column called activity with values: 4, NA, 12.1, 2.5\n> 6. Use subset() to retrieve the full ID and treatment columns\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  subset(Drug_study,smoking==TRUE,ID)\n>    >  subset(Drug_study,smoking==TRUE,c(ID,treament))\n>    >  Drug_study$smoking\n>    >  Drug_study$treatment[4] <- \"A\"\n>    >  Drug_study$activity <- c(4,NA,12.1,2.5)\n>    >  subset(Drug_study,select=c(ID,treatment))\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Drug_study[Drug_study$smoking==TRUE,\"ID\"]\n>    >   \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Drug_study[Drug_study$smoking==TRUE,ID]\n>    >   \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Drug_study[,\"smoking\"]\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  Drug_study[4,\"treatment\"] <- \"B\"\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Drug_study[\"activity\"] <- c(4,NA,12.1,2.5)\n>    >   \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  subset(Drug_study,c(ID,treatment))\n>    >   \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  subset(Drug_study,,c(ID,treatment))\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n\n> ### {% icon comment %} Comment\n>\n> The order of the arguments is important except when you specify their names. \n{: .comment}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 10c\n>\n> On which days did we observe more than 2 infected plants in the plant experiment? Answer this question with and without using the subset() function.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  > Plant_study[Plant_study$Plants > 2,\"Days\"]\n>    >  > subset(Plant_study,Plants > 2,Days)\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Plant_study[Plant_study[\"Plants\"] > 2,\"Days\"]\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 10d\n>\n> 1. Create vector q by extracting the a column of data frame ab (exercise 9) with and without subset().\n> 2. Retrieve the second element of column a of data frame ab\n> 3. Add column c with elements 2,1,4,7 to data frame ab\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  q <- ab$a\n>    >  subset(q,select=a)\n>    >  ab$a[2]\n>    >  ab$c <- c(2,1,4,7)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n##### Removing elements from a data frame\nTo remove elements from a data frame use negative indices just as in a vector e.g. to remove the second row from data frame D use:\n```\nD <- D[-2,]\n```\n\n> ### {% icon comment %} Comment\n>\n> The minus sign only works with numbers not with column labels. \n{: .comment}\n\nTo remove columns based on labels assign them to NULL:\n```\nD$genome <- NULL\n```\n\n> ### {% icon comment %} Comment\n>\n> Setting a column to NULL is done via an assignment so the removal is permanent. \n{: .comment}\n\n> ### {% icon comment %} Comment\n>\n> Insteading of removing elements you can also define the elements you want to keep.\n{: .comment}\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data removal: data frames** section\n{: .hands_on}\n\n##### Reordering columns in a data frame\nReordering columns is a special case of retrieving columns, e.g. for a data frame that has 4 columns you can switch the position of the second and third column as follows:\n```\nD2 <- D[ ,c(1,3,2,4)]\n```\n\n> ### {% icon comment %} Comment\n>\n> The first comma means keep all the rows, and the 1,3,2,4 refer to column indices. \n> You can use indices or labels to refer to the columns. \n{: .comment}\n\nYou can also use subset():\n```\nD2 <- subset(D,select=c(1,3,2,4))\n```\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Column reordering: data frames** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 11a\n>\n> Switch the position of the second and the third column of Drug_study\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Drug_study[,c(1,3,2)]\n>    >   \n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  subset(Drug_study,select=c(1,3,2))\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n### Lists\nA list is an ordered collection of objects (of any data type: string, numbers, vectors, matrices, data frames). Lists can even contain other lists as objects! A list allows you to gather a variety of objects under one name. It is not mandatory but very useful to give each object in a list a label.\n\n##### Creating a list\nTo create a list L use the list() function:\n```\nL <- list(label1=object1,label2=object2,label3=object3)\n```\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 12a\n>\n> 1. Create a list called myList with the following objects: 5, 6, the word seven, the matrix mat.\n> 2. Print the list.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  myList<-list(5,6,\"seven\",mat)\n>    >   \n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  subset(Drug_study,select=c(1,3,2))\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n##### Referring to the elements of a list\nReferring to the elements of a list can be done in exactly the same way as for data frames, using row and column **indices or labels** in between square brackets. However, since a list can contain other lists or data frames you have to use **double square brackets** [[ ]] to retrieve elements. \n\n> ### {% icon comment %} Comment\n>\n> The $ operator also works to access the objects of a list.\n{: .comment}\n","# Genome assembly with Velvet: Background\n{:.no_toc}\n\nVelvet is one of a number of *de novo* assemblers that use short read sets as input (*e.g.* Illumina Reads). The assembly method is based on the manipulation of de Bruijn graphs, via the removal of errors and the simplication of repeated regions.\n\n> ### {% icon comment %} Comment\n>\n> For information about Velvet, you can check its (nice) [Wikipedia page](https://en.wikipedia.org/wiki/Velvet_assembler).\n{: .comment}\n\nFor this tutorial, we have a set of reads from an imaginary *Staphylococcus aureus* bacterium with a miniature genome (197,394 bp). Our mutant strain read set was sequenced with the whole genome shotgun method, using an Illumina DNA sequencing instrument. From these reads, we would like to rebuild our imaginary *Staphylococcus aureus* bacterium via a *de novo* assembly of a short read set using the Velvet assembler.\n\n> ### Agenda\n>\n> In this tutorial, we will deal with:\n>\n> 1. TOC\n> {:toc}\n>\n{: .agenda}\n\n# Get the data\n\nWe will now import the data that we will use for the tutorial.\n\n> ### {% icon hands_on %} Hands-on: Getting the data\n>\n> 1. Create and name a new history for this tutorial.\n> 2. Import from [Zenodo](https://doi.org/10.5281/zenodo.582600) or from the data library the files:\n>    - [`mutant_R1.fastq`](https://zenodo.org/record/582600/files/mutant_R1.fastq)\n>    - [`mutant_R2.fastq`](https://zenodo.org/record/582600/files/mutant_R2.fastq)\n>\n>    > ### {% icon tip %} Tip: Importing data via links\n>    >\n>    > * Copy the link location (Right-click on the filename then \"Copy Link Address\")\n>    > * Open the Galaxy Upload Manager\n>    > * Select **Paste/Fetch Data**\n>    > * Paste the link into the text field\n>    > * Change the data-type to **fastqsanger**\n>    > * Press **Start**\n>    {: .tip}\n>\n> 3. Change the name of the files to `mutant_R1` and `mutant_R2`.\n>\n>    As a default, Galaxy uses the link as the name of the new dataset. It also does not link the dataset to a database or a reference genome.\n>\n>    {% include snippets/rename_dataset.md %}\n>\n> 4. Inspect the content of a file.\n>\n>    > ### {% icon tip %} Tip: Inspecting the content of a dataset\n>    >\n>    > * Click on the {% icon galaxy-eye %} (eye) icon next to the relevant history entry\n>    > * View the content of the file in the central panel\n>    {: .tip}\n>\n>    > ### {% icon question %} Questions\n>    >\n>    > 1. What are four key features of a FASTQ file?\n>    > 2. What is the main difference between a FASTQ and a FASTA file?\n>    >\n>    > > <details markdown=\"1\">\n>    > > <summary>{% icon solution %} Solution\n>    > > </summary>\n>    > > 1. Each sequence in a FASTQ file is represented by 4 lines: 1st line is the id, 2nd line is the sequence, 3rd line is not used, and 4th line is the quality of sequencing per nucleotide\n>    > > 2. In a FASTQ file, not only are the sequences present, but information about the quality of sequencing is also included.\n>    > > </details>\n>    >\n>    {: .question}\n>\n{: .hands_on}\n\nThe reads have been sequenced from an imaginary *Staphylococcus aureus* bacterium using an Illumina DNA sequencing instrument. We obtained the 2 files we imported (`mutant_R1` and `mutant_R2`)\n\n> ### {% icon question %} Question\n>\n> Why do we have 2 files here if we only sequenced the bacteria once?\n>\n> > <details markdown=\"1\">\n> > <summary>{% icon solution %} Solution\n> > </summary>\n> > 1. The bacteria has been sequenced using paired-end sequencing. The first file corresponds to forward reads and the second file to reverse reads.\n> > </details>\n>\n{: .question}\n\n# Evaluate the input reads\n\nBefore doing any assembly, the first questions you should ask about your input reads include:\n\n- What is the coverage of my genome?\n- How good is my read set?\n- Do I need to ask for a new sequencing run?\n- Is it suitable for the analysis I need to do?\n\nWe will evaluate the input reads using the FastQC tool. This tool runs a standard series of tests on your read set and returns a relatively easy-to-interpret report. We will use it to evaluate the quality of our FASTQ files and combine the results with MultiQC.\n\n> ### {% icon hands_on %} Hands-on: FastQC on a fastq file\n>\n> 1. **FastQC** {% icon tool %} with the following parameters\n>    - \"Short read data from your current history\" to (**Multiple datasets**) `mutant_R1.fastq` and `mutant_R2.fastq`\n>\n> 2. **MultiQC** {% icon tool %} with the following parameters\n>    - \"Software name\" to `FastQC`\n>    - \"Result file\" to the raw data files generated by FastQC\n>\n{: .hands_on}\n\nMultiQC generates a webpage combining reports for FastQC on both datasets. It includes these graphs and tables:\n\n- General statistics\n\n    This is important in setting maximum k-mer size for an assembly.\n\n    > ### {% icon comment %} Getting the length of sequences\n    >\n    > * Click on **Configure Columns**\n    > * Check **Length**\n    > * Close the window\n    {: .comment}\n\n    > ### {% icon question %} Questions\n    >\n    > 1. How long are the sequences?\n    > 2. What is the average coverage of the genome, given our imaginary *Staphylococcus aureus* bacterium has a genome of 197,394 bp?\n    >\n    > > <details markdown=\"1\">\n    > > <summary>{% icon solution %} Solution\n    > > </summary>\n    > > 1. The sequences are 150 bp long\n    > > 2. We have 2 x 12,480 sequences of 150 bp, so the average genome coverage is: 2 * 12480 * 150 / 197394, or approximately 19 X coverage.\n    > > </details>\n    >\n    {: .question}\n\n- Sequence Quality Histograms\n\n    Dips in quality near the beginning, middle or end of the reads may determine the trimming/cleanup methods and parameters to be used, or may indicate technical problems with the sequencing process/machine run.\n\n    <figure id=\"figure-1\"><img src=\"../../images/fastqc_per_base_sequence_quality_plot.png\" alt=\"Sequence Quality Histograms with the mean quality value across each base position in the read\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> The mean quality value across each base position in the read</figcaption></figure>\n\n    > ### {% icon question %} Questions\n    >\n    > 1. What does the y-axis represent?\n    > 2. Why is the quality score decreasing across the length of the reads?\n    >\n    > > <details markdown=\"1\">\n    > > <summary>{% icon solution %} Solution\n    > > </summary>\n    > > 1. The y-axis represents the quality score for each base (an estimate of the error during sequencing).\n    > > 2. The quality score is decreasing accross the length of the reads because the sequencing become less and less reliable at the end of the reads.\n    > > </details>\n    >\n    {: .question}\n\n- Per Sequence GC Content\n\n    High GC organisms tend not to assemble well and may have an uneven read coverage distribution.\n\n- Per Base N Content\n\n    The presence of large numbers of Ns in reads may point to a poor quality sequencing run. You will need to trim these reads to remove Ns.\n\n- k-mer content\n\n    The presence of highly recurring k-mers may point to contamination of reads with barcodes or adapter sequences.\n\n\n> ### {% icon comment %} Comment\n>\n> For a fuller discussion of FastQC outputs and warnings, see the [FastQC website link](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), including the section on each of the output [reports](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/), and examples of [\"good\"](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/good_sequence_short_fastqc.html) and [\"bad\"](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/bad_sequence_fastqc.html) Illumina data.\n{: .comment}\n\nWe won't be doing anything to these data to clean it up as there isn't much need. Therefore we will get on with the assembly!\n\n\n# Assemble reads with Velvet\n\nNow, we want to assemble our reads to find the sequence of our imaginary *Staphylococcus aureus* bacterium. We will perform a *de novo* assembly of the reads into long contiguous sequences using the Velvet short read assembler.\n\nThe first step of the assembler is to build a de Bruijn graph. For that, it will break our reads into k-mers, *i.e.* fragments of length *k*. Velvet requires the user to input a value of *k* (k-mer size) for the assembly process. Small k-mers will give greater connectivity, but large k-mers will give better specificity.\n\n> ### {% icon hands_on %} Hands-on: Assemble the reads\n>\n> 1. **FASTQ interlacer** {% icon tool %} with the following parameters\n>    - \"Type of paired-end datasets\" to `2 separate datasets`\n>    - \"Left-hand mates\" to `mutant_R1.fastq`\n>    - \"Right-hand mates\" to `mutant_R2.fastq`\n>\n>    Currently our paired-end reads are in 2 files (one with the forward reads and one with the reverse reads), but Velvet requires only one file, where each read is next to its mate read. In other words, if the reads are indexed from 0, then reads 0 and 1 are paired, 2 and 3, 4 and 5, etc. Before doing the assembly *per se*, we need to prepare the files by combining them.\n>\n> 2. **velveth** {% icon tool %} with the following parameters\n>    - \"Hash Length\" to `29`\n>    - \"Input Files\": click on `Insert Input Files`\n>    - \"file format\" to `fastq`\n>    - \"read type\" to `shortPaired reads`\n>    - \"Dataset\" to the pairs output of **FASTQ interlacer**\n>\n>    The tool takes our reads and break them into k-mers.\n>\n> 3. **velvetg** {% icon tool %} with the following parameters\n>    - \"Velvet Dataset\" to the output of **velveth**\n>    - \"Using Paired Reads\" to `Yes`\n>\n>    This last tool actually does the assembly.\n{: .hands_on}\n\nTwo files are generated:\n\n- A \"Contigs\" file\n\n    This file contains the sequences of the contigs longer than 2k. In the header of each contig, a bit of information is added:\n    - the k-mer length (called \"length\"): For the value of k chosen in the assembly, a measure of how many k-mers overlap (by 1 bp each overlap) to give this length\n    - the k-mer coverage (called \"coverage\"): For the value of k chosen in the assembly, a measure of how many k-mers overlap each base position (in the assembly).\n\n    ![Contigs output](../../images/image10.png)\n\n- A \"Stats\" file\n\n    This is a tabular file giving for each contig the k-mer lengths, k-mer coverages and other measures.\n\n    ![Contigs stats output](../../images/image11.png)\n\n# Collect some statistics on the contigs\n\n> ### {% icon question %} Question\n>\n> 1. How many contigs have been built?\n> 2. What is the mean, min and max length of the contigs?\n>\n> > <details markdown=\"1\">\n> > <summary>{% icon solution %} Solution\n> > </summary>\n> > 1. 190\n> > 2. To compute this information, we can use the Datamash tool on the 2nd columns (length). Be careful with the first line, the header. As a result, we obtain: 597.82 as mean, 1 as min and 12904 as max. It would mean that the smallest contig has a length of 1 bp, even smaller than k. The length on the 2nd column corresponds to length of the contig in k-mers. This means that the smallest contig has a length of 1k = 29. So to obtain the real length, we need to add k-1 to the length. We then obtain a mean contig length of 625.82 bp, a min contig of 29 bp and a max contig of 12,932 bp.\n> > </details>\n>\n{: .question}\n\nThis table is limitted, but we will now collect more basic statistics on our assembly.\n\n> ### {% icon hands_on %} Hands-on: Collect fasta statistics on our contigs\n>\n> 1. **Quast** {% icon tool %} with\n>    - \"Contigs/scaffolds output file\" to the output of **velvetg**\n>    - \"Type of data\" to `contig`\n>    - \"Reference File\" to `wildtype.fna`\n>    - \"Type of organism\" to `Prokaryotes`\n>    - \"Lower Threshold\" to `500`\n>    - \"Thresholds\" to `0,1000`\n{: .hands_on}\n\nThis tool generates 5 output files, but we will focus on the HTML report and the Icarus viewer.\n\n> ### {% icon question %} Question\n>\n> 1. What is represented in the Icarus viewer?\n>\n> > <details markdown=\"1\">\n> > <summary>{% icon solution %} Solution\n> > </summary>\n> > 1. Icarus is a novel genome visualizer for accurate assessment and analysis of genomic draft assemblies. It draws contigs ordered from longest to shortest, highlights N50, N75 (NG50, NG75) and long contigs larger than a user-specified threshold\n> > </details>\n>\n{: .question}\n\nThe HTML report reports many statistics computed by QUAST to assess the quality of the assembly:\n\n- Statistics about the quality of the assembly when compared to the reference (fraction of the genome, duplication ratio, etc)\n- Misassembly statistics, including the number of misassemblies\n\n    A misassembly is a position in the contigs (breakpoints) that satisfy one of the following criteria:\n    - the left flanking sequence aligns over 1 kbp away from the right flanking sequence on the reference;\n    - flanking sequences overlap on more than 1 kbp\n    - flanking sequences align to different strands or different chromosomes\n\n- Unaligned regions in the assembly\n- Mismatches compared to the reference genomes\n- Statistics about the assembly *per se*, such as the number of contigs and the length of the largest contig\n\n> ### {% icon question %} Question\n>\n> 1. How many contigs have been constructed?\n> 2. Which proportion of the reference genome do they represent?\n> 3. How many misassemblies have been found?\n> 4. Has the assembly introduced mismatches and indels?\n> 5. What are N50 and L50?\n> 6. Is there a bias in GC percentage induced by the assembly?\n>\n> > <details markdown=\"1\">\n> > <summary>{% icon solution %} Solution\n> > </summary>\n> > 1. 190 contigs have been constructed, but only 47 have a length > 500 bp.\n> > 2. The contigs represents 87.965% of the reference genome.\n> > 3. 1 misassembly has been found: it corresponds to a relocation, *i.e.* a misassembly event (breakpoint) where the left flanking sequence aligns over 1 kbp away from the right flanking sequence on the reference genome.\n> > 4. 8.06 mismatches per 100 kbp and 4.03 indels per 100 kbp are found.\n> > 5. N50 is the length for which the collection of all contigs of that length or longer covers at least half an assembly. In other words, if contigs were ordered from small to large, half of all the nucleotides will be in contigs this size or larger. And L50 is the number of contigs equal to or longer than N50: L50 is the minimal number of contigs that cover half the assembly.\n> > 6. The GC % in the assembly is 33.64%, really similar to the one of the reference genome (33.43%).\n> > </details>\n>\n{: .question}\n\n# Discussion\n\n> ### {% icon hands_on %} (Optional) Hands-on: Rerun for values *k* ranging from 31 to 101\n>\n> 1. **velveth** {% icon tool %} with the same parameters as before except\n>    - \"Hash Length\" to a value between 31 and 101\n> 2. **velvetg** {% icon tool %} with the same parameters as before\n> 3. **Quast** {% icon tool %} with the same parameters as before\n{: .hands_on}\n\nWe have completed an assembly on this data set for a number of k values ranging from 29 to 101. A few of the assembly metrics appear below.\n\n<figure id=\"figure-2\"><img src=\"../../images/number_of_contigs.png\" alt=\"contigs\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Number of contigs in the assembly for various k-mer sizes</figcaption></figure>\n\n<figure id=\"figure-3\"><img src=\"../../images/largest_contig.png\" alt=\"largest_contig\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Largest contig in each of the assemblies by k-mer size</figcaption></figure>\n\n<figure id=\"figure-4\"><img src=\"../../images/total_bp.png\" alt=\"total_bp\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Total number of base pairs in all the contigs for each assembly by k-mer size</figcaption></figure>\n\n<figure id=\"figure-5\"><img src=\"../../images/n50.png\" alt=\"n50\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> N50 metric for each of the assemblies by k-mer size</figcaption></figure>\n\n> ### {% icon question %} Questions\n>\n> 1. Are there any distinct features in the charts?\n> 2. Does it look like one assembly might be better than some of the others?\n>\n{: .question}\n\nThe reasons for these patterns will be discussed in detail in the De Bruijn graph assembly slides and tutorial.\n","# Manipulation of variables \n{:.no_toc}\n\n### General functions\nThe big difference between R and other programming languages is that functions in R are designed to be applied to variables rather than to individual values to avoid loops e.g. if we want to log transform a whole dataset we can do this using a single operation:\n```\n> v <- c(1,10,100,1000,10000)\n> log10(v)\n[1] 0 1 2 3 4\n```\nThe log10() function is written in such a way that it can be applied on a vector. This is true for all functions and operators in R:\n```\n> v - 1\n[1] 0     9    99   999  9999\n```\nR has built-in functions for virtually any standard mathematical task.\n \n<figure id=\"figure-1\"><img src=\"../../images/Rgeneral_functions.png\" alt=\"general_functions\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Overview of built-in functions</figcaption></figure>\n\nArithmetic operators can be used on variables. Provided that the variables have the same dimensions, you can do element-wise addition, subtraction, multiplication and division of two vectors or tables. Element-wise means that the calculation is performed on the equivalent positions between the two variables: first element + first element, second element + second element etc.\n\n```\n> v1<-c(1,2,3)\n> v2<-c(4,5,6)\n> z<-v1+v2\n> z\n[1] 5 7 9\n```\n\nIf you perform operations on vectors with different lengths (not recommended) then the vector with the shorter length is recycled to the length of the longer vector so that the first element of the shorter vector is appended to the end of that vector (a way of faking that it is of equal length to the longer vector) and so forth. You will get a warning, but R does let you perform the operation:  \n\n```\n> x1 <- c(1,2,3)\n> x2 <- c(3,4)\n> x3 <- x1 + x2\nWarning message: \nIn x1 + x2:\n  longer object length is not aa multiple of shorter object length\n> x3\n[1] 4 6 6\n```\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Operations on variables** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 13a\n>\n> 1. Calculate log base2 of the activity in Drug_study\n> 2. Round the result to the nearest integer\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  log.act <- (log2(Drug_study$activity))\n>    >  round(log.act)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 13b\n>\n> 1. Create vector v as the sum of newVector and threes using an arithmetic operator \n> 2. Print the content of v\n> 3. Do the same for newVector and vector x2 with elements 3,1\n> 4. Join the elements of newVector and threes into 1 vector q\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  v <- newVector + threes\n>    >  v\n>    >  x2 <- c(3,1)\n>    >  newVector + x2 \n>    >   q <- c(newVector,threes)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 13c\n>\n> 1. Add a column called geneDensity to genomeSize containing the number of bp per gene for every organism \n> 2. Round the numbers to the nearest integer\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  dens.fl <- genomeSize$size / genomeSize$geneCount\n>    >  genomeSize$geneDensity <- round(dens.fl)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\nSome functions only work on vectors. For instance sort() will sort data from smallest to largest (arguments allow other ordering) and order() returns the indices of the sorted elements:\n```\nx\n[1] 1 3 11 1 7\nsort(x)\n[1] 1 1 3 7 11\norder(x)\n[1] 1 4 2 5 3\n```\nIn the sorted vector the first element is also the first element of the original vector, the second element of the sorted vector has index 4 in the original vector etc.\nTo sort a data frame use order() inside square brackets:\n```\nmtcars[order(mtcars$mpg),]\n```\nTo sort on two columns (first on mpg, then on cyl): \n```\nmtcars[order(mtcars$mpg,mtcars$wt),]\n```\nTo sort in descending order place a minus sign in front of the variable:\n```\nmtcars[order(mtcars$mpg,-mtcars$wt),]\n```\n\nSelect the **labels** of a vector or table using names(). For tables rownames() and colnames() can access or set the either row or the column labels. Both functions will not work on vectors. \n\nThe length() function retrieves the number of elements of a vector. Used on data frames it doesn't throw an error but returns the number of columns instead. \n\nThe same is true for match(x,y). It compares x and y and returns a vector with the same length as x containing: \n-  NA for elements of x that are not in y  \n- the index in y for elements in x that are in y\n\nOn data frames it will not do an element-wise comparison but a column-wise comparison: \n```\nmatch(D1,D2) \n```\nwill return a vector with length equal to the number of columns in D1 containing:\n- NA for columns of D1 that are not in D2\n- the index in D2 for columns in D1 that are in D2 (so the complete column has to match, not the individual elements)\n\nImportant is to see the difference between the + operator and sum(). The former works element-wise on two variables, the latter calculates the sum of all elements of one vector.\n\nThere are also functions to be used only on tables, e.g. \n- dim() returns how many rows and columns a table has, nrow() and ncol() will get these values individually\n- t() transposes matrices (exchanges rows and columns), the output is a transposed matrix: the columns are the rows of the original matrix and vice versa\n\nUse merge() to join two data frames. Let?s say D1 has a column A with values. Data frame D2 has the same values stored in column A. Merge the two data frames on the basis of this common column:\n```\nnewD <- merge(D1,D2)\n```\nIf (some of) the values of the common column differ, merge() will ignore these values. Use argument *all.x* to add an extra row for every different value to the resulting data frame. All rows where the values of the two data frames don?t correspond, will be filled up with NA values.\n\nMost functions operate on numbers but there are also functions for manipulating text, e.g. \n```\npaste(x,y,sep=\" \") \n```\t\nconcatenates two strings x and y (glues them together into one string) separating them by the character defined by *sep*. Arguments *x* and *y* can be strings but they can also be vectors. If they are vectors, they are concatenated element-wise to give a character vector result.\n\nFurthermore there are also functions specific for factors. For instance to select the names of the categories (levels) of a factor use levels() and table() to create a contingency table. \n```\n table(cell_phone_data$own, cell_phone_data$grade)\n```\n\n<figure id=\"figure-2\"><img src=\"../../images/Rtable_function.png\" alt=\"table_function\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Example of a contingency table</figcaption></figure>\n\n> ### {% icon hands_on %} Hands-on: Exercise 13d\n>\n> You repeat the plant study experiment this time having the following numbers of plants developing lesions: 1, 6, 6, 5, 4\n> 1. Add these data as a third column to the data frame \n> 2. Relabel columns to Day, Infected and Repeat\n> 3. Use paste() to add the word ?day? to the elements of the Day column. Look at the documentation first !\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Plant_study$repeated <- c(1,6,6,5,4)\n>    >  names(Plant_study) <- c(\"Day\",\"Infected\",\"Repeat\")\n>    >  ?paste\n>    >  Plant_study$Day <- paste(Plant_study$Day,\"day\",sep=\"\")\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  paste(Plant_study[,\"Day\"],\"day\",sep=\"\")\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 13e\n>\n> 1. Change the label of the second column of Drug_study to drug\n> 2. How many rows does Drug_study contain?\n> 3. Order the rows according to decreasing activity\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  colnames(Drug_study)[2] <- \"drug\"\n>    >  nrow(Drug_study)\n>    >  Drug_study[order(Drug_study$activity,decreasing=TRUE),]\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What happens when you run this code ?\n>    >  ```\n>    >  colnames(Drug_study$ID) <- \"id\"\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What happens when you run this code ?\n>    >  ```\n>    >  colnames(Drug_study[2]) <- \"blabla\"\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  Drug_study[order(Drug_study$activity),\"ID\"]\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  n <- order(Drug_study$activity,decreasing=TRUE)\n>    >  Drug_study[n,]\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 13f\n>\n> 1. Sort the elements of z from smallest to largest\n> 2. Now use order(z). What's the difference with the previous exercise?\n> 3. How many elements does z contain?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  sort(z)\n>    >  order(z)\n>    >  length(z) \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 13g\n>\n> Add a new row to data frame ab containing values: 3,4,7\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  d <- c(3,4,7)\n>    >  ab <- rbind(ab,d)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 13h\n>\n> 1. How many rows and columns are in the built-in data frame CO2 (data on CO2 uptake by plants)\n> 2. Use levels() to retrieve the names of the Treatment categories\n> 3. Create a contingency table with counts (number of plants) in every category of CO2 that is defined by Type and Treatment\n> 4. Use unique() to count how many plants were studied\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  dim(CO2)\n>    >  levels(CO2$Treatment)\n>    >  table(CO2$Type,CO2$Treatment)\n>    >  length(unique(CO2$Plant))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Functions helpful for working with large data sets\nResearch in biology/medicine often generates very large data sets. When you work with very large data sets, it is often useful to show only a small part of the data set;\n- head() shows the first 6 elements (vector) or rows (table) of a variable \n- tail() prints the last 6 elements or rows\n\n> ### {% icon hands_on %} Hands-on: Exercise 14a\n>\n> 1. View the first 6 rows of the mtcars data frame\n> 2. Return TRUE if mtcars contains cars with 6 gears and FALSE if not\n> 3. How many cars with 3 gears are in mtcars?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  head(mtcars)\n>    >  nrow(subset(mtcars,gear==6))!=0\n>    >  nrow(subset(mtcars,gear==3))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Functions for finding indices of specific elements\nThere are functions that help you locate specific values, the which functions:\n```\nwhich.min(x)\nwhich.max(x)\n```\nreturn the location (index) of the minimum, maximum or a specific value of a vector x. So max() will return the highest value in the data, which.max() will return the index of the highest value in the data.\n\nThe argument of which() is a logical expression and which() will return the indices of the elements for which the logical expression is TRUE. \n```\nx <- c(1,5,8,4,6)\nx\n# [1] 1 5 8 4 6\nwhich(x == 5)\n# [1] 2\nwhich(x != 5)\n# [1] 1 3 4 5\n```\n\n> ### {% icon hands_on %} Hands-on: Exercise 15a\n>\n> Get the data of the patient with the highest activity in Drug_study\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Drug_study[which.max(Drug_study$activity),]\n>    >  \n>    >  ```\n>    > </details>\n>\n>    >    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  n <- which.max(Drug_study$activity)\n>    >  Drug_study[n,]\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 15b\n>\n> 1. Get the index of the column called cyl in mtcars\n> 2. Create a data frame that contains the car with the lowest mpg for each category of cyl\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  which(names(mtcars)==\"cyl\")\n>    >  C4m <- mtcars[order(mtcars$cyl,mtcars$mpg),][1,]\n>    >  C6 <- subset(mtcars,cyl==6)\n>    >  C6m <- C6[which.min(C6$mpg),]\n>    >  C8m <- mtcars[order(-mtcars$cyl,mtcars$mpg),][1,]\n>    >  rbind(C4m,C6m,C8m)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Checking and converting types of variables\nTo check the data structure of an object you can use str() and the generic class() function:\n```\nclass(c(10,12,30))\n# [1] \"numeric\"\nclass(c(\"alana\",\"britt\",\"chris\"))\n# [1] \"character\"\nclass(c(TRUE,TRUE,FALSE))\n# [1] \"logical\"\n```\n\nYou can also use the specific is. functions e.g. is.numeric(), is.character(), is.Date(), is.vector(), is.matrix(), is.data.frame() etc.\n\nThe is.na(x) function returns TRUE when an element of x is missing:\n```\nx <- c(1,2,3,NA)\nis.na(x)\n# [1] FALSE FALSE FALSE TRUE\n```\nTo recode values to missing values you don?t need is.na(). Select the rows that contain the value you want to recode, e.g. 99, and change the value using an assignment:\n```\ndata$v1[data$v1==99] <- NA\n```\nTo exclude missing values you can use is.na() but there are alternatives. The problem with missing values is that when you apply arithmetic functions on variables that contain missing values they will return missing values and you will have no result. To circumvent this problem many functions have the *na.rm* argument. If you set *na.rm=TRUE* missing values are deleted before calculations are done.\n```\nmean(x) \t\t\t\n# NA\nmean(x,na.rm=TRUE) \t\n# 2\n```\nThe function na.omit() allows to create a new vector without missing values. If you apply this function on a data frame it will remove complete rows that contain one or more NA-values.\n```\nnewdata <- na.omit(x)\n```\nYou can convert the data type of an object by using the as. functions e.g. as.numeric(), as.character(), as.Date(), as.vector(), as.matrix(),\nas.data.frame() etc.\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Checking and converting data types** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 16a\n>\n> We created a vector containing the days of the week and loaded this into a data frame called Plant_study. If we want to replace the days of the week by real dates, how should we proceed?\n> \n> To create a Date object in R:\n> - define the date as a string in the following format: 1970-01-01\n> - transform the string into a date by using as.Date()\n> 1. Replace the days of the week by the dates of this week\n> 2. What type of data is Plant_study ?\n> 3. Convert Plant_study into a matrix called PS\n> 4. Did the conversion work? Look at the matrix to see if there is a problem. \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Plant_study$Days <- as.Date(c(\"2019-01-09\",\"2019-01-10\",\"2019-01-11\",\"2019-01-12\",\"2019-01-13\"))\n>    >  class(Plant_study)\n>    >  PS <- as.matrix(Plant_study)\n>    >  PS\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 16b\n>\n> 1. Check the data type of the second column of Drug_study. Retrieve the column using a comma.\n> 2. Convert the second column into a vector. \n> 3. What is different now? Look at the vector.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  class(Drug_study[,2])\n>    >  v <- as.vector(Drug_study[,2])\n>    >  v\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 16c\n>\n> Instead of deleting missing values with na.omit() you can select the non-missing values.\n> 1. Create a vector with a missing value \n> 2. Multiply all elements with 2. What happens?\n> 3. Check if the 2nd element is missing\n> 4. Delete the missing value using is.na() and the strategy above\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  x <- c(1,2,3,NA)\n>    >  x*2\n>    >  is.na(x[2])\n>    >  x[!is.na(x)]\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 16d\n>\n> 1. Check if z is a vector or a data frame \n> 2. Check if z contains numbers or characters\n> 3. Convert z into a matrix\n> 4. Convert the elements of z into characters\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  is.vector(z)\n>    >  is.data.frame(z) \n>    >  is.character(z)\n>    >  is.numeric(z)\n>    >  as.matrix(z) \n>    >  as.character(z)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 16e\n>\n> 1. Create a vector called words containing Hello, Hi \n> 2. Convert the words into numbers. What happens?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  words <- c(\"Hello\",\"Hi\")\n>    >  as.numeric(words) \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\nR is smart enough to catch you if you try to do an illogical conversion, such as convert characters to numbers. It does the conversion but the data is converted to NA values.\n","# 1. Introduction\nIn this chapter we will discuss a strategy for collaborating on projects. These strategies are especially useful when we do not have authorisation to change the content of someone else's project, even though we still have a useful edit/suggestion in mind.  \n\nImagine that you're starting a project with some colleagues and you want to version control the project. If it were to be a document where each of you needs to write part of it, you could simply start a Google Doc. For coding purposes the situation is a bit more complex. There might be a base version of the code already to which you need to add separate parts, however you always need to test whether your part is working together with the rest of the code. \n\nFor this purpose, GitHub encourages the Fork & Pull workflow. Basically one **forks** a central repository, making it a personal forked repository. This repository can constantly be up to date with the central repository by merging those upstream changes in your personal forked repository.  \n\nAfter you forked a repository, it will appear as a new repository in your GitHub account. The next step would be to **clone** the repository locally so you can work on the project from your computer. It's always a good idea to make changes in a **new branch** and keep the *main* branch clean. Hence, after cloning the repository, you could make a new branch. Editing the files, staging, committing and pushing your changes remains the same and they will appear in your new personal forked repository. \n\nWhen you are happy about your changes, when all the commits are pushed to your forked repository, these changes can be merged back into the central repository by creating a **pull request**. The main author can now decide whether he/she is happy about your suggestions and can include (part of) them. This workflow leaves the central repository untouched untill the moment you want to incorporate changes.\n\n---\n\n![Representation of forking & pulling](../../images/fork_pull.png)\n\n---\n\n\nTwo important terms in this fork & pull workflow are:\n- `upstream`: generally refers to the original repository that you have forked\n- `origin`: is your fork: your own repository on GitHub  \n\nAs mentioned in section 4.4, the \"origin\" is used to refer to the GitHub original repository's URL. This also lasts here. The remote `origin` refers to your fork on GitHub, not the original repository it was forked from. \n\nTo summarize the above, the Fork & Pull workflow consists of the following steps:\n1. Fork\n2. Clone\n3. Branch\n4. Stage-commit-push\n5. Pull request\n\n# 2. Fork\nLet's explore GitHub first. GitHub is like the Facebook of programmers. You can see someone's account, what that person has been working on, find new projects (relatable to a Facebook page), etc. Exploring new repositories is possible by clicking on the 'Explore' button in the navigation bar. Searching a specific repository or searching for an account, on the other hand, is possible by simply typing it in the search bar in the navigation bar. \n\n---\n<center><img src=\"../../images/nav-bar.PNG\" /></center>\n\n---\n\nSearch for the VIB Bioinformatics Core account 'vibbits' and find the repository 'fork-repository'. This repository was made specifically for learning the concept of forking. Do this by clicking the fork button in the upper right corner.\n\n---\n<center><img src=\"../../images/fork-button.PNG\" /></center>\n---\n\n\nThe repository has been successfully forked if you see something similar to the figure below. The icon represents a fork, followed by your GitHub account name and the name of the repository. The second line tells us that the upstream repository is the 'vibbits/forked-repository'. \n\n---\n\n<center><img src=\"../../images/forked-repository.PNG\" /></center>\n---\n\n\n# 3. Changes\nClone this repository locally, make a branch (e.g. name it *yourname*) and do some edits in that branch. Add your name, accountname or initials and the date to the `participants.txt` file. For this exercise we will only edit the `participants.txt` file. The flow here remains the same: stage-commit-push. After these changes took place, we will have a similar situation \n\n---\n\n<center><img src=\"../../images/edited-forked-repository.PNG\" /></center>\n---\n\n\nIn normal circumstances it is possible that the upstream repository has changed in the meantime. The indicator would then note that there are new commits in the upstream (`1 commit behind vibbits:main`), while the branch/repository itself is one commit ahead.  \n\n---\n\n<center><img src=\"../../images/forked-repository-ahead.PNG\" /></center>\n---\n\nThis does not (really) affect the pull request. In any case, the following step is to create a pull request.\n\n# 4. Pull request\nThe two repositories have diverged during the previous steps. Now its time to create a pull request between these repositories. After clicking the **Pull request** a new screen pops up that looks very similar to the one seen in Chapter 5 (Branching & merging). Note that moste developers do not really appreciate it if you try to merge your changes straight into the *main* branch. Usually, they would write some suggestions as to how we can collaborate on a project preferably. Let's assume that the developers of this repository expect you to merge changes into the *dev* branch, than it would look something like this:\n\n---\n\n<center><img src=\"../../images/forked-pull-request.PNG\" /></center>\n---\n\nGitHub tells us:\n- It compared the main branch of the forked repository (in my case *tmuylder/fork-repository*) with the upstream (base) repository *vibbits/fork-repository*. \n- It's able to merge these two branches without any conflicting errors\n- It summarizes the changes that have been done in the branch that will be merged into the upstream.  \n\nIf all seems good, we can create the pull request. In the case that there are any conflicting errors, they will need to be solved first. Afterwards we only need to add a message that accompanies the pull request. \n\nA brief overview of the pull request is given in the following screen which either allows you to merge the pull request into the upstream repository yourself or which requests the maintainer of the upstream repository to review and merge the pull request. In the latter case, the maintainer will thereafter receive a notification showing the pull request. An overview of all pending pull requests where you are involved in, are consultable on the [pull requests](https://github.com/pulls) tab of the navigation bar.   \n\n\n# 5. Overview\n\nTo briefly summarize, the steps that we took were: *fork > clone(> branch > edit-stage-commit-push > pull request (> merge)* and represent a strategy for collaborating on projects. These strategies are especially useful when we do not have authorisation to change the content of someone else's project, even though we still have a useful edit/suggestion in mind.    \nWhat if the upstream repository changed while you were working on your local repository? In this case a pull request should be done in which the receiving branch is your forked repository. Hence, the order of the branches as depicted in the figure above would be swapped.    \n\n\n\n---\n\n> ### {% icon hands_on %} Exercise \n>\n> Merge upstream changes in your forked repository. This approach is useful if you are working on a project that is prone to lots of changes and you need to keep up to date. \n> Note: This exercise is only possible to be performed if the repository `vibbits/fork-repository` has changed after you forked it.  \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > You need to merge any upstream changes into your version, and you can do this with a pull request on GitHub too. This time though you will need to switch the bases of the  comparison around, because the changes will be coming from the upstream version to yours. First find the following notification in your repository and click on pull request:  \n>    > <center><img src=\"../../images/Exercise-fork-1.PNG\" /></center>\n>    > In my case, the order is not how it's supposed to be and the message reads: \"There isn't anything to compare. vibbits:main is up to date with all commits from tmuylder:main.\". Click on *switching the base* in order to insert the changes from the upstream in your forked repository.  \n>    > \n>    > A message similar to the following will allow to create a pull request and subsequently merge the changes into your forked repository. \n>    > \n>    > \n>    > <center><img src=\"../../images/Exercise-fork-2.PNG\" /></center>\n>    > \n>    > \n>    > </details>\n> \n{: .hands_on}\n\n---\n \n\nLet's continue with the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/7_gitignore/tutorial.html)!\n","## 5.1 Introduction\n\nSo far we've seen variables where you essentially assign a value to a name that you can use in the program. It is also possible to assign groups of values to a name, in Python these are called *lists* and *tuples* - variables that contain multiple values in a fixed order. Python also has *sets*, which are also variables that contain multiple values, but in no particular order. In [section 8](8_Dictionaries.ipynb) we will also discuss dictionaries. By means of a brief summary, already in this stage; there are four collection data types in Python:\n- `List` is a collection which is ordered and changeable. Allows duplicate members. Use square brackets [] for lists.\n- `Tuple` is a collection which is ordered and unchangeable. Allows duplicate members. Use normal brackets () for tuples.\n- `Set` is a collection which is unordered and unindexed. No duplicate members. Use curly brackets {} for sets. \n- `Dictionary` is a collection which is unordered, changeable and indexed. No duplicate members. Use curly brackets {} for dictionaries (see [section 8](8_Dictionaries.ipynb)).\n\nThey are useful in different circumstances and each data-type has its own advantage. On a small-case example this might not be noticable, however on a larger scale using the right data-type can save you a lot of time. \n\n\n\n\n\n## 5.2 Lists and range\n\nYou can make your own Python list from scratch:\n\n\n```python\nmyList = [5,3,56,13,33]\nmyList\n```\n\nYou can also use the `range()` function. Try this:\n\n\n\n```python\nmyList = list(range(10))\nmyList\n```\n\nYou should get the following output: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]. This is a list of integers - you can recognize a list by the square [ ] brackets. **Note** that Python always starts counting from 0. The command above will give you a series of integers starting from 0 and stopping at the number you defined, however with this number **not** included in the list. Hence, it stops at 9. You can start from a different number as well:\n\n\n```python\nmyList = list(range(3,12))\nmyList\n```\n\nor increase the step size (the default is step size is 1):\n\n\n\n\n```python\nmyList = list(range(1,12,2))\nmyList\n```\n\nAn important feature of lists is that they are flexible - you can add and remove values, change the order, ... . You can do such modifications by calling a *method* from the list itself. Some examples of methods are:\n- Add elements\n    - `append()` to append an item to the end of the list\n    - `insert()` to add an item at the specified index\n    - `extend()` to extend an item\n- Delete elements\n    - `remove()` to remove the specified item\n    - `pop()` to remove the specified index (or the last item if index is not specified)\n    - `del` keyword removes the specified index\n    - `clear()` method empties the list\n- Sorting:\n    - `sort()` will sort the list in an ordered way\n    - `reverse()` will reverse the order of the list\n- Copy of a list with the `copy()` method\n\n\n\n```python\nmyList = []             # Create an empty list\nmyList.append(5)        # Add a single value to the back of the list\nmyList\n```\n\n\n```python\nmyList.insert(0,9)      # Insert a value in the list at index (element position) 0\nmyList\n```\n\n\n```python\nmyList.extend([99,3,5]) # Extend the list with another list\nmyList\n```\n\n\n```python\nmyList[0]               # Return the first element in the list (counting starts at zero) \n```\n\n\n```python\nmyList[2]               # Return the third element in the list\n```\n\n\n```python\nmyRemovedElement = myList.pop(3)  # Remove the fourth element in the list and return it\nprint(\"I removed {}\".format(myRemovedElement))\nmyList\n```\n\n\n```python\nmyList.sort()           # You can sort the elements in a list - this will change their order\nmyList\n```\n\n\n```python\nmyList.reverse()        # Or reverse the order of the list\nmyList\n```\n\nYou can also select a slice from a list - this will give you a new list:\n\n\n```python\nmyList = list(range(15))\n \nmyListSlice = myList[3:6]\nmyListSlice\n```\n\n\n```python\nmyListCopy = myList[:]\nprint(myListCopy)\n```\n\n\n```python\nprint(myList[-4:])     # This will select the fourth-last to the last element in the list\n```\n\nThere are two other methods you can use on lists:\n- `index()` returns the index of the first element with the specified value\n- `count()` returns the number of elements with the specified value\n\n\n```python\nmyList = list(range(1,15))\nmyList\n```\n\n\n```python\nmyList.count(10)   # Will count the amount of times the value 10 occurs in this list\n```\n\n\n```python\nmyList.count(\"A\")  # This always works, and will return 0 if nothing is found\n```\n\n\n```python\nmyList.index(10)   # Will give the index of the element with value 10 - in this case 9 because the list index starts at 0.\n```\n\n\n```python\n#print(myList.index(\"A\"))  # This will crash the program - the value to look for has to be present in the list!!!\n```\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 5.2.1\n>\n> Take the list [54,56,2,1,5223,6,23,57,3,7,3344], sort it in reverse order (largest value first) and print out the third value.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Take the list [54,56,2,1,5223,6,23,57,3,7,3344], sort it in reverse order (largest value first) and print out the third value.\n>    >  myList = [54,56,2,1,5223,6,23,57,3,7,3344]\n>    >  \n>    >  myList.sort()\n>    >  myList.reverse()\n>    >  \n>    >  print(myList[2])\n>    >  #The first element is at index 0, the third at index 3!\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n## 5.3 Tuples  \nSimilar to *lists* are *tuples* - essentially they are the same, except that a tuple cannot be modified once created. This can be useful for values that don't change, like (part of) the alphabet for example:\n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nmyTuple\n```\n\nImportant to remember is that if you create a tuple with one value you have to use a comma:\n\n\n```python\nmyTuple = (\"My string\",)\nmyTuple\n```\n\n\n```python\nmyWrongTuple = (\"My string\")  # The brackets here don't do anything.\nmyWrongTuple\n```\n\nA tuple is indicated by round brackets **( )**. You can interconvert between lists and tuples by using `list()` and `tuple()`:\n\n\n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nmyList = list(range(10))\n \nmyNewTuple = tuple(myList)\nmyNewList = list(myTuple)\n \nprint(\"{} and {}\".format(myList, myNewTuple))\nprint(\"{} and {}\".format(myTuple, myNewList))\n```\n\nYou can find out the length (number of elements) in a list or tuple with `len()`:\n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nmyTupleLength = len(myTuple)\nmyTupleLength\n```\n\nTuples are faster during iteration procedures due to their immutability. \n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 5.3.1\n>\n> Start with the tuple `('a','B','c','D','e','F')`, sort it, take the fourth value out, and print the result.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Start with the tuple ('a','B','c','D','e','F'), sort it, take the fourth value out, and print the result.\n>    >  myTuple = ('a','B','c','D','e','F')\n>    >  myList = list(myTuple)\n>    >  myList.sort()\n>    >  #print(myList)\n>    >  \n>    >  print (\"Removing {}\".format(myList.pop(3)))\n>    >  print (\"Result is {}\".format(str(tuple(myList))))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n\n## 5.4 Strings\n**Strings are a bit like lists and tuples** \n\nStrings are really a sequence of characters, and they behave similar to lists:\n\n\n```python\nmyString = \"This is a sentence.\"\n \nmyString[0:5]          # Take the first five characters\n```\n\n\n```python\nmyString.count(\"e\")    # Count the number of 'e' characters\n```\n\n\n```python\nmyString.index(\"i\")    # Give the index of the first 'i' character\n```\n\nYou cannot re-assign strings as you do with lists though, the following example does not work:\n\n\n```python\nmyString = \"   This is a sentence.  \"\n```\n\n\n```python\nprint(myString.upper())       # Upper-case all characters\n```\n\n\n```python\nprint(myString.lower())       # Lower-case all characters\n```\n\n\n```python\nprint(myString.strip())       # Strip leading and trailing spaces/tabs/newlines\n```\n\n\n```python\nprint(myString.split())       # Split the line into elements - default is splitting by whitespace characters\n```\n\n\n```python\nprint(myString.replace(' is ',' was '))  # Replace ' is ' by ' was '. Spaces are necessary, otherwise the 'is' in 'This' will be replaced!\n```\n\nA list with all string methods and a full description can be found in the [Python documentation](https://docs.python.org/3/library/stdtypes.html#string-methods), or simply type `dir(myString)`\n\n\n```python\ndir(myString)\n```\n\n\n---\n> ### {% icon hands_on %} Exercise 5.4.1\n>\n> Ask the user for two words, then check whether they are the same (upper or lower case should not matter),if not check whether they have the same first letter (again case should not matter). If not, then print their length. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Ask the user for two words, then check whether they are the same (upper or lower case should not matter),if not check whether they have the same first letter (again case >    >  should not matter). If not, then print their length. \n>    >  firstWord = input(\"Give first word:\")\n>    >  secondWord = input(\"Give second word:\")\n>    >  \n>    >  print(len(firstWord))\n>    >  \n>    >  if firstWord.upper() == secondWord.upper():\n>    >      print(\"Words are the same (ignoring case).\")\n>    >  elif firstWord[0].upper() == secondWord[0].upper():\n>    >      print(\"Words share the same first letter (ignoring case).\")\n>    >  else:\n>    >      print(\"Word lengths are {} and {}\".format(int((len(firstWord))),int(len(secondWord))))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n## 5.5 Sets  \nVery useful as well are sets. These are unordered and unindexed (so the order in which you put in elements doesn't matter), and it is much easier to compare them to each other. Because sets cannot have multiple occurrences of the same element, it makes sets highly useful to efficiently remove duplicate values from a list or tuple and to perform common math operations like unions and intersections.\n\n![sets](../../images/Python-Set-Operatioons.png)  \n\nSource: https://www.learnbyexample.org/python-set/\n\nYou initialise them by using **set()** on a list or tuple:\n\n\n```python\nmySet1 = set(range(10))\nmySet2 = set(range(5,20))\n \nprint(mySet1)\nprint(mySet2)\n \nmySet.add(5)  # Elements in a set are unique - the set will not change because it already has a 5\n \nprint(mySet1.intersection(mySet2))\nprint(mySet1.union(mySet2))\n```\n\n\n```python\ndir(mySet1)\n```\n\nThe principle of using intersection and union is the same as the Venn diagrams you probably saw in school... You can also make a set out of a string:\n\n\n```python\nmyString = \"This is a sentence.\"\n \nmyLetters = set(myString)\nmyLetters    # Note that an upper case T and lower case t are not the same!\n```\n\nThere are more things you can do with sets which we will not go into here, see the [Python sets](https://docs.python.org/3/library/stdtypes.html#types-set) documentation for more information.\n\n---\n> ### {% icon hands_on %} Exercise 5.5.1\n>\n> Which letters are shared between the words \"perspicacious\" and \"circumlocution\"?\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Which letters are shared between the words \"perspicacious\" and \"circumlocution\"?\n>    >  firstWord = \"perspicacious\"\n>    >  secondWord = \"circumlocution\"\n>    >  \n>    >  firstLetterSet = set(firstWord)\n>    >  secondLetterSet = set(secondWord)\n>    >  \n>    >  print(firstLetterSet.intersection(secondLetterSet))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n","## 3.1 Introduction  \nThere are several ways to present the output of a program, data can be printed in a human-readable form, or written to a file for future use. Sometimes users want more control over the formatting of output, rather than simply printing space-separated values. There are several ways to format output which we will cover in this section.\n\nThe following figure (which I shamelessly copied from [here](https://www.python-course.eu/python3_formatted_output.php)) helps to visualize the `.format()` argument. If you don't understand it completely, don't worry, we'll cover it in this section:\n\n<center><img src=\"../../images/format_method_positional_parameters.png\" /></center>\n\nEverything between the double quotation marks is what will be printed (thus the `print()` statement is missing). Between curly brackets you can find lay-out options for the arguments, the arguments themselves are given within the `.format()` statement. The first number defines the argument that will be printed (Python starts counting at 0), the number behind the colon (`:`) defines the number of characters that is foreseen for the argument, and lastly the number behind the point (`.`) is only applicable for floats and defines the amount of decimals that will be printed. E.g.: `1:8.2f` will print the first argument with 8 characters/numbers of which two decimals and the type of the argument is a float. If the argument has less than 8 characters/numbers than whitespace will be used. \n\n## 3.2 Using .format()\nThe following example gives the most basic use form of the `.format()` statement. \n\n\n\n```python\nprint(\"My name is {}.\".format(\"Jane\"))\n```\n\nThe above doesn't do anything interesting; you can however put a number in between the curly brackets `{}` to force the output to take up a number of characters. Try this:\n\n\n```python\nprint(\"My name is {:>10}.\".format(\"Jane\"))\n```\n\nYou'll now see that you force an area of 10 characters to put the name. If the name is shorter, the remaining empty characters will be whitespaces. If the name would be longer, the number will be overruled. Note that the > character in the .format() form can be used to determine the alignment (use < for left align, > for right align and = for centered). \n\nThere are a number of differences between the old Python (version <2.5) and the version you're using now (Python 3.7). In older scripts you might see different print statements. Instead of using the `.format()` statement, it used to be a `%`-symbol to position information in the right place. E.g.:\n```print(\"My name is %s.\" % \"Jane\")```  \n\n\n----\n\n> ### {% icon hands_on %} Hands-on: Exercise 3.2.1\n>\n> Try to print the text: *Percent of alignment: 100%* using a formatting character for the number 100. \n> If this worked out succesfully, try to align it explicitly to the right with five whitespaces. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # part 1\n>    >  print(\"Percent of alignment: {}%\".format(100))\n>    >  \n>    >  # part 2\n>    >  print(\"Percent of alignment: {:>8}%\".format(100))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n----\n\n\n\n\n## 3.3 Formatting numbers  \nHere are some examples of formatting integers (digits):\n\n\n```python\nprint(\"This is {:d}.\".format(252))\nprint(\"This is {:d} and {:d}.\".format(25,30))\n```\n\nHere are some examples of formatting decimal number (floating point):\n\n```python\nmyFloat = 4545.4542244\n \nprint(\"Print the full float {},\\ncut off decimals {:5.2f},\\nor determine the characters before the decimal {:10.1f}.\".format(myFloat,myFloat,myFloat))\n \n# Or in old style\n# print(\"Print the full float %f,\\ncut off decimals %.2f,\\nor determine the characters before the decimal %10.1f.\" % (myFloat,myFloat,myFloat))\n```\n\n## 3.4 Special characters  \nFor some characters it is necessary to use what are called 'escape' codes because you cannot type the character normally from the keyboard. Try this:\n\n```python\nprint(\"The \\ sign\\ncan\\talso\\tbe\\tprinted.\")\n```\n\nHere the \\\\ will print a backslash (however Python might think you are trying to insert a special code and in order to be safe it's better to type a double \\\\\\\\), the \\n will print a new line, \\t a tab character.\n\n\nEscape codes are necessary if you are trying to print a single or double quote:\n\n```python\nprint(\"He said: \\\"Hello\\\".\")\n```\n","## 9.1 Introduction\n\nMore often than not the data you need for your program will come from somewhere else - either from user input or a file. Especially for more complex data, it becomes essential to be able to read in data files, do something with the data, and write out a new file with modified information or a set of analysis results.\n\n## 9.2 Reading files\n \nTo read in a file you have to create a *file handle*. This is a sort of connection to the file that you can use to pull data from it. You create a connection to a file by using the **open()** function. Whenever you're done using the file, it's good practice to close the file handle. \n\n\n```python\n# Open the file\nfileHandle = open(\"data/readfile.txt\")  \n# Close the file\nfileHandle.close()\n# Nothing happened...\n```\n\nAll this does, is creating this connection, the file has not been read. In order to read in a file, there are a couple of possibilities:\n- `readline()` - read the first line of the file as one string. \n- `readlines()` - read all of the lines in the file. Each line is one string. The lines are combined as a list of lines (strings). \n- `read()` - read the whole file as one string. \nEach method has its advantage. E.g. if you're searching for the presence of a word or string in a file, given that the file is not too big, you can use *read*. If you want to process an enormously big file and from each line you need to extract, process and save the information, than it's better to read line by line with *readline* within a for-loop. Try to understand the difference of these methods while you go through this section. \n\nGiven the file `readfile.txt` in a folder named data:\n\n``` \nThis is the first line.\nHere is a second one. \nAnd there is also a third line. \n```\n\n1. Using `read`:\nNote that the three different lines are read in one long string. This is how the `read` function works. \n\n```python\nfileHandle = open(\"data/readfile.txt\")  \nfileHandle.read()\n```\n\n\n```python\nfileHandle.close()\n```\n\n\n2. Using `readline`:\nReadline reads in the following line. It starts with the first one. When you call the method again, it will print the second line. It's important to understand this as you can exploit this method in a for-loop to access each line separately.\n\n```python\nfileHandle = open(\"data/readfile.txt\")   \nfileHandle.readline()\n```\n\n```python\nfileHandle.readline()\n```\n\n```python\nfileHandle.close()\n```\n\n\n3. Using `readlines`:\nInstead of reading the lines of a file one by one, you can also do it in one go. As explained above, each line is one string and all of the lines/strings are stored in a list. \n```python\nfileHandle = open(\"data/readfile.txt\")   \nfileHandle.readlines()\n```\n\n\n```python\nfileHandle.close()\n```\n\nKnowing this we can move on to more complex examples. First make sure to find the PDB file *TestFile.PDB* in your data folder or download [this fake PDB coordinate file for a 5 residue peptide](http://wiki.bits.vib.be/images/3/3a/TestFile.pdb) and save it in the data directory. \n\nIn the example below we will read all the lines in the file (as separated by a newline character), and store them in the variable *lines*. Each element in this list corresponds to one line of the file! When this is done, we close the file. \n\n\n```python\n# Read in the file per line\nfileHandle = open(\"data/TestFile.pdb\")\nlines = fileHandle.readlines()\n \n# Close the file\nfileHandle.close()\n \n# Print number of lines in the file\nprint(\"There are:\", len(lines), \"lines in the file\")\n\n# Loop over the lines, and do some basic string manipulations\nfor line in lines:\n    line = line.strip()  # Remove starting and trailing spaces/tabs/newlines\n    print(line)\n```\n\n\n```python\nline = lines[10]\nline = line.strip().split()\nline[-1]\n```\n\nNow you can do many other things with the data in the file. E.g. if you want to count the number of times a carbon element appears in the file. \n\n\n```python\n# Open the file\nfileHandle = open(\"data/TestFile.pdb\")\n \n# Read all the lines in the file (as separated by a newline character), and store them in the lines list\n# Each element in this list corresponds to one line of the file!\nlines = fileHandle.readlines()\n \n# Close the file\nfileHandle.close()\n \n# Initialise the line counter\nlineCount = 0\n \n# Loop over the lines\nfor line in lines:\n    columns = line.strip().split()\n    if columns[-1] == 'C':       # Alternatively, use \"if ' C ' in line:\"\n        print(line, end='')     # Using the 'end' argument in the print because the line already contains a newline at the end\n                                # otherwise will get double spacing.\n        lineCount += 1\n\nprint(\"Number of lines with ' C ': {}\".format(lineCount))\n```\n\nYou should find 75 lines - note that in this case, for those who know the PDB format a bit, you're finding all carbon atoms.\n\nAlternatively, you can use the with() statement to open files. The example here above would then become:\n```python\nwith open(\"data/readfile.txt\") as fileHandle:\n    for line in fileHandle:\n        print(line)\n```\nThis method is often used as it does not require you to keep track of the open file in your mind, as well as clearer syntax.\n\n## 9.3 Writing a file\nWriting a file is very similar, except that you have to let Python know you are writing this time by adding the `'w'` parameter in the `open()` function. Actually Python needs two arguments, however it assumes that if you only give one parameter (the file that it has to read), the other one is `'r'` which stands for *reading* mode. \n\nFor the sake of the example, we're writing a new file and call it `writefile.txt`:\n\n```python\nf = open('data/writefile.txt','w')\nf.write('Now we have a new file \\n')\nf.write('Because Python automatically makes this file and writes some text to it.')\nf.write('Btw, if you don\\'t specify the newline characters, it will append the string at the end of the last line')\nf.close()\nf = open('data/writefile.txt')\ntext = f.read()\nprint(text)\nf.close()\n```\n\n**Be careful** - if the file exists already it will be overwritten without warning!\n\nThe file is written to the directory you're executing the program in - have a look!\n\n\n----\n\n> ### {% icon hands_on %} Exercise 9.3.1\n>\n> Read in the file `TestFile.pdb`, and write out all lines that contain 'VAL' to a new file.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    > # Read the file\n>    > f = open(\"data/TestFile.pdb\",\"r\")\n>    > g = open('data/withval.pdb','w')\n>    > \n>    > # Loop over the lines\n>    > for line in f:\n>    >     if 'VAL' in line:      # Alternatively, use \"if ' C ' in line:\"\n>    >         if 'ATOM' in line:\n>    >             g.write(line)\n>    > f.close()\n>    > g.close()\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >\n>    >  ```python\n>    > # Open the file\n>    > fileHandle = open(\"data/TestFile.pdb\")\n>    > \n>    > # Read all the lines in the file (as separated by a newline character), and store them in the lines list\n>    > # Each element in this list corresponds to one line of the file!\n>    > lines = fileHandle.readlines()\n>    >  \n>    > # Close the file\n>    > fileHandle.close()\n>    >  \n>    > # Track the lines with VAL\n>    > linesToWrite = []\n>    >  \n>    > # Loop over the lines\n>    > for line in lines:\n>    >     if line.count(\"VAL\"):      # Alternatively, use \"if ' C ' in line:\"\n>    >         linesToWrite.append(line)\n>    > \n>    > # Write out the lines\n>    > fileHandle = open(\"data/fileWithVAL.pdb\",'w')\n>    > for line in linesToWrite:\n>    >     fileHandle.write(line)\n>    > \n>    > # Close the file\n>    > fileHandle.close()\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 3\n>    > </summary>\n>    >\n>    >  ```python\n>    > # Read the file\n>    > f = open(\"data/TestFile.pdb\",\"r\")\n>    > \n>    > # Track the lines with VAL\n>    > linesToWrite = []\n>    > \n>    > # Loop over the lines\n>    > for line in f.readlines():\n>    >     if line.count(\"VAL\"):      # Alternatively, use \"if ' C ' in line:\"\n>    >         linesToWrite.append(line)\n>    > \n>    > # Write out the lines\n>    > fileHandle = open(\"data/fileWithVAL.pdb\",'w')\n>    > for line in linesToWrite:\n>    >     fileHandle.write(line)\n>    > \n>    > # Close the file\n>    > fileHandle.close()\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n\n\n## 9.4 Advanced file reading and interpretation \n\n> ### {% icon hands_on %} Exercise 9.4.1\n>\n> Read in the TestFile.pdb file, print out the title of the file, and find all atoms that have coordinates closer than 2 angstrom to the (x,y,z) coordinate (-8.7,-7.7,4.7). Print out the model number, residue number, atom name and atom serial for each; the model is indicated by:\n> ```\n> MODEL     1\n> ```\n> lines, the atom coordinate information is in:\n> ```\n> ATOM      1  N   ASP A   1     -10.341  -9.922   9.398  1.00  0.00           N\n> ```\n> lines, where column 1 is always ATOM, column 2 is the atom serial,  column 3 the atom name, column 4 the residue name, column 5 the chain code, column 6 the residue number, followed by the x, y and z coordinates in angstrom in columns 7, 8 and 9.\n> \n> note that the distance between two coordinates is calculated as the square root of (x1-x2)²+(y1-y2)²+(z1-z2)².\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution \n>    > </summary>\n>    >\n>    >  ```python\n>    > # Open the file\n>    > fileHandle = open(\"data/TestFile.pdb\")\n>    >  \n>    > # Read all the lines in the file (as separated by a newline character), and store them in the lines list\n>    > # Each element in this list corresponds to one line of the file!\n>    > lines = fileHandle.readlines()\n>    >  \n>    > # Close the file\n>    > fileHandle.close()\n>    >  \n>    > # Initialise some information\n>    > searchCoordinate = (-8.7,-7.7,4.7)\n>    > modelNumber = None\n>    >  \n>    > # Loop over the lines, and do some basic string manipulations\n>    > for line in lines:\n>    >     line = line.strip()  # Remove starting and trailing spaces/tabs/newlines\n>    >     \n>    >     # Only do something if it's not an empty line\n>    >     if line:\n>    >         cols = line.split()   # Split the line by white spaces; depending on the format this could be commas, ...\n>    >  \n>    >     # Print the title\n>    >     if cols[0] == 'TITLE':\n>    >         title = line.replace(cols[0],'')\n>    >         title = title.strip()\n>    >         print(\"The title is '{}'\".format(title))\n>    >  \n>    >     # Track the model number\n>    >     elif cols[0] == 'MODEL':\n>    >         modelNumber = int(cols[1])\n>    >  \n>    >     # For atom lines, calculate the distance\n>    >     elif cols[0] == 'ATOM':\n>    >  \n>    >         # Set some clear variable names and convert to the right type\n>    >         atomSerial = int(cols[1])\n>    >         atomName = cols[2]\n>    >         residueNumber = int(cols[5])\n>    >         x = float(cols[6])\n>    >         y = float(cols[7])\n>    >         z = float(cols[8])\n>    >  \n>    >         # Calculate the distance\n>    >         distance = ((x - searchCoordinate[0]) ** 2 + (y - searchCoordinate[1]) ** 2 + (z - searchCoordinate[2]) ** 2 ) ** 0.5\n>    >         if distance < 2.0:\n>    >             print(\"Model {}, residue {}, atom {} (serial {}) is {:.2f} away from reference.\".format(modelNumber,residueNumber,atomName,atomSerial,distance))\n>    > \n>    >  ```\n>    > </details>\n>\n{: .hands_on} \n\n\n\n## 9.5 Next session\nConclusion\n","## 4.1 Introduction\nPrograms start to become more interesting if you can do different things depending on the input. For this, you have to use *conditions*, which we will discuss in this section. \n\nDecisions will be taken based on a condition. In this perspective, we highlight the importance of understanding booleans **True** and **False**, as well as the **None**-keyword once more.\n\n## 4.2 If statement\n\nThe **if** condition allows you to only execute a bit of code if a (set of) condition(s) is satisfied. Python syntax requires that you put a colon : after the **if**, and that the *block* of code that is conditional is *indented* with the same amount of spaces (or tabs). Python doesn't really care about the number of spaces or tabs, as long as you're consistent. Jupyter notebook uses tabs, hence it is best to follow along. Now try this:\n\n\n```python\nx = 5\n \nif x == 5:\n    print(\"x is five!\")\n\nif x!=5:\n    print(\"x is not five!\")\n```\n\nyou will see that only the block of code under x == 5 is printed out. You can of course make the conditions more complex and combine them with **and** and **or**:\n\n\n\n```python\nx = 5\ny = 10\n \nif (y / x) == 2:\n    print(\"y divided by x is 2!\")\n\nif y == 10 or x == 2:\n    print(\"x is two or y is ten\")\n    \nif y == 10 and x == 2:\n    print(\"x is two and y is ten\")\n\nprint(\"The end\")\n```\n\nHere you see that the blocks for the first two conditions (which are True) are executed, but not the third. The last line of code is always printed off - it's on the same level as the start of the code, and not conditional.\n\n## 4.3 Indentation\nPython relies on indentation (whitespace at the beginning of a line) to define scope in the code. Other programming languages often use (curly) brackets for this purpose. The level of indentation is crucial, and Python will immediately give an error if there are inconsistent levels of indentation in the code. Try this:\n\n\n```python\nx = 5\ny = 10\n \nif (y / x) == 2:\n  print(\"y divided by x is 2!\")\n   print (\"And x is {}!\".format(x))\n```\n\nNote that this can also happen if you start mixing space and tab characters!\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 4.3.1\n>\n> Write a program where you ask the user for x and y, make sure that y is not zero, and print out x/y. \n> \n> ```python\n> # Modify the code below on the ... locations:\n> xString = input(...)\n> yString = input(...)\n> \n> x = ...(xString)\n> y = ...(yString)\n> \n> if ... :\n>     print(\"Error, your y-number is 0\")\n> if ... : \n>     print(\"x divided by y = {:.2f}\".format(...))\n> ```\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Write a program where you ask the user for x and y, make sure that y is not zero, and print out x/y. \n>    >  \n>    >  xString = input(\"Give a number: \")\n>    >  yString = input(\"Give another number that is not zero: \")\n>    >  \n>    >  x = float(xString)\n>    >  y = float(yString)\n>    >  \n>    >  if y == 0:\n>    >      print(\"Error, you're y-number is 0\")\n>    >  if y != 0:\n>    >      result = x/y\n>    >      print(\"x divided by y = {:.2f}\".format(result))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n## 4.4 Elif statement \n\nOnce you have an **if**-condition, you can directly follow it up with an **elif** (else if) condition. This is not the same as another **if**-statement. An **elif** is only executed if the previous if (and other preceding elifs) are not True. In the example below the code in section 4.3 is adapted. Now all if-statements are changed by elifs.\n\n\n```python\nx = 5\ny = 10\n \nif (y / x) == 2:\n    print(\"y divided by x is 2!\")\nelif y == 10 or x == 2:\n    print(\"x is two or y is ten\")\nelif y == 10 and x == 2:\n    print(\"x is two and y is ten\")\n\nprint(\"The end\")\n```\n\nNow only the code under the first condition is executed, not the second (the third is not True and is in any case irrelevant). If we switch the conditions around a bit:\n\n\n\n\n```python\nx = 5\ny = 10\n \nif y == 10 and x == 2:\n    print(\"x is two and y is ten\")\nelif y == 10 or x == 2:\n    print(\"x is two or y is ten\")\nelif (y / x) == 2:\n    print(\"y divided by x is 2!\")\n\nprint(\"The end\")\n```\n\nThe first condition is not True, so the second is evaluated. This one is True, so it is executed, and the text 'x is two or y is ten' is printed. For clarity it is often useful to leave some space before and after the (set of) condition(s) - it makes the code easier to 'read' afterwards.\n\n\n\n\n\n---\n> ### {% icon hands_on %} Exercise 4.4.1\n>\n> Write a program where you ask the user for two words. Compare the words; if they are the same, print a message, if the first or second word is 'Stop', then also print a message. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Write a program where you ask the user for two words. Compare the words; if they are the same, print a message, if the first or second word is 'Stop', then also print a >    >  message.\n>    >  print(\"Give two words.\")\n>    >  firstWord = input(\"Write a word: \")\n>    >  secondWord = input(\"Write another word: \")\n>    >  \n>    >  if firstWord == secondWord:\n>    >      print(\"These words are the same\")\n>    >  elif firstWord ==\"Stop\" or secondWord == \"Stop\":\n>    >      print(\"You're word was Stop, hence we stopped here\")\n>    >  \n>    >  print(\"The end\")\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n## 4.5 Else statement\nYou can also end an **if** (with or without **elif**s) with an **else** condition. The block of code following else is only executed if the previous (set of) conditions are all False. Try this:\n\n\n```python\nx = 7\n \nif not (x % 2):\n    print(\"x is divisible by two!\")\nelif not (x % 3):\n    print(\"x is divisible by three!\")\nelse:\n    print(\"x is not divisible by two...\")\n\nprint (\"x is {}\".format(x))\n```\n\nYou can modify the value of x a bit to see what else can happen. Can you spot a problem with this example? What will happen if x can be divided by both two and three? What can you do to solve this problem?\n\n\n\n> ### {% icon hands_on %} Exercise 4.5.1\n>\n> Modify the code above so it prints that it is divisible by two and three when this is the case.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # If a value can be divided by two and three, only the block of code under the first condition will be executed, so you will not find out whether your value can be divided by three! There are several solutions to this, for example:\n>    >  x = 12\n>    >   \n>    >  if not (x % 2):\n>    >      print(\"x is divisible by two!\")\n>    >      if not (x % 3):\n>    >          print(\"x is divisible by three!\")\n>    >  elif not (x % 3):\n>    >      print(\"x is divisible by three!\")\n>    >  else:\n>    >      print(\"x is not divisible by two or three...\")\n>    >  \n>    >  print (\"x is {}\".format(x))\n>    >  \n>    >  # This is not a very elegant solution however, as you are repeating the same bit of code twice to find out whether the value can be divided by three. This one might be   slightly better:\n>    >  x = 12\n>    >  \n>    >  if not (x % 2):\n>    >      print(\"x is divisible by two!\")\n>    >  \n>    >  if not (x % 3):\n>    >      print(\"x is divisible by three!\")\n>    >  \n>    >  if (x % 2) and (x % 3):\n>    >      print(\"x is not divisible by two or three...\")\n>    >  \n>    >  print (\"x is {}\".format(x))\n>    >  \n>    >  # However you still have to repeat the conditions, which would become very tedious (and error-prone) if you were to try division by many values. The next example is a bit more verbose but cleaner and more 'extendable' for other values:\n>    >  x = 12\n>    >  xDivisible = False\n>    >   \n>    >  if not (x % 2):\n>    >      print(\"x is divisible by two!\")\n>    >      xDivisible = True\n>    >  \n>    >  if not (x % 3):\n>    >      print(\"x is divisible by three!\")\n>    >      xDivisible = True\n>    >  \n>    >  if not xDivisible:\n>    >      print(\"x is not divisible by two or three...\")\n>    >  \n>    >  print (\"x is {}\".format(x))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n","# 1. What's a branch?\nThe idea of branching is that we can create a copy of the project in which we can add a new feature. This branch is a completely separate version of your project and lives next to your original version. If the new feature is working properly we can merge it back into the project. It's a great way of testing new changes in some code when you're not sure whether it will work, and in the meanwhile not messing up the code that you already have. \n\n---\n\n<center><img src=\"../../images/branching.png\" /></center>\n\n---\n\nThe original repository is now called the *master* branch, however historically was called the *main* branch.   \n\nA new GitHub repository is initialized by default with one branch: the *main* branch. All the changes in our project that we did so far, have hence always been in this main branch. Remember that when we did `git status` we read a line saying that we were on the main branch. \n\nIf we would make a new branch, we can name it however we like (e.g. *new-feature*). There are two ways of doing this: locally or on the GitHub website. We will first show you the latter (section 2) and afterwards how to do it locally via Git Bash or the Terminal (section 4). \n\nA repository can have numerous branches. Branches are ways of organising work on a project: you can have a branch for a new feature, for trying out something new, for exploring an issue - anything at all.\n\nIt’s a good practice to create a new branch for every new bit of work you start doing, even if it’s a very small one. It’s especially useful to create a new branch for every new feature you start working on. Branches are of course disposable, you can always remove them. \n\n# 2. Branching on GitHub\nWe can make a new branch on GitHub. \n1. Click the button: 'Main'\n2. In 'Find or create a branch...' type `new-feature` (or any other name)\n3. Click 'Create branch': new-feature\n\n---\n\n<center><img src=\"../../images/newbranch-github.PNG\" /></center>\n\n---\n\nGitHub will now display `new-feature`. It's very important to understand that any changes that happen in this branch, will not be influencing the main branch. \n\n---\n\n> ### {% icon hands_on %} Exercise 5\n>\n>  Edit the `plot2.R` file again, however make sure you're in the *new-feature* branch. Add the following lines that will make a new plot. These lines will allow us to investigate the relation between the weight, horsepower and miles per gallon variables of `mtcars` dataset in R. \n> \n> ```R\n> # Install requirements & plotting of 3D scatterplot\n> install.packages(\"scatterplot3d\")\n> library(scatterplot3d)\n> attach(mtcars)\n> scatterplot3d(wt,hp,mpg, pch=16, highlight.3d=TRUE,\n>               type=\"h\", main=\"3D Scatterplot\")\n> ```\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > Edit the file `plot2.R` by clicking on the pencil icon and add the following lines: \n>    > \n>    > <center><img src=\"../../images/solution5.PNG\" /></center>\n>    > Commit your changes with a useful commit message and save by clicking the green 'Commit changes'-button.\n>    > \n>    > </details>\n> \n{: .hands_on}\n\n---\n\nSwitch back to your *main* branch and have a look to the `plot2.R`-file. It shouldn't contain these changes. \n\n\n# 3. Merging branches on GitHub\nBefore exploring how we make branches on our computer locally, we'll merge the changes in the *new-feature* branch into the *main* branch. Branches are merged by making a **pull request**. In this section we will explain how to do a pull request, often shorted to PR. \n\nWhether you are on the *main* or *new-feature* branch, doesn't matter. In both cases you should see the following yellow screen. Alternatively, go to 'Pull requests' and find it there. \n\n---\n\n<center><img src=\"../../images/pull-request-pre.PNG\" /></center>\n\n---\n\nClick on **compare & pull requests** or go to the section **Pull requests** and create a **New pull request** (select the branches you want to incorporate). A new screen pops-up with the following information.\n\n---\n\n<center><img src=\"../../images/pull-request-1.PNG\" /></center>\n\n---\n\n- The pull request should be interpreted as a request to pull the new branch and all of its changes into the main branch.   \n- The base where it would be pulled towards is `base: main`. The branch where the changes are deriving from is `compare: new-feature`.   \n- Note that GitHub checks the compatibility of the branches: in this case there are no conflicting edits and the branches can be merged together.   \n- Give a descriptive title text and if appropriate some additional comment. \n\nUnderneath the pull request related information, GitHub also gives you a summary of the changes that were done. \n- Each commit from the branch *new-feature* (i.e. only added these 7 lines in this case)\n- Display of the file and a visual representation of what changed in that commit. \n\n---\n\n<center><img src=\"../../images/pull-request-2.PNG\" /></center>\n\n---\n\n\n\nClick on **Create pull request** to finalize the creation of the PR. Note that the the branches are not merged yet, one more comment before we do that! We know that GitHub allows us to collaborate on projects. Here we can find some of the features that GitHub is providing us to start collaborating. We could for example start a conversation here and discuss the PR, select a (couple of) reviewer(s), add assignees who authored, add labels representing what type of edits were done in the branch, etc. Essentially these are valuable for organizing bigger projects; keep track of who's working on what and who needs to review specific changes, etc.     \n\nFinally, we verify the merge pull request commit and you give your consent to GitHub to merge both branches by clicking 'Merge pull request'.\n\n---\n\n<center><img src=\"../../images/pull-request-3.PNG\" /></center>\n\n---\n\nIt might be possible that in a project with several people, you are not authorized to make changes to the *main* branch. In this case you will always have to work in a separate branch and someone else will get this last message. He or she will then decide whether this pull request should be merged. \n\n# 4. Branching locally\nBesides the possibility of making branches on GitHub, we can also do it locally on our computer. As we've made changes to the repository on GitHub, we'll start with pulling the changes into our local repository. Use `git pull` in your project folder. \n \nThere is always an indication in the Terminal or Git Bash of which branch we are in (i.e. *main*). Here are the most important commands related to making branches and switching between different branches:\n1. Listing all the existing branches is possible with `git branch -a`\n2. `git checkout -b <new-branch>`: will create a new branch and move into this branch. \n3. `git branch <new-branch>`: will create a new branch, but will remain in the current branch (i.e. the *main* branch in this case)\n4. With `git checkout <branch>` we will switch from one branch to the other. \n\nLet's start with listing all the existing branches (4). \n```\n* main\nremotes/origin/HEAD -> origin/main\nremotes/origin/main\nremotes/origin/new-feature\n```\nThe first branch is our local *main* branch in which we are currently working (as denoted by the asterisk \\*). The three others relate to the branches that exist **remotely** on GitHub. If we want to work on the branch *new-feature* we will have to import it first with: `git checkout new-feature`. Git will understand that there is a remote branch with the same name and you want to work on this one. \n\n**Note** that if you use `git checkout -b new-feature`, you would have created a new branch with the same name as the remote branch. This is error prone and will lead to problems! Hence, it is really important that you switch from branch and not create a new one!\n\n## 4.1. Example workflow\nAn example workflow is depicted in the figure below and is discussed in the following points. \n\n---\n\n<center><img src=\"../../images/conceptual_areas_branching.png\" width=\"1000\" /></center>\n\n---\n\n**1. Make a new branch:**\n```\ngit checkout -b <new-branch>\n```\nGit will make a new branch with the name `<new-branch>` and tell you it switched to the new branch. If you want to change branches, just type `git checkout` followed by the name of the branch, e.g. `git checkout main`.\n\n**2. Make some changes:**\n  - Add a new file\n  - Edit an existing file\n\n**3. Stage changes:**  \nUse the following  command to simply add all the new or changed files. \n```\ngit add -A\n```\n \n**4. Commit staging area:**  \nCommit all the staged files with:\n```\ngit commit -m \"some useful commit message\"\n```\n\n**5. Push commits to GitHub:**\n\n```\ngit push origin <new-branch>\n```  \nor alternatively:\n```\ngit push --set-upstream origin <new-branch>\n```\nThe `git push` command is now a bit longer. The first time we want to publish a new local branch on a remote repository (GitHub), we need to be explicit and tell Git to add the `<new-branch>` to the origin. In Git, the \"origin\" is used to refer to the GitHub original repository's URL and makes it much easier to talk about. \n\nNext time you want to push your commits from *new-branch*, you won’t need to be explicit - you can simply do `git push`, because now *new-branch* exists on GitHub and both branches know how to commmunicate with each other. \n\n\n---\n\n> ### {% icon hands_on %} Exercise 6\n>\n>  Make a new branch and make sure you're in the branch. Rewrite the README.md file so it contains the following text. Once the changes have been committed and pushed to GitHub, create a pull request and merge the changes into the main branch.  \n> \n> ```\n> # Downstream data-analysis R\n> This repository contains all the scripts for the downstream data analysis of my project.\n> ```\n> \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  \n>    >  ```\n>    >  git checkout -b readme\n>    >  ```\n>    >  Do the necessary changes\n>    >  ```\n>    >  git add README.md\n>    >  git commit -m \"changed README file completely\"\n>    >  git push origin readme\n>    >  ```\n>    >  Find the new branch in your GitHub repository. From there the solution is identical as discussed here above. \n>    > </details>\n>\n{: .hands_on}\n---\n\nAs a final note on merging branches, we mention here that it is obviously also possible to merge branches on our computer locally. For this, we refer to some further reading materials linked [here](https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging).\n\n\n# 5. Deleting branches \n\n## 5.1. Via GitHub\n\nIf a branch is of no more use, we can delete it. To find all the existing branches in GitHub, click on *branches* in the top left corner of the repository.\n\n---\n\n<center><img src=\"../../images/deleting-branch-1-update.PNG\" /></center>\n\n---\n\n\nAfter successfully merging our changes in the *main* branch, the old one(s) can be deleted. Click on the waste bin:\n\n---\n\n<center><img src=\"../../images/deleting-branch-2.PNG\" /></center>\n\n---\n\nGo back to the main tab of the repository and find that the branch has been deleted. \n\n\n## 5.2. Locally\n\nDeleting a branch is as simple as typing the following command:\n\n```\ngit branch -d <name-of-the-branch>\n```\n\nIf git refuses to do so, there is a forced way to do it as well by using the capital `-D` parameter. \n\n\n---\n\n\nLet's continue with the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/6_forks/tutorial.html)!","# 12. Plotting figures\n*This chapter is based on the materials from [this book](https://www.packtpub.com/eu/big-data-and-business-intelligence/become-python-data-analyst) and [this website](https://python-graph-gallery.com/8-add-confidence-interval-on-barplot/)*\n\nMatplotlib is a Python 2D plotting library which produces publication quality figures. Although Matplotlib is written primarily in pure Python, it makes heavy use of NumPy and other extension code to provide good performance even for large arrays.\n\nWe will start with the basics concepts being figures, subplots (axes) and axis. The following line of code allows the figures to be plotted in the notebook results\n\n\n```python\n%matplotlib inline\n```\n\n`matplotlib.pyplot` is a collection of command style functions that make matplotlib work like MATLAB. Each pyplot function makes some change to a figure: e.g., creates a figure, creates a plotting area in a figure, plots some lines in a plotting area, decorates the plot with labels, etc. In matplotlib.pyplot various states are preserved across function calls, so that it keeps track of things like the current figure and plotting area, and the plotting functions are directed to the current subplot.\n\nWhat we first have to do is importing the library of course. \n\n\n```python\nimport matplotlib.pyplot as plt\n```\n\n\n```python\nplt.plot([1, 2, 3, 2.5])\nplt.ylabel('some numbers')\n```\n\n<center><img src=\"../../images/plotting1.png\" /></center>\n\n`plot()` is a versatile command, and will take an arbitrary number of arguments. For example, to plot x versus y, you can issue the command:\n\n\n\n```python\nx_list = list(range(1,10))\ny_list = [pow(i, 2) for i in x_list]\nprint(x_list)\nprint(y_list)\n```\n\n\n```python\nplt.plot(x_list, y_list)\nplt.title(\"Title of the plot\")\n```\n\n<center><img src=\"../../images/plotting2.png\" /></center>\n\nUsing the pyplot interphase, you build a graph by calling a sequence of functions and all of them are applied to the *current subplot*, like so:\n\n\n```python\nplt.plot([1, 2, 3, 4], [10, 20, 25, 30], color='lightblue', linewidth=3)\nplt.scatter([0.3, 3.8, 1.2, 2.5], [11, 25, 9, 26], color='darkgreen', marker='^')\nplt.xlim(0.5, 4.5)\nplt.title(\"Title of the plot\")\nplt.xlabel(\"This is the x-label\")\nplt.ylabel(\"This is the y-label\")\n# Uncomment the line below to save the figure in your currentdirectory\n# plt.savefig('examplefigure.png')\n```\n<center><img src=\"../../images/plotting3.png\" /></center>\n\nWhen working with just one subplot in the figure, generally is OK to work with the pyplot interphase, however, when doing more complicated plots, or working within larger scripts, you will want to explicitly pass around the *Subplot (Axes)* and/or *Figure* object to operate upon.\n\n\n\n```python\ndef gc_content(file):\n    \"\"\"Calculate GC content of a fasta file (with one sequence)\"\"\"\n    sequence=\"\"\n    with open(file, 'r') as f:\n        for line in f:\n            if line.startswith('>'):\n                seq_id = line.rstrip()[1:]\n            else:\n                sequence += line.rstrip()\n    \n    A_count = sequence.count('A')\n    C_count = sequence.count('C')\n    G_count = sequence.count('G')\n    T_count = sequence.count('T')\n    N_count = sequence.count('N')\n    GC_content = (sequence.count('G') + sequence.count('C')) / len(sequence) * 100\n    AT_content = (sequence.count('A') + sequence.count('T')) / len(sequence) * 100\n    print(\"The GC content of {} is\\t {:.2f}%\".format(seq_id, GC_content))    \n    return GC_content, AT_content, A_count, C_count, G_count, T_count, N_count\n    \n\nGC_content, AT_content, A_count, C_count, G_count, T_count, N_count = gc_content('../data/gene.fa')\nprint(GC_content)\nprint(AT_content)\nprint(A_count)\nprint(C_count)\nprint(T_count)\nprint(G_count)\n\n```\n\n\n```python\ntotal_count = A_count + C_count + G_count + T_count\nA_perc = A_count/total_count*100\nC_perc = C_count/total_count*100\nG_perc = G_count/total_count*100\nT_perc = T_count/total_count*100\nheight = [A_perc, C_perc, G_perc, T_perc]\nbars = ('A','C','G','T')\nplt.bar(bars, height)\n\nplt.xlabel('Nucleotide')\nplt.ylabel('Percentage of occurence (%)')\nplt.title('Distribution of nucleotides in fasta sequence')\n\nplt.show()\n```\n<center><img src=\"../../images/plotting4.png\" /></center>\n\n```python\ntotal_count = A_count + C_count + G_count + T_count\nA_perc = A_count/total_count*100\nC_perc = C_count/total_count*100\nG_perc = G_count/total_count*100\nT_perc = T_count/total_count*100\nheight = [A_perc, C_perc, G_perc, T_perc]\nbars = ('A','C','G','T')\n#plt.bar(bars, height, color=('green','red','yellow','blue'))\nplt.bar(bars, height, color=('#1f77b4','#ff7f0e','#2ca02c','#d62728'))\n\nplt.xlabel('Nucleotide')\nplt.ylabel('Percentage of occurence (%)')\nplt.title('Distribution of nucleotides in fasta sequence')\n\nplt.show()\n```\n<center><img src=\"../../images/plotting5.png\" /></center>\n\n```python\n# libraries\n#import numpy as np\nimport matplotlib.pyplot as plt\n \n# width of the bars\nbarWidth = 0.3\n \n# Choose the height of the blue bars\nexperimentA = [10, 9, 2]\n \n# Choose the height of the cyan bars\nexperimentB = [10.8, 9.5, 4.5]\n \n# Choose the height of the error bars (bars1)\nyer1 = [0.5, 0.4, 0.5]\n \n# Choose the height of the error bars (bars2)\nyer2 = [1, 0.7, 1]\n \n# The x position of bars\nr1 = list(range(len(experimentA)))\nr2 = [x + barWidth for x in r1]\n \n# Create blue bars\nplt.bar(r1, experimentA, width = 0.3, color = 'blue', edgecolor = 'black', yerr=yer1, capsize=5, label='Experiment A') # Capsize is the width of errorbars\n \n# Create cyan bars\nplt.bar(r2, experimentB, width = 0.3, color = 'cyan', edgecolor = 'black', yerr=yer2, capsize=7, label='Experiment B')\n \n# general layout\nplt.xticks([x + barWidth/2 for x in r1], ['cond_A', 'cond_B', 'cond_C'])\nplt.ylabel('effect')\nplt.legend()\n \n# Show graphic\nplt.show()\n\n```\n\n<center><img src=\"../../images/plotting6.png\" /></center>\n","## 1.1 Why Jupyter\nJupyter is an interactive code environment that allows you to write code and get immediate feedback from it. It's one of the most popular environment for Python programming. Especially for training purposes, as it interactively gives you your code and some informative text together. \n\n## 1.2 Installation\nThe easiest way to install Python and Jupyter is to install [Anaconda](https://docs.anaconda.com/anaconda/install/) (Navigator) on your computer. Anaconda Navigator contains several (GUI) applications like Jupyter in which you can run your Python code. As a side note, Anaconda is also a package manager which makes it ideal for reproducibility purposes as well. \n\nNowadays, Jupyter comes in two versions. More often you will hear about Jupyter Notebooks which is the precursor of Jupyter Lab. The latter has a couple of advantages, however for stability reasons we'll be using Jupyter Notebooks for now. \n\n> ### {% icon hands_on %} Installation instructions\n>\n> To install all prerequisites for this course \n> 1. Go to [Anaconda](https://www.anaconda.com/distribution/), scroll a bit down and select the right distribution system (Windows, MacOS or Linux), and download the Python 3.7 version. Follow the installation instructions.  \n> 2. You should be able to find Jupyter Notebooks within the installed apps now. Otherwise, open the Anaconda Navigator & launch a Jupyter Notebook\n> 3. Jupyter Notebooks opens a tab with a list of your folders. Make and/or select a folder in which you want to keep the training materials.\n> 4. Find the training materials on our Github repository: [Gentle hands on python](https://github.com/vibbits/gentle-hands-on-python)\n> 5. Click the button 'Clone or Download' and select 'Download ZIP'. Finally, extract the zipped file within the folder you just selected or created. \n> 6. In Jupyter Notebook you should see the materials now. \n>\n{: .hands_on}\n\n\n## 1.3 Getting familiar with Jupyter Notebooks\n\n**a. Make a new notebook**  \n\nNavigate to a folder and click on the right New --> Python 3. A new Notebook now pops up with an empty cell. In this cell you can directly input some Python code. Try out the following: \n\n```python\n1+1\n```\n\nClick on the triangle symbol on the top of the notebook or type 'Shift+Enter' to run the code. The output will immediately appear on the screen and should look like this. \n<center><img src=\"../../images/cells.PNG\" /></center>\n\nAlso, a new cell will have appeared in the notebook. A notebook is actually a set of cells in which you can input code. \n\nIf you want another cell, you can click the '+' symbol on top of the notebook. Other interesting symbols up there are the stop symbol and the reload symbol. Whenever your code is stuck, you can stop it right there, or whenever you want to restart in a clean and fresh environment, you hit that restart button. \n\n**b. Code or Markdown**\n\nThere are two modes that a cell can have. \n- A cell is by default in **Code** modus. This means that the environment expects a Python code as input and it will interpret it and give you some output upon running that cell.\n- The **Markdown** mode is a kind of text modus. In here you can type any kinds of text and edit it so headers, bold or italic texts, quotes, images are possible to integrate. It's called rich text. E.g. If you double click this text, you will see the Markdown code of this text. \n\n**c. Command or Edit mode**\n\nTo switch between these modes, hit 'Esc' or 'Enter'. When you hit 'Enter', you'll get into the Edit mode, the cell will have a blue border around it and you're free to edit the content of that cell (both in python code or markdown code). If you hit 'Esc', you're cell will be in the Command mode and you can use shortcuts to edit your notebook:\n- a (above): add a new cell above\n- b (below): add a new cell below\n- dd: remove the cell\n- z: undo the previous action\nthese are just a few of them. \n\nThe blue bar on the left of your cell indicates which cell is selected. In command mode, you can move through your cells with the up and down arrow keys. \n\nLastly, within the command mode, type 'y' to change the cell to a Python code cell and type 'm' to change the cell to a Markdown code cell. \n\n**d. Running a cell**\n\nTo stress the importance of the 'stop' button on top of this notebook, run the following code below. While it is running, the code has an asterisk which means it's still being executed and your notebook won't be able to process any other code in another cell. In order to stop it, because it's an infinite loop, hit the stop button or type 'ii' in command mode. \n\n\n```python\nimport time\nwhile True:\n    print(\"Hello\")\n    time.sleep(3)\n```\n\n## 1.4 Examples\nThe above will suffice for the Jupyter environment introduction. We will dive into our first examples before diving into the first chapter of our Python adventure. \n\nA program needs information (input) to run, and then needs to export its results so that you know what happened (output). The easiest way to do this is to send a 'text message' to the screen; this is possible with the print command which we will introduce here.\n\nIn this section we also discuss some basics of Python syntax, and the errors that occur if you don't get it right.\n\n**a. Let's do some math**\n\nPython is very intuitive and flexible in a way that there is no need of special colons, nor do you have to take spaces into account. Just note that Python is indent-sensitive, but we will get back to this. \n\n\n```python\n1+1\n```\n\n\n```python\n2 - 5\n```\n\n\n```python\n3  * 4\n```\n\n\n```python\n10/2\n```\n\n**b. Writing a message**  \n\nThe print command allows you to write data to the console screen. Try the following example:\n\n\n```python\n# Print 'Hello world' to the screen\nprint(\"Hello world\")\n```\n\nNotice that lines starting with a `#` symbol are not displayed, nor evaluated by Python. They usually contain extra information concerning the code. \n\n\n```python\n# What happens if you leave out the quotation marks? \nprint(Hello world)\n```\n\nYou should get the following error: **SyntaxError: invalid syntax**. This is because Python doesn't understand what Hello and world mean.\n\n**c. Writing numbers**\n\nYou can also print out numbers as text messages to the screen. You do not need quotation marks in this case; just the number is enough. If your number does not have a decimal point (.) in it, it's called an **integer**, if it does have a decimal point, it is a **float**. \n\n\n```python\n# Print an integer and a floating point \nprint(5)\nprint(3.1415)\n```\n\n**Note**  \nIn Python, programs often start with:\n```\n#!/usr/bin/python\n```\nThis line is called the 'Shebang' and tells the operating system where it can find the Python language interpreter so it can run the program without you having to specify where Python is. With Jupyter Lab/Notebooks we already have a Python environment so we do not need to redefine it every time. \n\n## 1.5 JupyterLab\n\nWith recent improvements, the environment grew a little bit more powerful to a full interface, called JupyterLab. You can see all of the files that are within a folder within a file explorer, you can open a Terminal window which is a Linux machine where you can install any packages that you would need. You can also make a text file or edit text files that are in your folder. However, the most simplest is still to open a Python console where you directly insert python code. \n\n","## 8.1 Introduction\n\nSo far we've seen variables that store one value or a series of values (see section 5: lists, tuples and sets). There is another way of storing information where you associate one value with another value; in Python this is called a dictionary. Dictionaries provide a very useful way of quickly connecting different values to each other.\n\n\n## 8.2 Dictionary creation & usage\n\nIt is best to think of a dictionary as a set of *key:value* pairs, with the requirement that the keys are unique (within one dictionary). Dictionaries are initiated by using curly brackets {}, and each pair of *key:value*s is separated with a comma. This is how a dictionary would look like:\n\n\n![Gentle-hands-on-introduction-to-Python-Programming Python Dictionary](../../images/myDictionary-cropped.png)\n\n\n\n\n\n```python\nmyDictionary = {'A': 'Ala', 'C': 'Cys', 'D': 'Asp'}\nmyDictionary\n```\n\nYou can recall values by using square brackets [ ] with the name of the key, or use the `get()`-method. \n\n\n```python\nmyDictionary['A']\n```\n\n\n```python\nmyDictionary.get('C')\n```\n\nIf you would like to add a new pair of key-value: \n\n\n```python\nmyDictionary['E'] = 'Glu'\nmyDictionary\n```\n\nNote however that keys are unique and if you try to add a *key:value*-pair with a key that already exists in the dictionary and a different value, it will overwrite the value. \n\n\n```python\nmyDictionary['A'] = 'Glu'\nmyDictionary\n```\n\nSo keys are unique, values are not!\n\nDictionaries, like lists, have several useful built-in methods. The most frequently used are listed here below:\n- `keys()`\tto list the dictionary's keys\n- `values()` to list the values in the dictionary\n- `get()`\tcall the value of a specified key\n- `pop()`\tto remove the specified key and its values\n\nListing the keys within a dictionary: \n```python\nmyDictionary = {'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu'}\nmyDictionary.keys()\n```\n\nPython tells us that the list is still in a dictionary-keys data structure type. If you would like to extract the keys for further processing, it's probably better to transform them into a list:\n```python\nlist(myDictionary.keys())\n```\n\nSimilarly for the values of a dictionary: \n```python\nlist(myDictionary.values())\n```\n\nWe've already exploited the `get` method, with `pop` we can remove a *key-value* pair:\n\n```python\nmyDictionary.pop('E')\nmyDictionary\n```\n\nIf you try to access a key that doesn't exist, Python will give an error:\n\n\n```python\nmyDictionary = {'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu'}\n \nmyDictionary['B']\n```\n\nYou should therefore always check whether a key exists:\n\n\n\n```python\n# Newlines don't matter when initialising a dictionary...\nmyDictionary = {\n     'A': 'Ala',\n     'C': 'Cys',\n     'D': 'Asp',\n     'E': 'Glu',\n     'F': 'Phe',\n     'G': 'Gly',\n     'H': 'His',\n     'I': 'Ile',\n     'K': 'Lys',\n     'L': 'Leu',\n     'M': 'Met',\n     'N': 'Asn',\n     'P': 'Pro',\n     'Q': 'Gln',\n     'R': 'Arg',\n     'S': 'Ser',\n     'T': 'Thr',\n     'V': 'Val',\n     'W': 'Trp',\n     'Y': 'Tyr'}\n\nif 'B' in myDictionary.keys():\n    print(myDictionary['B'])\nelse:\n    print(\"myDictionary doesn't have key 'B'!\")\n```\n\nHowever, it's much cleaner if you use the `get()` method as it doesn't return an explicit error if a key doesn't exist in your dictionary. Instead it will return a `None`-value. \n```python\ntype(myDictionary.get('B'))\n```\n\n---\n\n> ### {% icon hands_on %} Exercise 8.2.1 \n>\n> Use a dictionary to track how many times each amino acid code appears in the following sequence:\n> ```\n> SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGISCKVTNIEGLLHKNNWNIED  \n> NNIKNASLVQIDASNEQSSTSSSMIIDAQISNALNAQQYKVLIGNREWMIRNGLVINNDVNDFMTEHERKGRTAVLVAVDDELCGLIAIADT\n> ```\n> Tip: use the one-letter code as key in the dictionary, and the count as value.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > ```python\n>    > # Use a dictionary to track how many times each amino acid code appears in the following sequence:\n>    > # SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGISCKVTNIEGLLHKNNWNIEDNNIKNASLVQIDASNEQSSTSSSMIIDAQISNALNAQQYKVLIGNREWMIRNGLVINNDVNDFMTEHERKGRTAVLVAVDDELCGLIAIADT\n>    > # Tip: use the one-letter code as key in the dictionary, and the count as value. \n>    > mySequence = \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGISCKVTNIEGLLHKNNWNIEDNNIKNASLVQIDASNEQSSTSSSMIIDAQISNALNAQQYKVLIGNREWMIRNGLVINNDVNDFMTEHERKGRTAVLVAVDDELCGLIAIADT\"\n>    >  \n>    > # First way to do this, using sets (condensed)\n>    > aminoAcidCount = {}\n>    > myUniqueAminoAcids = set(mySequence)\n>    > for aaCode in myUniqueAminoAcids:\n>    >     print(\"Amino acid {} occurs {} times.\".format(aaCode,mySequence.count(aaCode)))\n>    >     aminoAcidCount[aaCode] = mySequence.count(aaCode)\n>    > ```\n>    > \n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > ```python\n>    > # Another way to do this, a little bit more elaborate and using the myDictionary as a reference for iteration\n>    > mySequence = \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGISCKVTNIEGLLHKNNWNIEDNNIKNASLVQIDASNEQSSTSSSMIIDAQISNALNAQQYKVLIGNREWMIRNGLVINNDVNDFMTEHERKGRTAVLVAVDDELCGLIAIADT\"\n>    > \n>    > myDictionary = {\n>    >      'A': 'Ala',\n>    >      'C': 'Cys',\n>    >      'D': 'Asp',\n>    >      'E': 'Glu',\n>    >      'F': 'Phe',\n>    >      'G': 'Gly',\n>    >      'H': 'His',\n>    >      'I': 'Ile',\n>    >      'K': 'Lys',\n>    >      'L': 'Leu',\n>    >      'M': 'Met',\n>    >      'N': 'Asn',\n>    >      'P': 'Pro',\n>    >      'Q': 'Gln',\n>    >      'R': 'Arg',\n>    >      'S': 'Ser',\n>    >      'T': 'Thr',\n>    >      'V': 'Val',\n>    >      'W': 'Trp',\n>    >      'Y': 'Tyr'}\n>    > \n>    > lengthDict = len(myDictionary.keys())\n>    > for aa in range(lengthDict):\n>    >     aaCode = list(myDictionary.keys())[aa]\n>    >     aaCount = mySequence.count(aaCode)\n>    >     print(\"Amino acid {} occurs {} times.\".format(aaCode,aaCount))\n>    > ```\n>    > \n>    > </details>\n>\n{: .hands_on}\n\n\n## 8.3 A practical example of dictionaries\nAn practical example of dictionaries can be found in Biopython. Imagine that we want to extract some information from a GenBank file ([NC_005816](https://www.ncbi.nlm.nih.gov/nuccore/NC_005816/))   \n\n\n```python\n# Imports the SeqIO object from Biopython\nfrom Bio import SeqIO\n\n# Reads in (just one record of) the GenBank file\nrecord = SeqIO.read(\"data/NC_005816.gb\",\"genbank\")\nprint(record)\n```\n\nThe SeqRecord object (which we see here) has an id, name and description as well as a sequence. For other (miscellaneous) annotations, the SeqRecord object has a dictionary attribute *annotations*. Most of the annotations information gets recorded in the annotations dictionary.\n\n\n```python\nprint(record.id)\nprint(record.name)\nprint(record.description)\n#print(record.seq)\n```\n\n\n```python\nrecord.annotations\n```\n\n\n```python\nrecord.annotations['organism']\n```\n\n\n```python\nrecord.annotations['source']\n```\n\n(In general, `organism` is used for the scientific name (in Latin, e.g. *Arabidopsis thaliana*), while `source`\nwill often be the common name (e.g. thale cress). In this example, as is often the case, the two fields are\nidentical.)\n\n\n```python\nrecord.annotations['accessions'] # This could be a list of values, hence the list. \n```\n\n## 8.4 More with dictionaries\nAs mentioned here above, the value associated with a key can consist of a list with values (instead of one single value). In the example below we save the information of an experiment in a dictionary. The key that saves the *date* information contains a `list` of fictive dates (01-01-2020 and 02-01-2020):\n\n\n```python\nTriplicateExp1 = {'name': 'experiment 1', 'pH': 5.6, 'temperature': 288.0, 'volume': 200, 'calibration':'cal1', 'date':['01-01-2020','02-01-2020']}\nTriplicateExp1\n```\n\nFor the keys, however, the data structures should be immutable (so tuples are OK, lists are not). Recall that keys have to be unique; if you add a key that already exists, the old entry will be overwritten:\n\n\n```python\ndates = ('date1','date2') # tuple\n\nTriplicateExp1[dates] = ['01-01-2020','02-01-2020']\nTriplicateExp1\n```\n\nIt is also possible to have a so-called nested dictionary, in which there is a dictionary within a dictionary. Here we make two more dictionaries with information about the triplicate experiment. The information of each experiment is thus assembled in a separate dictionary. Then, the three dictionaries are combined into one dictionary. \n\n\n```python\nTriplicateExp2 = {'name': 'experiment 2', 'pH': 5.8, 'temperature': 286.0, 'volume': 200, 'calibration':'cal1', 'date':'03-01-2020'}\nTriplicateExp3 = {'name': 'experiment 3', 'pH': 5.4, 'temperature': 287.0, 'volume': 200, 'calibration':'cal1', 'date':'04-01-2020'}\nTriplicate = {\n    'exp1':TriplicateExp1,\n    'exp2':TriplicateExp2,\n    'exp3':TriplicateExp3\n}\nTriplicate\n```\n","## 2.1 Introduction\n\nJust printing things is not that interesting, what you really want to do with a computer program is manipulate data. This is why variables are so important - they allow you to assign information to a name that you can re-use later on.\n\nIn this section we will introduce the basic types of variables and how you can manipulate them. Just to get started, we give an overview of the different **built-in data types** that are present in Python and which you can assign to a variable. Although this variety of data types exist, not all of them will be discussed in this course.\n\n- Text type:       `str`\n- Numeric types:   `int`, `float`, `complex`\n- Sequence types:  `list`, `tuple`, `range`\n- Mapping types:   `dict`\n- Set types:       `set`, `frozenset`\n- Boolean types:   `bool`\n- Binary types:    `bytes`, `bytearray`, `memoryview`\n\nIn this section, we will cover the text type, numeric types (complex are out of scope) and booleans.\n\n**Operators** can be anything from:\n- Arithmetic: additions, substractions, multiplications, divisions, remainders and power\n- Comparison: equal to, not equal to, greater than, less than, etc. \n- Logical: AND, OR and NOT used for conditional statements\n- Identity: `is`, `is not`\n\n**Note**:  \nThis section doesn't really include any exercises. Try to follow and code along while we scroll through the examples so you start to have a feeling of it.\n\n## 2.2 Strings\nWe already saw strings in the previous section. You can assign a string to a variable like this:\n\n\n```python\n# Assign the sequence AGAATCGATACGA to a variable and print the variable.  \nmySequence = \"AGAATCGATACGA\"\nprint(mySequence)\n```\n\nWhat happens here is that you assign a **value**: \"*AGAATCGATACGA*\" to a **variable**: `mySequence` and then print it out. You can now keep on using `mySequence` throughout your program. Note that `mySequence` is not quoted because it is now part of the program, try this for example:\n\n\n```python\n# Repeat the above, but this time put the variable in quotation marks when you put in the print statement and see what happens\nmySequence = \"AGAATCGATACGA\"\nprint(\"mySequence\")\n```\n\nYou will now still assign the value \"*AGAATCGATACGA*\" to the variable `mySequence`, but because of the quotes you then print off the string \"mySequence\", not the variable.\n\nYou can assign strings in the following ways:\n\n\n```python\nmyString1 = \"Hello world!\"\nmyString2 = 'Hello sun!'\nmyString3 = \"\"\"Hello\nuniverse.\"\"\"\nprint(myString1)\nprint(myString2)\nprint(myString3)\n```\n\nThe single and double quotes are essentially the same. If you use triple double quotes - \"\"\" - you can assign a string over multiple lines.\n\n\n```python\n# Try assigning a string over multiple lines without using the triple double quotes and see what happens.\nmyString = \"Hello\nuniverse.\"\n```\n\nThis will give a SyntaxError, as Python 'reads' each line separately, and it doesn't find the ending (on the first line) and starting (on the second line) quote. Using the escape codes, you can however do the following:\n\n\n\n```python\n# Try to print two words in two different lines without using three \"\" marks. \nmyString = \"Hello\\nuniverse.\"\nmyString\n```\n\n## 2.3 Strings from user input\n\nPython provides a very simple way to get user input. This input is always returned as a string, so try the following:\n\n\n```python\n# Use input to ask for a sequence string, then print the input sequence\nmySequence = input(\"Give me a sequence:\")\nprint(mySequence)\n```\n\n## 2.4 Integers\nIntegers are non-decimal numbers. Python will recognize numbers in the code automatically, so you can do:\n\n\n```python\n# Assign integer 5 to a variable myInteger\nmyInteger = 5\nprint(myInteger)\n```\n\nAs described in the introduction, you can also do standard mathematical operations on integers. Mathematical operations are even possible within a print statement.\n\n\n```python\n5 + 5  # Addition\n```\n\n\n```python\n5 - 8  # Subtraction\n```\n\n\n```python\n2 * 5  # Multiplication\n```\n\n\n```python\n4 / 2  # Division\n```\n\n\n```python\n5 % 2  # Modulus, remainder of division\n```\n\n\n```python\n2 ** 3 # Power\n```\n\nIt doesn't matter if you use variables or integers for this:\n\n\n```python\nx = 5\ny = 2\n```\n\n\n```python\nx + 5  # Addition\n```\n\n\n```python\nx - 8  # Subtraction\n```\n\n\n```python\ny * x  # Multiplication\n```\n\n\n```python\n4 / y  # Division\n```\n\n\n```python\n5 % y  # Modulus, remainder of division\n```\n\n\n```python\ny ** 3 # Power\n```\n\nIn order to print an integer inside a string, you could simply use the following expression in which the string is separated from the integer with a comma.\n\n\n```python\nfirstResult = 5 * 4\nprint(\"The result is\", firstResult,\".\")\n```\n\nHowever, there is another way using the `.format()` method. The format method allows you to change the lay-out of the output that it prints. We will use it a lot during this course, here you see it in the most simplest form. The variable that you want to print is given within the rounded brackets of the format method, and the location in the string to where it prints is given with curly brackets:\n\n\n```python\nfirstResult = (5 * 4)\nprint(firstResult)\nprint(\"The result of the first calculation is {}.\".format(firstResult))\n\nsecondResult = (5 * (4 + 3) - 2)\nprint(secondResult)\nprint(\"The result of the second calculation is {}.\".format(secondResult))\n```\n\nNote here the precedence of operations; * and / take precedence over + and -. You can use () to change the results.\n\n## 2.5 Floats\n\nFloats (floating point numbers) are decimal numbers that behave in the same way as integers, except that they are more accurate\n\n\n```python\n# Assign float 5.5 to the myFloat variable\nmyFloat = 5.5 \nmyFloat\n```\n\n\n```python\ntype(myFloat)\n```\n\nMathematical operations are the same:\n\n\n```python\n5.2 + 4.8  # Addition\n```\n\n\n```python\n5.2 - 8.3  # Subtraction\n```\n\n\n```python\n2.0 * 5.11212  # Multiplication\n```\n\n\n```python\n4.2 / 2.7  # Division\n```\n\n\n```python\n5.4 % 2.0  # Modulus, remainder of division\n```\n\n\n```python\n4 ** 0.5 # Power\n```\n\nAlso floats can be incorporated in a string with the `.format()` statement. You can determine the number of characters before and after the decimal point as well, however we will cover this in the next section. \n\n\n```python\nmyFloat = 4545.4542244\nprint(\"Print the full float {},\\ncut off decimals {:.2f},\\nor determine the characters before the decimal {:10.1f}.\".format(myFloat,myFloat,myFloat))\n```\n\nNote here that we put three formatting characters in the string; we then also need three values to print out. \n\n## 2.6 Floats, integers and strings  \nYou can also force a conversion between the different value types float, integers and strings with the `str()`, `int()` and `float()` conversions:\n\n\n```python\n# Use the int() and float() statements to switch the value types and print out the values. Do you notice any differences?\nmyFloat = 4.5\nmyFloat\n```\n\n\n```python\nint(myFloat) # Note that it will print the result of the operation; myFloat remains an integer!\n```\n\n\n```python\nmyInteger = 5\nmyInteger\n```\n\n\n```python\nmyOtherFloat = float(myInteger)\nmyOtherFloat\n```\n\nThe same is possible to convert between strings with `str()`, you can also convert strings back to integers and floats but only if the content of the string is an integer or float:\n\n\n```python\n# Convert a float and an integer to a string with the str() statement \nmyFloat = 4.5\nmyFloatString = str(myFloat)\nmyInteger = 5\nmyIntegerString = str(myInteger)\nprint(\"My strings are {} and {}\".format(myFloatString,myIntegerString))\nprint(\"My string converted to integer is {}\".format(int(myIntegerString)))\nprint(\"My string converted to float is {}\".format(float(myFloatString)))\n```\n\n\n---\n> ### {% icon hands_on %} Exercise 2.6.1\n>\n> Write a program where you ask for a number, convert it to an integer, and print out in a formatted string what your number is.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  myFloatString = input(\"Give me a number:\")\n>    >  myInteger = int(float(myFloatString))\n>    >  print(\"My number in integer form is {}\".format(myInteger))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n--- \n\nYou can also add, substract, divide and multiple a variable by a number or other variable directly. These are the so-called assignment operators.\n\n\n```python\nmyFloat = 6\nmyString = \"ABC\"\n \nmyFloat += 5   # Same as myFloat = myFloat + 5\nprint(myFloat)\n \nmyString += \"DE\"  # Addition works for strings as well\nprint(myString)\n \nmyFloat -= 5   # Same as myFloat = myFloat - 5\nprint(myFloat)\n \nmyFloat /= 2   # Same as myFloat = myFloat / 2\nprint(myFloat)\n \nmyFloat *= 2   # Same as myFloat = myFloat * 2\nprint(myFloat)\n```\n\nFinally, you can check what data type a variable is by using `type()`:\n\n\n```python\nmyInteger = -6\nmyFloat = 5.22\nmyString = \"Text!\"\n \nprint(myInteger, type(myInteger))\nprint(myFloat, type(myFloat))\nprint(myString, type(myString))\n```\n\nNote here that you can print multiple values by using a comma in between the values.\n\n\n\n---\n> ### {% icon hands_on %} Exercise 2.6.2\n>\n> See what happens if you try to print a float as an integer, and an integer as a string. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  myFloat = 11.4\n>    >  myIntFloat = int(myFloat)\n>    >  print(\"My float as integer {}\".format(myIntFloat)) \n>    >  #This works\n>    >  myInt  = 12\n>    >  print(\"My integer as string {}\".format(str(myInt)))\n>    >  #This works as well... but:\n>    >  myString = \"Hello\"\n>    >  print(\"My string as float {}\".format(float(myString)))\n>    >  #will fail and give a TypeError - Python cannot convert \"aa\" into a float.\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n--- \n\n\n## 2.7 Booleans \nFinally, there are the boolean variables `True` and `False`. \nPython returns booleans when comparing values. In the code below python checks whether the comparison is `TRUE`, when this is the case it will print out the boolean True. In order to do a comparison, we use **comparison operators** like `==, >, <, <=, >=, !=`\n\n\n```python\nmyBoolean = True\nmyBoolean\n```\n\n\n```python\ntype(myBoolean)\n```\n\n\n```python\nmyInteger = 5\nmyInteger == 6   # This means 'is myInteger equal to 6?'\n```\n\n\n```python\nmyInteger < 6    # This means 'is myInteger smaller than 6?'\n```\n\n\n```python\nmyInteger > 6    # This means 'is myInteger greater than 6?'\n```\n\n\n```python\nmyInteger <= 6   # This means 'is myInteger smaller or equal to 6?'\n```\n\n\n```python\nmyInteger >= 6   # This means 'is myInteger greater or equal to 6?'\n```\n\n\n```python\nmyInteger != 6   # This means 'is myInteger not equal to 6?'\n```\n\nSimilarly to comparison operators, you can also use `is` and `not` which are the **identity operators**:\n\n\n```python\nmyInteger = 5\n```\n\n\n```python\nmyInteger is 6    # Same as ==\n```\n\n\n```python\nmyInteger is not 6   # Same as !=\n```\n\n\n```python\nnot myInteger > 6    # Same as <=\n```\n\nIf you want to combine multiple comparisons, it is possible to use the logical operators `and` and `or`. With the `and` operator both comparisons have to be True for the result to be True. With the `or` operator, only one has to be True for the result to be True.\n\n\n```python\nx = 5\ny = 6\n```\n\n\n```python\nx == 5 and y > 2    # Both have to be True for the result to be True\n```\n\n\n```python\nx != 5 or y > 2     # Only one has to be True for the result to be True\n```\n\n## 2.8 Nothing\n\nFinally, we highlight the `None` value which is comparable to other program's `null` values. In the code below we show that None, which you could interpret as nothing, is still something else than the value 0 or e.g. an empty string. \n\n\n```python\nmyNothing = None\nmyNothing\n```\n\n\n```python\ntype(myNothing)\n```\n\n\n```python\ntype(None)\n```\n\n\n```python\n0 == None\n```\n\n\n```python\n\"\" == None\n```\n\nHowever, the opposite of None is still True. \n\n\n```python\nnot None\n```\n\nReally 0 is still an integer, \"\" a string, so `None` is really nothing:\n\n","## 10.1 Introduction\n\nSo far we've been writing 'sequential' code, basically following the flow of the code from the top to the bottom of the program. Sometimes, however, you want to re-use code elsewhere without copy/pasting a bit of code. You can do this with functions; a function holds a block of code that can be called from other places. Functions are essential for larger projects and code maintenance - if there's a problem with that piece of code, for example, you only have to fix it in one place.\n\n## 10.2 Functions\n\nWe've already been using built-in Python functions, for example **abs()** or **len()**. However, in this section we will build our own functions. Generally, the syntax when calling a function is the name of the function followed by round brackets **( )**. When you're writing your own function, in essence it would look like this:\n\n```python\ndef name_function():\n    \"Some information about the function\"\n    \n    print(\"This is a very simple function\")\n```\n\nInformation is given to a function by means of an argument and this is passed on in the rounded brackets. In the example above an argument is not defined, hence whenever you call the function it will print the same text. Arguments are defined within the parenthesis and are separated by commas in case there are multiple arguments. Before exploiting functions with arguments, let's have a look to an example with no arguments that prints the same text always when you call the function. \n\n```python\ndef silly_function():\n    \"This is some information about the silly function that will print out some silly text\"\n    text = \"Some silly text\"\n    print(text)\n```\nNotice that nothing happened now. This is because we're not calling the function, we just defined it. In order to call the function, we use the following expression:\n\n```python\nsilly_function()\n```\n\n\nInformation about the function can be retrieved by using the `help()` function. \n\n\n```python\nhelp(silly_function)\n```\n\nThe following code is an example of a function that will take some value as an argument and return the absolute value:\n```python\ndef myAbsFunc(someValue):\n    \"myAbsFunc takes a number as input and will return the absolute value\"\n    if someValue < 0:\n        someValue = -someValue\n    return someValue\n```\n\nSo here we've emulated the Python built-in abs() function with myAbsFunc(). Within a function you can use **return** to 'send back' a value, which can then be used somewhere else in the code. \n\n\n```python\nmyAbsFunc(-10)\n```\n\nIt works exactly the same as a built-in Python function. \n\n\n```python\nabs(-10)\n```\n\nFunctions can also make code more 'readable', as you can give them a name that is easy to understand so that it's clear what is happening without having to examine the code. \n\n\n```python\ndef getMeanValue(valueList):\n    \"\"\"\n    Calculate the mean (average) value from a list of values.\n    Input: list of integers/floats\n    Output: mean value\n    \"\"\"\n    meanValue = sum(valueList)/len(valueList)\n    \n    return meanValue\n\ngetMeanValue([4,6,77,3,67,54,6,5])\n```\n\n\n```python\ngetMeanValue([3443,434,34343456,32434,34,34341,23])\n```\n\nNote that it's a good practice to add a comment (in this case a multi-line one) to the top of the function that describes what it does, what it takes as input and what it produces as output. This is especially important for more complex functions. You can invoke the information with `help(function_name)`\n\n\n```python\ndef compareMeanValueOfLists(valueList1,valueList2):\n \n    \"\"\"\n    Compare the mean values of two lists of values.\n    Input: valueList1, valueList2\n    Output: Text describing which of the valueLists has the highest average value\n    \"\"\"\n \n    meanValueList1 = getMeanValue(valueList1)\n    meanValueList2 = getMeanValue(valueList2)\n \n    if meanValueList1 == meanValueList2:\n        outputText = \"The mean values are the same ({:.2f}).\".format(meanValueList1)\n    elif meanValueList1 > meanValueList2:\n        outputText = \"List1 has a higher average ({:.2f}) than list2 ({:.2f}).\".format(meanValueList1,meanValueList2)\n    else:\n        # No need to compare again, only possibility left\n        outputText = \"List2 has a higher average ({:.2f}) than list1 ({:.2f}).\".format(meanValueList2,meanValueList1)\n \n    return outputText\n```\n\n\n```python\nvalueList1 = [4,6,77,3,67,54,6,5]\nvalueList2 = [5,5,76,5,65,56,4,5]\ncompareMeanValueOfLists(valueList1,valueList2)\n\n```\n\nYou can call functions within functions, or basically anywhere in your code, even in conditions, ...:\n\n```python\nif getMeanValue(valueList1) > 26 :\n    print(\"The mean value of list 1 is greater than 1.\")\n```\n\n\n---\n\nThere are several ways to solve this problem, however it might be easier to do it with `zip()` ;). \n> ### {% icon hands_on %} Exercise 10.2.1\n>\n> The Hamming distance between two strings of equal length is the number of positions at which the corresponding character are different. In a more general context, the Hamming distance is one of several string metrics for measuring the edit distance between two sequences. \n> \n> The Hamming distance between:\n> \n> \"karolin\" and \"kathrin\" is 3.\n> \n> Write a function called \"hamming_distance\":\n> - which accepts two strings, and \n> - raises an error if the lengths are unequal. \n> - Furthermore the function will return an integer that represents the number of mismatches between the two sequences. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    > # string1 and string2 should be the same length.\n>    > def hamming_distance(string1, string2): \n>    >     \"\"\"Return the Hamming distance between equal-length sequences.\"\"\"\n>    >     \n>    >     if len(string1) != len(string2):\n>    >         raise ValueError(\"Undefined for sequences of unequal length.\")\n>    >     \n>    >     # Start with a distance of zero, and count up\n>    >     distance = 0\n>    >     # Loop over the indices of the string\n>    >     L = len(string1)\n>    >     for i in range(L):\n>    >         # Add 1 to the distance if these two characters are not equal\n>    >         if string1[i] != string2[i]:\n>    >             distance += 1\n>    >     # Return the final count of differences\n>    >     return distance\n>    > \n>    > seq1 = \"GATCATAGA\"\n>    > seq2 = \"CATCATACA\"\n>    > print(hamming_distance(seq1,seq2))\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >\n>    >  ```python\n>    > # string1 and string2 should be the same length.\n>    > def hamming_distance(string1, string2): \n>    >     \"\"\"Return the Hamming distance between equal-length sequences.\"\"\"\n>    >     \n>    >     assert len(string1) == len(string2), \"Undefined for sequences of unequal length.\"\n>    >     \n>    >     # Start with a distance of zero, and count up\n>    >     distance = 0\n>    >     # Loop over the indices of the string\n>    >     for s1,s2 in zip(string1,string2):\n>    >         if s1 != s2:\n>    >              distance +=1\n>    >         return distance\n>    >     # Return the final count of differences\n>    >     return distance\n>    > \n>    > seq1 = \"GATCATAGA\"\n>    > seq2 = \"CATCATACA\"\n>    > print(hamming_distance(seq1,seq2))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\nThere are several ways to solve this problem, however it might be easier to do it with the `zip()` function.\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 10.2.2 \n>\n> Write a function that calculates the GC content of the sequence in a fasta file. For this example you can use [this fasta file](data/gene.fa) which contains the genetic sequence of a bone gla protein. The function must accept a fasta file as input file and will print the following:\n> \n> ```\n> The GC content of HSBGPG Human gene for bone gla protein (BGP) is\t 63.53%\n> ```\n> \n> The method [.startswith()](https://www.tutorialspoint.com/python/string_startswith.htm) might help. The function should read the lines of the fasta file and if it starts with a '>' define the text that comes afterwards as the sequence ID. The other lines are part of the sequence. After reading through the lines, you can easily define the GC content by counting the bases and taking the average. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    > # solution one\n>    > def gc_content(file):\n>    >     \"\"\"Calculate GC content of a fasta file (with one sequence)\"\"\"\n>    >     sequence=\"\"\n>    >     with open(file, 'r') as f:\n>    >         for line in f:\n>    >             if line.startswith('>'):\n>    >                 seq_id = line.rstrip()[1:]\n>    >             else:\n>    >                 sequence += line.rstrip()\n>    >     GC_content = (sequence.count('G') + sequence.count('C')) / len(sequence) * 100\n>    >     print(\"The GC content of {} is\\t {:.2f}%\".format(seq_id, GC_content))    \n>    > \n>    >     \n>    > gc_content('data/gene.fa')\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >\n>    >  ```python\n>    > # solution two - very similar to one. \n>    > def gc_content(file):\n>    >     f = open(file, 'r')\n>    >     sequence=\"\"\n>    >     for line in f.readlines():  \n>    >         if line.startswith('>'):\n>    >             seq_id = line.rstrip()[1:]\n>    >         else:\n>    >             sequence += line.rstrip()\n>    >     GC_content = (sequence.count('G') + sequence.count('C')) / len(sequence) * 100\n>    >     print(\"The GC content of {} is\\t {:.2f}%\".format(seq_id, GC_content))    \n>    > \n>    >     \n>    > gc_content('data/gene.fa')\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n## 10.3 Flexibility in functions\n\nIn the functions so far we've been using values (arguments) that are passed in and are required for the function to work. If you're not sure how many arguments the user will give, you can use an asterisk `*`. However, make sure that your code is flexible to access the number of arguments that the user is giving as input. In the example below we use the * asterisk to define a flexible number of arguments, and we use a for-loop to access each argument:\n\n\n```python\ndef MeanValue(*valueList):\n    \"\"\"\n    Calculate the mean (average) value from a list of values.\n    Input: list of integers/floats\n    Output: mean value\n    \"\"\"\n    meanValues = []\n    \n    for eachList in valueList:\n        meanOfList = sum(eachList)/len(eachList)\n        meanValues.append(meanOfList)\n        \n    return meanValues\n```\n\n\n```python\nMeanValue([1, 2, 3], [4,5,6])\n```\n\n\n```python\nMeanValue([1, 2, 3], [4,5,6], [7, 8, 9])\n```\n\nA second way of making flexible functions is by using *keywords* in a function; these are not required for the function to work because they are given a default value in the function definition. You can then set these keywords if necessary; consider the following example.\n\n\nBy default the parameter sortedList is `False` which means that Python will not make a sorted list in the function below, unless you explicitly ask it by setting the parameter to `True`. \n\n\n```python\ndef MeanValue(*valueList, sortedList = False):\n    \"\"\"\n    Calculate the mean (average) value from a list of values.\n    Input: list of integers/floats\n    Output: mean value\n    \"\"\"\n    meanValues = []\n\n    for eachList in valueList:\n        meanOfList = sum(eachList)/len(eachList)\n        meanValues.append(meanOfList)\n        \n    if sortedList == False:\n        print('I calculated all the mean values of your lists, however did not sort them')\n    else:\n        meanValues.sort()\n        print('I calculated the mean values and also sorted them')\n    return meanValues\n```\n\n\n```python\nvalueList1 = [4,6,77,3,67,54,6,5]\nvalueList2 = [5,5,76,5,65,56,4,5]\nvalueList3 = [5,9,75,8,65,34,4,4]\n```\n\n\n```python\nMeanValue(valueList1, valueList2, valueList3)\n```\n\n\n```python\nMeanValue(valueList1, valueList2, valueList3, sortedList = True)\n```\n\nUsing these keywords makes the function a lot more flexible - you can make the function do things (or not) depending on them.\n\n","## Extra exercises on functions\n\nThis chapter contains some extra exercises on functions. In the end, practice makes perfect...\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 10.2.1\n>\n> Download [this matrix file](http://wiki.bits.vib.be/images/4/4e/Matrix.txt) (`Matrix.txt`) and save it in your directory. Then write a function to read a matrix file in this format, reorder the rows by the values in the given column, and printing out the result. The function should take as argument a file name and a column number. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > def sortMatrixByColumn(fileName,columnNumber):\n>    >     #\n>    >     # Read the tab-delimited file and store the values\n>    >     #\n>    >  \n>    >     fin = open(fileName)\n>    >     lines = fin.readlines()\n>    >     fin.close()\n>    >  \n>    >     #\n>    >     # Convert the data from the file into a Python list\n>    >     #\n>    >  \n>    >     matrix = []\n>    > \n>    >     for matrixRow in lines:\n>    >         # Tab-delimited, so split line by \\t - this will give a list of strings\n>    >         matrixColumns = matrixRow.rstrip().split(\"\\t\") \n>    >  \n>    >         # Add a row to the matrix\n>    >         matrix.append([])\n>    >  \n>    >         # Add the columns, but convert the strings from the file into a float\n>    >         for matrixValue in matrixColumns:\n>    >             matrix[-1].append(float(matrixValue))\n>    >  \n>    >     #\n>    >     # Now sort by column - but have to track the row number as well!\n>    >     #\n>    >  \n>    >     selectedColumnValues = []\n>    >  \n>    >     for rowNumber in range(len(matrix)):\n>    >  \n>    >         selectedColumnValues.append((matrix[rowNumber][columnNumber],rowNumber))\n>    >  \n>    >         selectedColumnValues.sort()\n>    >  \n>    >     #\n>    >     # Now print out the new matrix - the column value is now not interesting\n>    >     # we want the row number!!\n>    >     #\n>    >  \n>    >     for (columnValue,rowNumber) in selectedColumnValues:  \n>    >         columnValueStrings = []\n>    >         for value in matrix[rowNumber]:\n>    >             columnValueStrings.append(\"{:.3f}\".format(value))\n>    >         print(\"\\t\".join(columnValueStrings))\n>    >  \n>    >  \n>    > sortMatrixByColumn(\"data/matrix.txt\",3)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 10.2.2\n>\n> \n> Modify the program to read in the TestFile.pdb file by using separate functions to \n> 1. get the title, \n> 2. dissect the information from the ATOM line and \n> 3. to calculate the distance to the reference distance\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > def getTitle(line,cols):\n>    >  \n>    >     # Gets the title\n>    >  \n>    >     title = line.replace(cols[0],'')\n>    >     title = title.strip()\n>    >  \n>    >     return (\"The title is '%s'\" % title)\n>    >  \n>    > def getAtomInfo(cols):\n>    >  \n>    >     # Get relevant information from an ATOM line and convert to the right type\n>    >  \n>    >     atomSerial = int(cols[1])\n>    >     atomName = cols[2]\n>    >     residueNumber = int(cols[5])\n>    >     x = float(cols[6])\n>    >     y = float(cols[7])\n>    >     z = float(cols[8])\n>    >  \n>    >     return (atomSerial,atomName,residueNumber,x,y,z)\n>    >  \n>    > def calculateDistance(coordinate1,coordinate2):\n>    >  \n>    >     # Calculate the distance between two 3 dimensional coordinates\n>    >  \n>    >     return ((coordinate1[0] - coordinate2[0]) ** 2 + (coordinate1[1] - coordinate2[1]) ** 2 + (coordinate1[2] - coordinate2[2]) ** 2 ) ** 0.5\n>    >  \n>    > \n>    > # Open the file\n>    > fileHandle = open(\"data/TestFile.pdb\")\n>    >  \n>    > # Read all the lines in the file (as separated by a newline character), and store them in the lines list\n>    > # Each element in this list corresponds to one line of the file!\n>    > lines = fileHandle.readlines()\n>    >  \n>    > # Close the file\n>    > fileHandle.close()\n>    >  \n>    > # Initialise some information\n>    > searchCoordinate = (-8.7,-7.7,4.7)\n>    > modelNumber = None\n>    >  \n>    > # Loop over the lines, and do some basic string manipulations\n>    > for line in lines:\n>    >  \n>    >     line = line.strip()  # Remove starting and trailing spaces/tabs/newlines\n>    >  \n>    >     # Only do something if it's not an empty line\n>    >     if line:\n>    >         cols = line.split()   # Split the line by white spaces; depending on the format this could be commas, ...\n>    >  \n>    >         # Print off the title\n>    >         if cols[0] == 'TITLE':\n>    >             print(getTitle(line,cols))\n>    >  \n>    >         # Track the model number\n>    >         elif cols[0] == 'MODEL':\n>    >             modelNumber = int(cols[1])\n>    >  \n>    >         # For atom lines, calculate the distance\n>    >         elif cols[0] == 'ATOM':\n>    >             (atomSerial,atomName,residueNumber,x,y,z) = getAtomInfo(cols)\n>    >  \n>    >             # Calculate the distance\n>    >             distance = calculateDistance((x,y,z),searchCoordinate)\n>    >  \n>    >             if distance < 2.0:\n>    >                 print(\"Model {}, residue {}, atom {} (serial {}) is {:.2f} away from reference.\".format(modelNumber,residueNumber,atomName,atomSerial,distance))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n","### Introduction\n\n#### NGS data repositories\n\nFirst of all, you need data to analyze. You can generate your own data but there's a lot of NGS data available on the internet.\n\nThe main repositories for NGS data:\n\n{|class=\"wikitable\"\n| align=\"center\" style=\"background:#f0f0f0;\"|''''''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''NCBI - US'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''EBI - Europe'''\n|-\n| ||||Close-by so faster downloads\n|-\n|'''Gene expression database'''||[http://www.ncbi.nlm.nih.gov/geo/ GEO]||[http://www.ebi.ac.uk/arrayexpress ArrayExpress]\n|-\n|Contain processed NGS data, no raw data||ID starts with G||ID starts with E-\n|-\n|'''NGS sequence database'''||[http://www.ncbi.nlm.nih.gov/sra SRA]||[http://www.ebi.ac.uk/ena ENA]\n|-\n|Contain raw NGS data||ID starts with SR||ID starts with ER\n|-\n| ||ENA IDs also used by SRA||SRA IDs also used by ENA \n|-\n| ||stores reads in sra format||stores reads in fastq format\n|}\n\n\nBoth GEO and SRA use multiple types of IDs, ordered according to a certain hierarchy:\n\n{|class=\"wikitable\"\n| align=\"center\" style=\"background:#f0f0f0;\"|'''GEO ID'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''points to'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''definition'''\n|-\n|ID starts with GSE||experiment||Data of a full NGS experiment consisting of multiple samples The samples belong to different groups that are to be compared e.g. treated and control samples\n|-\n|ID starts with GSM||sample||Data of one single sample \n|-\n| align=\"center\" style=\"background:#f0f0f0;\"|'''SRA ID'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''points to'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''definition'''\n|-\n|ID starts with SRP||study||Studies have an overall goal and may comprise several experiments. \n|-\n|ID starts with SRX||experiment||An Experiment describes what was sequenced and the method used.\nInfo on the source of the DNA, samples, sequencing platform and the processing of the data. \n|-\n|ID starts with SRR||run||Data of a particular sequencing experiment.\nExperiments may contain many runs depending on the number of instrument runs that were needed.\n|}\n\n\nThere are two other resources of NGS data:\n\n- [https://insilicodb.org/ In Silico DB] from the ULB <ref name=\"insilicoDB\">https://insilicodb.org/</ref>\n- [http://www.illumina.com/science/data_library.ilmn Illumina's NGS data library] <ref name=\"Illumina Sequence Data Library\">http://www.illumina.com/science/data_library.ilmn</ref>\n\n\n\nIf you have an article describing an NGS dataset that is of interest to you, you should search in the article for a sentence mentioning the ID of the data in one of these databases.\n\n\n#### Metadata of NGS data sets\n\nYou do not only need the data, you also need extra inforrmation to be able to do the analysis. For instance, you need to know where each sample comes from: in clinical datasets it is important to know if the reads are coming from a patient or from someone in the control group...\nThis kind of information is called metadata and is stored together with the actual data.\n\n### Exercise 1: Downloading a data set for the introduction training\n\nFor the introduction training we will use a data set containing short Illumina reads from *Arabidopsis thaliana* infected with a pathogen, *Pseudomonas syringae*, versus mock treated controls. The data set is described in [the article of Cumbie et al., 2011](http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3188579/).\n\nThe authors provide an ArrayExpress ID (**E-GEOD-25818**) in the section **Analysis of a pilot RNA-Seq experiment**, but this ID points to Affymetrix microarray data and not to NGS data:\nGo to [the ArrayExpress home page](https://www.ebi.ac.uk/arrayexpress/)\n\n> Find the description of the experiment with ArrayExpress ID E-GEOD-25818 ?\n|-\n|\n- Type the ID in the search box on the ArrayExpress home page\n- Click **Search**\n\n{{Wiki-img|NGS/Intro/AE1.png|500px}}\n\nYou see that the experiment is stored as a **Transcription profiling by array** experiment (red) and that **Affymetrix GeneChip Arabidopsis Genome [ATH1-121501]** is the platform that was used (green).\n- Click the **Click for detailed sample information and links to data** link (blue)\n\n{{Wiki-img|NGS/Intro/AE2.png|500px}}\n\nYou see that you will download .CEL files, the file type for storing raw Affymetrix **microarray** data.\n\n|}\n\n{{Warning | So you see that IDs that are provided in articles are not always accurate !}}\n\nFortunately I could find the data in NCBI's SRA database, so we know the SRA ID. Since the connection with NCBI is too slow, we will do the download from ENA using the SRA ID.\n\nGo to [the EBI website](http://www.ebi.ac.uk/).\n\n> Download the data set with SRA ID SRR074262 from ENA ?\n\n- Type **SRR074262** in the search box\n- Click **Search**\n\n{{Wiki-img|NGS/Intro/SRA1.png|500px}}\n\nSince we are using an SRA run ID as a search term, we do a very specific search so the search returns a single SRA record:\n\n{{Wiki-img|NGS/Intro/SRA2.png|500px}}\n\n- Click the SRA run ID on the results page to go to [http://www.ebi.ac.uk/ena/data/view/SRR074262&display=html the ENA record containing the actual data of the run]\n- Scroll to the table at the bottom of the page\n- Click the link in the **Fastq files (ftp)** column (red): \n\n{{Wiki-img|NGS/Intro/SRA6A.png|700px}}\n\n|}\n\nIt can take some time to download the file since it's very big. Firefox will give you an estimate on how long it's going to take. If it takes too long, cancel the download and use the file that is already present on the BITS laptops in the **/Documents/NGSdata** folder as **SRR074262.fastq**.\n\nIn a normal analysis we would of course download all 6 data files of this study. It's only because of time limits that we will only use a single sample during the training. If you are analyzing the 6 samples you need to take a look at the metadata to know which samples represent controls and which samples represent the treatment (in this case treatment with a plant pathogen).\n\nIn ENA and SRA, annotation is found in the record of the NGS study.\n\n> Go to the ENA record of the study the downloaded sample belongs to and look at the grouping of the samples.\n|-\n|\n\n- Click the SRA ID of the study the downloaded sample belongs to (green) to access the record of the study:\n\n{{Wiki-img|NGS/Intro/SRA6A.png|700px}}\n\n- Click the **Select columns** link on the **Read files** tab to visualize all the fields with metadata that you can visualize.\n{{Wiki-img|NGS/Intro/SRA6B.png|200px}}\n\n- Deselect the fields that you are not interested in and select the fields you want to view; If you are interested in the grouping of the samples you need to select **Library name**(red): \n\n{{Wiki-img|NGS/Intro/SRA6C.png|700px}}\n\nThis adds a column called **Library name** in the table containing the grouping annotation of the samples.\n- If you want to know exactly what the names mean, you have to consult [http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0025279 the paper that describes the analysis of the data set]. In the **RNA preparation and sequencing** section you see that hrcC means infected with the pathogen, while MgCL2 represent the control treatment\n{{Wiki-img|NGS/Intro/SRA6D.png|500px}}\n\n\nThe sample that we have downloaded for the introduction training thus comes from the group of infected samples. \n|}\n\n\n### Exercise 2: Downloading a data set for the ChIP-Seq training\n*Exercise created by Morgane Thomas-Chollier*\n\nFor the ChIP-Seq training, we are going to use the data set that is described in [http://www.ncbi.nlm.nih.gov/pubmed/23818864 the article of Myers et al., 2013] <ref>http://www.ncbi.nlm.nih.gov/pubmed/23818864</ref>. The article contains the following sentence at the end of the Materials and Methods section:\n\"All genome-wide data from this publication have been deposited in NCBI’s Gene Expression Omnibus (GSE41195).\"\nIn this case **GSE41195** is the ID of the experiment in the GEO database.\n\nGo to [http://www.ncbi.nlm.nih.gov/geo/ the GEO home page]\n\n> Download the data of the experiment with GEO ID GSE41195 ?\n|-\n|\n- Type the ID in the search box on the GEO home page\n- Click **Search**\n\n{{Wiki-img|NGS/Intro/GEO1.png|200px}}\n\nThis redirects you to the GEO record of the experiment.\n\n{{Wiki-img|NGS/Intro/GEO2.png|400px}}\n\nIn the **Experiment type** section you can see that this GEO record is a mixture of expression analysis and ChIP-Seq.\n- Scroll to the bottom of the page:\n\n{{Wiki-img|NGS/Intro/GEO3.png|400px}}\n\nYou can see that the data of the ChIP-Seq experiment have their own identifier: GSE41187\n\n- Click the ChIP-Seq data identifier.\nThis brings us on the GEO record of the ChIP-Seq experiment.\n- Scroll down to the **Samples** section:\n\n{{Wiki-img|NGS/Intro/GEO4.png|400px}}\n\nNote that GEO contains the grouping annotation here in the **Samples** section.\nFor time's sake, we will focus on a single sample: FNR IP ChIP-seq Anaerobic A\n- Click the ID **GSM1010219** of that sample to go to the GEO record of the sample\n- Scroll to the bottom of the page to the **Relations** section:\n\n{{Wiki-img|NGS/Intro/GEO5.png|400px}}\n\nGEO only contains processed NGS, no raw data. The corresponding raw data is stored in the SRA database. In the **Relations** section you can find the SRA ID of this data set. For the training we need a fastq file containing raw data.\n- Copy the SRA ID of the ChIP-Seq experiment (SRX189773)\n\n|}\n\nAgain, it will take too long to download the data from NCBI. So we will do the download from EBI.\n\nGo to [http://www.ebi.ac.uk/ the EBI website].\n\n> Download the data with SRA ID SRX189773 ?\n|-\n|\n- Type the ID in the search box on the EBI home page\n- Click **Search**\n\n{{Wiki-img|NGS/Intro/ENA1.png|300px}}\n\nThis returns two results: a link to the record of the experiment and a link to the record of the run:\n\n{{Wiki-img|NGS/Intro/ENA2.png|400px}}\n\n- Click the record of the full experiment (red)\n\n{{Wiki-img|NGS/Intro/ENA3.png|600px}}\n\nThe table at the bottom of the page contains a column called **Fastq files (ftp)**\n- Click the link in this column to download the data in fastq format\n\n|}\n\nIt took only a few minutes to download the data on my laptop at work, but the internet connection at work will be faster than the one in the training room. Firefox will give you an estimate of the time it takes for the download. If it is too long, cancel the download and use the file that has already been downloaded and is available on the BITS laptops:\n\n- on Windows: in the **/Documents/NGSdata** folder as **SRR576933.fastq**\n- In Linux: in the **/home/bits/NGS/ChIPSeq** folder as **SRR576933.fastq**\n\n\nChIP-Seq always compares the ChIP sample to a control sample, consisting of genomic DNA isolated from cells that were cross-linked and fragmented under the same conditions as the ChIP sample or of DNA fragments isolated in a “mock” ChIP reaction using an antibody that reacts with an irrelevant, non-nuclear protein.\n\nIn this data set, control samples consist of full genomic DNA. To download a control sample, we should redo the same steps starting from the GEO record of the ChIP-Seq experiment and click the GEO sample ID of the **anaerobic INPUT DNA** sample... However, the fastq file is available in the same data folders (SRR576938.fastq)\n\n\n### Downloading data sets via Linux command line\n\nSee Linux command line training pages\n\n### Downloading data sets via R\n\n*Exercise created by Stephane Plaisance*\n\nOnce you know the SRA or ENA ID of the data set you can download the data and the metadata automatically via an R script.\nSee [http://wiki.bits.vib.be/index.php/NGS_RNASeq_DE_Exercise.1#Obtain_data_and_metadata_from_ENA_using_R the exercises of the RNA-Seq training] to learn how to do this.\n","## 6.1 Introduction\n\nAnother important feature of computer programs is that they can do the same thing over and over again with different information. This is possible by using loops in your code; essentially a loop is executed until it runs out of data or the code decides to break out of it.\n\n\n\n## 6.2 For loop\nNow that we have these variables that can hold multiple elements (previous exercise), it would be useful to be able to loop over them one by one. This is possible with the **for** loop:\n\n\n\n\n```python\n# Make a list of integers from 0 to 9 with steps of 1 (0, 1, 2, ..., 9)\nmyList = range(10) \n \n# for each value (myElement) in this list (myList); do the following:\nfor myElement in myList: \n    # Print that value\n    print(\"Counting {}\".format(myElement))  \n```\n\nIn the first iteration myElement will take up the first value of myList and perform the code that is indented (in this cas it will print *counting 0*), then it will go back to the start of the loop and take up the second value of myList (which is 1) and perform again the code that is indented (*counting 1*), etc.  \n\nNote that again we have to use indentation, as there is a block of code that is only relevant within the for loop. \n\nPython will always need a list, tuple, set or dictionary to iterate through and it will work exactly the same with tuples (see example below). The iterator will always take up the value of the list/tuple/set/dict! \n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\n \nfor myElement in myTuple:\n    print(\"Letter {}\".format(myElement))\n```\n\nBecause you can access individual elements within a list or tuple, you can also count the element index in the list or tuple, so that you know both index and value. If you want to iterate over  a list of letters, in this case it's in a tuple type, you'll first have to find the length of the list and then use range to make a list of integers that can be used as an index. \n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nmyTupleLength = len(myTuple)\n \nfor tupleIndex in range(myTupleLength):\n    myElement = myTuple[tupleIndex]\n    print(\"Letter {} is at position {}\".format(myElement,tupleIndex + 1))  # We have to add 1 to the index here because Python starts at zero...\n```\n\nPython has a built-in function `enumerate()` which eases this task for you as a programmer. For the tuple which we defined above, you could make the following table with indeces and accompanied values:\n\n<center>\n\n| index | value |\n|---|---|\n| 0 | A |\n| 1 | B |\n| 2 | C |\n| 3 | D |\n| 4 | E |\n| 5 | F |\n</center>\n\n`enumerate()` mimics this table and you can use it in this way which immediately gives you the indeces:\n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nfor line in enumerate(myTuple):\n    print(line)\n```\n\nThe enumerate function has some similarities with dictionaries, especially in how to access a value. Don't worry if you're confused with the squared brackets, we'll cover this in Chapter 8. \n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nfor line in enumerate(myTuple):\n    print(\"Letter {1} is at position {0}\".format(line[0]+1, line[1])) # For the sake of exercising I switched the format positions for once. \n```\n\n----\n\n**Intermezzo:**\n\n\nBefore starting with exercises, we want to highlight the if-conditions from chapter 4 again, especially the fact that Python interprets the integer 0 to be `False`, the integer 1 is interpreted as `True` and any other integer different than 0 and 1 is considered to be not `False` (it's also not `True` though)\n\n\n```python\na = 0\na == False\n```\n\n\n```python\nb = 1\nb == True\n```\n\n\n```python\nc = 2\nc == True\n```\n\n\n```python\nc == False\n```\n\n\n```python\nc != True\n```\n\n\n```python\nc != False\n```\n\nWhy is this important to know? We see sometimes code similar to the one below in which an arithmetical operation is evaluated in an `if` statement. If the result of this arithmetical operation is an integer like 2, 3, etc. we know now how we can deploy this knowledge to evaluate the statement.  \n\n\n\n```python\nc = 2 \nif c != False:  # \n    print(\"C is equal to\", c)\n```\n\n\n```python\na = 0\nif a:\n    print(\"A is equal to 0\")\n```\n\n\n```python\nb = 1\nif b:   \n    print(\"B is equal to\", b)\n```\n\n\n```python\nif not a:\n    print(\"A is still equal to 0\")\n```\n\n----\n\nNow we want to find out if a number is divisible by another number. In the code below, we will iterate over each value in the list of numbers. If the remainder after division is 0 (comparison is True), we print the number out. \n\n\n\n```python\nmyNumbers = range(1,50)\nmyDivider = 17\n \nfor myNumber in myNumbers:\n    if not (myNumber % myDivider):  # Nothing left after division, so number is divisible.\n        print(\"Number {} cannot be divided by {}!\".format(myNumber,myDivider))\n```\n\nHere we now have two levels of code besides the main one; the **if** is checked for every value, but the print is only executed for numbers divisible by myDivider.\n\n\nYou can also control the loop by using **continue** and **break**. They alter the flow of a normal loop:\n\n\n```python\nmyNumbers = range(1,100)\n \nfor myNumber in myNumbers:\n    if myNumber == 5:   \n        continue     # This means that the code within the for loop will be ignored if myNumber is equal to 5, we 'jump back' to the start and use the next number (6)\n    print(myNumber)\n\n    if myNumber == 8:\n        break        # This means we will exit the loop alltogether, all other values after this one will not be dealt with.\n\n```\n\n---\n> ### {% icon hands_on %} Exercise 6.2.1\n>\n> Write a program where you print out all positive numbers up to 1000 that can be divided by 13, or 17, or both. The output should be printed as : `Number 13 is divisible by [13]`. If you want a little more challenge, the output should be printed as `Number 884 is divisible by 13, 17`\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    >  myNumbers = range(1,100) # should be 1001\n>    >  myDividers = (13,17)    # We will loop over these in the loop itself, so it's easy to add new numbers to this\n>    >   \n>    >  for myNumber in myNumbers:\n>    >      validDividers = []     # In this list we will put all the valid dividers\n>    >      for myDivider in myDividers:\n>    >          if not (myNumber % myDivider):\n>    >                validDividers.append(myDivider)\n>    >      if validDividers:      # This means that the list has to have values in it\n>    >          print(\"Number {} is divisible by {}\".format(myNumber,validDividers))       \n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >  ```python\n>    >  # Extra: The output is not very nice here as you print off the list with the square brackets, you could try the following bit of code under the if validDividers: condition:\n>    >  myNumbers = range(1,100) #should be 1001\n>    >  myDividers = (13,17)    # We will loop over these in the loop itself, so it's easy to add new numbers to this\n>    >   \n>    >  for myNumber in myNumbers:\n>    >      validDividers = []     # In this list we will put all the valid dividers\n>    >      for myDivider in myDividers:\n>    >          if not (myNumber % myDivider):\n>    >                validDividers.append(myDivider)\n>    >      if validDividers:      # This means that the list has to have values in it\n>    >          # First make strings out of the integers; this is valid Python syntax where you make a list out of a list    \n>    >          validDividerStrings = [\"{}\".format(validDivider) for validDivider in validDividers]\n>    >  \n>    >          # Now you can join the elements of a list (if they are strings) together using the .join() method for a string:\n>    >          validDividerString = ', '.join(validDividerStrings)\n>    >   \n>    >          print(\"Number {} is divisible by {}\".format(myNumber,validDividerString))\n>    >      \n>    >      ######### Or as an alternative for the nice printing:\n>    >          #if len(validDividers) == 1:\n>    >          #    print(\"number is div by {}\".format(validDividers[0]))\n>    >          #elif len(validDividers) == 2:\n>    >          #    print(\"number x is div by {}, {}\".format(validDividers[0],validDividers[1]))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 6.2.2\n>\n> Write a program where you find, for each positive number up to 50, all numbers that can divide each number. E.g. 16 can be divided by 1, 2, 4, 8 and 16. 17 can be divided by... \n> \n> It's fine if you print the output like this: \n> ```\n> Number 1 can be divided by 1!\n> Number 2 can be divided by 1!\n> Number 2 can be divided by 2!\n> Number 3 can be divided by 1!\n> ```\n> However, you can also try to print the output like this:\n> ```\n> Number 4 can be divided by 1, 2, 4!\n> ```\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Write a program where you find, for each positive number up to 50, all numbers that can divide each number. E.g. 16 can be divided by 1, 2, 4, 8 and 16. 17 can be divided by...\n>    >  myNumbers = range(1,5) #should be 51\n>    >   \n>    >  for x in myNumbers:\n>    >      dividers = []\n>    >      for y in range(1,x+1):\n>    >          if not (x % y):\n>    >              dividers.append(y)\n>    >   \n>    >      for divider in dividers:\n>    >          print (\"Number {} can be divided by {}!\".format(x,divider))\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # The output is again not very nice here, you can replace the last two lines by this for nicer output:\n>    >  myNumbers = range(1,5)\n>    >   \n>    >  for x in myNumbers:\n>    >      dividers = []\n>    >      for y in range(1,x+1):\n>    >          if not (x % y):\n>    >              dividers.append(y)\n>    >   \n>    >      #for divider in dividers:\n>    >      dividerList = \", \".join([str(divider) for divider in dividers])\n>    >          \n>    >      print (\"Number {} can be divided by {}!\".format(x,dividerList))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n\n## 6.3 While loop\nA **while** loop is dependent on a condition, as long as this condition is evaluated as `True` the loop will continue. Its structure is very similar to the for-loop we saw here above.\n\n```python\nresult = 0\nwhile result < 10:\n    # add 1 to the result\n    result += 1\n    print(result)\n```\n\n\nThis is an endless loop:\nFYI, if you execute this, you'll end up in an enternal loop. To break the loop, press stop button.\n\n\n```python\nwhile True:\n    print(\"Endless...\")\n```\n\nWhile loops are more flexible than for loops, as you can make them end whenever necessary depending on code within the loop itself:\n\n\n\n\n```python\nbaseValue = 2\npowerValue = 1\npowerResult = 0\nwhile powerResult < 1000:\n    powerResult = baseValue ** powerValue\n    print(\"{} to the power {} is {}\".format(baseValue,powerValue,powerResult))\n    powerValue += 1 # Add one to itself - this kind of step is crucial in a while loop, or it will be endless!\n```\n\nNote that the last value printed is greater than 1000, the while condition is only checked at the start of the loop. You should check where the first result is calculated as this may impact the result! Here we changed the order of calculating the value. We *initialized* the loop and put the calculation at the very end:\n\n\n\n```python\nbaseValue = 2\npowerValue = 1\npowerResult = 0\npowerResult = baseValue ** powerValue\n\nwhile powerResult < 1000:\n    print(\"{} to the power {} is {}\".format(baseValue,powerValue,powerResult))\n    powerValue += 1 # Add one to itself - this kind of step is crucial in a while loop, or it will be endless!\n    powerResult = baseValue ** powerValue\n```\n\n---\n\n> ### {% icon hands_on %} Exercise 6.3.1 \n>\n> Try to reproduce a for-loop (the example of numbers divisible by 17) by using a while-loop.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > \n>    >  ```python\n>    >  # Try to reproduce a for-loop (the example of numbers divisible by 17) by using a while-loop.\n>    >  myNumber = 1\n>    >  myDivider = 17\n>    >  \n>    >  while myNumber <= 50:\n>    >      if not (myNumber % myDivider): # Nothing left after division, so number is divisible.\n>    >          print(\"{} is divisible by {}\".format(str(myNumber),str(myDivider)))\n>    >      myNumber += 1\n>    >  \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n---\n\n> ### {% icon hands_on %} Exercise 6.3.2\n>\n> Write a program where you start with a list of numbers from 1 to 100, and you then remove every number from this list that can be divided by 3 or by 5. Print the result.  \n> Tip: you have to make a copy of the original list here, otherwise Python will get 'confused' when you remove values from the list while it's looping over it. Use `[:]` for this purpose.  \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > ```python\n>    > # Write a program where you start with a list of numbers from 1 to 100, and you then remove every number from this list that can be divided by 3 or by 5. Print the result.\n>    > # Tip: you have to make a copy of the original list here, otherwise Python will get 'confused'\n>    > # when you remove values from the list while it's looping over it\n>    > \n>    > myNumberList = list(range(1,101))\n>    > \n>    > for number in myNumberList[:]:  \n>    >     if not (number % 3) or not (number % 5):\n>    >         myNumberList.pop(myNumberList.index(number))\n>    > \n>    > print(myNumberList)\n>    > ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n---\n> ### {% icon hands_on %} Exercise 6.3.3\n>\n> Write a program where you ask the user for an integer (whole number), and keep on asking if they give the wrong input. Check whether the number can be divided by 7, and print the result.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > # Write a program where you ask the user for an integer (whole number), and keep on asking if they give the wrong input. Check whether the number can be divided by 7, and print the result.\n>    > myNumberList = range(1,101)\n>    >  \n>    > # Keep on checking until you have a number, prime the while loop as well\n>    > isNumber = False\n>    > while not (isNumber):\n>    >     inputString = input(\"Give a number:\")\n>    >     if inputString.isdigit():\n>    >         isNumber = True\n>    >         number = int(inputString)\n>    >     else:\n>    >         print(\"Incorrect, not a whole number, try again.\")\n>    >     if not (number % 7):\n>    >         print(\"{} can be divided by 7!\".format(number))\n>    >     else: \n>    >         print(\"Number not divisible by 7\")\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n## 6.4 Iterating through two files at the same time\nPython has a built-in function which allows you to iterate through multiple e.g. lists or strings at the same time. For two strings, it would look like this:\n\n\n```python\nx = 'abcde'\ny = 'fghij'\n\ncount = 0\nfor i,j in zip(x,y):\n    count += 1\n    print(\"Iteration: {}. The value i is {}, and the value j is {}\".format(count, i, j))\n```\n\nAnd the principle is practically the same for three (or more) strings. \n\n\n```python\nx = 'abcde'\ny = 'fghij'\nz = 'klmno'\n\ncount = 0\nfor i,j,k in zip(x,y,z):\n    count += 1\n    print(\"Iteration: {}. The value i is {}, the value j is {} and the value k is {}\".format(count, i, j, k))\n```\n\n\n","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\n## Introduction to data selection and preservation\n\nResearch should be transparent and you should always be able to revert back to your data if necessary and be able to show others how you came to your results. Therefore, your research data with all information reasonably necessary for verification needs to be preserved.\n\nWith well-managed and preserved research data, you can defend yourself against allegations of mistakes. You can also prevent wrong conclusions from further spreading into the scientific community if there really are mistakes.\n\n## Long term data preservation\n\nResearch data can be preserved for different reasons such as verification and/or possible reuse. It can be your own wish or that of your university, funder or journal.\n\n**Verification**\nTODO: adapt this part\n\nThe Netherlands Code of Conduct for Academic Practice (VSNU) states that raw data from research must be kept available for a minimum of ten years. This statement is also included in the Utrecht University Policy framework for research data: “Archived research data are to be retained for a minimum of ten years, commencing from the date that the research results are published.”\n\n**Reuse**\nIt may be worthwhile to make (part of) your data available for a longer period of time and/or for a wider audience. Data which are suitable to keep for reuse are interpretable data on which new research can be based,  independent of the publication.\n\nOn the one hand, making research data reusable will need extra effort. On the other hand, possible reuse, even by your future self, might bring you lots of benefits and credits. Consider if your data is worth the effort of making it reusable or if preserving and archiving for verification is enough.\n\nReuse is explained more in depth in the next part of this course: ‘Availability for reuse’. In this part we will focus on selection and preservation of research data for verification purposes.\n\n## Data package\n\nKeeping data for verification serves the specific goal of having transparent, reproducible research.\n\n**Alternatives to preserving raw data**\nIf preserving your raw data poses problems, alternatives can also ensure verfication. For instance, transcripts of recorded interviews could hold all important information and may be less privacy-sensitive, so it is reasonable to preserve those instead of the recordings themselves. Also, if raw data is very large, preserving your data only in some processed form could be an alternative. Combined with, for instance, a demonstrable quality check on the processing.\n\n**The contents of your data package**\n\nTODO: add image for illustration/zenodo?\n\nOthers should be able to understand what you did. It is not enough to just provide data. Instead you should preserve a package with everything included that is necessary to reproduce your results. Think of including the following:\n\n* Primary (raw) data;\n* Secondary (processed) data;\n* Protocols;\n* Computer code/scripts;\n* Lab journals;\n* Metadata and/or codebooks describing the data;\n* An overview of what the contents of the data package stating what file contains what information, and how these are related.\n\nThe data should contain a reference to any publication which is based on the data.\n\nTo make understanding your data less dependent on information in the publication, you can also add information on:\n\n* Collection methods;\n* Procedures;\n* Experimental protocol;\n* Your research question;\n* Stimuli used;\n* Sample descriptions.\n\nThis is especially practical if the data package can be found and used on its own account. This is the case if it is published in a data repository or data journal as a data package for reuse.\n\nDo not forget to explicitly state who is responsible for the content of the data package, who is to be contacted in case of a request for access, and under what conditions access is granted.\n\n## Where to preserve what type of data?\n\nDuring your research, you generate research results that can be made available for others.\n\nA paper or publication is the most traditional way of making results available, but it is by no means the only way. A relatively new way of making results available is using a public data repository.\n\nAs you have just learned, preserving your data may serve the purpose of verification or  reuse. Public data repositories cater to both needs. In addition, they handle requests to view or use your data which means you do not have to take care of such requests yourself.\n\nIn the example below, you find a workflow for experimental research. What information can be made available in what place? Drag the items on the right to the correct place in the figure. Please note that some items can be used more than once.\n\nTODO: add H5P quiz and PDF solution?\n\n### Accounting for data of others\n\nIf you are permitted to use data from other parties, you will have to account for those as well if your research is to be verifiable and reproducible by others. You may recognise this from chapter 1 of this course: Data collection: Discover existing data, weblecture ‘Assessing usefulness of research data of others’ (5 of 10).\n\nYou have the following options:\n\nIf the used data is preserved correctly somewhere for the coming ten years, refer to the data repository in question.\nIf it is not taken care of, contact the responsible persons, negotiate correct preservation in a data repository for ten years, and refer to that repository.\nIf this isn’t possible, try to arrange a local copy that you preserve yourself;\nIf this isn’t allowed, you will not be able to present the data in case of questions. Therefore, you should question yourself whether you can actually use the data.\n\n<figure id=\"figure-1\"><img src=\"../../images/Cont_5_Share_SelectPreserve_Chart10years.png\" alt=\"alt-t\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Preserve for 10 years</figcaption></figure>\n\n**Accounting for data of others on websites**\n\nIf you find interesting information on a website that you want to refer to, it is possible that this information will not be future proof.\n\nThe link or web address might change over time (link rot). Or the information on a website is updated, changed or replaced with other content (content drift).\n\nIt is possible to archive web pages on a web archive like the [Internet Archive](https://archive.org/web/). You can capture a web page as it appears now for use as a trusted citation in the future (save a page). You will get an alternative link, pointing to the archived, static version of the page. Use this alternative link as a reference to the online information.\n\n## How to preserve your data correctly\n\nIn order for the data to survive for the long term, an active preservation regime has to be applied. The bad news is, data automatically gets lost over time.\n\nThere are five main ways your data can be lost:\n\n* Digital sources degrade over time ('bit rot');\n* File formats and software become outdated;\n* The media on which your data is stored becomes outdated or defective;\n* Disaster strikes the storage location;\n* The person that understands the data finds another job or data simply becomes forgotten.\n\nIn this video below you will learn how to minimise the risk of losing data. You are also given good preservation practices.\n\n<iframe src=\"https://www.youtube.com/embed/qENaO0Lk6eo\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n## Match the solutions to the data loss\n\nFrom the weblecture you learned how to prevent data loss. Can you recall all applicable active regimes, as explained in the weblecture?\n\nBelow you see a list of solutions to prevent data loss. Underneath that list you see a list of risks for data loss. Please add the number of each solution to the correct risk.\n\n**Solutions to prevent data loss**\n\n1. Have multiple copies. Use a checksum to identify faulty copies\n2. Use preferred file formats that can be opened by a wide range of software. Update the file format to a current one.\n3. Move data to fresh media well before the media’s expiration date.\n4. Have multiple copies. Move data to fresh media well before the media’s expiration date.\n5. Document your data well.\n6. Advertise the content in a data catalogue.\n\nTODO: add quiz text solution\n\n### Write your data management plan for your data preservation\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module on data selection and preservation. You should be able to complete the following questions in the section ‘Data selection and preservation’:\n\n* Which data should be preserved and/or shared?\n* How and where will you keep your data for the long term?\n","# OTU creation using LotuS \n{:.no_toc}\n\nIn this tutorial we will create a genus abundance table from two 454 sequencer runs using a pipeline called LotuS. A genus abundance table contains counts of different genera in a several of samples – Rows are the different genera and columns the samples. As a simple example, take a look at this table:\n\n|                     |        |       |        |       |      |\n|:--------------------|:-------|:------|:-------|:------|:-----|\n|Genus\t              | bl10   |bl11   |bl12\t|bl128\t|bl13  |\n|Bacteria             |24      |52     |39\t|63\t|181   |\n|Bacteroides\t      |169     |27     |7\t|42\t|6     |\n|Porphyromonadacea    |370     |346    |621\t|565\t|224   |\n\nThis table tells us how often we observe unclassified Bacteria, Bacteroides and unclassified Porphyromonadaceae in the 5 samples bl10, bl11, bl12, bl128 and bl13. A matrix like this will be used for the next tutorial on numerical ecology and created from raw sequence data within this tutorial.\n\n## The data\n\nIn a recent experiment, we sequenced 73 samples in two 454 runs, the raw fasta and quality files are in `/home/VIBTrainingX/metagenomics/` on the bits server. For each run we have a fasta (.fna) and quality (.qual) file. Go to this directory using the command `cd` and become aware of the files required from the experimenter (command `ls`). You can always take a look at files and their contents using viewing commands like `less`.\n\nThe sequence files were multiplexed before the experiment, that is a small nucleotide sequence – the barcode - was attached to each read, specific for each experiment. A mapping file is typically used, containing the link between a sequence barcode and the name of the experiment and is essential to demultiplex the fasta files. \n\n## The tools\n\nLotuS is actually a set of tools that were installed in the `/opt/` folder. First go to [the lotus website](http://psbweb05.psb.ugent.be/lotus/) and familiarize yourself with the basic documentation.\n\nTo start the exercises, go to the directory where Lotus is installed. \n```bash\ncd /opt/lotus-1.62/lotus_pipeline/\n```\n\nFrom this directory you can run all the tools. To reach all data files (e.g. input files) you have to provide the path to the files: `~/metagenomics/`\n\n## The analysis\n\n### Creation of Mapping file. \n\n[An Excel](http://data.bits.vib.be/pub/trainingen/metagenomics/Mice_experiment.xlsx) is provided, with some basic experiment annotation. The fwd primer is given as `ACTYAAAKGAATTGACGG`, but if you search for the primer sequence in the reads (in one of the .fna files) you will not find it because you need to reverse translate the primer sequence first using [http://www.bioinformatics.org/sms/rev_comp.html this tool]. So you see annotation provided by the provider is not always correct.\n \nLotus needs experiment annotation to map input files to barcodes. Based on the documentation on [http://psbweb05.psb.ugent.be/lotus/documentation.html#MapFile the Lotus website], create a mapping file for this experiment. This means that you need to replace the column headers of the Excel file to terms that are accepted by Lotus and that you have to indicate that there is a .fna and a .qual file for each run. The header line should be preceeded by a `#`. The mapping file should at least contain four columns with the following headers:\n\n* SampleID\n* BarcodeSequence\n* fnaFile\n* qualFile\n\nSave the file as a tab-delimited text file.\n\nYou can always test the validity of your mapping file with the command \n```bash\n./lotus.pl -check_map [your mapping file]\n```\n\nIf you have fastq files as input the fnaFile and qualFile columns would be replaced by one fastqFile column.\n\n### Changing  the data format of the input files.\n\nSometimes you need to transcribe data from one format to another. For instance we could transform the fasta files (.fna) to fastq files (.fq). This can be done with the program `sdm`, that is part of the LotuS pipeline. Take a look at the sdm help by typing `./sdm` and exploring the options, e.g.\n \n```bash\n./sdm -help_commands\n```\n\nThen, using command line arguments, transcribe the fasta + qual files of the Anh experiment into fastq files. Take a look at output and log files created by sdm.\n\n> ### {% icon hands_on %} Hands-on: Exercise 1 \n>\n>  > ### {% icon question %} Question\n>  > \n>  > How to transform fasta + qual files into fastq files ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```bash\n>  > > sudo ./sdm -i_fna ~/metagenomics/Anh.1.fna -i_qual ~/metagenomics/Anh.1.qual -o_fastq t1.fq\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nIn the lotus_pipeline folder the fastq file t1.fq was generated, to take a look at the file use\n```bash\nhead t1.fq\n```\n\nDo the same for the t1.log file: you see that sdm is not only used to transform fasta into fastq files but it is also capable of doing quality filtering on the raw reads files.\n\n### Setting up a quality filter of the input sequence files.\n\nSince we want to make sure the quality filtering of the input file is strict, LotuS offers several quality filtering options. Quality settings are different for different data formats, that´s why Lotus offers a file with specific settings for each platform. Since we have 454 data we will take a look at the file sdm_454.txt.\n```bash\nless sdm_454.txt\n``` \n\nRead the comments (line starting with “#”) to each option and think which quality filtering options might be important in order to create OTUs from the raw sequences. (Hint: an OTU is a clustering of similar sequences with the aim to have one cluster of sequences for each species that was originally present in the samples. Take into account that sequencing machines make errors and that PCR amplification of the 16S rDNA is similarly with errors). Think about a set of parameters, including the statistical information from step 2, and save these in your copy of sdm_options.txt for later use.\n\nCheck the sdm [quality filter settings](http://psbweb05.psb.ugent.be/lotus/documentation.html#SDMconfig). Some of the default filter settings are:\n\n* MinSeqLength=250 : Only use reads of at least 250 nt long after processing (remember we are working with long reads from 454 sequencing)\n* TruncateSequenceLength = 250 : Cut all reads after 250 nt\n* QualWindowWidth = 50 and QualWindowThreshold = 25 : Remove all reads where average quality is <= 25 over a 50bp window\n* maxAccumulatedError = 0,5 : Remove all remaining bases when accumulated error score >= 0,5</li>\n* maxHomonucleotide = 8 : Remove all reads with a homonucleotide run (repeat of same nt) >= 8\n* RejectSeqWithoutFwdPrim = TRUE : Remove all reads that do not contain the forward primer\n\n### Demultiplexing  and quality filter the input files.\n\nFor this step you will need the mapping file from Step 1 and the file with the quality filtering settings for 454 data. Use the sdm command to demultiplex and filter all input files at the same time into a local folder ''demultDir''. First create the folder where the demultiplexed files will be written in ~/metagenomics/:\n```bash\nmkdir ~/metagenomics/demultDir\n```\n\nSince the mapping file contains information on all files, you have to provide an input path to the folder that contains the input files (.fna + .qual) to sdm.\n\n> ### {% icon hands_on %} Hands-on: Exercise 2 \n>\n>  > ### {% icon question %} Question\n>  > How to demultiplex and quality filter files ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```bash\n>  > > ./sdm -i_path ~/metagenomics/ -o_fastq t1.fq -o_demultiplex ~/metagenomics/demultDir/ -map ~/metagenomics/map.txt -options sdm_454.txt \n>  > > ```\n>  > > Input is the folder containing the .fna and .qual files. The demultiplexing will fill the demultDir folder with fastq files.\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nDiscuss the output files and what each of these represents. In this experiment multiple samples were sequenced in the same lane. Two lanes were used, each containing 37 samples. After sequencing, this results in two files with reads. To know which sample a read comes from, unique bar codes are incorporated into the adapter sequences. One specific bar code for each sample. In this step reads from different samples (but from the same lane thus in the same fasta file) are split over separate fastq files, one for each sample. \n\n### Mapping file creation when sequence provider provides demultiplexed files.\n\nNow that you have demultiplexed the files into a single folder, you might be aware that sequence providers often deliver files in this format: already demultiplexed into single files. In this case slight modifications to the mapping file are enough to change the input from non-demultiplexed large file(s) to demultiplexed-many-small-files.\n\nNote that lotus has a special script that creates the mapping file for you in this case. The script is autoMap.pl. It is used to link SampleIDs to demultiplexed files. Run autoMap.\n\n```bash\n./autoMap.pl ~/metagenomics/demultDir/ ~/metagenomics/automap.txt 1,1\n```\n\n### Running Lotus.\n\nWe will run Lotus on the demultiplexed files. Use the mapping file you generated in Step 5 and the settings file sdm_454.txt. Use the utax taxonomy to assign a phylogeny to the derived OTUs. Run lotus from out the /opt/lotus_pipeline/ and save the output in the folder ''testR''\n\n> ### {% icon hands_on %} Hands-on: Exercise 3 \n>\n> > ### {% icon question %} Question\n> > How to run lotus\n> >\n> > > <details markdown=\"1\">\n> > > <summary>{% icon solution %} Solution\n> > > </summary>\n> > > ```bash\n> > > sudo ./lotus.pl -i ~/metagenomics/demultDir/ -o ~/metagenomics/testR/ -m ~/metagenomics/automap.txt \n> > > ```\n> > > Input is the folder containing the .fna and .qual files. The demultiplexing will fill the demultDir folder with fastq files.\n> > > </details>\n> >\n> {: .question }\n{: .hands_on }\n\nIn case you haven't done any quality filtering yet, you can still do it now. The command would then be:\n```bash\nsudo ./lotus.pl -i ~/metagenomics/demultDir/ -o ~/metagenomics/testR/ -m ~/metagenomics/automap.txt -s sdm_454.txt\n```\n\n* Peek at the file hiera_RDP (using `head`). The file maps eachg OTU to a genus.\n* Peek at the file OTU.txt (using `head`). The first line contains the number of reads that represent OTU_1 in each sample.\n* Peek at the file otus.fa (using `head`). It contains the reads representing each OTU. You can use this file to blast the sequences to check if they are really from the OTU they were assigned to.\n* Go to the folder higherLvl. This contains the data that we are going to use in the Ecology analysis.\n* Go to the folder LotuSLogs. This contains the settings of the analysis. For instance, peek a the file demulti.log: it shows how many reads were rejected... The file citations.txt contains the references for reporting your LotuS results. \n\n### Using a different taxonomy assignment on a finished run.\n\nIn this step we want to reassign the taxonomy to a LotuS run, but keep exactly the same OTUs. In this exercise, assign the OTUs to the Silva taxonomy. \n\nThis option is useful, if e.g. you want to keep your work on a given OTU set (as well as the phylogenetic tree), but want to try out what would have happened if you had used e.g. Silva as reference database instead of utax.\n\n> ### {% icon hands_on %} Hands-on: Exercise 4 \n>\n> > ### {% icon question %} Question\n> > How to reassign the taxonomy with Silva as reference database? \n> >\n> > > <details markdown=\"1\">\n> > > <summary>{% icon solution %} Solution\n> > > </summary>\n> > > ```bash\n> > > sudo ./lotus.pl -i ~/metagenomics/demultDir/ -o ~/metagenomics/testR2/ -m ~/metagenomics/automap.txt -s sdm_454.txt -refDB SLV -redoTaxOnly 1\n> > > ```\n> > > Input is the folder containing the .fna and .qual files. The demultiplexing will fill the demultDir folder with fastq files.\n> > > </details>\n> >\n> {: .question }\n{: .hands_on }\n\n### Using  a custom database.\n\nThe research of honey bee gut communities have very specific taxonomic names for already known bacteria. In order to accomodate for their naming sheme, we will use a very specific database that contains 16S sequences of bacteria mostly found in the honey bee gut. Download the [bee taxonomy in tax format](http://psbweb05.psb.ugent.be/lotus/packs/DB/beeTax/beeTax.tax) and [http://psbweb05.psb.ugent.be/lotus/packs/DB/beeTax/beeTax.fna bee taxonomy in fna format].\n\nUse the two provided files (fna, tax) to again redo the taxonomy, but this time assigning first using the honey bee DB and secondly everything with low hit should be assigned with the SILVA database. \n\n> ### {% icon hands_on %} Hands-on: Exercise 5 \n>\n> > ### {% icon question %} Question\n> > Use honey bee taxonomy database ? \n> >\n> > > <details markdown=\"1\">\n> > > <summary>{% icon solution %} Solution\n> > > </summary>\n> > > ```bash\n> > > sudo ./lotus.pl -i XX -o ~/metagenomics/testR3/ -redoTaxOnly 1 \\\n> > > -m ~/metagenomics/LASI_Spring_2_bees_barn_3_map_lts_5.txt \\\n> > > -refDB ~/metagenomics/beeTax.fna,SLV -tax4refDB ~/metagenomics/beeTax.tax \n> > > ```\n> > > Input is the folder containing the .fna and .qual files. The demultiplexing will fill the demultDir folder with fastq files.\n> > > </details>\n> >\n> {: .question }\n{: .hands_on }\n\n### Get  everything assigned!\n\nIn this step we want to assign every OTU sequence to a database target – and we don’t care about false positive assignments! Of course this is per se wrong, but in some cases you just want to know what the best hit would be, even if it is only 90% similar to your OTU sequence. LotuS provides several options that allow tweaking towards more lenient assignments. Find all options related to this and try to create the most extreme case with these options, by reassigning the taxonomy again as in the previous step.\n\n### Try a different sequence clustering algorithm.\n\nNow rerun lotus, but try to optimize for a lot of small, hard defined OTUs (that might correspond to something like strain level). Which clustering algorithm might be suitable? Which clustering cutoffs make sense? For this specific run, use the first mapping file you created (step 1) and the non-demultiplexed input files. Save this output in the folder ''testR4''\n\n### Your own best run.\n\nNow that you have run LotuS with various databases and options, go back and look at the output folder of the different runs, look at the statistics provided in the ''LotuSLogS'' subfolder. Based on this, tune the sdm filtering parameter file from step 3 (again), choose the database you think best suited/most interesting, and choose a clustering algorithm. With this create run the sample set again, saving the output in folder ''testrun1.3''. This output folder you can use in the following session on numerical ecology.\n\nIf LotuS run has finished, go to the specified output folder and copy the genus.txt from the output folder to your home folder: \n```\ncp testrun1.3/ higherLvl/genus.txt ~\n```\n\n### Using Illumina data as input.\n\nIn all the analysis before we were using 2 x 454 runs from an outdated next generation sequencing technology. For the next exercise we will look at the output of an Illumina miSeq sequencing platform, that is still being used a lot nowadays.\n\nSet up the mapping file, using [http://data.bits.vib.be/pub/trainingen/metagenomics/Miseq.xlsx the provided Miseq.xlsx file]. Run LotuS, after you set up a custom sdm configuration file and using a combination of parameters you learned about in previous steps.\n\nThis run might take some time longer to finish. Be sure you set it to use all the cores of your computer and let it run over the lunch break.\n\nCongratulations, now you know how to process raw sequence files to meaningful summary tables, that can be directly analyzed in R or even Excel! In the next tutorial this matrix will be analyzed with the help of R, after the lunch break.\n\n","# Tools  \n{:.no_toc}\n\n## Lotus pipeline \nLotuS offers a lightweight complete 16S/18S/ITS pipeline to\n- Demultiplex and filter fasta or fastq sequences\n- Denoise, remove chimeric sequences and cluster sequences into very high quality OTUs that perform at a similar level to mothur / dada2\n- Determine taxonomic origin of each OTU using >5 spezialized and general purpose database or statistical algorithms\n- Construct OTU, genus, family, class, order and phylum abundance tables in .txt or .biom format\n- Reconstruct OTU phylogenetic tree\n\nMore information at [LotuS home page](http://psbweb05.psb.ugent.be/lotus/downloads.html)\n\n## usearch \n\nDownload [usearch version 8](http://www.drive5.com/usearch/download.html) and copy the executable in a folder e.g. /usr.bin/tools/ which you can reach (you might to be superuser for this)\n\nMake executable:\n```\nsudo chmod +x /usr/bin/tools/usearch8.1.1861_i86linux32\n```\n\nCreate a symbolic link into the folder where Lotus will search for it:\n\n``\nsudo ln -s /usr/bin/tools/usearch8.1.1861_i86linux32 /usr/bin/tools/lotus_pipeline/bin/usearch_bin\n```\n\n## R package \n\nYou also need R with the vegan package installed.\n","# Ecology Analysis using vegan \n{:.no_toc}\n\nIn this exercise we will look at a data matrix of 16S rRNA counts in 74 samples.\n\nThis dataset is the microbiota composition of 74 mice from 5 different mice strains. The original research aim was to define the effect that the mouse genome has on the microbiota and what the effect of living in the same cage would be. However, we found much stronger trends in the data, and these we will look at in this exercise.\n\nThe 454 data was already compiled into a matrix with genus abundance per sample in a previous step. This matrix is called a feature abundance matrix, or abundance matrix for short. We will do an ecology-oriented analysis of the data, in later steps also taking metadata (experimental, environmental or clinical data that was collected for each sample, independent of the DNA) into account. The aim of this tutorial is to get an idea of the very basic steps of ecological data analysis using the programming language R.\n\nThe gene abundance table (Genus.txt) can be found in the folder /home/VIBTrainingX/NGS/metagenomics/higherLvl folder on the server. Those who are working on their own laptop can download it [from the lotus website](http://psbweb05.psb.ugent.be/lotus/data/LotuStutorial.tar.gz).\n\n1. Set the folder with the provided files as your working directory in R using `setw`. This way required files can be easily loaded. To find out how to use this command, you can type ?setwd() to open the help. If there are other R-commands that you want to know more about, you can open the R-help for that command by entering in the R-prompt `?command`. This will be very useful when working with R, make sure to use this a lot as you can only learn more :o). \n\n\n> ### {% icon hands_on %} Hands-on: Exercise 1 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to set the working directory in R \n>  >    > <details markdown=\"1\">\n>  >    > <summary>{% icon solution %} Solution\n>  >    > </summary>\n>  >    > ```R\n>  >    > setwd(\"dir_to_data\")\n>  >    > ```\n>  >    > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n2. Load the provided data into the matrix M (Genus.txt, actual genus abundance data), using the read.delim command, saving the loaded table as `M`. Make sure, the row names are correctly read in. As R reads the matrix as an object of class data.frame, we convert M from a data.frame to a matrix `M=as.matrix(M)`. This is important for some of the following calculations, where we need a `matrix` class object. \n\n> ### {% icon hands_on %} Hands-on: Exercise 1 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to read in data as matrix ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # read in data as matrix\n>  > > M = read.delim(file=\"Genus.txt\",row.names=1)\n>  > > M = as.matrix(M) \n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nThe matrix you loaded represents the number of 16S sequences assignable to each genus, which we could find in the samples. Also note that not all genera are real genera, but partly assigned unknown sequences. With these groups we do not know if this is a single genus or in fact several genera or in extreme cases even several classes, that just all fall under the same phylum tag. What are the advantages and disadvantages of keeping such undefined groups in the data?\nUse the function `edit(M)` to better view the abundance matrix.\n\n3. Let’s look at some  basic features of the abundance matrix. The `summary(M)` command is a good start, but also look at total row and column counts (`colSums`, `rowSums` command). To see how the genera are distributed within each sample, we will plot a sample-wise density plot.We will be using a combination of the `density`, `lines` and `lapply` functions, to draw the densities of values found in each sample. Let’s start with looking at the density of the first sample. In R you can access specific columns by writing the matrix coordinates in square brackets. For example `M[1,]` shows the first row of a matrix, `M[,7]` shows the 7th column etc:\n\n> ### {% icon hands_on %} Hands-on: Exercise 3 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to estimate density of first sample ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # estimate density of first sample\n>  > > densityOfSample1 = density(M[,1])\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nLook at the object densityOfSample1 by simply entering the object name into the command prompt. Next try to visualize it with `plot(densityOfSample1)`. In this plot you see that most genera are at 0 abundance, some genera have an abundance <10 and some rare genera actually occur with a higher frequency, one genus even having ~1100 16S reads assigned to it. Which genus is this?\n\nAlternatively you can also use the function `hist`, to plot a histogram of the abundances. Try to do this now.\n\n> ### {% icon hands_on %} Hands-on: Exercise 4 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to plot histogram of abundances ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # plot histogram of abundances\n>  > > hist(M[,1], nclass = 50)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nWe can use the `apply` command, to apply the density command to every column of M, which will return a list of density objects. The second argument to the `apply` function is the `margin` and is set to 2, which tells the `apply` function that we want to work on columns (margin = 2) and not on rows (margin = 1). Save this into object  `S_densities`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 5 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to estimate densities of all samples ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # estimate densities of all samples\n>  > > S_densities = apply(M,2,density)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\n\nTo plot this start with:\n```R\n# open a new plot window and set range of x and y axis\nplot(1,1,type=\"n\",ylim=c(0,3),xlim=c(0,5000)) \n```\n\nThis will open a new plotting window, already set to the range of x and y coordinates (xlim, ylim) we will need in this example. In this case we just want to plot a blank space, this is done with the `type=n` argument. Try to replace the argument by `type=p`, to actually see that point! S_densities is a list, so we use `lapply` (list apply), in combination with the `lines` function, try this now to plot all the density lines into the open plot window.\n\n> ### {% icon hands_on %} Hands-on: Exercise 6 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to plot density distributions of all samples ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # plot density distributions of all samples\n>  > > lapply(S_densities,lines)\n> > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nWhat you should see now in the plot window is the density distribution of all samples. The lines function is adding new lines, while a plot function makes a completely new plot. Try to replace the `lines` with `plot` to see this (it’s very fast, so keep a close eye on your plot). How are these lines already telling us something about the differences between the communities of each sample?\n\n4. Maybe you noticed that the `colSums` command showed that the totals are not equal. What does this mean? In this state the data is actually not comparable among each other. One way to `correct` the data for this shortcoming is to normalize the matrix. In this step we will normalize the abundance matrix into variable M1: \n\n```R\n# normalize matrix: divide each column by the total of that column\nM1 = sweep(M,2,colSums(M),\"/\")\n```\n\nThe `sweep` command is extremely useful, as it will apply a simple arithmetic operation (like divide) in a matrix column- or row-wise with a vector of your choice. So it is very similar to `apply`, but takes more basic functions. In this case we will divide each column by the sum of the column, this is called normalization.\n\nNow we will compare these matrices using the `barplot` function. For this we need to open another graphical window, using the `X11` function:\n```R\n# create barplot of original and normalized data\nbarplot(M)\nX11()\nbarplot(M1)\n```\n\nWhat do you notice about the sample composition? What does the graph mean? Discuss where you would want to normalize the data (and where not).\n\nClose all open plots.\n\nNow replot the sample-wise density plot (as you did in step 3), but start the plot with these adapted x and y ranges. Additionally we will this time label the x- and y-axis:\n\n```R\n# open a new plot and define ranges and titles of x and y axis\nplot(1,1,type=\"n\",ylim=c(0,80),xlim=c(0,1),xlab=\"relative genus abundance\", ylab=\"Frequency of genera\") \n``` \n\nYou will notice that the graph looks different from you previous plot. What changed due to the normalization? Are the samples more similar to each other using M or M1? \n\nIf you spot a difference in species abundance between two samples using matrix M, is this difference real, does it have scientific value?\n\nFor the next step the R-library vegan is required. It is a set of functions specifically designed for ecological data analysis. The package has been installed on the bits laptops. If you were to install the package, you could do so using the command: `install.packages(“vegan”)`. More details on the [vegan web site ](http://cc.oulu.fi/~jarioksa/softhelp/vegan.html). Load vegan, using the `library` command.\n\n\n> ### {% icon hands_on %} Hands-on: Exercise 7 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to load the vegan package ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # load vegan package\n>  > > library(vegan)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nLet’s try to put the differences we observed in sample density into numbers. To do this, ecologists rely on the concept of diversity. Diversity describes the evenness of species distributions as well as the richness of species that are observed in a given ecological system. We will first calculate the Shannon diversity, using vegan’s `diversity` command. Try to do this per sample, using the `apply` function again. Save the result in object `div`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 8 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to calculate Shannon diversity index for each sample using the normalized data ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # calculate Shannon diversity index for each sample using the normalized data \n>  > > div = apply(M1,2,diversity,index=\"shannon\")\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nNow we can see in action what these indices are actually doing for us. Plot the density of the sample with the lowest and highest diversity in red and blue on your previous density plot of M1, this you do by first finding out which diversity indexes are the maximum and minimum values using the `which.max` and `which.min` functions on the object `div`. Don’t forget to have the last density plot still open (or replot it from step 4 on M1), than add the lowest samples as a blue line and the highest sample as a red line, using the `lines` command. \n\n> ### {% icon hands_on %} Hands-on: Exercise 9 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > Find samples with lowest and highest Shannon diversity index and add them to the density plot ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # find samples with lowest and highest Shannon diversity index and add them to the density plot\n>  > > which.min(div) #should be bl16\n>  > > which.max(div) #should be bl48\n>  > > lines(density(M1[,\"bl16\"],adjust =0.5),col=\"blue\")\n>  > > lines(density(M1[,\"bl48\"],adjust =0.5),col=\"red\")&\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nYou can now readjust the window by changing the `ylim` and `xlim` attribute in the plot function, if necessary (tip, try to rerun using `ylim=c(0,180)`). Try to explain why the colored samples have the highest & lowest diversity. What does this tell about an ecosystem (remember that these are genus abundances).\nRaise your hand if you reached this step.\n\nA different way to normalize the data is to sample exactly equal amounts of 16S rDNA for each sample in this experiment. Of course in practice this is impossible to do, but we can simulate this, by randomly selecting a subset of 16S rDNA. This is called rarefaction. Rarefy your original abundance matrix (M) into M2, using 1000 reads per sample, using the `rrarefy` function of vegan. Note that you need to transpose (command `t()`) the matrix, before giving it to `rrarefy`. Transform the matrix back and save it as `M2`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 10 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to normalize via rarefaction ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Alternative way of normalization\n>  > > M2 = t(rrarefy(t(M),sample=2000))  #vegan needs transformed matrix, and we need it back-transformed\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nUse `colSums(M2)` to check if the rarefaction worked. The main use of rarefaction is in calculating diversity and richness correctly, for this we will look in the following step at observed richness.\n\nThe concept of observed richness within a sample is pretty simple (but useful): richness describes the number of different species that occur at least once in a sample. This can be calculated in two steps:\n\n```R\n# Species present in sample: TRUE or 1 if species is present, FALSE or 0 if species is absent\nOnceOrMoreOftenPresent = M1>0\n``` \n\nThe sum of each column in this matrix will tell us how many species were detected in total within the respective sample, use the `apply` and `sum` functions , saving the result in `rich1`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 11 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to calculate the sum of each column ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Calculate sum of each column\n>  > > rich1 = apply(OnceOrMoreOftenPresent,2,sum)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nCompare the richness values in `rich1` to the richness obtained on the rarefied matrix `M2`, calculated with a shortened command:\n\n```R\n# Calculate number of present species in each sample using the rarefied data\nrich2 = apply(M2>0,2,sum)\n``` \n\nCompare rich1 and rich2 in a matrix value by value. We use the `cbind` command to bind two vectors column wise together, so we get a matrix with 2 columns. Order this matrix by the richness values in rich1, using the `order` command and accessing the vector representation with `[]` square brackets.\n\n```R\n# Create new matrix with two columns: rich1 and rich2 and order rows according to rich1 values\ncbind(rich1,rich2)[order(rich1),]\n```\n\nWhat does the second part of the formula do? What happens if you change that to order(rich2)?\n\nDiscuss which richness values have the highest value to the researcher and why the order is very different between these two richness estimates. Is one way clearly wrong?\n\nWhy did we choose 1000 as cutoff for the sequences per sample? What is the maximum value we could choose? \n\nFirst samples are clustered to see underlying data structures. For this tutorial we will choose a hierarchical clustering, based on a bray-curtis distance between samples, using the function `vegdist`. Make  sure the distances are calculated between Samples and not Genera.\n\nNext, use the function `hclust` on the distance matrix, saving the output in variable `cluster`, and subsequently plot the clustering of the samples (using `plot`).\nTake a guess of how many groups there might be in this clustering?\n\n> ### {% icon hands_on %} Hands-on: Exercise 11 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to cluster samples and plot results ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # cluster samples and plot results\n>  > > BCD = vegdist(t(M1), dist=\"bray\")\n>  > > cluster = hclust(BCD)\n>  > > plot(cluster)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nTo visualize the samples and their relatedness to each other in a two-dimensional space, we can use an ordination to visualize the data in a low dimensional space. The dimensionality of the original matrix (73 genera=73 dimensions) is reduced to two dimensions. If you know what a PCA (Principal component analysis) is, this step will use a conceptually similar, but methodologically quite different technique to perform an ordination of the data, NMDS (non-metric multidimensional scaling).\n\nStart by calculating a 2-dimensional NMDS of the data using M1, using the Bray-Curtis distance in the function `metaMDS`, saving the result to `nmds`. Again, make sure that samples are being ordinated and not Genera.\n\n> ### {% icon hands_on %} Hands-on: Exercise 11 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to calculate the NMDS ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # calculate NMDS\n>  > > nmds = metaMDS(t(M1),distance = \"bray\") #actual NMDS command, matrix needs to be transformed to conform with vegan’s standards\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nTake a look at the `nmds` object and explore some of its features (e.g. type `str(nmds)` to see what variables are stored within the NMDS object). Try to find out what the `stress` of your ordination is. What does stress stand for (tip: go to the R help on metaMDS)? Next we can visualize the NMDS, similar to what you get out of PCA’s, displaying samples only:\n```R\n# plot NMDS\nplot(nmds,display =\"sites\")\n```\n\nThe important difference of NMDS compared to PCA is, that NMDS works with any kind of distance metric, while PCA can only use Euclidean distances between samples. A second important feature of NMDS is, that this method finds non-parametric, monotonic relationships between objects; in short: it doesn’t assume a specific data distribution. Why might these two features be important for ecologists? \n\nYou might have noticed that you see two clusters, similar to the hierarchical clustering of the data. We can get for each sample the identity within the two clusters using the `cutree` commands, specifying k=2 (2 clusters). This can be plotted into the NMDS with the following command:\n\n```R\n# identify clusters\nmemb = cutree(cluster, k = 2)\nordispider(nmds,memb)\n```\n\nCongratulations, you have just visualized the mouse enterotypes. Next we are going to look closer at these. If you want to know the exact methods to detect enterotypes in your data visit [http://enterotype.embl.de/enterotypes.html http://enterotype.embl.de/enterotypes.html]\n\nIn the last step, we will test for all the genera in the matrix whether they show significant differences between two clusters. The scientific question we are posing here is: what are the significant differences in the gut microbiota of between enterotypes? We will use a non-parametric test (kruskal-wallis) to do the tests, as ecological data is in most cases not normally distributed. This test is very similar to the student t-test, and the interpretation works just the same way. Use the function `kruskal.test` to test the first genera (M[1,]) for significant differences between the two cluster groups (in object `memb`). Save the output of this command in variable `Kt`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 12 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to test if there is a difference between the two clusters for the first genus ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Test if there is a difference between the two clusters for the first genus\n>  > > Kt = kruskal.test(M1[1,],memb)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nLook at the output of this function. This will show you a human readable summary of the test and the result. You can access elements of a list (`Kt` is a list in this case) using the `$` operator. Try to extract the p-value from the `Kt` object.\n\nOnce you know how, we can start to calculate the significance for every genus in the M1 matrix,. These p-values we will store in a newly created vector `pvals`. Let’s add the first 2 p-values to the vector:\n\n```R\n# Test if there is a difference between the two clusters for the first and second genera. Store p-values in a vector.\npvals = c()\npvals[1] = kruskal.test(M1[1,], memb)$p.value\npvals[2] = kruskal.test(M1[2,], memb)$p.value\n```\n\nSince doing this 73 times takes a long time, we will be using a for-loop to `loop` over the matrix and do this for us. We could as well use the apply function, but the syntax would get a little more complicated, since we are only interested in a subpart of the result, the $p.value part. Try to write a for-loop, to calculate the p-value 73 times.\n\n> ### {% icon hands_on %} Hands-on: Exercise 13 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to test if there is a difference between the two clusters for all genera ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Test if there is a difference between the two clusters for all genera\n>  > > for (i in 1:dim(M1)[1])\n>  > > {\n>  > >         pvals[i] = kruskal.test(M1[i,], memb)$p.value\n>  > > }\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nAs an additional help, you can add the name of the taxa to the pvals vector using the names command (that will name a vector):\n\n```R\n# Add names to the vector\nnames(pvals) = dimnames(M1)[[1]] \n```\n\nWhich taxa are significantly different?\n\nIn this case we will use the normalized M1 matrix, can you explain why we do not use the M or M2 matrix? Would either be wrong to use?\n\nIn total we were testing in 73 genera, if their p-value was below a threshold of 0.05. What is the chance of observing data with a p-value >0.05 by random chance? How many genera do you expect to be below this threshold by random chance? \n\nTo avoid statistical errors of this kind, we will use a Benjamini-Hochberg multiple testing correction, implemented in the R function `p.adjust`. Save the result as `qvals`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 14 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to perform multiple testing correction of p-values using Benjamini-Hochberg method ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Multiple testing correction of p-values using Benjamini-Hochberg method\n>  > > qvals = p.adjust(pvals,method = \"hochberg\")\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nWhat do you see in this test? What would you report on this dataset, based on these values?\n\nTry sorting the q-values to see the most significant differences first:\n```R\n# Sorting q-values\nsort(qvals)\n```\n\nNow that you have finished the tutorials, you should be able to analyze any new dataset of amplicon data, using the LotuS pipeline and performing a basic analysis with R, including\n* Data normalization\n* Clustering analysis\n* Ordination\n* Univariate statistics\nYou can always expand upon these concepts, using this tutorial as starting point. Just remember that R is a very flexible language, and all these commands can be expanded for new purposes and visualizations.\n\n### Data sources\nAll the material provided in this tutorial are from metagenomic study on mice knockouts. Further analysis of the data can be found in the reference below.\n\n### Reference \n\nHildebrand, F., Nguyen, A. T. L., Brinkman, B., Yunta, R. G., Cauwe, B., Vandenabeele, P., … Raes, J. (2013). Inflammation-associated enterotypes, host genotype, cage and inter-individual effects drive gut microbiota variation in common laboratory mice. Genome Biology, 14(1), R4. doi:10.1186/gb-2013-14-1-r4\n\n","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\n## Introduction to data collection\n\nBy now you will have obtained some idea of what research data management is all about. Now we will have a more in-depth look into the different phases of your research by starting with data collection.\n\nData collection involves understanding the different types of data you collect. Depending on the nature of your research, there are different methods of collecting data and thus different types of data.\n\nYour data may be physical (paper records or archival forms) or digital (database contents or Excel data). The source of your data may be external, you collect it yourself or you generate it from a machine.\n\nWhen you write your data management plan you will need to take into account the type of data you collect, the source of the data, and how you will process and analyse your data.\n\nYou can watch the video below, provided by TU Delft, about data collection. The video stops at 1:12.\n\n<iframe src=\"https://www.youtube.com/embed/AqnVrnVdv2Y\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n## Preferred formats for your research data\n\nThis part is based on the online Research Data Management training 'MANTRA' of The University of Edinburgh (CC BY: [https://mantra.edina.ac.uk/](https://mantra.edina.ac.uk/)) and Managing Data @ Melbourne.\n\n<figure id=\"figure-1\"><img src=\"../../images/01_Preferred-formats_Learning_Objective.png\" alt=\"Introduction \"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Learning objectives</figcaption></figure>\n\n---\nThe file formats you use to generate your research data will influence how you can manage them over time, i.e. a program or application must be able to recognise the file format in order to access your data within the file.\nFor example, a web browser is able to process and display a file in the HTML file format so that it appears as a web page. If the browser encounters another file type, it may need to call on a special plug-in to view it. Or it may simply let you download the file to view if it can recognise it in another program.\n\nTo identify the file format, files usually have a file name extension, or suffix that follows a full stop in the file name and contains three or four letters, like for example:\n\nTODO: add PDF with links to preferred file formats\n\n* .txt    text\n* .pdf    portable document format\n* .jpg    joint photographic experts group\n* .csv    comma separated values\n* .html   hypertext markup language\n* .xml  extensible markup language  \n* .rtf  rich text format\n\n<figure id=\"figure-2\"><img src=\"../../images/02_Preferred-formats_proprietary-formats-01.png\" alt=\"Proprietary formats\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Background on proprietary and open formats</figcaption></figure>\n\n---\n\n<figure id=\"figure-3\"><img src=\"../../images/02_Preferred-formats_proprietary-formats-02.png\" alt=\"Proprietary formats\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Background on proprietary and open formats</figcaption></figure>\n\n### {% icon question %} Question\n\nDetermine which format is proprietary and which is an open format\n\n.xml .pdf .psd .odf .ppt .docx .csv .xls\n\n<details markdown='1'>\n  <summary>Check your answers!</summary>\n\n1. Proprietary: .psd, .docx, .xls, .ppt \n2. Open format: .csv, .xml, .odf, .pdf\n\n</details>\n\nTODO: list of preferred formats\n\n<figure id=\"figure-4\"><img src=\"../../images/03_Preferred-formats-file-conversion.png\" alt=\"Risks of file conversion\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Risks of file conversion</figcaption></figure>\n\n### {% icon question %} Question\n\nWhile file conversion or migration sometimes has to be done, there are also risks.\n\nWhich ones can you think of?\n\n<details markdown='1'>\n  <summary>Check your answers!</summary>\n\n* file size may change and even become surprisingly large\n* blanks used as missing data code\n* special characters and end of line returns may change\n* relation among items in a table and among tables may be lost\n* layers, color fidelity and resolution may be lost or changed in image files\n* fonts, footnotes and links to other documents may change\n* frame rate, sound quality, codecs and wrappers may be altered in multimedia files\n* last characters in rows (due to row size limitations) may be altered\n\n</details>\n\n### {% icon hands_on %} Hands On \n\nOpen the following .docx file to the preferred format .txt: [PreferredFormatsExcersizePenguinDOC.docx](../../images/PreferredFormatsExcersizePenguinDOC.docx)\n\n1. Convert this docx file to the preferred format .txt\n2. Open the text file in an editor\n3. Is all formatting preserved OK?\n\n<details markdown='1'>\n  <summary>Check your answers!</summary>\n\nNo, the format Microsoft Word creates saves the layout together with the textual and other elements. The .txt format created by Word is only the textual information in your file.\n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nOpen the following .docx file to the preferred format .txt: [PreferredFormatsExcersizePenguinDOC.docx](../../images/PreferredFormatsExcersizePenguinDOC.docx)\n\n1. Convert this docx file to the preferred format .odt\n2. Open the .odt file\n3. Is all formatting preserved OK?\n \n<details markdown='1'>\n  <summary>Check your answers!</summary>\n \nNo, ODT files are formatted using the OASIS OpenDocument XML-based standard. When you open an OpenDocument Text file in Word, it might not have the same formatting as it did in the original application it was created in. This is because of the differences between applications that use the OpenDocument Format.  \n \n</details>\n\n![Data compression](../../images/04_Preferred-formats-data-compression.png)\n\n## Discovering existing data\n\n### Where to discover existing data?\n\nWatch the screencast below. In this screencast, you will be guided through different ways to find data.\n\n<iframe src=\"https://www.youtube.com/embed/AZMUKgM8X-A\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n> ### {% icon hands_on %} Hands On  \n> \n> You have just learned that there are different places to find data. By actively searching the different places, you will get an understanding of the differences.\n> Look at the different portals below. Some of them have been showcased in the screencast, some of them are additional.\n> \n> [Google](http://www.google.be) - add \"database OR registry OR dataset OR archive OR statistics\" to your search\n>\n> [Registry of Research Data Repositories re3data](https://www.re3data.org/) - find appropriate repositories holding interesting data\n>\n> [ZanRan]() - search engine for tables and graphes within .pdf or .html on the internet\n> \n> [Elsevier Data Search](https://datasearch.elsevier.com/#/) - try out chip-seq drosophila\n> \n> [Google Dataset Search]() - try out chip-seq drosophila. Google Dataset Search indexes [OmicsDI](https://www.omicsdi.org/), an index providing a knowledge discovery framework across heterogeneous omics data (genomics, proteomics, transcriptomics and metabolomics).   \n\n### Assess the usefullness of existing data\n\nHow useful is a dataset? Follow this short tutorial.\n\n<iframe src=\"https://www.youtube.com/embed/t1SZutbCAxI\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### Assess the usefullness of existing data yourself\n\nIn the previous activity, the lecture described four main points to check if you want to reuse existing data:\n\n* Condition for reuse\n* Context\n* Trustworthiness\n* Persistence\n\nIn the following quizzes, take a closer look at the description or metadata of some datasets and assess the usefulness of these datasets yourself. As the description or metadata of datasets can be lacking in several different areas at the same time, it will be indicated per assignment on which of the four main points your focus should be.\n\n### {% icon hands_on %} Hands On  \n\nCan you re-use [this dataset](https://dataverse.nl/dataset.xhtml?persistentId=hdl:10411/UWAU3K) on Spatial Patterns of Water-dispersed Seed Deposition along Stream Riparian Gradients in DataverseNL?\n\n1. Maybe \n2. Yes \n3. No \n\n<details markdown='1'>\n  <summary>Check your answer!</summary>\n\nYes, the Terms of use indicate that there is a Creative Commons license 'Public Domain Dedication', which means you can copy, modify, distribute and perform thge work, even for commercial purposes, all without asking permission. \n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nCan you re-use [this weather dataset](http://www.climatemps.com/)?\n\n1. Maybe\n2. Yes\n3. No\n\n<details markdown='1'>\n  <summary>Check your answer!</summary>\n\nMaybe, although the website states 'We hope that you will enjoy using ClimaTempss as much as we have enjoyed developing it!\", there is no clear license or use agreement and directions on how to cite the data are lacking. The use has not been defined nor explained. In this case of re-use you should simply contact the creators.\n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nGiven the follwing description of a dataset: can you assess the usefulness of this dataset to establish cholestasis (an unhealthy condition of the liver) parameters in livers in the age group of puberty through adulthood? Please focus on the context.\n\nDescription: \"We measured livers for several parameters of cholestasis. The subjects were in advanced stages of liver cancer.\"\n\n1. Maybe\n2. Yes\n3. No\n\n<details markdown='1'>\n  <summary>Check your answer!</summary>\n\nNo, the dataset is not useful because the subjets have cancer. This should affect the values of parameters for cholestasis. You would rather have a dataset of healthy subjects.\n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nWould you trust the following dataset on heart rate under severe physical stress?\n\nHeart rate (beats per minute): 124, 160, 240, 0, 120, 400, 198, 156, 167 \n\nPlease focus on the trustworthiness. \n\n1. Maybe\n2. Yes \n3. No\n\n<details markdown='1'> \n  <summary>Check your answer!</summary>\n\nNo, there are weird values in the dataset, a value of zero is unlikely. And overall, the values are on the high side.\n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nIs your research likely to be reproducible when you use the following the following infrastructure?\n\nThe datasets is created during a PhD. Conditions for use state that it is a dataset stored and shared by the PhD student on his university account.\n\n1. Maybe\n2. Yes \n3. No\n\n<details markdown='1'> \n  <summary>Check your answer!</summary>\n\nNo, it is unlikely that the dataset can be reused since you do not have certainty that the files stored on the university file drives are availble for at least 10 years which is the current rule for data availablity.\n\n</details>\n\n\n## Describe what kind of data you will generate\n\nHaving a clear view of what data you will generate will enable you to plan its management. You can create an overview of the data you produce or collect by drawing the data in a workflow, or noting down in a table.\n\nPlease watch the video below. Tessa Pronk will explain to you how to describe your data.\n\n<iframe src=\"https://www.youtube.com/embed/KE2UpZY4wYA\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### Order elements in your data flow\n\nTODO: add H5P quiz\n\n### Copyright and Intellectual Property Rights (IPR) issues\n\nCopyright is a form of intellectual property right which arises automatically if an original work is created. Copyright may affect the way data may be stored, shared and reused. You should ask yourself who the copyright holder of your datasets is, especially when you use existing data or when you collaborate with external parties.\n\n**Using someone else’s research data**\nSURF provides a brief guide to determining what consent is necessary to reuse someone else’s data (see \"A brief guide ... someone else's data\" in the resources below)  \n\n**Clarifying the ownership of your research data**\n\nTODO: change accordingly for VIB\n\nOfficially VIB, as your employer, is considered the rights holder to the research data you create. You, as a researcher, have the primary responsibility for taking care of the data. Questions on data exploitation may be even more important than those of ownership. Who can use the data? Who can publish it? Who can provide it to third parties?  \n\nWe strongly recommend that you deal with the issues around data exploitation at an early stage of your research project. Write down agreements between yourself, your supervisor, project members and other interested parties in your Data Management Plan.\n\nTODO: change accordingly\n\nRDM Support offers you a Guide to legal instruments and agreements for research data management (see the Guide 'Legal instruments and agreements')\n\n**Confidential or privacy-sensitive data**\nWhen your research project has received data under confidentiality or under legal privacy restrictions, you will have to identify and explain how you will deal with these restrictions in your data management plan (also see ‘Learning Unit: Handle - Data security’).\n\n### Costs involved with managing your data\n\nTODO: https://www.uu.nl/en/research/research-data-management/guides/costs-of-data-management\n\nThe costs of data management and sharing activities must be included into your research, in terms of time and resources needed.\n\n**1. Data Management Cost Guide**\n\nWhen you plan your research you may not be able to oversee all costs involved. Nevertheless, it is useful to have an idea of possible costs at an early stage. You can use the Guide 'Costs of Data Management', which is a practical overview of possible costs per activity within each phase of the research process. Note: The Cost Guide offers cost indications and examples. These are not real prices.\n\n**2. Budget your data management costs**\n\nYou are advised to budget the data management costs as separate data management costs. These costs are eligible for funding with funders like NWO and the European Commission, as long as the costs are invoiced before the end of the project.\n\n**3. Planning can save time and money**\n\nPlanning an early start for certain activities within your research project can lower the costs for data management in the run of your project. You can save time by:\n\nProperly describing your data while collecting it, instead of doing it afterwards\nChoosing the right file format so that file conversion afterwards is not necessary\nHiring an experienced data manager\nSpending time to think about data activities beforehand can help prevent unexpected extra efforts and costs later on in your research project.\n\n### Check the current and expected costs for your research data\n\nYou have just learned that in many parts of a research project there are data related costs. These costs depend on the type and volume of data you produce, analyse and store.\n\nTODO: link to file (calculation) https://lll-platform.uu.nl/pluginfile.php/4907/format_elevated/resource/0/Cost%20overview.docx\n\n### Write your data management plan for your data collection\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module Data collection. You should be able to complete the following questions in the section Data collection:\n\n* Will you use existing data?\n* What data will you collect or create?\n* How will the data be collected or created?\n* How will you manage rights issues?\n* What are the costs involved in managing and storing your data?\n\n","# Basic Statistics Theory \n{:.no_toc}\n\nThis introductory video has been created during a livestream session in March 2020.\n\n<iframe src=\"https://www.youtube.com/embed/Zd9FkB348zk\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n","<!-- This is a comment. -->\n\n## Why manage your research data?\n\nIn this video Katarzyna Biernacka explains what data in a research context is.\n\n<div>\n<iframe src=\"https://www.youtube.com/embed/XCckz_4mlhU\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n</div>\n\nCC-BY-4.0: Katarzyna Biernacka, HU Berlin & [Discipline Workshops 2019](http://www.discipline-workshops.com/)\n\nManaging your data effectively is crucial to the success of your research. This doesn't only apply to the immediate context of your thesis or publications. Managing your data is a practice that will benefit you throughout your research career. The following list gives an overview of what benefits are evident.\n\n1. **Access, Re-use & Recognition**\n   * Facilitating future research by allowing others to build on or add to your research data.\n   * Increased citations of research data and of publications based on that data.\n2. **Efficiency**\n   * Increasing your research efficiency by saving time and resources.\n   * Preventing duplication of effort by enabling others to use your data.\n3. **Quality & Security**\n   * Ensuring the integrity and reproducibility of your research.\n   * Ensuring that research data and records are accurate, complete, authentic and reliable.\n   * Enhancing data security and minimising the risk of data loss.\n4. **Compliance**\n   * Meeting legal obligations, restrictions and codes of conduct.\n   * Meeting the University policy for research data requirements.\n   * Meeting funding body grant requirements.\n   * Meeting publisher requirements for data access.\n\n\n## A case to consider\n\nMarleen is an early career researcher. She completed her PhD about four years ago and is now a postdoctoral research fellow at a different university. Since she obtained her PhD, she has published a number of journal articles based on her doctoral research. Her papers have been cited widely in the literature of her field. But just recently a fellow researcher has questioned her findings. He has gone so far as to suggest that the data on which her research was based is inaccurate. One implication is that the data could even have been falsified. Marleen is confident that her research is valid and that her data is accurate.\n\n\n- What steps could Marleen take to verify her research findings?\n- What evidence would she need to demonstrate that she hasn't falsified her data?\n\nThink about your own research. If someone accused you of research misconduct, would you be in a position to defend your research and reputation? List some strategies you could implement right now that would assist you, should you ever find yourself in Marleen’s situation.\n\n## Data disasters – postcards from the edge\n\nThe following are real examples where researchers or data centers have lost crucial data. Could any of these ever happen to you? With good planning you could avoid or reduce the impact of such occurrences.\n\nTODO: add H5P \n\n<iframe src=\"https://lll-platform.uu.nl/mod/hvp/embed.php?id=2295\" width=\"800px\" height=\"664\" frameborder=\"0\" allowfullscreen=\"allowfullscreen\"></iframe><script src=\"https://lll-platform.uu.nl/mod/hvp/library/js/h5p-resizer.js\" charset=\"UTF-8\"></script>\n<script src=\"https://lll-platform.uu.nl/mod/hvp/library/js/h5p-resizer.js\" charset=\"UTF-8\"></script>\n\n## University policy framework for research data\n\nFor the Flemish universities, it is important that all researchers honour scientific standards, including the meticulous and ethical treatment of research data.\nThis policy is intended to set out parameters to safeguard the quality, availability and accessibility of research data within any Flemish university. It provides a basis for evaluating compliance with laws, regulations and codes of conduct. The policy also clarifies the various roles and responsibilities of university staff in managing research data.\n\nThe highlights of the policy are:\n* Archive (relevant and valuable) research data for a minimum of ten years;\n* Store data in a structure that is suitable for long-term preservation and later consultation;\n* Provide metadata to describe the data with sufficient clarity to ensure they are findable for further research;\n* Make archived research data available for access and reuse at and outside VIB insofar as is reasonably possible;\n* Each individual researcher / research leader is responsible to draw up a Data Management Plan (DMP) at the start of the research project and to follow up the agreements made in this plan;\n* Scientific directors are responsible for the implementation and monitoring of the University policy framework and for drawing up additional faculty guidelines to this end if needed.\n\nLinks to the Policy Frameworks of the Flemish Universities\n\n* [Policy Framework from Ghent University](https://www.ugent.be/en/research/datamanagement/rdm-policy.pdf/at_download/file)\n\n* [Policy Framework from KU Leuven](https://www.kuleuven.be/english/research/scholcomm/rdm/policy-plan-rdm-ku-leuven-2014)\n\n* [Policy Framework from UHasselt](https://bibliotheek.uhasselt.be/sites/default/files/uploads/RDM/20180517_UH_RDM_PolicyPlan_NL.pdf)\n\n* [Policy Framework from VUB](https://vub.sharepoint.com/sites/ORG-RandD/SitePages/RESEARCH-DATA-MANAGEMENT.aspx?web=1)\n\n## Policy in Practise\n\nIn this short video Prof. dr. Chantal Kemner explains the importance of good data management for Utrecht University. Chantal is full professor of Biological Developmental Psychology in Utrecht at the faculty of social sciences and since 2013 also at the UMCU.\n\n<iframe src=\"https://www.youtube.com/embed/f48l4Uca9nA\" allowfullscreen=\"\" allow=\"accelerometer; au\ntoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block\n;\" width=\"800px\" title=\"\"></iframe>\n\n## Funder requirements\n\nMore and more research funders explicitly require you to consider the management and publication of your research data, both during and after your research project. The European Commission and the Flemish funders FWO have explicit policies on research data management.\n\n**European Commission - Horizon 2020**\n\nThe European Commission wants “Horizon 2020 beneficiaries to make their research data findable, accessible, interoperable and reusable (FAIR), to ensure it is soundly managed. Good research data management is not a goal in itself, but rather the key conduit leading to knowledge discovery and innovation, and to subsequent data and knowledge integration and reuse.” Horizon 2020 is the biggest research and innovation program of the European Commission.\n\n[![European Commission - Horizon 2020](../../images/O_Funders_Screenshot_H2020guidelines.JPG)](https://www.nwo.nl/en/policies/open+science/data+management)\n\n**FWO**\n\nFWO states that “FWO has made data management a key element of its policy for all support channels provided by the FWO. The FWO expects researchers to pay due attention to this dimension before, during and for at least five years after their research.”\n\n[FWO Overview Data Management Plan](https://www.fwo.be/en/the-fwo/organisation/data-management-plan/)\n\n## Funder guidelines and templates\n\nMost funders require you to write a Data Management Plan. A DMP outlines all key aspects of collecting, storing and managing research data during and after a project. For this they provide you with guidelines, forms, templates and examples. For more information you can download the documents under Resources or check out the websites. You can also contact your faculty Research Support Office:\n\n- [EC – Horizon 2020: guidelines](https://ec.europa.eu/research/openscience/index.cfm)\n- [FWO template](https://www.fwo.be/media/1023898/fwo-dmp-templatedocx.docx)\n\n## Writing a data management plan\n\nBy now it should be clear that data needs to be properly managed throughout its lifecycle. The most effective way to do this is to create a Data Management Plan (DMP). This will take into account all the stages of the research data lifecycle. As outlined earlier, each individual researcher or research leader is responsible to draw up a data management plan. He or she should do this at the start of the research project. And during the research you should actively follow up on the agreements made in this plan.\n\nThink about our early career researcher Sasha (introduced in ‘Why manage your research materials and data?’) who needs to defend herself against accusations of researcher misconduct. As well as defending against misconduct accusations, some additional benefits of creating a data management plan include:\n\n- Accessing your data more easily;\n- Prioritising and balancing activities relating to research data collection and storage;\n- Mitigating data loss;\n- Reaching agreement between stakeholders about ownership of data;\n- Reducing time and effort in the long term.\nThe good news is that this online training will take you through the necessary steps to create a plan during the subsequent modules.\n\n## Getting started with DMPonline\n\nWe offer you DMPonline to create your Data Management Plan. DMPonline is an international online service that guides you in creating a DMP by answering a series of questions about your research project. It allows you to create, share, store, and revise your data management plans online. You will be asked to complete different sections of your DMP as we go through the other modules. As a result you will have written your own data management plan at the end of this course.\n\nWith DMPonline you can:\n\n* Write your plan and keep it up-to-date\n  * You can easily update your DMP throughout the lifecycle of a project\n\n* Share plans online\n  * DMPonline allows collaborative access, so you can share your DMP with other researchers, within and outside of your university.\n\n* Create multiple plans\n  * You can store different DMPs for different projects. And you can make a copy of a previous plan as the basis for writing a new one.\n\n* Download plans\n  * You can download your DMP in a variety of formats.\n\nWe recommend that graduate researchers share their data management plans with their supervisor(s).\n\n<iframe src=\"https://player.vimeo.com/video/251506151\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n## About RDM Support\n\nRDM Support provides all kinds of research data management assistance to researchers of VIB in all stages of their research. This can range from one-off individual advice to large-scale infrastructure coordination.\n\nYou can find an overview of the contact details of the main host institutions for DMP related questions and guidance are as follows:\n\n* AMS: Bart Cambré (bart.cambre@ams.ac.be)\n* Hogere Zeevaartschool: Marc Vervoort (marc.vervoort@hzs.be)\n* ITG: Ann Verlinden (averlinden@itg.be)\n* KU Leuven: rdm@kuleuven.be\n* UAntwerpen: RDM-support@uantwerpen.be\n* UGent: Myriam Mertens and Annik Leyman (rdm.support@ugent.be)\n* UHasselt: Sadia Vancauwenbergh (rdm@uhasselt.be)\n* Vlerick: Eva Cools (eva.cools@vlerick.com)\n* VUB: dmp@vub.be\n* VIB: bits@vib.be\n","# Installation\n## Windows\n> Requirements to install E-Notebook 2014: \n> 1. Microsoft Windows\n> 2. MS Office, Adobe Reader (or similar)\n> 3. ChemBioDraw (optional - see STEP 2)\n> 4. Valid VIB login credentials. Check your login and password on [https://storefront.vib.be/](https://storefront.vib.be/).\n\n**STEP 1: E-Notebook 2014**\n\n1. Browse to [https://eln.vib.be/clickonce/](https://eln.vib.be/clickonce/)\n2. Click “Install” and open the file\n3. After the installation, the software is automatically launched and the login window appears\n4. Log in with your VIB credentials (see requirements)\n5. Close E-Notebook after successful launch: File - Exit or 'X' in the right upper corner\n6. Generate a shortcut on the desktop (right click - Send to - Desktop): All Programs - PerkinElmer - E-Notebook 2014 Client\n7. Install ChemBioDraw (STEP 2)\n\n**STEP 2: ChemBioDraw**\nNote: In case you only reinstall the ELN client, you don't have to reinstall the ChemBioDraw component\n1. Download the ChemBioDraw installation file from the same website as E-Notebook 2014: [https://eln.vib.be/clickonce](https://eln.vib.be/clickonce)\n2. Start the installation\n3. Install ChemBioDraw ActiveX component in suggested destination\n4. Follow the installation wizard instructions\n5. Click on “Install” and subsequently on \"Finish\"\n\n> Why use ELN throught Citrix on Windows? \nSome older Windows versions cause problems with the E-Notebook 2014 Client installation.\n\n**STEP 1: Citrix Workspace app**\n1. Browse to [http://www.citrix.com www.citrix.com] \n2. Click on Download\n3. Select Citrix Workspace app from the list of possible downloads\n4. Download and install Citrix Workspace app\n\n**STEP 2: Launch ELN online**\n1. Browse to [https://storefront.vib.be](https://storefront.vib.be)\n2. Login with your VIB credentials\n3. Launch the ELN application by clicking on the icon\n4. If your browser asks to download and open an .ica file, please agree\n5. Citrix Workspace will open en launch the application\n\n## MacOS, Linux, mobile devices\n**STEP 1: Citrix Workspace app**\n1. Browse to [https://www.citrix.com www.citrix.com] \n2. Click on Download\n3. Select Citrix Workspace app from the list of possible downloads\n4. Download and install Citrix Workspace app\n5. After the installation on Linux execute the following command:\n```\nsudo cp -a /usr/share/ca-certificates/mozilla/DigiCert_Assured_ID_ Root_ CA.crt /opt/Citrix/ICAClient/keystore/cacerts/\n```\n\n**STEP 2: Launch ELN online**\n1. Browse to [https://storefront.vib.be](https://storefront.vib.be)\n2. Login with your VIB credentials\n3. Launch the ELN application by clicking on the icon\n4. If your browser asks to download and open an .ica file, please agree\n5. Citrix Workspace will open en launch the application\n\n# Support\n- Call us at +32 (0)9 248 16 15\n- Mail us at eln@vib.be","# Login\nWhen launching the application (Windows: double-click the **E-notebook 2014 client** icon – Citrix: click on the ELN 2014 icon and open the .ica file, Citrix Workspace will launch the application), you will see the following login window:\n\nIn order to login on ELN, you need a **valid VIB account**. The VIB username usually has a format like: *firstname lastname*. More information on [https://help.vib.be](https://help.vib.be) or mail eln@vib.be.  \n\nWhen clicking on **Connect** the application will retrieve your data. The **Work Offline** option is only available with the client installation and will allow you to make adjustments to the data in your Offline folder.\n\n> Note: when launching the application for the first time, a download of all collections will start, this usually takes 1 or 2 minutes.\n\n# Layout\nThe layout is resembling to Microsoft Office. It has 3 main parts; the ribbon with options on top, the navigation and history area on the left and the working area on the right.\n\nThe default starting point is the Home location, this gives an overview of all data in the navigation area on the left and any modified experiments since one month on the right.\nIn the Audit Trail (bottom left) you can find the history of the object selected above. This history allow you to access previous versions of an experiment and retrieve a file in order to bring it back to the present. Every version has a timestamp and operator (= user that pressed the save button). Previous versions of an experiment can**t be modified, only the last version is adjustable.\nNavigating to your colleagues or Home can be done with the orange icons in the upper left corner. Next to the navigation buttons you find the Save button. When saving you can add annotations as well.\n# Ribbon\nThe Ribbon is where you can find the options corresponding with your selection (navigation area or section). By default, there are three tabs: Home, View and Data. Sections have specific tabs in the ribbon, e.g. Document, Image, Text, Table, Property List, etc. An example can be found below (Text):\n\n# Project, Notebook, Experiment\nThere are 3 basic levels to organize your data: Project, Notebook and Experiment (see icons below). You can see them as folders with a certain hierarchy. Only an experiment contains files. To add one of the levels click on the icon in the **Home** tab in the ribbon. \n\n# Sections\nAn experiment consists of sections, every section is a file or page. To add a section, select the icon in the **Home** tab in the ribbon. Some sections are hidden behind the **Other** button.\nYou can add sections automatically by drag and dropping them into your experiment. E-Notebook will recognize Word, Excel and PowerPoint files, PDF documents and images. GraphPad Prism files are not native to E-Notebook and will result in an Ancillary data section, this will happen with any other file type that is not native to the program.\n## General Page\nCreating a new experiment will give you a blank experiment with only one section, by default this is the General page. This is an example of a General Page:\n\nEvery lab group has a slightly different version of this General page. The universal parts of this section are the **General Information** and the **Reference to experiment** field. In the first field you have the option to enter general properties of your experiment such as start date, project, etc. Adding extra properties is available in the **Property List** tab in the ribbon.\n\nAdding a reference to your experiment can be very useful to link similar experiment to each other or make a series of experiments. This refence can be any experiment within your group. To add a reference, click on the option in the **Home** tab in the ribbon.\n\nAs last there are 3 or 4 text boxes to add keywords, aim of experiment, results, specifications or a conclusion.\n## Microsoft Office sections\nThree MS Office applications are supported in the E-Notebook software: Word, Excel and PowerPoint. All other MS Office files can be uploaded using the Ancillary Data section.\n\nFor the supported application you can add files using the corresponding section. This will initially display a (print) preview of the file, double-clicking the preview will launch the MS Office application to make adjustments. All other options are displayed in the ribbon:\n\n## Images\nUsing the Image section in E-Notebook will allow you to import one (1) image file. All common image extensions are supported, camera brand specific files (e.g. RAW or DNG) can be uploaded using a non-file-specific section. Next to the image file itself you can add a title and notes.\n\n## PDF files and Captured Image\nUsing the PDF section in E-Notebook will allow you to import 1 PDF file. Next to the PDF file itself you can add a description, date and a document name.\n\n## Ancillary Data (a.k.a. Binder)\nThis non-file-specific section will save 1 file. In order to open the file , you must double-clicking on it, this will launch the according application outside ELN. Closing the external application again (e.g. after making adjustments) will result in this window:\n\nClick **Continue** to save your changes and re-upload the new file in ELN or click **Cancel** to ignore the changes.\n## Supplementary Data Management (SDM)\nFiles imported in this section will be saved on an internal network drive linked to ELN. This means that files in SDM won**t be accessible outside of your research center or university network. Files in the SDM section are not limited to the file size limit of 30 MB. \nNext to the default list of sections, there are some lab-specific sections for PCR or Western Blot. To add one of these lab-specific sections, click on the **Other** icon and select your section.\n\n# Sharing data and linking experiments\n## Access rights for others\nTo grant a colleague access to your data, you simple select the object and click on the View tab in the ribbon. In the Properties field you click on Security. A new window will appear (left picture). The inherited privileges are default settings, you’re not able to modify this. The assigned privileges on the other hand can be modified by clicking ‘Grant’.\n\nBy filtering on user group or user you can select the group/person (right picture). The type of privilege can be: read, read and write, full control. You can define this in the next window.\n\nRemoving the privilege can de done by selecting the person or group and click on ‘Remove’. For both granting or removing access privileges there is no notification system, you have to tell them yourself.\n## Experiment shortcuts\nWhen a colleague granted you access to a project/notebook/experiment you can place a link to this object in your own ELN. This makes navigating to this object easier and allows you to group all your collaborations within your own ELN hierarchy. To create such a shortcut, follow these steps:\n1. Select the object of interest\n2. Right click – Copy\n3. Navigate to your own ELN\n4. Right-click on the location you want the link to appear\n5. Select Paste Reference\n\n> Note: shortcuts can be removed, the original data however is not deleted. \n## Templates\nTemplates can be created by every user and can be shared with your colleagues. To create a template, follow this procedure:\n\n1.\tnavigate to ‘User Configuration’ – ‘Templates’\n2.\tcreate new experiment\n3.\tbuild your new default experiment/template by adding information/sections\n4.\tsave your template\n\nNext time you want to create a new experiment, you will have the option to create a blank or template experiment. \n## Search\nThe collection search can be used for users, projects, notebooks and experiments. No content can be found with the search box in the upper right corner.\nThe Advanced Search option can find experiment content. You can find it in ‘Quick Links’ above the navigation pane.\n\n\n","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\n<details>\n  <summary>Click to expand!</summary>\n\n*Heading*\n1. A\n2. list\n   * With some\n   * Sub bullets\n\n<figure id=\"figure-1\"><img src=\"../../images/Seqselector.png\" alt=\"seqselector.png\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Seqselector.png</figcaption></figure>\n</details>\n\n### TODO: specific chapter on storage\n\n### Write your data management plan for your data storage\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module on data storage. You should be able to complete the following questions in the section ‘Data documentation’:\n\nWhere will you store your data?\nHow will the data be backed up?\nAfter finishing this part in DMPonline, please return to the learning environment and click on [Complete]. This takes you back to the course overview. Continue with the next learning unit.\n\nYou can ask your faculty or project data manager or RDM Support for a review of your DMP once you have finished writing all or parts of your DMP.\n","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\n<details>\n  <summary>Click to expand!</summary>\n\n*Heading*\n1. A\n2. list\n   * With some\n   * Sub bullets\n\n<figure id=\"figure-1\"><img src=\"../../images/Seqselector.png\" alt=\"seqselector.png\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Seqselector.png</figcaption></figure>\n</details>\n\n### Introduction to rounding up\n\nYou have almost reached the end of this course on research data management.\n\nYou have learned about data collection, data documentation, data storage and security, selection and preservation and making data available for reuse.\n\nWe are very curious to know if this course has helped you write your Data Management Plan (DMP).\n\nTo round up:\n\n* We want to remind you of the DMP review service of RDM Support;\n* We want to share some good practices of data management with you;\n* We invite you to fill out the evaluation of this online training. This will help us to further develop this training and future learners can benefit from this. Thank you very much!\n\n### DMP review service\n\nYou can have your data management plan (DMP) checked by the specialists of Research Data Management Support. You can get in touch if you are unsure about sections in your DMP or when you doubt whether your plan fits the requirements of your research funder.\n\nWhen you are in the process of writing a proposal for a research funder and you want a check on the data section, you can also contact the Research Support Office (RSO) of your faculty.\n\n### Researchers sharing their experiences\n\nTODO: add stories if available or links to resources\n\nTODO: merge with experiences\n\n### More data stories\n\nChallenges in irreproducible research\nspecial issue in Nature, 7 Oct 2015\n\nThere is growing alarm about results that cannot be reproduced.  Explanations include increased levels of scrutiny, complexity of experiments and statistics, and pressures on researchers. Journals, scientists, institutions and funders all have a part in tackling reproducibility. Nature has taken substantive steps to improve the transparency and robustness in what they publish, and to promote awareness within the scientific community.\n\nData stories in environmental science\ncollected by DataONE\n\nSuccess stories and cautionary tales from researchers related to their experiences with managing and sharing scientific research data as collected by DataONE.\n\nAdvantages of data sharing\nby John-Alan Pascoe of Delft University of Technology\n\nJohn-Alan Pascoe, researcher at the Faculty of Aerospace Engineering at Delft University of Technology, explains the advantages he experienced after sharing his raw and derived data in the data archive of 4TU.ResearchData.\n\n\n<iframe src=\"https://www.youtube.com/embed/Q7vC0v988R4\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### Evaluation of training\n\nTODO: link to questionnaire\n\n","## Introduction to data security\n\n<!-- This is a comment. -->\n\nBy now you know more about how to manage your data collection, how to organise and document your research data and where and how to store your data.\n\nNow we will take you into the world of keeping data safe and secure.\n\n**Loss of data, loss of academic career**\n\nThe loss of scientific data can have a devastating impact on careers. Imagine that you loose all of the research data you've been diligently collecting for four years. Now imagine the knock-on effect: you won't get the PhD you've been working towards, affecting your future career. This nightmare happened to Billy Hinchen, a biologist at Cambridge University. Listen to his story.\n\n<iframe src=\"https://www.youtube.com/embed/3xlax_Iin0Y\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### Data breaches\n\nThere are several examples of (mainly online) data storage going wrong, leading to leaks of sensitive and personal information.\n\nThe picture below shows the biggest cases of data breaches in the past 10 years. They involve some well-known, highly regarded and trusted companies as well as some practices from the academic world.\n[Read about the story](http://www.informationisbeautiful.net/visualizations/worlds-biggest-data-breaches-hacks/)\n\n<figure id=\"figure-1\"><img src=\"../../images/data-breaches.png\" alt=\"examples about data breaches\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Biggest data breaches</figcaption></figure>\n\n## Prevent unauthorised access\n\nData security may be needed to protect intellectual property rights, commercial interests, or to keep personal or sensitive information safe. Data security involves security of data files, computer system security and physical data security. All three need to be considered to ensure the security of your data files and to prevent unauthorised access, changes, disclosure or even destruction. Data security arrangements need to be proportionate to the nature of the data and the risks involved. Attention to security is also needed when data are to be destroyed.  If data destruction is in order, you need to make sure that the destruction process is irreversible.\n\nLearn about different measures depending on the kind of security you need.\n\n**Security of data files**\n\nThe information in data files can be protected by:\n\n* Controlling access to restricted materials with encryption. By coding your data, your files will become unreadable to anyone who does not have the correct encryption key. You may code an individual file, but also (part of) a hard disk or USB stick\n* Procedural arrangements like imposing non-disclosure agreements for managers or users of confidential data\n* Not sending personal or confidential data via email or through File Transfer Protocol (FTP), but rather by transmitting it as encrypted data e.g. [FileSender](https://filesender.belnet.be)\n* Destroying data in a consistent and reliable manner when needed\n* Authorisation and authentication: for personal data you have to give very selective access rights to specified individuals.\n\n**Computer security systems**\n\nThe computer you use to consult, process and store your data, must be secured:\n\n* Use a firewall\n* Install anti-virus software\n* Install updates for your operating system and software\n* Only use secured wireless networks\n* Use passwords and do not share them with anyone. Do not use passwords on your UU computer only, but also on your laptop or home computer. If necessary, secure individual files with a password.\n* Encrypt your devices (laptop, smartphone, USB stick/disk).\n\n**Physical data security**\n\nWith a number of simple measures, you can ensure the physical security of your research data:\n\n* Lock your computer when leaving it for just a moment (Windows key + L)\n* Lock your door if you are not in your room\n* Keep an eye on your laptop\n* Transport your USB stick or external hard disk in such a way that you cannot lose it\n* Keep non-digital material which should not be seen by others, in a locked cupboard or drawer.\n\n**Data classification**\n\nTODO: what to do with classified data\n\n**Data that contain personal information**\n\nThese data should be treated with higher levels of security than data which do not. You will learn more about privacy-sensitive data in the e-module.\n\n## What is your experience with unauthorised access to your research data?\n\nTODO: implementation form widget\n\nWe are interested to know if you have ever experienced unauthorized access to any of your research data. When you give your reply, we will show you an overview with the responses of other researchers in this course. All responses will be processed anonymously.\n\n[(1)] No, I am sure about that\n[(2)] Not that I am aware of\n[(3)] Yes, without much consequences\n[(0)] Yes, with severe consequences\n\n### Legal agreements and contracts\n\nOften other people are required to handle your data, or you might be the person that handles other people’s data.\n\nTo arrange the security of the research data you work with, in many cases you have to make a (legal) agreement with other people involved. These agreements will make explicit permitted uses, retention time, and agreed upon security measures. Find out what legal contracts you can use by studying the figure below.  TODO: Visit the Guide 'Legal instruments and agreements' for more information\n\nFor tailored advice and templates, contact Legal Affairs via your faculty Research Support Officer (RSO)\n\nTODO: add link\n\n<figure id=\"figure-2\"><img src=\"../../images/AgreementsPicture.png\" alt=\"Legal Agreement contacts -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Agreements types for data</figcaption></figure>\n\n### When to use which legal contract?\n\nYou have been acquainted with the different flavors of legal agreements. Is it clear to you when you need which agreement? Please answer the following questions by choosing the right kind of agreement.\n\nTODO: add quiz or H5P quiz\n\n### Privacy-sensitive data\n\n<figure id=\"figure-3\"><img src=\"../../images/01_privacy-sensitive-data-learning-objectives.png\" alt=\"start privacy-sensitive data\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Personal data - learning objectives</figcaption></figure>\n\n---\n\n**Privacy in a nutshell**\n\nPrivacy is a fundamental right. With regards to privacy, we all have two perspectives:\n\n1. How is your privacy protected?\n2. How can we, as a researcher, protect the privacy of the people involved in our research (the data subjects)?\n\nTODO: add link to document and image screenshot\n<figure id=\"figure-4\"><img src=\"../../images/LCRDM-privacy-reference-card-why-Version-02.pdf\" alt=\"privacy reference card\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Privacy reference card</figcaption></figure>\n\n**Six principles from the European General Data Protection Regulation 1/2**\n\nThe European General Data Protection Regulation (GDPR) outlines how we should work with privacy-sensitive data.\n\nTODO: create working infographics with images\nsee http://gdprcoalition.ie/infographics\n\n**Six principles from the European General Data Protection Regulation 2/2**\n\nAccording to the GDPR processing of personal data must be done according to 6 principles.\n\nTODO: create HP5 document\n\nThe GDPR outlines six data protection principles you must comply with when processing personal data. These principles relate to:\n\n- Lawfulness, fairness and transparency - you must process personal data lawfully, fairly and in a transparent manner in relation to the data subject.\n- Purpose limitation - you must only collect personal data for a specific, explicit and legitimate purpose. You must clearly state what this purpose is, and only collect data for as long as necessary to complete that purpose.\n- Data minimisation - you must ensure that personal data you process is adequate, relevant and limited to what is necessary in relation to your processing purpose.\n- Accuracy - you must take every reasonable step to update or remove data that is inaccurate or incomplete. Individuals have the right to request that you erase or rectify erroneous data that relates to them, and you must do so within a month.\n- Storage limitation - You must delete personal data when you no longer need it. The timescales in most cases aren't set. They will depend on your business’ circumstances and the reasons why you collect this data.\n- Integrity and confidentiality - You must keep personal data safe and protected against unauthorised or unlawful processing and against accidental loss, destruction or damage, using appropriate technical or organisational measures. \n\n**Privacy by design**\n\nTo comply with the six principles from the GDPR, you can implement privacy by design. This means that you design a data management plan with measures on both IT and procedural level.\n\n<iframe src=\"https://www.youtube.com/embed/iZRcePnhS5I\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n**Which data breach is breached?**\n\nCan you recognise the principles that are breached in the different ways personal data is processed?\n\nTODO: H5P quiz 7 cases\n\n**Storing personal data 1/2**\n\n<figure id=\"figure-5\"><img src=\"../../images/02_privacy-sensitive-data-personal-data-01.png\" alt=\"storing personal data\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> Storing personal data</figcaption></figure>\n\n**Storing personal data 2/2**\n\nOnly if the access can be unambiguously be restricted to authorised persons, can data be stored without such measures.\n\nShould you want an elaborate visualisation of what is considered identifiable data, check out the information sheet at the Future Privacy Forum.\n\n[Download the visual guide to practical data de-identification](https://fpf.org/2016/04/25/a-visual-guide-to-practical-data-de-identification/)\n\n**Can you recognize identifiable data?**\n\n\n### {% icon question %} Can you recognize identifiable data?  \n\n1. a collection of GPS data of daily routines\n2.  a list of households sizes associated with number of pets\n3.  MRI scans without identifying metadata.\n4.  audio recordings with no metadata and no names of the recorded persons\n5. transcripts of interviews without any directly identifying information\n6.  a list of gender and grades for a de-identified course\n\n<details markdown='1'>\n  <summary>Check the answers.</summary>\n\nAnswer 1,3, and 4 are correct!\n\nGPS data holds information on where people go. In a daily routine, the track ends at a particular location which is likely the home of the subject. AN MRI scan from the profile of the head can be identifiable. Audio recordings can be identifiable from the tone of the voice. A list of surnames in itself is not identifying nor personal information.\n</details>\n{: .question }\n\n**Access to privacy-sensitive data**\n\nIf and how you can make personal data available, depends n the level of sensitivity of your data. The more sensitive, the more restrictions and safeguards need to be put in place to make sure the data does not fall into the hands of unauthorised persons both during and after research.\n\nTo determine where the privacy risks lie for your data you will have to do a Data Privacy Impact Assessment (DPIA).\n\nFor more information:\n\nTODO: link to: https://www.uu.nl/en/research/research-data-management/guides/handling-personal-data\n\nTowards the data subjects, you need to be transparent regarding the possible reuse, or retaining of the data for verification requirements, and get their prior consent.\n\n**Cases on how to make personal data accessible**\n\nCase 1: YOUth cohort study\n\nYOUTH COHORT STUDY\nYOUth (Youth Of Utrecht) is a large-scale, longitudinal cohort following children in their development from pregnancy until early adulthood.\n\nA total of 6,000 babies and children from Utrecht and its surrounding areas will be included in two different age groups and followed at regular intervals.\n\nThe YOUth data enables researchers to look for answers to all sorts of scientific questions on child development. A few examples of YOUth data: human bodily material, hours of videos, MRI images, questionnaires, ultrasounds and IQ scores. YOUth encourages and facilitates data sharing. It is one of the leading human cohorts in FAIR and open data in the Netherlands.\n\nMore information at: https://www.uu.nl/en/research/youth-cohort-study\n\nCase 2: TODO: other example from Wings?\n\n**An introduction to informed consent**\n\nIn the module 'Legal agreements and contracts' you learned about informed consent. Informed consent is very important when working with data which is in any way related to people.\n\nTODO: add graphics on informed consent\n\nOne thing to arrange in your informed consent is the possibility for future use, for verification or reuse. In your informed consent, it is important to be clear on future use of data.\n\n**Informed consent for data sharing**\n\nOne thing to arrange and to be crystal clear about in your informed consent is the possibility for future use of your data, for verification or reuse.\n\n### {% icon question %} Question  \n\nCheck the sentences that do permit data sharing if used as a single statement.\n\n1. Any personal information that reasonably could identify you will be removed or changed before files are shared with other researchers or results are made public.\n2. Other genuine researchers (may) have acces to tis data only if they agree to preserve the confidentiality on the information as requested in this form.\n3. Any data that could identify you will be accessible only to the researchers responsible for performing this study.\n4. All personally identifying information collected about you will be destroyed after the study.\n\n<details markdown='1'>\n  <summary>Check the answers.</summary>\n\nAnswer 1 and 2 are both correct!\n\nSharing of research data that relates to people can often be achieved using a combination of obtaining consent, anonymizing data and regulating data access. If the statement towards the data only mentions the current study, sharing is not explicitly possible. You should add some sentence to make it clear to participants that the data could be used for further research, deidentified where possible, or identifiable with enough safeguards and security measures, if it is not.\n\n</details>\n{: .question }\n\n## Write your data management plan for your data security\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module on data security. You should be able to complete the following questions in the section ‘Data security’:\n\n* Will you use or collect any confidential or privacy-sensitive data?\n* What measures will you take to ensure the security of any confidential or privacy-sensitive data?\n* What measures will you take to comply with security requirements and mitigate risks? To whom will access be granted/restricted?\n","# Introduction to documentation\n\n<!-- This is a comment. -->\n\nBy now you understand how to describe your data collection in terms of, for example, type, size, and format. You have identified this for your own research data.\n\nNow we will look into the documentation and metadata which will accompany your data. Documentation and metadata are essential to understand what a dataset means and to make it reusable in the future.\n\n<figure id=\"figure-1\"><img src=\"../../images/01_Metadata_Learning_Objective.png\" alt=\"Introduction \"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Why document your data: learning objectives</figcaption></figure>\n\n---\n\nTips for data documentation - John MacInnes, professor of Sociology of the University of Edinburgh, explains why it is necessary to document each step of your research and how this will benefit you in the long term.\n\n<iframe src=\"https://www.youtube.com/embed/EIZsxT-fIiQ\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n---\n\n**Examples of data documentation**\n\nSince there is a wide variety of types of data and types of research, there are many different ways of documenting data. A few examples of data documentation are:\n\n* Laboratory notebooks and experimental procedures\n* Questionnaires, codebooks, data dictionaries\n* Software syntax and outout files;\n* Information about equipment settings & instrument calibrations\n* Database schemes\n* Methodology reports\n* Provenance information about sources of derived or digitised data\n\n### {% icon question %} Question  \n\nWhat data documentation will you use and why?\n\n<details markdown='1'>\n  <summary>Feedback on your reflections</summary>\n\nData documentation has as goal to be used by people to understand the dataset. Such as specific conditions in which it was collected, what each column means and which methods were used to collect the data. When creating documentation, you need to ask yourself, can others (or I, myself) understand my dataset if I give them this information.\n\n</details>\n{: .question }\n\nThere are many different ways to set up and organise your documentation.\n\n**Project level**\n\nProject level documentation documents what the study sets out to do; how it contributes to new knowledge in the field, what research questions/hypotheses are, what methodologies are used, what samples are used, what intruments and measures are used, etc. A complete academic thesis normally contains this information in details, but a published article may not. If a dataset is shared, a detailed technical report needs to be included for the user to understand how the data were collected and processed. You should also provide a sample bibliographic citation to indicate how you would like secondary users of your data to cite it in any publication.\n\n**File or database level**\n\nFile or database level documentation documents how all the files (or tables in a database) that make up the dataset relate to each other, what format they are in, whether they supersede or are superseded by previous files, etc. A readme.txt file is the classic way of accounting for all the files and folders in a project.\n\n**Variable or item level**\n\nVariable or item level documentation documents how an object of analysis came about. For example, it does not just document a variable name at the top of a spreadsheet file, but also the full label explaining the meaning of that variable in terms of how it was operationalised.\n\nJohn MacInnes, professor of Sociology of the University of Edinburgh, speaks about how data documentation can help to find a way in often voluminous data collections of different copies, routings, syntaxes, samplings, etc.\n\n**On the necessity of data documentation in secondary data analysis**\n\n<iframe src=\"https://www.youtube.com/embed/Ebaiwg08CW8\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### {% icon question %} Question  \n\nLooking back at your previous research project: Did you ever have problems reusing other people's data because of lack of documentation?\n\n- Never tried\n- Successfully reused\n- Had to ask clarification\n- Had to abandon the reuse attempt\n\n<details markdown='1'>\n  <summary>Feedback on your reflections</summary>\n\nData documentation always provides advantages for yourself and for others such as better understandability, sharability and reusability in the future. \n\n</details>\n\n{: .question }\n\n<figure id=\"figure-2\"><img src=\"../../images/02_Metadata_Lab-Notebook.png\" alt=\"Lab notebooks\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Laboratory Notebooks for documentation</figcaption></figure>\n\n---\n\nThorough and effective management of laboratory data and the routine documentation of all lab procedures is a highly important responsibility for all researchers.\n\nIf you want to learn more about the electronic lab notebook system at VIB, please see [these tutorials](https://material.bits.vib.be/topics/eln/) \n\n# An introduction to metadata\n\nWatch this web lecture to learn about the different types of metadata and how metadata can help make your research data better findable. You are pointed to useful sources for metadata standards.\n\n<iframe src=\"https://www.youtube.com/embed/h0oZ3swbTJ0\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n**identify different types of metadata**\n\nTODO: HP5 quiz or matrix quiz\n\n**Metadata for different disciplines**\n\nDifferent disciplines like biology, earth sciences, physical sciences and social sciences and humanities have their own standards. By choosing a well-supported standard, you will maximise the chance that your data can be re)used and understood by other researchers.\n\n<iframe src=\"https://www.youtube.com/embed/AvL7hEk8RJQ\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n**Metadata for different disciplines**\n\nUseful links to metadata standards:\n\n* [Biology](http://www.dcc.ac.uk/resources/subject-areas/biology)\n* [General Sciences](http://www.dcc.ac.uk/resources/subject-areas/general-research-data)\n\nA community-maintained [directory of metadata schemas](http://rd-alliance.github.io/metadata-directory/) which has been set up under the auspices of the Research Data Alliance.\n\nA list of metadata standards and other standards developed by [FairSharing](https://fairsharing.org/).\n\n**Controlled vocabulary**\n\n![Controlled vocabulary](../../images/03_Metadata-controlled-vocabulary.png)\n\n**Improve a record description**\n\n### {% icon question %} Question  \n\nTake a look at the record descriptions n the table below and answer the question below and in the following pages.\n\n| Soil Sample       | Condition     | Length| Classx |\n| ----------------- |:-------------:| -----:|:-------|\n| A1                | low           | $458  | III    |\n| A2                | low           | $391  | II     |\n| A3                | medium        | $422  | IV     |\n\nx according to the classification from last experiment\n\nIs the value of in the Soil sample column clear?\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\nYes, it is sufficient to say this is a sample. The identifier for the sample needs to be unique, the content of the sample comes from the other metadata fields and their values.\n\n</details>\n{: .question }\n\n### {% icon question %} Question  \n\nTake a look at the record descriptions n the table below and answer the question below and in the following pages.\n\n| Soil Sample       | Condition     | Length| Classx |\n| ----------------- |:-------------:| -----:|:-------|\n| A1                | low           | $458  | III    |\n| A2                | low           | $391  | II     |\n| A3                | medium        | $422  | IV     |\n\nx according to the classification from last experiment\n\nIs the value in the COndition column clear?\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\n  No! It is not clear what low or medium as condition means.\n\n</details>\n{: .question }\n\n### {% icon question %} Question  \n\nTake a look at the record descriptions n the table below and answer the question below and in the following pages.\n\n| Soil Sample       | Condition     | Length| Classx |\n| ----------------- |:-------------:| -----:|:-------|\n| A1                | low           | $458  | III    |\n| A2                | low           | $391  | II     |\n| A3                | medium        | $422  | IV     |\n\nx according to the classification from last experiment\n\nIs the value in the Length column clear?\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\nNo, it is not clear what is meant by length. Also a unit for the values is missing. Is it meters, centimeters, or seconds?\n\n</details>\n{: .question }\n\n### {% icon question %} Question  \n\nTake a look at the record descriptions n the table below and answer the question below and in the following pages.\n\n| Soil Sample       | Condition     | Length| Classx |\n| ----------------- |:-------------:| -----:|:-------|\n| A1                | low           | $458  | III    |\n| A2                | low           | $391  | II     |\n| A3                | medium        | $422  | IV     |\n\nx according to the classification from last experiment\n\nIs the value in the Class column clear?\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\nNo! There is a reference that the classes are explained somewhere. But no link to the document is given.\n\n</details>\n{: .question }\n\n## Data standards explained\n\nYour dataset can be standardised in various aspects. Standardisation, in general, makes data comparable and interpretable. In other words, your data becomes interoperable by applying standards. Datasets can be combined, compared or are simply easier to reuse. You have to plan standardisation, as it is for many aspects hard or impossible to apply afterwards.\n\nStandardise as much as possible between you and your collaborators or research group. If there are standards established and used in your field of research you are advised to use these.\n\nHere is a list of things you can standardise in your research.\n\n* Standardise how, what and when you measure things by standardising your protocol, or methods and materials For instance, is there a standard set of questions for ‘quality of life’? Is there a standard procedure to house mice for your purpose? What aspects do you measure? At what parameter values (age, concentration, etc.)? When do you measure (every two hours, every gram of weight gain, etc.)?\n\n* Standardise your file formats so you can easily exchange results without technical difficulties. Check for standard taxonomies or coding systems within your research discipline.\n\n* Standardise the units in which you note down your results. For instance, do you use mm, cm, m? It is extra work to transform units between experiments.\n\n* Standardise the metadata you use to describe your records or study. What fields will fill in by default, and according to what standard do you define the fields’ names? Will you design a metadata spreadsheet where you specify all things that you will note down?\n\n* Standardise the vocabulary you use. If everyone has the same terminology, it can avoid confusion or misinterpretation. Check for standard taxonomies or coding systems within your research discipline.\n\n### Check your knowledge on standards\n\nFollow the links below for examples of standards. What type of standardisation do the links refer to?\n\n* [Demographic market research](http://www.amplituderesearch.com/market-research-questions.shtml)\n* Find via Google: “general morphology score (GMS)”\n* [Marine Geoscience Data](http://www.marine-geo.org/submit/guidelines.php)\n* [International Union of crystallography](http://www.iucr.org/resources/cif/spec/ancillary/abbreviations)\n* [The Cultural Objects Name Authority](http://www.getty.edu/research/tools/vocabularies/cona/index.html))\n* [SI Units](https://www.nist.gov/pml/weights-and-measures/metric-si/si-units)\n* [UK data service](https://www.ukdataservice.ac.uk/manage-data/format/recommended-formats)\n\nTODO: add H5P exercise\n\n## Folder structure and file naming\n\n<figure id=\"figure-3\"><img src=\"../../images/01_Folder-structure-Learning-Objective.png\" alt=\"Introduction \"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Folder structure - learning objectives</figcaption></figure>\n\nCC BY: [https://mantra.edina.ac.uk/](https://mantra.edina.ac.uk/)\n\n---\n\n<figure id=\"figure-4\"><img src=\"../../images/02_Folder-structrue-introduction-file-management.png\" alt=\"Introduction to good file management\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Introduction to good file management</figcaption></figure>\n\nTrying to find a data file that you need which has been stored or named incorrectly or inaccurately can be both frustrating and a waste of valuable time. In this short video Jeff Haywood, professor at the University of Edinburg, explains his experiences with good and bad file management.\n\n<iframe src=\"https://www.youtube.com/embed/i2jcOJOFUZg\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n**Project level**\n\nProject level documentation documents what the study sets out to do; how it contributes to new knowledge in the field, what research questions/hypotheses are, what methodologies are used, what samples are used, what intruments and measures are used, etc. A complete academic thesis normally contains this information in details, but a published article may not. If a dataset is shared, a detailed technical report needs to be included for the user to understand how the data were collected and processed. You should also provide a sample bibliographic citation to indicate how you would like secondary users of your data to cite it in any publication.\n\n**File or database level**\n\nFile or database level documentation documents how all the files (or tables in a database) that make up the dataset relate to each other, what format they are in, whether they supersede or are superseded by previous files, etc. A readme.txt file is the classic way of accounting for all the files and folders in a project.\n\n**Variable or item level**\n\nVariable or item level documentation documents how an object of analysis came about. For example, it does not just document a variable name at the top of a spreadsheet file, but also the full label explaining the meaning of that variable in terms of how it was operationalised.\n\n### {% icon question %} **Choose the best chronological file name**  \n\nWhich of the file names below is the most appropriate?\n\n- 2019-03-24_Attachment\n- 24 March 2006 Attachment\n- 240306attach\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\n2019-03-24_Attachment is correct! Using a date in the format Year-Month-Day will maintain the chronological order of your files.\n</details>\n{: .question }\n\n### {% icon question %} **Choose the best descriptive file name**  \n\nWhich of the file names below is the most appropriate?\n\n- labtox_recent_110810_old_version.sps\n- 2010-08-11_bioasssay_tox_V1.sps\n- FFTX_3776438656.sps\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\n2010-08-11_bioasssay_tox_V1.sps is correct! Keep the file names short and relevant while using sufficient characters to capture information. Do not name files recent or final or definitive_final, a date or version number will suffice.\n\n</details>\n{: .question }\n\n<figure id=\"figure-5\"><img src=\"../../images/03_Folder-structure-batch-renaming.png\" alt=\"Batch renaming\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> Batch renaming</figcaption></figure>\n\n---\n\n<figure id=\"figure-6\"><img src=\"../../images/04_Folder-structure-version-control.png\" alt=\"Lab notebooks\"><figcaption><span class=\"figcaption-prefix\">Figure 6:</span> Suggestions for version control</figcaption></figure>\n\n**How would you treat your data**\n\n### {% icon question %} **Choose the best descriptive file name**  \n\nWhy should you discard or delete obsolete versions of data?\n\n- The most current version is the only relevant version.\n- You have several versions of files in a state between versions\n- You are exceeding the storage space available to you.\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\nCorrect answer: You have several versions of files in a state between versions! Too many similar or related files may be confusing to yourself and to anyone else wanting to access or use your data. You may think that you know which data file is which but that may not always be the case as time passes and the number of different versions increases. It is easier to maintain a manageable number of versions with a clear naming structure. As long as the original raw or definitive copy is retained and processing is well documented, the intermediate working files can and should be discarded.\n\n</details>\n{: .question }\n\n**Fill the blanks**\n\nTODO: add H5P\n\n### Write your data management plan for your data documentation\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module Data documentation. You should be able to complete the following questions in the section Data documentation:\n\n* How will you structure your data?\n* How will the data be described and documented?\n* What standards will you use?\n\n","## Useful information & training resources on Research Data Management \n\n[UGent RDM webpages in Dutch](https://www.ugent.be/nl/onderzoek/datamanagement)\n\n[UGent RDM webpages in English](https://www.ugent.be/en/research/research-staff/organisation/datamanagement)\n\n[Australian National Data Service (esp. “23 (research data) things”)](http://www.ands.org.au/partners-and-communities/23-research-data-things)\n\n[Coursera Mooc “Research Data Management and Sharing”](https://www.coursera.org/learn/data-management)\n\n[Data Management Training Clearinghouse (registry of RDM learning resources)](http://dmtclearinghouse.esipfed.org/)\n\n[DataOne (esp. education modules)](https://www.dataone.org/education-modules)\n\n[Digital Curation Centre (esp. How-to Guides & Checklists)](http://www.dcc.ac.uk/resources/how-guides)\n\n[Essentials for Data Support](http://datasupport.researchdata.nl/en)\n\n[EUDAT (esp. training materials)](https://eudat.eu/training)\n\n[FOSTER training portal](https://www.fosteropenscience.eu)\n\n[MANTRA – Research Data Management Training](http://datalib.edina.ac.uk/mantra/)\n\n[OpenAIRE webinars](https://www.openaire.eu/webinars/)\n\n[RDM open training materials on Zenodo](https://zenodo.org/communities/dcc-rdm-training-materials/?page=1&size=20)\n\n[UK Data Service (esp. “Prepare & Manage Data pages)](https://www.ukdataservice.ac.uk/manage-data)\n\n[UK Data Service webinars](https://www.ukdataservice.ac.uk/news-and-events/webinars)\n\n[FAIRDOM Knowledge Hub](https://fair-dom.org/knowledgehub/)\n\n[Data4LifeSciences Handbook for Adquate Natural Data Stewardship](http://data4lifesciences.nl/hands/handbook-for-adequate-natural-data-stewardship/) \n\n","# Reading and writing files\n{:.no_toc}\n\n### Reading files\nEntering data in R can be done by typing the values when you create a variable. In most cases, however, you will have a file with data that was created by an instrument in your lab. How to import such a file into R? \n\nThere is a manual available in the R documentation called **R Data Import/Export**. It's accessible using help.start() and covers in detail the functionality R has to import and export data. Reading this is highly recommended. This manual covers importing data from spreadsheets, text files, and networks.\n\n### Reading text files\nMost instruments put out data in text format: tab-delimited text (.txt) or comma-separated value files (.csv). Both  can be easily opened in R. \n\nThe most convenient method to import data into R is to use the read functions, like read.table(). These functions can read data in a text file. In Notepad you can save such a file as a regular text file (extension .txt). Many spreadsheet programs can save data in this format. Reading means opening the file and storing its content into a data frame.\n```\nread.table(file,header=FALSE,sep=\"\",dec=?.?,skip=0,comment.char=\"#\")\n```\n\nThis function has a long list of arguments, the most important ones are:\n- *file*: path on your computer to the file e.g. D:/trainingen/Hormone.csv \n\tIf it is stored in the working directory, you can simply use its name. You can also use *file=file.choose()* to browse to the file and select it. File can be replaced by a url to load a file with data from the internet.\n- *header*: does the first line of the file contain column names?\n- *dec*: symbol used as decimal separator\n- *sep* symbol used as column separator, default is a whitespace or tab\n- *skip*: number of lines to skip in the file before starting to read data\n- *comment.char*: symbol to define lines that must be ignored during reading\n\nSee the documentation for an overview of all the arguments. The output of every read function is a data frame.\n\nThere are functions to read specific file formats like .csv or tab-delimited .txt files. In the documentation of read.table() you see that these functions are called read.csv() and read.delim().  Both functions call read.table(), but with a bunch of arguments already set.  Specifically they set up *sep* to be a tab or a comma, and they set *header=TRUE*.  \n\n```\nread.delim(file,header=TRUE,sep=\"\\t\")\n```\nOn the documentation page, you see that these functions each have two variants that have different default settings for the arguments they take:\n```\nread.csv(   file,header=TRUE,sep= \",\",dec=\".\", ...)\nread.csv2(  file,header=TRUE,sep= \";\",dec=\",\", ...)\nread.delim( file,header=TRUE,sep=\"\\t\",dec=\".\", ...)\nread.delim2(file,header=TRUE,sep=\"\\t\",dec=\",\", ...)\n```\nOriginally the CSV format was designed to hold data values separated by commas. In .csv files that are made on American computers this is the case. However, in Europe the comma was already used as a decimal separator. This is why .csv files that are made on a European computer use the semicolon as a separator. \n\nFor instance, the file below contains a header row and three columns, separated by semicolons. It uses the comma as decimal separator.\n```\nPatient;Drug;Hormone\n1;A;58,6\n2;A;57,1\n3;B;40,6\n```\nObviously, the file is a European CSV file, to open it use read.csv2()\n\n### Reading Excel files\nTo import Excel files via a command the easiest way is to let Excel save the file in .csv or tab delimited text format and use the read functions. \n\nAn easy way to import Excel files is to use the RStudio interface although I prefer to use commands. To use the interface go to the **Environment** tab and click the **Import Dataset** button. \n\nRStudio can import 3 categories of files: text files, Excel files and files generated by other statistical software. To read .xls or .xlsx files select **From Excel**. \n\nA dialog opens with options on the import. You can import data from your computer (**Browse**) or from the internet (provide a url and click **Update**). Click **Browse**, locate the Excel file and click **Open**.\n\nThe **Data Preview** section shows what the data will look like in R.\n\nThe **Import Options** section allows you to specify the import parameters. \n- *Name*: name of the data frame that will hold the imported data. The default is the name of the file that you are opening.\n- *Skip*: number of rows at the top of the file to skip during import. Some data formats contain a number of header rows with general info like parameter settings, sample names etc. These rows are followed by the actual data. Skip allows you to skip over the header rows and import the actual data. \n- If the first row of the file contains column names, select *First Row as Names*\n- *Open data viewer* shows the data in the script editor upon import\n\nClick **Import**.\n\nBehind the scenes RStudio uses the **readxl** package that comes with the tidyverse package. You can also use the functions of this package directly in commands. \n\nCompared to other packages for reading Excel files (gdata, xlsx, xlsReadWrite) readxl has no external dependencies, so it?s easy to install and use on all operating systems. It supports the  .xls format and the .xlsx format. The easiest way to install it from CRAN is to install the whole tidyverse package but you have to load readxl explicitly, since it is not a core tidyverse package.\n\nOnce imported into RStudio the data is stored in a data frame and you can use it as input of commands. The data frame appears in the list of **Data in the Environment tab**.\n\n<figure id=\"figure-1\"><img src=\"../../images/Rfile_imported.png\" alt=\"file_imported\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Inspect Variables and Data Frames in the Environment tab</figcaption></figure>\n\nIf you want to view the data frame you can **click its name in the Environment** tab and it will appear in a separate tab in the script editor.\n\n<figure id=\"figure-2\"><img src=\"../../images/Rview_file.png\" alt=\"view_file\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> View file content</figcaption></figure>\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Reading files** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 17a\n>\n> 1. Import the file [GeneEx.csv](http://data.bits.vib.be/pub/trainingen/RIntro/GeneEx.csv) into a data frame called GeneEx\n> 2. Rename the two last columns Ct1 and Ct2\n> 3. Create a new column containing the average Ct: (Ct1+Ct2)/2\n>    > <details markdown=\"1\">\n>    > <summary> {% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  GeneEx <- read.csv2(\"Rdata/GeneEx.csv\")\n>    >  colnames(GeneEx)[c(3,4)] <- c(\"Ct1\",\"Ct2\")\n>    >  GeneEx$Average_Ct <- (GeneEx$Ct1 + GeneEx$Ct2)/2\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  Which of these 2 commands will work ?\n>    > ```\n>    >  GeneEx <- read.csv2(\"Rdata/GeneEx\")\n>    >  GeneEx <- read.csv2(\"http://data.bits.vib.be/pub/trainingen/RIntro/GeneEx.csv\")\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Which of these 2 commands will work ?\n>    >  ```\n>    >  names(GeneEx[c(3,4)]) <- c(\"Ct11\",\"Ct21\")\n>    >  names(GeneEx)[3:4] <- c(\"Ct11\",\"Ct21\")\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > What's the difference in result between these 2 commands ?\n>    > ```\n>    > GeneEx$Average_Ct2 <- (GeneEx$Ct1+GeneEx[4])/2\n>    > GeneEx[5] <- (GeneEx[3]+GeneEx[4])/2\n>    > ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Can you use sum() instead of + ?\n>    > ```\n>    > sum(GeneEx$Ct1,GeneEx$Ct2)\n>    > (GeneEx$Ct1+GeneEx$Ct2)\n>    > ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Can you use mean() instead of +/2 ?\n>    > ```\n>    > mean(GeneEx$Ct1,GeneEx$Ct2)\n>    > mean(GeneEx$Ct1)\n>    > ```\n>    {: .question}\n{: .hands_on}\n\n### Reading other files\nAlso of note is an R package called **foreign**. This package contains functionality for importing data into R that is formatted by most other statistical software packages, including SAS, SPSS, STRATA and others. \n\n### Writing files\nReversely, to write a data frame to a file you can use the generic function:\n```\nwrite.table(x,file=?name.txt?,quote=TRUE,row.names=TRUE,col.names=TRUE)\n```\nThis function has a long list of arguments, the most important ones are:\n- *x*: data frame to be written to a file\n- *file*: name or full path of the file e.g. D:/trainingen/Hormone.csv\n- *quote*: if TRUE, strings, row and column names will be surrounded by double quotes. If FALSE, nothing is quoted.\n- *sep*: column separator\n- *row.names*: boolean indicating whether the row names of x are to be written or a character vector of row names to be written\n- *col.names*: boolean indicating whether the column names of x are to be written or a character vector of column names to be written\n- *append=FALSE*: if TRUE x is **added** to the file defined by *file*\n- *eol = ?\\n?*: end-of-line character, default ?\\n? represents an enter\n- *na=?NA?*: string to use for missing values in the data\n- *dec=?.?*: decimal separator\n\nSee the help file for a full overview of all arguments. \n\nTo specifically write .csv files use write.csv() or write.csv2(). See the help file for a description of the difference between them. \n\nExcel can read .csv files but if you really want to write .xls or .xlsx files use the openxlsx package.  \n\n\n> ### {% icon hands_on %} Hands-on: Exercise 17b\n>\n> 1. Read the file [RNASeqDE.txt](http://data.bits.vib.be/pub/trainingen/RIntro/RNASeqDE.txt) into a data frame called DE. It contains the differentially expressed genes from an RNA-Seq experiment.  \n> 2. Split the table into a table of upregulated genes (log2foldchange > 0) and a table of downregulated genes and store them in data frames called up and down.\n> 3. How many up- and downregulated genes are there?\n> 4. What is the gene with the highest log2 fold change?\n> 5. What is the data of the gene with the lowest adjusted p-value (= padj)?\n> 6. Write the Ensembl IDs (= row names) of the upregulated genes to a file called up.txt. You will use this file for functional enrichment analysis using online tools like ToppGene,EnrichR? These tools want a file with only Ensembl IDs as input (one per line, no double quotes, no column headers, no row names).\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  DE <- read.table(\"Rdata/RNASeqDE.txt\",header=TRUE)\n>    >  up <- DE[DE$log2FoldChange > 0,]\n>    >  down <- DE[DE$log2FoldChange < 0,]\n>    >  nrow(up) \n>    >  nrow(down)\n>    >  rownames(up[which.max(up$log2FoldChange),])\n>    >  DE[which.min(DE$padj),]\n>    >  write.table(rownames(up),file=\"up.txt\",quote=FALSE,col.names=FALSE,row.names=FALSE)\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    > Which of the following 2 commands will not work properly ?\n>    > ```\n>    >  DE <- read.table(\"Rdata/RNASeqDE.txt\")\n>    >  file <- file.choose()\n>    >  DE <- read.table(file,header=TRUE)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Will the following command work ?\n>    > ```\n>    >  up <- subset(DE,log2FoldChange > 0)\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > What's the difference between these 2 commands ?\n>    >  ```\n>    >  which.max(up$log2FoldChange)\n>    >  max(up$log2FoldChange)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Will this command write Ensembl IDs and log fold changes ?\n>    > ```\n>    >  toprint <- as.data.frame(up$log2FoldChange)\n>    >  write.table(toprint,file=\"up.txt\",quote=FALSE,col.names=FALSE)\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 17c\n>\n> Which type of files are imported by read.delim ? \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > Check the documentation and look at the default for *sep* \n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 17d\n>\n> 1. Read the file [ALLphenoData.tsv](http://data.bits.vib.be/pub/trainingen/RIntro/ALLphenoData.tsv) into a variable called pdata using one of the read functions\n> 2. What type of data structure is pdata ?\n> 3. What are the names of the columns of pdata ?\n> 4. How many rows and columns are in pdata ?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  pdata <- read.delim(\"Rdata/ALLphenoData.tsv\")\n>    >  class(pdata)\n>    >  colnames(pdata)\n>    >  dim(pdata)\n>    >  ``` \n>    > </details>\n>\n{: .hands_on}\n","<!-- This is a comment. -->\n\n## Introduction to data availability for reuse\n\nThanks to information and communication technology and globalisation new opportunities arise to exchange results of scientific research - publications and research data - and even of scientific methods and practices. This new way of practising science is called ‘open science’.\n\nOpen data is a part of this movement towards open science. It is the ambition of universities, governments, funders and publishers to make research data optimally suited for reuse.\n\nThere are different reasons why you may not be able to share your research data. Thinking about these issues and challenges when developing your data management plan will help you reflect on such reasons in an early stage.\n\n**How frustrating a data request can be**\n\nNot being prepared to share your data can lead to problems in using the data. In this short video, you see what shouldn't happen when a researcher makes a data sharing request! Topics include storage, documentation, and file formats. A made up, yet not unrealistic story.\n\n\n<iframe src=\"https://www.youtube.com/embed/66oNv_DJuPc\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n## Introduction to data repositories\n\nIn order to preserve, manage, and provide access to your research data, you can deposit your data in a data repository. Data repositories allow permanent access to datasets in a trustworthy environment and enable search, discovery, and reuse of the data they host.\n\nClick on the topics below to find out more about data repositories.\n\nTODO: add repositories from Elixir\n\n**A wide variety**\n\nThere is a wide variety of data repositories. Most have the option to publish your dataset using a persistent identifier and some provide the service of long-term preservation. Some repositories host data from various disciplines and others are domain- or discipline specific.\n\n**Choosing a data repository**\n\nWhen choosing a repository for your data be sure to check if the repository meets your criteria or the criteria set by your funder or journal editors.\n\nCriteria to select a certain repository can be:\n\n* Is the repository certified with a [CoreTrustSeal](https://www.coretrustseal.org/) or Data Seal of Approval?\nRepositories with a Data Seal of Approval are recognised in the community as a trustworthy source of data.\n* Is long term archiving guaranteed or not?\nSome repositories will guarantee the legibility of the data, even if the hardware and software become obsolete.\n* What are the costs per dataset or gigabyte?\nRepositories differ in their cost model, some allow free deposits up to a certain amount of storage\n* What is the physical storage location of data?\nThe location of your data determines under which data protection law it falls. Some repositories store data in the US and others in the EU.\n* What is the default license?\nSome repositories allow for open or restricted access, or you can specify which license for use you want for your data.\n\nYou can use this [repository selection tool](https://www.uu.nl/en/research/research-data-management/tools-services/tools-for-storing-and-managing-data/decision-aid-data-repositories) to help you select a suitable repository.\n\n**Registry of research data repositories**\n\nYou can browse or search for a data repository in re3data.org. This is a global registry of research data repositories covering different academic disciplines. You can search or browse by subject, content type or country. You can filter the search and browse results on criteria for choosing a data repository as described above.\n\n[https://www.re3data.org/](https://www.re3data.org/)\n\n**Some well-known and more generic repositories**\n\n* [Zenodo](https://zenodo.org/) – a repository that enables researchers, scientists, EU projects and institutions to share and showcase multidisciplinary research results (data and publications) that are not part of the existing institutional or subject-based repositories of the research communities;\n* [Dryad](http://www.datadryad.org/) – a curated general-purpose repository that makes the data underlying scientific publications discoverable, freely reusable and citable. Dryad has integrated data submission for a growing list of journals;\n* [Open Science Framework (OSF)](https://osf.io/) - a scholarly commons to connect the entire research cycle. It is part network of research materials, part version control system, and part collaboration software;\n* [Figshare](https://figshare.com/) – a repository that allows researchers to publish all of their research outputs in an easily citable, sharable and discoverable manner.\n\n## Explore data repositories\n\nYou have just learned about the existence of a global registry of research data repositories that covers repositories from different academic disciplines.\n\nRe3data.org makes it possible to search for a repository that meets your criteria.\n\nGo to [www.re3data.org/search](http://www.re3data.org/search) and find a repository that meets all three of the following criteria:\n\n* Certificate → CoreTrustSeal\n* Data licenses → CC0 (Creative Commons 0)\n* Persistent identifier (PID systems) → DOI (Digital Object Identifier)\n\nMake use of the filters offered on the left side of the screen, as visualized here:\n\nTODO: quiz with ELIXIR resources\n\n### Give clarity with (Creative Commons) licenses\n\nIn order to publish your data and make it reusable, you require a license. A license creates clarity and certainty for potential users of your data. A license is not an option for all data; some of it may be too confidential or privacy-sensitive to be published.\n\n**Creative Commons licenses**\n\nLicenses such as the [Creative Commons](https://creativecommons.org/share-your-work/licensing-types-examples/) (CC) licenses replace 'all rights reserved' copyright with 'some rights reserved'. There are seven standard CC licenses. CC-BY is the most commonly used license, in which attribution is mandatory when using data. You can also choose restrictions like non-commercial, no derivatives, or share alike. Creative Commons offers a [guide](https://creativecommons.org/choose/?lang=en) to help you determine your preferred license.\n\n<figure id=\"figure-1\"><img src=\"../../images/CC.png\" alt=\"Creative Commons\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Creative Commons</figcaption></figure>\n\n**Assigning a license to your data**\n\nAssigning licenses to data can also have disadvantages. Licenses are static and do not change with the quick developments in the field of research data. Therefore, some data repositories work with a CC0 license whereby no rights are reserved. Instructions regarding use are completed with codes of conduct, which may be adapted more easily.\n\nA short movie explaining the different Creative Commons elements is shown below. Remember that sharing without a license can still lead to conflicts.\n\nTODO: add video on CC licenses?\n\n**Question**\n\nWe are very interested to know what license you would choose if you were to share the underlying research data of your most recent publication.  \n\nAn explanation for each license can be found by clicking on the links below.\n\n1. CC BY: [Attribution](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#by)\n2. CC BY-SA: [Attribution ShareAlike](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#sa)\n3. CC BY-ND: [Attribution-NoDerivs](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#nd)\n4. CC BY-NC: [Attribution-NonCommercial](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#nc)\n5. CC BY-NC-SA: [Attribution-NonCommercial-ShareAlike](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#by-nc-sa)\n6. CC BY-NC-ND: [Attribution-NonCommercial-NoDerivs](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#by-nc-nd)\n7. CC0: [Public Domain](https://creativecommons.org/share-your-work/public-domain/)\n\n## Publishing in a data journal\n\nData journals are publications whose primary purpose is to publish datasets. They enable you as an author to focus on the data itself, rather than producing an extensive analysis of the data which occurs in the traditional journal model. Fundamentally, data journals seek to:\n\n* Promote scientific accreditation and reuse;\n* Improve transparency of scientific methods and results;\n* Support good data management practices;\n* Provide an accessible and permanent route to the dataset.\n\n**The benefits of publishing in a data journal**\n\nPublishing in a data journal may be of interest to researchers and data producers for whom data is a primary research output. In some cases, the publication cycle may be quicker than that of traditional journals, and where there is a requirement to deposit data in an \"approved repository\", long-term curation and access to the data is assured.\n\nPublishing a data paper may be regarded as best practice in data management as it:\n\n* Includes an element of peer review of the dataset;\n* Maximises opportunities for reuse of the dataset;\n* Provides academic accreditation for data scientists as well as for front-line researchers.\n(source: [ANDS Guide](http://www.ands.org.au/working-with-data/publishing-and-reusing-data/data-journals))\n\n**General and disciplinary data journals**\n\nThere are data journals for various disciplines and also more general data journals exist. A widespread standard PID is the DOI. DOI stands for ‘Digital Object Identifier’. A DOI is an alphanumeric string assigned to an object which allows for an object to be identified over time. Often a DOI will be presented as a link which looks like: https://doi.org/10.1109/5.771073. There are other identifiers available which some repositories may use instead. If you are depositing in a reputable repository then you should be given some type of persistent identifier which you can use to cite and link to your data.\n\nExamples of generic data journals:\n\n* [Scientific Data](http://www.nature.com/sdata/about)  \n* [Data in Brief](http://www.journals.elsevier.com/data-in-brief)   \n* [Data Science Journal](http://www.codata.org/publications/data-science-journal)\n\nExamples of disciplinary data journals:\n\nTODO: check for life science additions\n\nOpen archaeology data;\nEarth System Science Data;\nResearch Data Journal for the Humanities and Social Sciences.\n\n## How to cite a dataset\n\nCitations to your data can add to your academic impact.\n\nA citation should include enough information so that the exact version of the data being cited can be located. Including a Persistent Identifier (PID) in the citation ensures that even if the location of the data changes, the PID will always link to the data that were used.\n\nYou can indicate in your (Creative Commons) license or user agreement that you want your data cited when reused.\n\nData citations work just like book or journal article citations and can include the following information:\n\n* Author;\n* Year;\n* Dataset title;\n* Repository;\n* Version;\n* Persistent IDentifier (PID), often works as a functional link/URL.\n\n**Examples**\n\nA widespread standard PID is the DOI. DOI stands for ‘Digital Object Identifier’. A DOI is an alphanumeric string assigned to an object which allows for an object to be identified over time. Often a DOI will be presented as a link which looks like: https://doi.org/10.1109/5.771073. There are other identifiers available which some repositories may use instead. If you are depositing in a reputable repository then you should be given some type of persistent identifier which you can use to cite and link to your data.\n\nIrino, T; Tada, R (2009): Chemical and mineral compositions of sediments from ODP Site 127‐797. Geological Institute, University of Tokyo. http://dx.doi.org/10.1594/PANGAEA.726855\n\n\n**Tips**\n\nTip1: Get a PID at the data repository of your choice.\nTip2: Is your PID a DOI and do you want to cite it in the format of a specific journal? Use the [DOI formatter](https://citation.crosscite.org/) from CrossCite.\n\n\nTODO: add short quiz\n\n### FAIR data\n\nFAIR stands for ‘Findable, Accessible, Interoperable, and Reusable’. The FAIR data principles act as an international guideline for the result of high-quality data management.\n\nWith the increase in volume, complexity and creation speed of data, humans are more and more relying on computational support for dealing with data. The principles were defined with the focus on machine-actionability, i.e. the capacity of computational systems to find, access, interoperate and reuse data with none or minimal human intervention.\n\n* F – Findable\n\nBy using correct metadata to describe the data, it will be findable. By using a persistent identifier the data can be found by computer systems automatically.\n\n* A – Accessible\n\nThe data should be accessible for the long term. Even when underlying data is not accessible, the describing metadata should remain available.\n\n* I – Interoperable\n\nThe data can be used and combined with other datasets. To achieve this, the data should be stored in generic file types, not in software specific file types.\n\n* R – Reusable\n\nThe options for reuse should be stated clearly in a license. Without a license there is no certainty about the options for reuse and creator rights are implicit.\n\n**How to achieve FAIR data**\n\nIn general, having a good data management plan will lead to FAIR data. In the case of privacy-sensitive data, it is possible to meet the criteria, but not to share the data openly. In this case you can make sure that a well-described dataset can be found online, while preventing the underlying data to be downloaded and used without permission.\n\nIf you anonymise your data, presuming the data is of limited sensitivity and you are very sure the data cannot lead back to the persons involved, you can share your data openly.\n\nThe FAIR Guiding Principles were put together and published in Scientific Data (Mark D. Wilkinson et al., “The FAIR Guiding Principles for Scientific Data Management and Stewardship,” Scientific Data 3 (March 15, 2016): 160018.).\n\nTODO: add question H5P quiz?\n\n### Open science\n\n“Open Science is the practice of science in such a way that others can collaborate and contribute, where research data, lab notes and other research processes are freely available, under terms that enable reuse, redistribution and reproduction of the research and its underlying data and methods.”\n\n(Source:[ FOSTER](https://www.fosteropenscience.eu/foster-taxonomy/open-science-definition)).\n\nYou have learned that good data management contributes to the findability, accessibility, interoperability and reusability of your research data. This does not necessarily mean that you should make your data openly available. But to open up data, you do need good data management from the earliest possible stage of your research project.\n\nTODO: add links to ORION course or other relevant elements\nFlemish open science plan?\n\n### Write your data management plan for your data reuse\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module on data sharing and availability for reuse. You should be able to complete the following questions in the section ‘Data availability for reuse’:\n\n* What secondary use of your data is intended or foreseeable?\n* Where will you make your data available?\n* What access and usage conditions will apply?\n","# What is R ?\n{:.no_toc}\n\nR is many things: a project, a language... \nAs a **project**, R is part of the [GNU free software project](http://www.gnu.org). The development of R is done under the philosophy that software should be free of charge. This is good for the user, although there are some disadvantages: R comes with ABSOLUTELY NO WARRANTY. This statement comes up on the screen every time you start R. There is no company regulating R as a product. The R project is largely an academic endeavor, and most of the contributors are statisticians, hence the sometimes incomprehensible documentation. \nAs a **computer language** it was created to allow manipulation of data, statistical analysis and visualization. It is not easy to learn the language if you haven't done any programming before but it is worth taking the time as it can be a very useful tool.  An enormous variety of statistical analyses are available and R allows you to produce graphs exactly as you want them with publication quality. \n\n### Good things about R\n- It's free\n- It works on Windows, Mac and Linux\n- It can deal with very large datasets (compared to Excel)\n- A lot of freedom: graphs can be produced to your own taste\n- Supports all statistical analyses: from basic to very complex\n\n### Bad things about R\n- It can struggle with extremely large datasets\n- Difficult if you don't have any programming experience \n- Open source: many people contribute thus consistency can be low\n- Open source: documentation can be poor or written by/for experts\n- Can contain  bugs and errors: packages that are widely used are probably correct, niche packages can contain errors, there is no central team assessing the quality of the code\n\n# Installing R\nR is available on the [CRAN website](https://cran.r-project.org/) (Comprehensive R Archive Network]. \nIt can be installed on Linux, Mac and Windows. On the top of the CRAN page is a section with **Precompiled Binary Distribution**: R versions you can download as an .exe file (for Windows users) and are easy to install. What you download is the basic R installation: it contains the base package and other packages considered essential enough to include in the main installation. Exact content may vary with different versions of R.\nAs R is constantly being updated and new versions are constantly released, it is recommended to regularly install the newest version of R. \n\n# Installing RStudio\nAlthough you can work directly in the R editor, most people find it easier to use [RStudio](https://www.rstudio.com/)  on top of R. RStudio is free and available for Windows, Mac and Linux. You need to have R installed to run Rstudio. \n\n# RStudio user interface\nWatch this [video tutorial](https://www.youtube.com/watch?v=5YmcEYTSN7k) on the different components of the RStudio user interface and this [video tutorial](https://www.youtube.com/watch?v=o0Y478jOjGk) on how to use the RStudio user interface.\n\n### The script editor\nA script is a text file that contains all the commands you want to run. You can write and run scripts and you can also save them so next time you need to do a similar analysis you can change and re-run the script with minimal effort. An R project can contain multiple scripts. \nThe script editor highlights syntax in scripts making it easy to find and prevent errors. It has many features that will help you write scripts e.g. autocompletion, find/replace, commenting. \n\n### Autocompletion\nIt supports the automatic completion of code, e.g. if you have an object named relfreq in your workspace, type rel in the script editor and it will show a list of possibilities to complete the name.\n\n<figure id=\"figure-1\"><img src=\"../../images/Rautocompletion.png\" alt=\"autocompletion\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Example for autocompletion</figcaption></figure>\n\n### Find and replace\nFind and replace can be opened using Ctrl+F.\n\n### Adding comments to scripts\nIn scripts you must include comments to help you remember or tell collaborators what you did. Comments are lines that start with a # symbol. This symbol tells R to ignore  this line. Comments are displayed in green.\nYou can comment and uncomment large selections of code using: **Comment/Uncomment Lines**\n\n<figure id=\"figure-2\"><img src=\"../../images/Rcomment_uncomment.png\" alt=\"comment_uncomment\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Menu Comment/Uncomment Lines</figcaption></figure>\n\n### Adding section headings to scripts\nAdd section headings to your scripts using the following format: #Heading Name####\n\n<figure id=\"figure-3\"><img src=\"../../images/Rsection_headings.png\" alt=\"section_headings\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Define section headings</figcaption></figure>\n\nAt the bottom of the script editor you can quickly navigate to sections in your script. Especially in long scripts this is very useful.\n\n### Creating a new script\nClick **File** in the top menu and select **New File > R Script**.\n\n<figure id=\"figure-4\"><img src=\"../../images/Rnew_script.png\" alt=\"new_script\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> File Menu / New File</figcaption></figure>\n\nBesides a simple R script, there are many other file types you can create: \n- [R markdown](http://rmarkdown.rstudio.com/) file: incorporate R-code and its results in a report \n- R Notebook: R Markdown file with chunks of code that can be executed interactively, with output visible beneath the code\n- R Sweave file: incorporate R-code and its results in a Latex report\n\n### Opening an existing script\nClick **File** in the top menu and select **Open File**.\n\nScripts are opened as a tab in the script editor. You can open several scripts at the same time in RStudio. \n\n### Running a script\nTo run a script you select the code that you want to execute in the script editor and click the **Run** button at the top right of the script editor. \n\n![run_script](../../images/Rrun_script.png)\n\nThe code will be executed in the console.\n\n### Saving a script\n\nIf there are unsaved changes in a script, the name of the script will be red and followed by an asterisk. To save the script click the **Save** button: ![save_script](../../images/Rsave_script.png)\n\nR scripts should have the extension .R \nOnce it is saved the asterisk disappears and the name becomes black.\n\n### The console\nThe  > symbol in the console shows that R is ready to execute code \ne.g. type 10+3 and press return\n```\n> 10 + 3\n[1] 13\n>\n```\nThe result is printed in the console. \n\nIt is recommended to write commands in a script rather than typing them directly into the console. Creating a script makes it easier to reproduce, repeat and describe the analysis. If you select commands in the script editor and press the **Run** button, you will see the commands appearing in the console as they are executed. \n\nIf the > symbol does not reappear upon execution of a command it means that R has crashed or is still calculating. To terminate a command press Esc.\n\nThe console also has many [features that make life easier](https://support.rstudio.com/hc/en-us/articles/200404846-Working-in-the-Console) like autocompletion, retrieving previous commands.\n\n### Environment\nA list of all variables (numbers, vectors, plots, models...) that have been imported or generated. The variables that R creates and manipulates are called *objects*. \nTo remove all variables that have been generated in the RStudio session:\n```\n> rm(list=ls())\n```\nls() lists the objects in the current workspace and rm() removes them.\n\n### History\nAn overview of the last 500 commands that were run in the console: see [how to use the history](https://support.rstudio.com/hc/en-us/articles/200526217-Command-History).\n\n### Connections\nAn interface to easily [connect to databases](http://db.rstudio.com/) in R. \n\n### Files\nThe list of files and folders in the working directory. RStudio has a default working directory, typically your home folder.\n\n### Changing the working directory\n Often you want to work in the folder that contains the data. In that case you can change the working directory. \n Check which folder R is using as a working directory:\n```\n> getwd()\n```\nChange the working directory:\n```\n> setwd(\"D:/trainingen/zelfgegeven/R/\")\n```\n\n> ### {% icon comment %} Comment\n>\n> You need to use / or \\\\ in paths. Either will work but \\ will not since R sees it as the character that represents a division. \n{: .comment}\n\nChanging your working directory will make relative file references in your code invalid so you type this in the console **at the start of the analysis**.\n\nAlternatively you can change the working directory in the **Files** tab, expand **More** and select **Set As Working Directory**.\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> 1. Download the demo script for this lesson and open it in RStudio\n> [`Demo_1.R`](http://data.bits.vib.be/pub/trainingen/RIntro/Demo_1.R)\n> 2. From the demo script run the **Set working directory** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 1\n>\n> Set the working directory to the folder that contains the demo script that you have downloaded and check if it was changed. \n{: .hands_on}\n\nTo list the files in the working directory:\n```\n> list.files() \n```\n\n### Plots\nPlots that are generated by the code you run will appear here.\nTo save a plot click the **Export** button: ![export_plot](../../images/Rexport_plot.png)\n\n### Packages\nR is popular because of the enormous diversity of packages. R is essentially a modular environment and you install and load the modules (packages) you need. Packages are available at the [CRAN](https://cran.r-project.org/web/packages/available_packages_by_name.html) and [Bioconductor](http://www.bioconductor.org/packages/release/BiocViews.html) websites. \nInstalling a package means that a copy of the package is downloaded and unzipped on your computer. If you want to know in what directory R stores the packages, type:\n\n```\n>.libPaths()\n[1] \"D:/R-3.6.0/library\"\n>\n```\nto see the default path where R stores packages. If you want to change this folder use the *destdir* argument of the install.packages() function:\n\n```\n> install.packages(\"car\",destdir=\"C:/Users/Janick/R\")\n```\nYou only need to install a package once, as it is saved on your computer.\n\n### Installing R packages\nWatch this [video tutorial](https://www.youtube.com/watch?v=u1r5XTqrCTQ ) on how to install CRAN packages. \nWhen you have made changes to the right side of the Rstudio user interface (packages, files tab...), R is sometimes slow to show these changes. In that case hit the refresh button: ![refresh_button](../../images/Rrefresh_button.png)\n\nSome packages are not available on the CRAN site. Download in compressed format (as a .zip or .tar.gz file) from the source site. To install: select **Install from Package Archive File (.zip; .tar.gz)** in the **Install Packages** window and R will put it in the appropriate directory. \n\n<figure id=\"figure-5\"><img src=\"../../images/Rinstall_zip.png\" alt=\"install_zip\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> Installing packages downloaded from their source site</figcaption></figure>\n\n### Installing Bioconductor packages\nBioconductor is a set of R packages that provides tools for the analysis of high-throughput data, e.g. NGS data.\nMake sure you have the BiocManager package installed:\n```\n> if (!requireNamespace(\"BiocManager\")) \ninstall.packages(\"BiocManager\") \n```\nThe if statement is checking if you already have the BiocManager package installed, if not then install.packages() will install it. BiocManager is a package to install and update Bioconductor packages. Once BiocManager is installed, you can install the Bioconductor core packages:\n```\n> BiocManager::install()\n```\nTo install additional Bioconductor packages e.g. **GenomicFeatures** you type the following command:\n```\n> BiocManager::install(\"GenomicFeatures\")\n```\nOverview of all available Bioconductor [packages](https://www.bioconductor.org/packages/release/BiocViews.html#___Software) and [workflows](https://www.bioconductor.org/packages/release/BiocViews.html#___Workflow).\n\n### Installing packages from GitHub\nGit is a free and open source version control system. Version control helps software developers manage changes to code by keeping track of every change in a special database. If a mistake is made, the developer can turn back the clock and compare earlier versions of the code to fix the mistake. \nThere is an install_github() function in the devtools packageto install R packages hosted on GitHub:\n```\n> install.packages(\"devtools\") \n> library(devtools)\n> devtools::install_github(\"statOmics/MSqRob&copy;MSqRob0.7.6\")\n```\n\n### Loading packages\n Each time you want to use a package you have to load it (activate its functions). Loading a package is done by selecting it in the list of installed packages or by typing the following command:\n```\n> library(\"name_of_package\")\n```\nIf R responds:\n```\nError in library(car) : there is no package called 'car'\n```\nor similar, it means that the car package needs to be installed first.\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> Run commands of the **Installation** section of the demo script\n{: .hands_on}\n\n### Help\nYou can find a lot of documentation online: e.g. the [getting help section](https://www.r-project.org/help.html) of the R website. R documentation is not easily accessible nor well-structured  so it can be a challenge to consult the help files of R packages online. By far the most user-friendly interface for searching the R documentation is the [Rdocumentation website](https://www.rdocumentation.org/).\nAdditional useful links:\n- [Documentation of RStudio](https://support.rstudio.com/hc/en-us/categories/200035113-Documentation) \n- [Quick R by DataCamp](https://www.statmethods.net/about/sitemap.html): loads of basic and advanced tutorials\n- [R-bloggers](https://www.r-bloggers.com/): R-news and tutorials contributed by bloggers\n- [Rseek](https://rseek.org/): Google specifically for R.\n- [Google's R style guide](https://google.github.io/styleguide/Rguide.xml): Programming rules for R designed in collaboration with the entire R user community at Google to make R code easier to read, share, and verify.\n\nAccess the R documentation in RStudio using commands: help() or ?\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Get help** section\n{: .hands_on}\n\n### Viewer\nViews HTML files that are located on your computer.\n\n[All RStudio keyboard shortcuts](https://support.rstudio.com/hc/en-us/articles/200711853-Keyboard-Shortcuts)\n\n# Expressions in R\nR can handle any kind of data: numerical, character, logical... \n\n### Character data\nCharacter data like \"green\", \"cytoplasm\" must be typed in between **single or double quotes**:\n```\n> x <- \"Hello\"\n```\nTo use quotes in the text escape the quotes:\n```\n> x <- \"say \\\"Hello\\\"\"\n```\nNames of packages, files, paths on your computer, urls are all text data and need to be typed in between quotes. Names of variables do not. \n\n### Booleans\nBoolean values are **TRUE** and **FALSE** without quotes because they are Booleans not text. \n\n> ### {% icon comment %} Comment\n>\n> R is case sensitive: true and false are not recognized as Booleans. They have to be written in capitals.\n{: .comment}\n\n### Missing values\nMissing values are represented by **NA** (Not Available) without quotes. \nImpossible values (e.g., dividing by zero) are represented by the symbol NaN (Not A Number).\n\n### Arithmetic operators\n\n<figure id=\"figure-6\"><img src=\"../../images/Rarithmetic_operators.png\" alt=\"arithmetic_operators\"><figcaption><span class=\"figcaption-prefix\">Figure 6:</span> Overview of arithmetic operators</figcaption></figure>\n\nArithmetic operators follow the standard **order of priority**, with exponentiation the highest and addition and subtraction the lowest priority, but you can control the order with **parentheses**. Do not use brackets as these are for other purposes in R. \n\n### Logical operators\nLogical operators can be used to selectively execute code based on certain conditions. They allow to create logical expressions (comparisons) that return TRUE or FALSE. \n\n<figure id=\"figure-7\"><img src=\"../../images/Rlogic_operators.png\" alt=\"logic_operators\"><figcaption><span class=\"figcaption-prefix\">Figure 7:</span> Overview of logical operators</figcaption></figure>\n\nLogical expressions may be combined using logical operators. The NOT operator (!) can be used to assess whether something is NOT the case. \n\n```\n> x = 1\n> y = 2   \n> z = x > y      \t\tis x larger than y? \n> z              \t\t\tFALSE \n> u = TRUE\n> v = FALSE \n> u & v          \t\t\tu AND v: FALSE \n> u | v          \t\t\tu OR v: TRUE \n> !u             \t\t\tNOT u: FALSE\n```\n> ### {% icon hands_on %} Hands-on: Exercise 2a\n>\n>    > ### {% icon question %} Question \n>    > What's the difference between x=2 and x==2 ? \n>    >\n>    > > <details markdown=\"1\">\n>    > > <summary>{% icon solution %} Solution\n>    > > </summary>\n>    > >  The = operator attributes a value to a variable (see next section), x becomes 2. \n>    > >  The == is a logical operator, testing whether the logical expression x equals 2 is TRUE or FALSE.\n>    > > </details>\n>    >\n>    {: .question }\n>\n{: .hands_on }\n\n> ### {% icon hands_on %} Hands-on: Exercise 2b\n>\n> Check if the words UseR and user are equal. \n{: .hands_on}\n\n> ### {% icon comment %} R is case sensitive\n>\n> As exercise 2b showed R is indeed case sensitive.\n{: .comment}\n\n# Assigning variables\nA variable allows you to save a value or an object (a plot, a table, a list of values) in R. \nA value or object is assigned to a variable by the assignment operator **<-**\nIt consists of the two characters < (less than) and - (minus): \n```\n> v <- 4\tnow the value of variable v is 4\n````\nIn most contexts the = operator can be used as an alternative:\n```\n> v <- 4 \n> v = 4 \ngive the same result: a variable called v with value 4\n```\nAfter R has performed the assignment you will not see any output, as the value 4 has been saved to variable v. You can access and use this variable at any time and print its value in the console by running its name:\n```\n> v\n[1] 4\n```\nYou can now use v in expressions instead of 4\n```\n> v * v\n[1] 16\n```\nYou can re-assign a new value to a variable at any time: \n```\n> v <- \"a cool variable\"\n> v\n[1] \"a cool variable\"\n```\n\nR is not very fussy as far as syntax goes. Variable names can be anything, though they cannot begin with a number or symbol. Informative names often involve using more than one word. Providing there are **no spaces** between these words you can join them using dots, underscores and capital letters though the Google R style guide recommends that names are joined with a dot. \n\n### Using operators to create variables\nYou can combine variables into a new one using operators (like + or /).\n\n### Using functions to create variables\nA function is a piece of code that performs a specific task. \nFunctions are called by another line of code that sends a request to the function to do something or return a variable. The call may pass *arguments* (inputs) to the function. In other words a function allows you to combine variables (arguments) into a new variable (returned variable).\nThere are lots of built in functions in R and you can also write your own. Even the base package supplies a large number of pre-written functions to use. Other packages are filled with additional functions for related tasks.\nCalling a function in R has a certain syntax:\n**output <- function(list of arguments)** \nFor example: \n```\n> p <-  ggplot(mtcars,(aes(wt,mpg))\n```\nIn this example **ggplot()** is the **function**. The brackets () are always needed. Before a function can start the actions and calculations  it encodes, it needs prior information: **input** data and parameter settings. These are called the **arguments** of the function. In this example the arguments are:\n- **mtcars**: a table containing the input data\n- **aes(wt,mpg)**: defines the two columns of the table you want to plot: weight (wt) along the X-axis and miles/gallon (mpg) along the Y-axis.\n\nTo see the arguments of a function you can use **?** or **help()**:\n```\n> ? ggplot \n> help(ggplot)\n```\nThis opens the documentation of the function in the **Help** tab including an overview of the arguments of the function. At the bottom of the documentation page you find examples on how to use the function.\nThe function generates a plot so the plot **p** is the **output** of the function.  \n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Assigning variables** section\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Exercise 3a\n>\n> 1. Create a variable called patients with value 42\n> 2. Print the value of patients divided by 2\n> 3. Create a variable called patients_gr2 with value 24\n> 4. Print the total number of patients\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  patients <- 42\n>    >  patients/2\n>    >  patients_gr2 <- 24\n>    >  total_patients <- patients + patients_gr2\n>    >  total_patients\n>    > ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  \"patients\" <- 42\n>    >  \"patients\"/2\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 3b\n>\n> Check the arguments of the mean() function. \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  ?mean\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\nThe mean() function has many arguments and each argument has a default value. To use the default values simply do not specify these arguments in the function call. You only have to specify the arguments for which you want to use a value other than the default.\nTo show the **examples** section instead of the full documentation page:\n```\n> example(min) \n```\n\n> ### {% icon hands_on %} Hands-on: Exercise 3c\n>\n> Calculate and print the sum of patients and patients_gr2 using the sum() function.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} solution: answer\n>    > </summary>\n>    >  ```\n>    >  sum(patients,patients_gr2)\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  Replace the sum() function with the mean() function. What happens ?\n>    >    > <details markdown=\"1\">\n>    >    > <summary>{% icon solution %} solution: answer\n>    >    > </summary>\n>    >    >  Look at the help of the sum() function. What's the first argument ? \n>    >    >  Compare with the first argument of the mean() function\n>    >    > </details>\n>    >\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  Will the code below work ?\n>    >  ```\n>    >  sum (patients,patients_gr2)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  Will the code below work ?\n>    >  ```\n>    >  sum ( patients , patients_gr2 )\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\nSometimes functions from different packages have the same name. In that case use **package::function** to specify the package you want to use, e.g. `ggplot2::ggplot()` where `ggplot2` is the name of the package and `ggplot()` is the name of the function.\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 3d\n>\n> Create a variable `patients_gr3` with value \"twenty\" and print the total number of patients\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  patients_gr3 <- \"twenty\"\n>    >  patients + patients_gr3\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 3e\n>\n> 1. Create variable `x` with value `5`\n> 2. Create variable `y` with value `2`\n> 3. Create variable `z` as the sum of `x` and `y` and print the value of `z`\n> 4. Print `x - y`\n> 5. Print the product of `x` and `y` and add `2` to it\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  x <- 5\n>    >  y <- 2\n>    >  z <- x+y\n>    >  z\n>    >  x-y\n>    >  x*y+2\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 3f\n>\n> What is the difference between:\n> 1. `correctLogic <- TRUE`\n> 2. `incorrectLogic <- \"TRUE\"`\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 3g\n>\n> Is there a difference between:\n> 1. `name <- \"Janick\"`\n> 2. `name <- 'Janick'`\n> 3. `name <- Janick`\n{: .hands_on}\n","# Data structures in R\n{:.no_toc}\n\nThe power of R lies not in its ability to work with simple numbers but in its ability to work with large datasets. R has a wide variety of data structures including scalars, vectors, matrices, data frames, and lists.\n\n### Vectors\nThe simplest data structure is the *vector*, a single row consisting of data values of the same type, e.g. all numbers, characters, Booleans... \n\n#### Creating a vector\nThe function **c()** (short for \"combine values\" in a vector) is used to create vectors. The only arguments that need to be passed to c() are the  values that you want to combine into a vector. \nYou can create a **numeric** (a), **character** (b) or **logical** (c) vector:\n```\na <- c(1,2,5.3,6,-2,4)\nb <- c(\"janick\",\"jasper\",\"niels\")\nc <- c(TRUE,TRUE,TRUE,FALSE,TRUE,FALSE)\n```\nYou can also create a vector by **joining existing vectors with the c () function:**\n```\nx1 <- c(1,2,3)\nx2 <- c(3,4)\nc(x1,x2)\n# [1] 1 2 3 3 4\n```\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data Creation: vectors** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 4a\n>\n> You count every day how many plants of the initial set of 40 plants developed lesions as a result of a mold infection. \n> \n> 1. Create a vector called Plants_with_lesions containing the results of your counts: 1,3,4,2,6\n> 2. Create a vector days containing the days of the week in the following format: Mon, Tues, Wednes, Thurs, Fri.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  > Plants_with_lesions <- c(1,3,4,2,6)\n>    >  > days <-  c(\"Mon\",\"Tues\",\"Wednes\",\"Thurs\",\"Fri\")\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 4b\n>\n> Create a vector newVector with the following elements: 2,5,5,3,3,6,2 and print its content.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  newVector <- c(2,5,5,3,3,6,2)\n>    >  newVector\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\nIf you need a sequence of consecutive integers you can create it with the **start:end** notation, e.g. a vector with values from 5 through 9\n```\n5:9\t\n# [1] 5 6 7 8 9\n```\nYou can also define a decreasing sequence of integers:\n```\n9:5\t\n# [1] 9 8 7 6 5\n```\nYou can create the same vector with the seq() function:\n```\nseq(5,9)  \n# [1] 5 6 7 8 9\n```\n\nBut seq (short for sequence) can do a lot more: it allows to take increments other than 1. It takes four arguments:\n- *from*: the first number in the sequence\n- *to*: the last possible number in the sequence. \n- *by=increment*: increment, can be added or subtracted depending on the start and the end of the sequence. If from > to then subtract increment, if from < to then add increment.\n- *length.out*: alternative to end, number of elements in the vector.\n\nAs you can see, some arguments of a function have a name, e.g. the increment argument is called *by*. \n\nThe **rep()** function **repeats** a value a specified number of times.\n```\nrep(\"bla\", 3)\n# [1] \"bla\" \"bla\" \"bla\"\n```\nYou can combine these functions with the c() function to make more complicated vectors:\n```\nc(rep(1,3), rep(2,3), rep(3,3))\n# [1] 1 1 1 2 2 2 3 3 3\n```\n\nTo generate a **random** set of **numbers** drawn from a normal distribution with a given mean and spread use the **rnorm(n, mean = 0, sd = 1)** function where:\n- *n*: how many random numbers do you want ?\n- *mean*: mean of the normal distribution\n- *sd*: standard deviation of the normal distribution\n```\nrnorm(1000, 3, 0.25)\n```\ngenerates 1000 numbers from a normal distribution with mean 3 and sd=0.25\n\nThe normal distribution implies that numbers close to the mean have a higher probability of occurring than numbers far from the mean.\n\nIf you want a set of random numbers from a uniform distribution (every number in the specified range has the same probability of being drawn) you can use the **runif(n, min=0, max=1)** function where:\n- *n*: how many random numbers do you want ?\n- *min*: lowest number of the range of numbers to choose from\n- *max*: highest number of the range of numbers to choose from\n\nThe most freedom is given by the **sample(x, size, replace = FALSE)** function: it takes a random sample of a specified size from the elements of x either with or without replacement:\n- *x*: a vector of elements from which to choose\n- *size*: how many random numbers do you want ?\n- *replace*: place sampled numbers back in set or not ?\n```\nsample(c(0,1), 100, replace=TRUE)\n```\t\ngenerates a set of 100 random zeros or ones.\n\nSuppose you want to simulate 10 rolls of a dice. Because the outcome of a single roll is a number between 1 and 6, your code looks like this:\n```\nsample(1:6, 10, replace=TRUE)\n# [1] 2 2 5 3 5 3 5 6 3 5\n```\nYou tell sample() to return 10 values, each in the range 1:6. Because every roll of dice is independent, you sample with replacement. This means that you put the element you?ve drawn back into the list of values to choose from.\n\n> ### {% icon hands_on %} Hands-on: Exercise 4c\n>\n> For a study checking the effect of a drug on a disease, we want to store patient info. \n> \n> 1. Create a vector named ID containing numerical values 1,2,3,4\n> 2. Create a vector named treatment containing values A, placebo, B, and a missing value.\n> 3.  Use the rep() function to create a vector called smoking containing booleans true, true, true, and false. Check the documentation and the examples of usage of rep(). \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  ID <- 1:4\n>    >  treatment <- c(\"A\",\"placebo\",\"B\",NA)\n>    >  smoking <- c(rep(TRUE,3),FALSE)\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  smoking <- c(rep(true,3),false)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  smoking <- c(rep(\"true\",3),\"false\")\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 4d\n>\n> Create vector threes consisting of 3,3,3,3,3,3,3 and print the content of threes\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  threes<-rep(3,7)\n>    >  threes\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 4e\n>\n> Print ha ha ha ha\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  rep(\"ha\",4) \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon comment %} Comment\n>\n> Vectors cannot hold values of different types! R automatically converts all values to the same type so that the vector can hold them. If one of the values is a string all values will be converted to strings or in case of a mix of integers and booleans all values will be converted to integers. \n{: .comment}\n\n> ### {% icon comment %} Comment\n>\n> Words used as values have to be written between quotes, words used as variable names do not! If R encounters a word without quotes it will try to find a variable with that name.\n{: .comment}\n\n#### Referring to elements of a vector\nEvery element in a vector is assigned an index (= its position in the vector) in the order in which elements were entered. This index starts with one, not zero. \n\nYou can extract elements from vectors in two ways:\n1. You directly identify specific elements using their indices\n2. You create a logical operation to select certain elements.\n\nTo refer to elements of a vector use indices or a logical operation inside square brackets []\ne.g. to retrieve the 2nd element of vector a use:\n```\na[2]\n```\nto retrieve the 2nd, 3rd and 4th element of vector a use:\n```\na[2:4]\n```\nto retrieve the 2nd and 4th element of vector a use:\n```\na[c(2,4)]\n```\nYou also see [] when you look at output in the console. The number in between the square brackets is the index of the first value on the line. \n```\nv <- c(rep(5,10),rep(10,5))\n#[1] 5 5 5 5 5 5 5 5 5 5 10 10\n#[13] 10 10 10 \n```\nThere are 12 values on the first line, so on the second line of data, the first value (10) is actually on the 13th position in the vector v. So [13] refers to the index of the first element on the line.\n\nRetrieving elements using a logical operation is done as follows:\n```\nx\n#[1] 1 3 11 1 7\nx[x < 4]\n#[1] 1 3 1\n```\nRetrieving data with logical operators is based on the following fact: every logical statement produces the outcome TRUE or FALSE.\n```\nx < 4\n#[1]  TRUE  TRUE  FALSE  TRUE  FALSE\n```\n\nLogical operators applied to vectors will result in a vector of the same length consisting of TRUE or FALSE values depending on whether the statement is true for the particular element. If you use the outcomes of a logical operation to retrieve elements of a vector, only the elements where the outcome is TRUE will be selected. \n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data extraction: vectors** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 5a\n>\n> Create a vector named x containing the numbers 20 to 2. Retrieve elements that are larger than 5 and smaller than 15.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  x <- 20:2\n>    >  x[x > 5 & x < 15]\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  x[15 > x > 5]\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  x(x > 5 & x < 15)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  x[x > 5] & x[x < 15]\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 5b\n>\n> 1. Retrieve the 4th and 5th elements from the days vector.\n> 2. Retrieve elements from Plants_with_lesions that are larger than 2.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  days[c(4,5)]\n>    >  Plants_with_lesions[Plants_with_lesions > 2]\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  days[4,5]\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  days[4:5]\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  days(4:5)\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 5c\n>\n> Create vector y with elements 9,2,4 and retrieve the second element of y.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  y <-c (9,2,4)\n>    >  y[2] \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 5d\n>\n> 1. Create vector z with elements 1, 2, 3, 4, 12, 31, 2, 51, 23, 1, 23, 2341, 23, 512, 32, 312, 123, 21, 3\n> 2. Retrieve the 3rd, 4th, 5th, 6th and 7th element\n> 3. Retrieve the 2nd and 4th element\n> 4. Retrieve elements from z that are larger than 100\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  z <- c(1,2,3,4,12,31,2,51,23,1,23,2341,23,512,32,312,123,21,3)\n>    >  z[3:7] \n>    >  z[c(2,4)]\n>    >  z[z > 100] \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Logical and arithmetic operations on variables** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 5h\n>\n> Retrieve elements from newVector (exercise 4b) that are larger than the corresponding elements of vector threes (exercise 4d).\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  newVector[newVector > threes]\n>    >   \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n#### Removing, changing or adding elements in a vector\nTo remove an element from a vector use a negative index: ?-? indicates ?NOT? followed by the index of the element you want to remove, e.g. to remove the second element of vector z use:\n```\nz <- z[-2]\n```\n\nChange or add elements by assigning a new value to that element . \n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data removal vectors** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 6a\n>\n> From vector x (exercise 5a) remove the first 8 elements and store the result in x2.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  x2 <- x[-(1:8)]\n>    >  x2\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  x2 <- x[-1:8]\n>    >  \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 6b\n>\n> Retrieve the same elements from z as in exercise 5d2 but first replace the 3rd element by 7.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  z[3] <- 7\n>    >  z[3:7] \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Factors\nYou can tell R that a variable is categorical (= text labels representing categories although sometimes numbers are also used) by making it a factor. \n\nThe difference between a categorical variable and a continuous variable is that a categorical variable represents a limited number of categories. A continuous variable is the result of a measurement and can correspond to an infinite number of values. \n\nIn most cases categorical data is used to **describe** other data, it is not used in calculations e.g. which group does a measurement belong to. Storing data as factors ensures that the graphing and statistical functions in R will treat such data correctly.\n\nThere are two types of categorical data:\n1. unranked categorical data do not have an implied order\n2. ranked categorical data do have a natural ordering\n\nR will treat factors by default as unranked but you can create ordered (ranked) factors. \n\nTo create a factor, first create a vector and then convert it to a factor using the factor() function:\n```\nv <- c(1,4,4,4,3,5,4,4,5,3,2,5,4,3,1,3,1,5,3,4)\nv\n#[1] 1 4 4 4 3 5 4 4 5 3 2 5 4 3 1 3 1 5 3 4\nf <- factor(v,ordered=TRUE)\nf\n#[1] 1 4 4 4 3 5 4 4 5 3 2 5 4 3 1 3 1 5 3 4\n#Levels: 1 < 2 < 3 < 4 < 5 \n```\n\n> ### {% icon comment %} Comment\n>\n> The factor() function creates \"Levels\": these are the labels of the categories.\n{: .comment}\n\nThe only **required argument** of the factor() function is a **vector** of values which will be factorized. Both numeric and character vectors can be made into factors but you will use factor() typically for numerical data that represents categories. \n\nWhen you create a vector containing text values in R you have to factorize it but if you store the vector as a column in a data frame, text data is automatically converted to a factor. \n\nWhen you import data into R using read.() functions, the data is automatically stored in a data frame so text will be automatically converted into a factor. \n\nSo in reality (since you mostly import data into R) you use factor() mainly to factorize **numbers** that represent categories.\n\nBy default, factor() transforms a vector into an unordered factor, as does the automated factorization of the read.() functions. Unordered means that the categories are processed in alphabetical order: High will be plotted before Low since H comes first in the alphabet. \n\nIf the categories are ranked, you have to create an ordered factor, you have to add two additional arguments: \n- Set *ordered* to TRUE to indicate that the factor is ordered\n- *levels*: a vector of category labels (as strings) in the correct order\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data creation: factors** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 7a\n>\n> 1. Create a vector gender with the following elements: Male, Female, male. \n> 2. Convert gender into a factor with levels: Male and Female\n> 3. Print the content of the factor. What happens?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  gender <- c(\"Male\",\"Female\",\"male\")\n>    >  gender <- factor(gender,levels=c(\"Male\",\"Female\"))\n>    >  gender\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n","## 11.1 Introduction\nSo now that we know how to make functions, how can you re-use them? Imagine that you've started writing code and functions in one file and the project has grown to such an extent that it would be easier to maintain it in different files each containing a specific part of the project. Or you want to re-use some of the functions in other projects as well. \n\nIn Python you can import functions and chunks of code from files. Such a file containing the functions is called a *module*. Generally we say that we import a *definition* from a *module*. A module can have one or multiple functions in it. \nThe file name is the module name with the suffix `.py` appended. \n\nUsing the code from this module is possible by using **import**. In this way you can import your own functions, but also draw on a very extensive library of functions provided by Python (built-in modules). We will first look at the syntax for imports and how to import your own functions, then explore the most commonly used Python libraries.\n\n## 11.2 How imports work\nThe easiest way to import a module looks like this:\n\n```python\nimport module1\n```\n\nImagine that in the module `module1`, there is a function called `getMeanValue()`. This way of importing does not make the name of the function available; it only remembers the module name `module1` which you can than use to access the functions within the module:\n\n```python\nimport module1\nmodule1.getMeanValue([1,2,3])\n```\n\n## 11.3 How to create your own module\nThe easiest example is importing a module from within the same working directory. Let's create a Python module called `module1.py` with the code of the function `getMeanValue()` that we have written earlier (and you can find here below). \n\n> ### {% icon hands_on %} Create your own module\n>\n> To create your own module from Jupyter Notebook, follow these steps:\n> 1. In order to create a module in Jupyter Lab, first create a new notebook \n> 2. Rename the notebook (e.g. 'module1.ipynb') and copy paste the code in the notebook \n> 3. Click 'File', 'Download as' and 'Python' \n> 4. Jupyter will not download it in some local folder, copy it to your current working directory (in our case in the same directory as we're in right now). \n>\n{: .hands_on}\n\nUnfortunately, Jupyter Notebook doesn't have a streamlined & straightforward way of creating Python modules and Python scripts. When you export the notebook, it will always export the whole Notebook and not just a part of it, which makes it very messy if you have a very large notebook. \n\nImport the following code in the `module1.py` file. \n\n\n```python\n# When you download this as a Python script, Jupyter will automatically insert the environment shebang here. \n\ndef getMeanValue(valueList):\n    \"\"\"\n    Calculate the mean (average) value from a list of values.\n    Input: list of integers/floats\n    Output: mean value\n    \"\"\"\n    valueTotal = 0.0\n \n    for value in valueList:\n        valueTotal += value\n    numberValues = len(valueList)\n    \n    return (valueTotal/numberValues)\n```\n\n## 11.4 Import syntax \nWe can now use the module we just created by importing it. In this case where we import the whole 'module1' file, we can call the function as a method, similar to the methods for lists and strings that we saw earlier:\n\n\n```python\nimport module1\n\nprint(module1.getMeanValue([4,6,77,3,67,54,6,5]))\n```\n\nIf we were to write code for a huge project, long names can get exhaustive. Programmers will intrinsically make shortcut names for functions they use a lot. Renaming a module is therefore a common thing to do (e.g. NumPy as np, pandas as pd, etc.):\n\n\n```python\nimport module1 as m1\n\nprint(m1.getMeanValue([4,6,77,3,67,54,6,5]))\n```\n\nWhen importing a file, Python only searches the current directory, the directory that the entry-point script is running from, and sys.path which includes locations such as the package installation directory (it's actually a little more complex than this, but this covers most cases).\n\nHowever, you can specify the Python path yourself as well. If you're using the materials from [Github](https://github.com/vibbits/gentle-hands-on-python), note that within our folders there is a directory named `modules` and within this folder, there is a module named `module2` (recognizable due to its .py extension). In that module there are two functions: 'getMeanValue' and 'compareMeanValueOfLists'. \n\n\n```python\nfrom modules import module2\n\nprint(module2.getMeanValue([4,6,77,3,67,54,6,5]))\n```\n\n\n```python\nfrom modules import module2 as m2\n\nprint(m2.getMeanValue([4,6,77,3,67,54,6,5]))\n```\n\nAnother way of writing this is with an absolute path to the module. You can explicitly import an attribute from a module.\n\n\n```python\nfrom modules.module2 import compareMeanValueOfLists\n\nprint(compareMeanValueOfLists([1,2,3,4,5,6,7], [4,6,77,3,67,54,6,5]))\n```\n\nSo here we *import* the function compareMeanValueOfLists (without brackets!) from the file *module2* (without .py extension!).\n\nIn order to have an overview of all the different functions within a module, use `dir()`:\n\n\n```python\ndir(module2)\n```\n\n## 11.5 Built-in Modules\n\nThere are several built-in modules in Python, which you can import whenever you like.\n\nPython has many ready-to-use functions that can save you a lot of time when writing code. The most common ones are **time**, **sys**, **os/os.path** and **re**.\n\n### 11.5.1 `time`\nWith **time** you can get information on the current time and date, ...:\n\n\n```python\nimport time\ntime.ctime()  # Print current day and time\n```\n\n\n```python\ntime.time()   # Print system clock time\n```\n\n\n```python\ntime.sleep(10)       # Sleep for 5 seconds - the program will wait here\n```\n\nSee the [Python documentation](https://docs.python.org/3/library/time.html) for a full description of time. Also see [datetime](https://docs.python.org/3/library/datetime.html), which is a module to deal with date/time manipulations.\n\n\n### 11.5.2 `sys`\ngives you system-specific parameters and functions:\n\n\n```python\nimport sys\n\n```\n\n\n```python\nsys.argv  # A list of parameters that are given when calling this script \n          # from the command line (e.g. ''python myScript a b c'')\n```\n\n\n```python\nsys.platform # The platform the code is currently running on\n```\n\n\n```python\nsys.path     # The directories where Python will look for things to import\n```\n\n\n```python\nhelp(sys.exit)          # Exit the code immediately\n```\n\nSee the [Python documentation](https://docs.python.org/3/library/sys.html) for a full description.\n\n### 11.5.3 `os` and `os.path` \nare very useful when dealing with files and directories:\n\n\n\n```python\nimport os\n```\n\n\n```python\n# Get the current working directory (cwd)\ncurrentDir = os.getcwd()\ncurrentDir\n```\n\n\n```python\n# Get a list of the files in the current working directory    \nmyFiles = os.listdir(currentDir)\nmyFiles\n```\n\n\n```python\n# Create a directory, rename it, and remove it\nos.mkdir(\"myTempDir\")\nos.rename(\"myTempDir\",\"myNewTempDir\")\nos.removedirs(\"myNewTempDir\")\n```\n\n\n```python\n# Create a full path name to the `module2` module in the modules folder\nmyFileFullPath = os.path.join(currentDir,'modules','module2.py')\nmyFileFullPath\n```\n\n\n```python\n# Does this file exist?\nos.path.exists(myFileFullPath)\n```\n\n\n```python\n# How big is the file?\nos.path.getsize(myFileFullPath)\n```\n\n\n```python\n# Split the directory path from the file name\n(myDir,myFileName) = os.path.split(myFileFullPath)\nprint(myDir)\nprint(myFileName)\n```\n\nSee the Python documentation for [**os**](https://docs.python.org/3/library/os.html) and [**os.path**](https://docs.python.org/3/library/os.path.html) for a full description.\n\n### 11.5.4 `re`\n\nA library that is very powerful for dealing with strings is **re**. It allows you to use regular expressions to examine text - using these is a course in itself, so just consider this simple example:\n\n\n```python\nimport re\n\nmyText = \"\"\"Call me Ishmael. Some years ago - never mind how long precisely -\nhaving little or no money in my purse, and nothing particular to interest me on \nshore, I thought I would sail about a little and see the watery part of the \nworld.\"\"\"\n\n# Compile a regular expression, \nmyPattern = re.compile(\"(w\\w+d)\")    # Look for the first word that starts with a w,\n                                     # is followed by 1 or more characters (\\w+)\n                                     # and ends in a d\n\nmySearch = myPattern.search(myText)\n\n# mySearch will be None if nothing was found\nif mySearch:\n    print(mySearch.groups())\n```\n\nSee the full [Python documentation](https://docs.python.org/3/library/re.html) on regular expressions for more information.\n\n## 11.6 Putting everything together\n\n\n---\n\n> ### {% icon hands_on %} Exercise 11.6.1\n>\n> Make a new directory in which you write out 5 files with a 2 second delay. Each file should contain the date and time when it was originally written out.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > # 1\n>    > import time, os\n>    >  \n>    > \n>    > # Create a variable for the directory name\n>    > myDir = \"timeTest\"\n>    > \n>    > # Check whether the directory exists, if not create it\n>    > if not os.path.exists(myDir):\n>    >     os.mkdir(myDir)\n>    > \n>    > \n>    > # Loop from 1 to 5\n>    > for i in range(1,6):\n>    > \n>    >     # Get the current time\n>    >     currentTime = time.ctime()\n>    > \n>    >     # Write out the file - use i to give a different name to each\n>    >     filePath = os.path.join(myDir,\"myFile{}.txt\".format(i))\n>    > \n>    >     outFileHandle = open(filePath,'w')    \n>    >     outFileHandle.write(\"{}\\n\".format(currentTime))\n>    >     outFileHandle.close()\n>    > \n>    >     print(\"Written file {}...\".format(filePath))\n>    > \n>    >     # Sleep for 2 seconds\n>    >     time.sleep(2)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n\n\n---\n> ### {% icon hands_on %} Exercise 11.6.2\n>\n> Write a function to read in a FASTA file with an RNA sequence and return the RNA sequence (in 3 base unit chunks).\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > # 2 \n>    > import os\n>    >  \n>    > def readRnaFastaFile(fileName):\n>    >  \n>    >     if not os.path.exists(fileName):\n>    >         print(\"Error: File {} not available!\".format(fileName))\n>    >         return (None,None,None)\n>    > \n>    >     fconnect = open(fileName)\n>    >     lines = fconnect.readlines()\n>    >     fconnect.close()\n>    > \n>    >     sequenceInfo = []\n>    >     moleculeName = None\n>    >     description = None\n>    > \n>    >     # Get information from the first line - ignore the >\n>    >     firstLine = lines[0]\n>    >     firstLineCols = firstLine[1:].split()\n>    >     moleculeName = firstLineCols[0]\n>    >     description = firstLine[1:].replace(moleculeName,'').strip()\n>    > \n>    >     # Now get the full sequence out\n>    >     fullSequence = \"\"\n>    >     for line in lines[1:]:\n>    > \n>    >         line = line.strip()\n>    >         fullSequence += line\n>    > \n>    >     # Divide up the sequence depending on type (amino acid or nucleic acid)\n>    >     for seqIndex in range(0,len(fullSequence),3):\n>    >         sequenceInfo.append(fullSequence[seqIndex:seqIndex+3])\n>    > \n>    >     return (moleculeName,description,sequenceInfo)\n>    > \n>    > \n>    > print(readRnaFastaFile(\"data/rnaSeq.txt\"))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n\n\n---\n> ### {% icon hands_on %} Exercise 11.6.3\n>\n> Write a program where you ask the user for a one-letter amino acid sequence, and print out the three-letter amino acid codes. Download the dictionary from section 8.2 and save it as a module named SequenceDicts.py first.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > # 3\n>    > # Note how you can import a function (or variable) with a different name for your program!\n>    > \n>    > from modules.SequenceDicts import proteinOneToThree as oneToThreeLetterCodes\n>    > \n>    > oneLetterSeq = input('Give one letter sequence:')\n>    >  \n>    > if oneLetterSeq:\n>    >     for oneLetterCode in oneLetterSeq:\n>    >         if oneLetterCode in oneToThreeLetterCodes.keys():\n>    >             print(oneToThreeLetterCodes[oneLetterCode])\n>    >         else:\n>    >             print(\"One letter code '{}' is not a valid amino acid code!\".format(oneLetterCode))\n>    > else:\n>    >     print(\"You didn't give me any information!\")\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n---\n> ### {% icon hands_on %} Exercise 11.6.4 \n>\n> Write a program where you translate the RNA sequence `data/rnaSeq.txt` into 3 letter amino acid codes. Use the dictionary from section 8.2 (called myDictionary) and save it as a module named SequenceDicts.py first. You can use the `readFasta.py` module from the modules folder. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > from modules.SequenceDicts import standardRnaToProtein, proteinOneToThree\n>    > \n>    > from modules.readFasta import readRnaFastaFile\n>    > \n>    > (molName,description,sequenceInfo) = readRnaFastaFile(\"data/rnaSeq.txt\")\n>    > proteinThreeLetterSeq = []\n>    > \n>    > for rnaCodon in sequenceInfo:\n>    > \n>    >     aaOneLetterCode = standardRnaToProtein[rnaCodon]\n>    >     aaThreeLetterCode = proteinOneToThree[aaOneLetterCode]\n>    >     proteinThreeLetterSeq.append(aaThreeLetterCode)\n>    > \n>    > print(proteinThreeLetterSeq)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n---\n> ### {% icon hands_on %} Exercise 11.6.5 \n>\n> Write a program that:\n> - Has a function `readSampleInformationFile()` to read the information from this sample data file into a dictionary. Also check whether the file exists.\n> - Has a function `getSampleIdsForValueRange()` that can extract sample IDs from this dictionary. Print the sample IDs for pH 6.0-7.0, temperature 280-290 and volume 200-220 using this function.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > import os\n>    >  \n>    > def readSampleInformationFile(fileName):\n>    >  \n>    >     # Read in the sample information file in .csv (comma-delimited) format\n>    > \n>    >     # Doublecheck if file exists\n>    >     if not os.path.exists(fileName):\n>    >         print(\"File {} does not exist!\".format(fileName))\n>    >         return None\n>    >  \n>    >     # Open the file and read the information\n>    >     fileHandle = open(fileName)\n>    >     lines = fileHandle.readlines()\n>    >     fileHandle.close()\n>    > \n>    >     # Now read the information. The first line has the header information which\n>    >     # we are going to use to create the dictionary!\n>    > \n>    >     fileInfoDict = {}\n>    > \n>    >     headerCols = lines[0].strip().split(',')\n>    > \n>    >     # Now read in the information, use the first column as the key for the dictionary\n>    >     # Note that you could organise this differently by creating a dictionary with\n>    >     # the header names as keys, then a list of the values for each of the columns.\n>    > \n>    >     for line in lines[1:]:\n>    >  \n>    >         line = line.strip()  # Remove newline characters\n>    >         cols = line.split(',')\n>    > \n>    >         sampleId = int(cols[0])\n>    > \n>    >         fileInfoDict[sampleId] = {}\n>    > \n>    >         # Don't use the first column, is already the key!\n>    >         for i in range(1,len(headerCols)):\n>    >             valueName = headerCols[i]\n>    >  \n>    >             value = cols[i]\n>    >             if valueName in ('pH','temperature','volume'):\n>    >                 value = float(value)\n>    > \n>    >             fileInfoDict[sampleId][valueName] = value\n>    > \n>    >     # Return the dictionary with the file information\n>    >     return fileInfoDict\n>    > \n>    > def getSampleIdsForValueRange(fileInfoDict,valueName,lowValue,highValue):\n>    >  \n>    >     # Return the sample IDs that fit within the given value range for a kind of value\n>    >  \n>    >     #sampleIdList = fileInfoDict.keys()\n>    >     #sampleIdList.sort()\n>    >     sampleIdList = sorted(fileInfoDict.keys())\n>    >     sampleIdsFound = []\n>    > \n>    >     for sampleId in sampleIdList:\n>    > \n>    >         currentValue = fileInfoDict[sampleId][valueName]\n>    >  \n>    >         if lowValue <= currentValue <= highValue:\n>    >             sampleIdsFound.append(sampleId)\n>    >  \n>    >     return sampleIdsFound\n>    >  \n>    > if __name__ == '__main__':\n>    >  \n>    >     fileInfoDict = readSampleInformationFile(\"../data/SampleInfo.txt\")\n>    > \n>    >     print(getSampleIdsForValueRange(fileInfoDict,'pH',6.0,7.0))\n>    >     print(getSampleIdsForValueRange(fileInfoDict,'temperature',280,290))\n>    >     print(getSampleIdsForValueRange(fileInfoDict,'volume',200,220))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n",""],"documents":[],"related_posts":null,"config":null,"data":{"qbase-plus":{"name":"qbase-plus","type":"basics","category":"software","title":"QPCR analysis using qbase+","summary":"QPCR analysis using qbase+","requirements":null,"maintainers":["chdeb"]},"protein-structure-analysis":{"name":"protein-structure-analysis","type":"basics","category":"basics","title":"Protein Structure Analysis","summary":"Analyzing the protein structure of your protein-of-interest can be advantageous in multiple ways. It can help you discover regions which are good candidates to interact with other proteins. It can help you discover new domains. It can help with identifying differences with homologuous proteins and a lot more.","extra":"protein_analysis","requirements":null,"maintainers":["abotzki"],"references":[{"authors":"Switchlab","title":"Home of FoldX plugin","link":"http://foldxyasara.switchlab.org/","summary":"More information about FoldX plugin and troubleshooting"},{"authors":"Wikipedia","title":"Wiki page about PDB","link":"https://en.wikipedia.org/wiki/Protein_Data_Bank","summary":"wiki page explaining information on PDB"},{"authors":"YASARA developers","title":"Working with YASARA","link":"http://www.yasara.org/movies.htm","summary":"Movie tutorials on YASARA"}]},"data-management-plans":{"name":"data-management-plans","type":"basics","category":"rdm","title":"How to write your Data Management Plan","summary":" Welcome to this self-study course about research data management. In this course you will learn more about how you can manage your research data.\n\nA lot of material of that version of the **Learn to write your DMP** course has been developed by RDM Support of University of Utrecht.\n\nWith all questions about this course you can contact: info.rdm@vib.be.\n\n__By VIB Bioinformatics Core, ELIXIR Belgium and Helis Academy__\n\n![http://www.vib.be/ -20width](https://corefacilities.vib.be/images/logos/bioinformatics_core_rgb_pos.png)\n\n* LiaScript: [this course as e-learning resource](https://material.bits.vib.be/courses/?https://raw.githubusercontent.com/vibbits/material-liascript/master/README.md#1)\n\nThe course consists of 6 chapters, divided in three categories.\n\n* Prepare Data collection\n* Prepare Data documentation\n- Handle Data storage\n- Handle Data security\n- Share Data selection and preservation\n- Share Data availability for reuse\n\nEach chapter starts with an introduction and ends with an assignment to write that part of your data management plan that corresponds with what you have just learned. You are currently in the introd uction chapter. In this chapter you will learn more about the course and the learning environment. The course ends with chapter 7, 'Rounding up'.\n\n**Data Management Plans** The assignment throughout the course is to fill your own data management plan. At the end of each chapter you will be asked to log into DMPonline. With the content in this course, you should be able to apply this to your research project.\n\n**Questions about the course** If you have technical questions, please contact bits@vib.be. If you have content related questions, please contact RDM Support: info.rdm@vib.be. All feedback is welcome, as this is still a beta version. Based on feedback from users, more content may be added or existing content may be changed to a different form.\n\n**Technical requirements** Some activities use HTML5. Make sure your browser has installed the latest updates. If an activity doesn't work, we recommend you use another browser.\n\n**Licenses and credits** We wish you a lot of fun with the course and we hope it turns out to be a useful learning experience. The content of the course is adapted from an online course of [University of Utrecht](https://lll-platform-uu.nl/).\n","subtopics":[{"id":"introduction","title":"Introduction","description":"We are giving overviews on why it is important to manage your research data, user stories about data loss, funder requirements, and RDM Support."},{"id":"prepare","title":"Preparatory steps for Data collection and Documentation","description":" * Will you use existing data?\n* What data will you collect or create?\n* How will the data be collected or created?\n* How will you manage rights issues?\n* What are the costs involved in managing and storing your data? "},{"id":"handle","title":"Data handling","description":"Storing your data properly can save you a lot of time (in finding and interpreting) and frustration (in not losing it). Moreover, when properly structured and annotated during research, you’ll have your data preserved and/or shared with minimal effort at the end of your research."},{"id":"share","title":"Data sharing","description":"Research should be transparent and you should always be able to revert back to your data if necessary and be able to show others how you came to your results. Therefore, your research data with all information reasonably necessary for verification needs to be preserved.\nWith well-managed and preserved research data, you can defend yourself against allegations of mistakes. You can also prevent wrong conclusions from further spreading into the scientific community if there really are mistakes."}],"maintainers":["abotzki"],"references":[{"authors":"RDM training by the University of Edinburgh","title":"MANTRA","link":"http://mantra.edina.ac.uk/"},{"authors":"RDM training by the University of Melbourne","title":"Managing Data @ Melbourne","link":"http://library.unimelb.edu.au/Digital-Scholarship/training_and_outreach/data"},{"authors":"RDM website of University of Amsterdam","title":"Essentials for Data Support","link":"http://rdm.uva.nl/"},{"authors":"Delft University of Technology","title":"Research Data Services","link":"https://www.tudelft.nl/en/library/current-topics/research-data-management/"},{"authors":"Wageningen University & Research","title":"Essentials for Data Support","link":"https://www.wur.nl/en/Expertise-Services/Data-Management-Support-Hub.htm"},{"authors":"Digital Curation Centre","title":"Essentials for Data Support","link":"http://www.dcc.ac.uk/"},{"authors":"UK Data Archive","title":"Essentials for Data Support","link":"http://data-archive.ac.uk/"},{"authors":"Australian Networked Data Services (ANDS)","title":"Essentials for Data Support","link":"http://www.ands.org.au/working-with-data/data-management"},{"authors":"ORION e-learning course","title":"Essentials for Data Support","link":"https://www.orion-openscience.org/news/201912/new-launch-orion-mooc-open-science-life-sciences"}]},"gimp-inkscape":{"name":"gimp-inkscape","type":"basics","category":"software","title":"Initiation GIMP and Inkscape","summary":"This course aims at introducing GIMP and Inkscape (free alternatives for Adobe Photoshop and Illustrator) to prepare images for publication, annotate plots and images, and create figures (diagrams, infographics, ...). It is also a prerequisite for the 'Image Ethics and Poster Design' course.","requirements":null,"maintainers":["chdeb"]},"eln":{"name":"eln","type":"basics","category":"software","title":"Electronic Lab Notebook","summary":"This course aims at introducing ELN or E-notebook 2014 by PerkinElmer.","requirements":null,"maintainers":["chdeb"]},"linux":{"name":"linux","type":"basics","category":"basics","title":"Initiation to Linux Command Line","summary":"This training is a prerequisite for all Linux novices who want to follow a training that requires the use of Linux command line (e.g. metagenomics or GATK training).","requirements":null,"maintainers":["chdeb"]},"basic-statistics":{"name":"basic-statistics","type":"basics","category":"statistics","title":"Basic statistics theory","summary":"Basic statistics theory","requirements":null,"maintainers":["janick-bits"]},"R":{"name":"R","type":"basics","category":"programming","title":"Introduction to R","summary":"Introduction to R","requirements":null,"maintainers":["janick-bits"]},"contributors":{"chdeb":{"name":"Christof De Bo","email":"christof.debo@vib.be"},"tmuylder":{"name":"Tuur Muyldermans","email":"tuur.muyldermans@vib.be"},"janick-bits":{"name":"Janick Mathys","email":"janick.mathys@vib.be"},"abotzki":{"name":"Alexander Botzki","email":"alexander.botzki@vib.be"},"jvdurme":{"name":"Joost Van Durme","email":"bits@vib.be"},"hildebra":{"name":"Falk Hildebrand","email":"bits@vib.be"},"morganeTC":{"name":"Morgane Thomas-Chollier","email":"bits@vib.be"}},"metagenomics":{"name":"metagenomics","type":"basics","category":"omics","title":"Metagenomics","summary":"Metagenomics","maintainers":["janick-bits","abotzki"],"references":[{"authors":"Falk Hildebrand","title":"Lotus pipeline","link":"http://psbweb05.psb.ugent.be/lotus/downloads.html","summary":"More information about the Lotus pipeline"},{"authors":"Robert Edgar","title":"usearch version","link":"http://www.drive5.com/usearch/download.html","summary":"usearch version has to be downloaded seperately due to licensing."},{"authors":"vegan","title":"R package vegan","link":"https://cran.r-project.org/web/packages/vegan/index.html","summary":"further analysis with R package vegan"}]},"chip-seq":{"name":"chip-seq","type":"basics","category":"omics","title":"ChIP-Seq analysis","summary":" This training gives an introduction to ChIP-seq data analysis, covering the processing steps starting from the reads to the peaks. Among all possible downstream analyses, the practical aspect will focus on motif analyses. A particular emphasis will be put on deciding which downstream analyses to perform depending on the biological question. This training does not cover all methods available today. It does not aim at bringing users to a professional NGS analyst level but provides enough information to allow biologists understand what DNA sequencing practically is and to communicate with NGS experts for more in-depth needs.\n- Have an understanding of the nature of ChIP-Seq data - Perform a complete analysis workflow including QC, read mapping, visualization in a genome browser and peak-calling - Use the GenePattern platform for each step of the workflow and feel the complexity of the task - Have an overview of possible downstream analyses - Perform a motif analysis with online web programs\nFor this training, we will use a dataset produced by Myers et al [1] involved in the regulation of gene expression under anaerobic conditions in bacteria. We will focus on one factor: FNR. The advantage of this dataset is its small size, allowing real time execution of all steps of the dataset. ","requirements":null,"maintainers":["abotzki","janick-bits","morganeTC"],"references":[{"authors":"Bailey et al. ","title":"Practical Guidelines for the Comprehensive Analysis of ChIP-seq Data. PLoS Comput Biol 9, e1003326 (2013)","link":"http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/articles/2013_Bailey_PLoS%20Comput%20Biol.pdf","summary":""},{"authors":"Thomas-Chollier et al.","title":"A complete workflow for the analysis of full-size ChIP-seq (and similar) data sets using peak-motifs Nature Protocols 7, 1551–1568 (2012)","link":"http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/articles/2012_Thomas-Chollier_Nature%20Protocols.pdf","summary":""},{"authors":"Kevin S Myers et al.","title":"Genome-scale analysis of escherichia coli FNR reveals complex features of transcription factor binding. PLoS Genet.: 2013, 9(6);e1003565","link":"http://dx.doi.org/10.1371/journal.pgen.1003565","summary":""}]},"git-introduction":{"name":"git-introduction","type":"basics","category":"basics","title":"Introduction to Git & GitHub","summary":"This tutorial will get you started with Git & GitHub for version controlling your project's files & scripts.","requirements":null,"maintainers":["tmuylder"]},"graphpad":{"name":"graphpad","type":"basics","category":"statistics","title":"Statistics with GraphPad Prism","summary":"Statistics with GraphPad Prism","requirements":null,"maintainers":["janick-bits"]},"python-programming":{"name":"python-programming","type":"basics","category":"programming","title":"Gentle hands-on introduction to Python programming","summary":"In this course you'll get to learn all the basics of Python by plenty hands-on exercises related to bioinformatics or Life Sciences data-analyses.","extra":"data_analysis_python","maintainers":["tmuylder"],"references":[{"authors":"Wim Vrancken","title":"Gentle hands-on introduction to Python programming","link":"https://wiki.bits.vib.be/index.php/Python_training_material#Python","summary":"This course is based on the original content from Wim Vrancken"}]}},"pages":["\n","\n","\n","---\nname: galaxy_training_material\nchannels:\n  - conda-forge\n  - bioconda\n  - defaults\ndependencies:\n  - gmp=6.1.2\n  - jemalloc=5.0.1\n  - libiconv=1.15\n  - nodejs=9.11.1\n  - openssl=1.0.2o\n  - pandas=0.22.0\n  - pip\n  - pip:\n    - pathspec==0.5.6\n    - oyaml\n  - planemo>=0.55.0\n  - readline=7.0\n  - requests=2.18.4\n  - ruby=2.4.4\n  - yaml=0.1.7\n  - yamllint=1.11.0\n  - zlib=1.2.11\n  - libxml2\n  - ephemeris\n  - pkg-config\n","<ol id=\"markdown-toc\">\n  <li><a href=\"#overview-questions\" id=\"markdown-toc-overview-questions\">Overview Questions</a>    <ol>\n      <li><a href=\"#what-is-this-website\" id=\"markdown-toc-what-is-this-website\">What is this website?</a></li>\n      <li><a href=\"#what-are-the-tutorials-for\" id=\"markdown-toc-what-are-the-tutorials-for\">What are the tutorials for?</a></li>\n      <li><a href=\"#what-audiences-are-the-tutorials-for\" id=\"markdown-toc-what-audiences-are-the-tutorials-for\">What audiences are the tutorials for?</a></li>\n      <li><a href=\"#how-is-the-content-licensed\" id=\"markdown-toc-how-is-the-content-licensed\">How is the content licensed?</a></li>\n      <li><a href=\"#how-can-i-advertise-the-training-materials-on-my-posters\" id=\"markdown-toc-how-can-i-advertise-the-training-materials-on-my-posters\">How can I advertise the training materials on my posters?</a></li>\n      <li><a href=\"#how-do-i-use-this-material\" id=\"markdown-toc-how-do-i-use-this-material\">How do I use this material?</a></li>\n      <li><a href=\"#how-can-i-get-help\" id=\"markdown-toc-how-can-i-get-help\">How can I get help?</a></li>\n    </ol>\n  </li>\n  <li><a href=\"#for-instructors\" id=\"markdown-toc-for-instructors\">For Instructors</a>    <ol>\n      <li><a href=\"#where-do-i-start\" id=\"markdown-toc-where-do-i-start\">Where do I start?</a></li>\n      <li><a href=\"#how-can-i-fix-mistakes-or-expand-an-existing-tutorial-using-the-github-interface\" id=\"markdown-toc-how-can-i-fix-mistakes-or-expand-an-existing-tutorial-using-the-github-interface\">How can I fix mistakes or expand an existing tutorial using the GitHub interface?</a></li>\n      <li><a href=\"#sustainability-of-the-training-material-and-metadata\" id=\"markdown-toc-sustainability-of-the-training-material-and-metadata\">Sustainability of the training-material and metadata</a></li>\n    </ol>\n  </li>\n</ol>\n\n<h1 id=\"overview-questions\">Overview Questions</h1>\n\n<h2 id=\"what-is-this-website\">What is this website?</h2>\n\n<p>This website is a collection of hands-on tutorials that are designed to be interactive.</p>\n\n<p>This material is developed and maintained by the <a href=\"https://www.bits.vib.be/\">VIB Bioinformatics Core</a>.</p>\n\n<h2 id=\"what-are-the-tutorials-for\">What are the tutorials for?</h2>\n\n<p>These tutorials can be used for learning and teaching how for general data analysis, and for learning/teaching specific domains such as metagenomcis and differential gene expression analysis with RNA-Seq data.</p>\n\n<h2 id=\"what-audiences-are-the-tutorials-for\">What audiences are the tutorials for?</h2>\n\n<p>There are two distinct audiences for these materials.</p>\n\n<ol>\n  <li><strong>Self-paced individual learners.</strong> These tutorials provide everything you need to learn a topic, from explanations of concepts to detailed hands-on exercises.</li>\n  <li><strong>Instructors.</strong> They are also designed to be used by instructors in teaching/training settings. Slides, and detailed tutorials are provided.</li>\n</ol>\n\n<h2 id=\"how-is-the-content-licensed\">How is the content licensed?</h2>\n\n<p>The content of this website is licensed under the <a href=\"https://creativecommons.org/licenses/by/4.0/\">Creative Commons Attribution 4.0 License</a>.</p>\n\n<h2 id=\"how-can-i-advertise-the-training-materials-on-my-posters\">How can I advertise the training materials on my posters?</h2>\n\n<p>We provide some QR codes and logos in the <a href=\"https://github.com/vibbits/training-material/tree/master/assets/images\">images folder</a>.</p>\n\n<h2 id=\"how-do-i-use-this-material\">How do I use this material?</h2>\n\n<p>Many topics include slide decks and if the topic you are interested in has slides then start there.  These will introduce the topic and important concepts.</p>\n\n<h2 id=\"how-can-i-get-help\">How can I get help?</h2>\n\n<p>If you have questions about this training material, you can reach us sending an email to bits@vib.be.</p>\n\n<h1 id=\"for-instructors\">For Instructors</h1>\n\n<p>This material can also be used to teach the content in a group setting to students and researchers.</p>\n\n<h2 id=\"where-do-i-start\">Where do I start?</h2>\n\n<p>Spend some time exploring the different tutorials and the different resources that are available. Become familiar with the structure of the tutorials and think about how you might use them in your teaching.</p>\n\n<h2 id=\"how-can-i-fix-mistakes-or-expand-an-existing-tutorial-using-the-github-interface\">How can I fix mistakes or expand an existing tutorial using the GitHub interface?</h2>\n\n<p>Please submit an issue via github.</p>\n\n<h2 id=\"sustainability-of-the-training-material-and-metadata\">Sustainability of the training-material and metadata</h2>\n\n<p>This repository is hosted on <a href=\"https://github.com/\">GitHub</a> using git as a <a href=\"https://en.wikipedia.org/wiki/Distributed_version_control\">DVCS</a>. Therefore the community is hosting backups of this repository in a decentralised way. The repository is self-contained and contains all needed content and all metadata.</p>\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","\n","### ChiP-Seq Analysis ###\n\n[slides](http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/booklet/thomas-chollier_2020.pdf)\n","### How to fill the slide decks?\n\nPlease follow our\n[tutorial to learn how to fill the slides]({{ site.baseurl }}/topics/contributing/tutorials/create-new-tutorial-slides/slides.html)\n","### Protein Structure Analysis ###\n\n[slides](https://material.bits.vib.be/courses/?https://raw.githubusercontent.com/vibbits/material-liascript/master/slides-PSA.md)\n\n- Sequences, structures and databases\n- Experimental methods (X-rays, electrons and NMR)\n- Finding and visualising structures from the  Protein Data Bank\n- Comparing structures\n- Modelling mutations\n- Creating homology models","","","",".enlarge120[\n\n# ***De novo* Genome Assembly**\n\n]\n\n#### With thanks to T Seemann, D Bulach, I Cooke and Simon Gladman\n---\n.enlarge120[\n\n# ***De novo* assembly**\n\n]\n\n.pull-left[\n\n**The process of reconstructing the original DNA sequence from the fragment reads alone.**\n\n* Instinctively like a jigsaw puzzle\n\n  * Find reads which \"fit together\" (overlap)\n  * Could be missing pieces (sequencing bias)\n  * Some pieces will be dirty (sequencing errors)\n\n]\n\n.pull-right[ ![](../../images/Humpty.jpg) ]\n\n---\n\n# **Another View**\n\n![](../../images/newspaper.png)\n\n---\n\n# **Assembly: An Example**\n\n---\n\n# **A small \"genome\"**\n\n![](../../images/shakespear1.png)\n\n---\n\n# **Shakespearomics**\n\n![](../../images/shakespear2.png)\n\n---\n\n# **Shakespearomics**\n\n![](../../images/shakespear3.png)\n\n---\n\n# **Shakespearomics**\n\n![](../../images/shakespear4.png)\n\n---\n\n# **So far, so good!**\n\n---\n\n# **The Awful Truth**\n\n![](../../images/notsimply.png)\n\n## \"Genome assembly is impossible.\" - A/Prof. Mihai Pop\n\n---\n.enlarge120[\n\n# **Why is it so hard?**\n\n]\n\n.pull-left[\n* Millions of pieces\n  * Much, much shorter than the genome\n  * Lots of them look similar\n* Missing pieces\n  * Some parts can't be sequenced easily\n* Dirty Pieces\n  * Lots of errors in reads\n]\n\n.pull-right[ ![](../../images/worlds_hardest.png) ]\n\n---\n\n# **Assembly recipe**\n\n* Find all overlaps between reads\n  * Hmm, sounds like a lot of work..\n* Build a graph\n  * A picture of the read connections\n* Simplify the graph\n  * Sequencing errors will mess it up a lot\n* Traverse the graph\n  * Trace a sensible path to produce a consensus\n\n---\n\n![](../../images/olc_pic.png)\n\n---\n\n# **A more realistic graph**\n\n![](../../images/real_graph.png)\n\n---\n\n# .image-15[![](../../images/nofun.png)] **What ruins the graph?**\n\n* Read errors\n  * Introduces false edges and nodes\n\n* Non haploid organisms\n  * Heterozygosity causes lots of detours\n\n* Repeats\n  * If they are longer than the read length\n  * Causes nodes to be shared, locality confusion.\n\n---\n\n# **Repeats**\n\n---\n.enlarge120[\n# **What is a repeat?**\n]\n\n.pull-left[\n\n#### ***A segment of DNA which occurs more than once in the genome sequence***\n\n* Very common\n  * Transposons (self replicating genes)\n  * Satellites (repetitive adjacent patterns)\n  * Gene duplications (paralogs)\n\n]\n\n.pull-right[\n\n![](../../images/triplets.png)\n\n]\n\n---\n\n# **Effect on Assembly**\n\n![](../../images/repeat_effect.png)\n\n---\n.enlarge120[\n# **The law of repeats** .image-15[![](../../images/repeatafterme.png)]\n]\n\n## **It is impossible to resolve repeats of length S unless you have reads longer than S**\n\n## **It is impossible to resolve repeats of length S unless you have reads longer than S**\n\n---\n\n# **Scaffolding**\n\n---\n.enlarge120[\n# **Beyond contigs**\n]\n\n.pull-left[\n\nContig sizes are limited by:\n\n* the length of the repeats in your genome\n  * Can't change this\n\n\n* the length (or \"span\") of the reads\n  * Use long read technology\n  * Use tricks with other technology\n\n]\n\n---\n.enlarge120[\n# **Types of reads**\n]\n\n.pull-left[.enlarge120[**Example fragment**]]\n\n\n.remark-code[.enlarge120[atcgtatgatcttgagattctctcttcccttatagctgctata]]\n\n.pull-left[.enlarge120[**\"Single-end\" read**]]\n\n\n.remark-code[.enlarge120[**atcgtatg**atcttgagattctctcttcccttatagctgctata]]\n\nsequence *one* end of the fragment\n\n.pull-left[.enlarge120[**\"Paired-end\" read**]]\n\n\n.remark-code[.enlarge120[**atcgtatg**atcttgagattctctcttcccttatag**ctgctata**]]\n\nsequence both ends of the same fragment\n\n**We can exploit this information!**\n---\n\n.enlarge120[# **Scaffolding**]\n\n* **Paired end reads**\n  * Known sequences at each end of fragment\n  * Roughly known fragment length\n\n* **Most ends will occur in same contig**\n\n* **Some will occur in different contigs**\n  * ***evidence that these contigs are linked***\n---\n\n.enlarge120[# **Contigs to Scaffolds**]\n\n![](../../images/scaffolding.png)\n\n---\n\n.enlarge120[# **Assessing assemblies**]\n\n* We desire\n  * Total length similar to genome size\n  * Fewer, larger contigs\n  * Correct contigs\n\n* Metrics\n  * No generally useful measure. (No real prior information)\n  * Longest contigs, total base pairs in contigs, **N50**, ...\n\n---\n\n.enlarge120[# **The \"N50\"**]\n\n.enlarge120[***The length of that contig from which 50% of the bases are in it and shorter contigs***]\n\n* Imagine we have 7 contigs with lengths:\n  * 1, 1, 3, 5, 8, 12, 20\n\n* Total\n  * 1+1+3+5+8+12+20 = 50\n\n* N50 is the \"halfway sum\" = 25\n  * 1+1+3+5+8+**12** = 30 (>25) so **N50 is 12**\n\n---\n\n.enlarge120[# **2 levels of assembly**]\n\n* Draft assembly\n  * Will contain a number of non-linked scaffolds with gaps of unknown sequence\n  * Fairly easy to get to\n\n* Closed (finished) assembly\n  * One sequence for each chromosome\n  * Takes a **lot** more work\n  * Small genomes are becoming easier with long read tech\n  * Large genomes are the province of big consortia (e.g. Human Genome Consortium)\n\n---\n.enlarge120[# **How do I do it?**]\n---\n.enlarge120[\n# **Example**\n\n* Culture your bacterium\n\n\n* Extract your genomic DNA\n\n\n* Send it to your sequencing centre for Illumina sequencing\n  * 250bp paired end\n\n\n* Get back 2 files\n  * .remark-code[MRSA_R1.fastq.gz]\n  * .remark-code[MRSA_R2.fastq.gz]\n\n\n* ***Now what?***\n]\n\n---\n.enlarge120[# **Assembly tools**\n\n* **Genome**\n  * **Velvet, Velvet Optimizer, Spades,** Abyss, MIRA, Newbler, SGA, AllPaths, Ray, SOAPdenovo, ...\n\n\n* Meta-genome\n  * Meta Velvet, SGA, custom scripts + above\n\n\n* Transcriptome\n  * Trinity, Oases, Trans-abyss\n\n***And many, many others...***\n\n]\n\n---\n.enlarge120[\n# **Assembly Exercise #1**\n\n* We will do a simple assembly using **Velvet** in **Galaxy**\n* We can do a number of different assemblies and compare some assembly metrics.\n\n]\n","As we introduced a lot of new concepts it is important that you practice them.\n\n----\n\n> ### {% icon hands_on %} Exercise 7\n>\n> Write a program that does the following:\n> \n> 1. Ask the user for a full DNA sequence\n>     - Make sure the sequence contains only GACT\n> 2. Once you have a valid sequence\n>     - For each DNA fragment the user enters:\n>         - Check if it occurs in the full sequence\n>         - Print out the sequence position if so\n>         - Track each fragment\n>     - Keep on asking the user for DNA fragments, stop if they just press return\n> 3. As a summary, print out all fragments with their position that you tracked\n> \n> **Tips** to complete this exercise in case you get stuck.\n> - Use while loops: you can use the condition to decide when to end the loop depending on the user input\n> - Track the sequence fragment and position data using a list\n> - Use string methods!\n> - To check the full DNA sequence, you can count how many times each GACT letter occurs, add up these counts, and compare this value to the total length of the full DNA sequence\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > \n>    > ```python\n>    > # This variable will be used for the while loop\n>    > validSequence = False\n>    >  \n>    > # Keep on going as long as the DNA sequence is not valid\n>    > while not validSequence:\n>    >     # Get a string from the user\n>    >     fullDnaSequence = input(\"Please enter your full DNA sequence:\")\n>    >     fullDnaSequence = fullDnaSequence.upper()\n>    >     \n>    >     # Count the GACT characters in the sequence\n>    >     gactCount = 0\n>    >     for code in 'GACT':\n>    >         gactCount += fullDnaSequence.count(code)\n>    >  \n>    >     # Check if the number of GACT characters matches the full length of the sequence\n>    >     # and set validSequence to True if so - this will stop the while: loop\n>    >     if gactCount == len(fullDnaSequence):\n>    >         validSequence = True\n>    >     else:\n>    >         print(\"\\nInvalid sequence, only GACT allowed, try again!.\\n\")\n>    > \n>    > # Print some line breaks\n>    > print(\"\\n\\n\")\n>    >  \n>    > # Prime the list to track the DNA fragments and the variable for the while loop\n>    > dnaFragmentInfo = []\n>    > dnaFragment = input(\"Please give a DNA fragment to check:\")\n>    >  \n>    > while dnaFragment:\n>    >     \n>    >     # Check if present at all\n>    >     dnaFragmentCount = fullDnaSequence.count(dnaFragment)\n>    >     if dnaFragmentCount:\n>    >         currentDnaSequenceIndex = 0\n>    >         for i in range(dnaFragmentCount):        \n>    >             # Equivalent to currentDnaSequenceIndex = currentDnaSequenceIndex + fullDna...\n>    >             currentDnaSequenceIndex += fullDnaSequence[currentDnaSequenceIndex:].index(dnaFragment)\n>    >  \n>    >             print(\"\\n  Fragment {} present at position {}.\\n\".format(dnaFragment,currentDnaSequenceIndex + 1))\n>    >             dnaFragmentInfo.append((currentDnaSequenceIndex + 1,dnaFragment))\n>    >             currentDnaSequenceIndex += 1\n>    >     else:\n>    >         print(\"\\n  Fragment {} not present!\\n\".format(dnaFragment))\n>    >  \n>    >     dnaFragment = input(\"Please give a DNA fragment to check:\")\n>    > \n>    > # Print some line breaks\n>    > print(\"\\n\\n\")\n>    > \n>    > # Print out the fragment information again, first sort it\n>    > dnaFragmentInfo.sort()\n>    > for (dnaFragmentPosition,dnaFragment) in dnaFragmentInfo:\n>    >     print(\"Found {} at position {}\".format(dnaFragment,dnaFragmentPosition))\n>    > ```\n>    > </details>\n>\n{: .hands_on}\n","[Group exercises](http://data.bits.vib.be/pub/trainingen/NGSIntro/GroupExNoSol.pdf)\n\nYou can solve most quality issues found by FASTQC e.g. trimming contaminating adapters, low quality bases at the end of your reads, filtering low quality reads...\nThere's is a lot of debate on whether it is required to do this. Reads that are contaminated with adapter sequences will not map but if these reads make up a large fraction of the total number of reads they might slow down the mapping a lot. While it is true that mappers can use noisy info (still containing adapters, low quality bases...), the mapping results will be negatively affected by this noise.\nCleaning is in my opinion worthwhile especially when working with small reads and in case of extensive adapter contamination (almost always).\n\n## Quality control in Galaxy\n\nLinks:\n- [European Galaxy](http://usegalaxy.eu)\n- [Raw Arabidopsis data in European Galaxy](https://usegalaxy.eu/u/janick/h/rawdata)\n- [Groomed Arabidopsis data in European Galaxy](https://usegalaxy.eu/u/janick/h/groomeddata)\n- [Clean Arabidopsis data in European Galaxy](https://usegalaxy.eu/u/janick/h/cleandata)\n- [Raw E. coli data in European Galaxy](https://usegalaxy.eu/u/janick/h/ecoliraw)\n- [Groomed E. coli data in European Galaxy](https://usegalaxy.eu/u/janick/h/ecoligroomed)\n- [Filtered E. coli data in European Galaxy](https://usegalaxy.eu/u/janick/h/ecolifiltered)\n\n- [Main Galaxy](http://usegalaxy.org \n- [Raw Arabidopsis data in main Galaxy](https://usegalaxy.org/u/janick/h/ngsdata)\n- [Groomed Arabidopsis data in main Galaxy](https://usegalaxy.org:/u/janick/h/ngsgroomed-1)\n\nGalaxy is a bioinformatics server that contains many tools, data and analysis results. Before you can upload your data to Galaxy, you have to register or log in to Galaxy (see slides).\n\n### Upload data to Galaxy\n\nIf you want to work on your data in Galaxy, you have to first get the data into Galaxy. To accomplish this you can use the **Upload file** tool in the **Get data** section.\n**Instead I shared the file on Galaxy so you can import it using [this link](https://usegalaxy.eu/u/janick/h/rawdata).** Make sure that you are logged on to Galaxy before you do this. When you click this link you are redirected to a web page where you can import the file:\n\n ![](../../images/Galaxy6a.png)\n\n\n### The history\n\nData sets that are uploaded or created by running a tool appear in the history in the right Galaxy pane.\nTo give a history a new name, click the history's current name, type a new one and hit enter.\nClicking the name of a data set unfolds a preview, a short description and tools to manipulate the data.\n\n#### Icons in the History\n\n- Clicking the floppy (**Download**) icon will download the file to your computer\n- To visualize a file in the middle pane, click the eye (**View data**) icon next to the name of the file.\n\n\n#### Colors of files in the HistoryData sets in the history have different colors representing different states.\n\n- **Grey**: The job is placed in the waiting queue. You can check the status of queued jobs by refreshing the History pane.\n- **Yellow**: The job is running.\n- **Green**: When the job has been run the status will change from yellow to green if completed successfully.\n- **Red**: When the job has been run the status will change from yellow to red if problems were encountered.\n\n\n### Running Groomer in Galaxy\n\nIf you select a tool in Galaxy it will automatically detect all data sets in your history that it can use as input. In the case shown below the tool does not recognize the fastq file in the history:\n\n ![](../../images/Galaxy10b.png)\n\nThe fact that the tool does not recognize the fastq file means that the fastq file is so messy that the tool can't read it. Remember that there is a tool to clean messy fastq files: **FASTQ Groomer** \n\nCheck the quality encoding in your fastq file (e.g. in FASTQC), and click the **Execute** button to start the tool:\n\n ![](../../images/Galaxy12a.png)\n\nGrooming takes long (30 min when Galaxy traffic is low). You can choose to wait but if it takes too long you can click the **Delete** button in the History (see slides) to stop the tool. I have provided the groomed file: import it in Galaxy using [https://usegalaxy.eu/u/janick/h/groomeddata this link].\n\n\n### Using Trimmomatic in Galaxy\n\nTo clean your data use the **Trimmomatic** tool in the **Quality Control** section of tools. Click the name of the tool to display its parameters in the middle pane.\n\nSee [this page](http://wiki.bits.vib.be/index.php/Parameters_of_Trimmomatic) for an overview of the Trimmomatic parameters.\n\nA bit more explanation:\n\n- **The input file with the reads**: Galaxy will automatically suggest a file from your History that has the right format, in this case: a fastq file. If Galaxy doesn't make a suggestion it means it cannot find any files in your History with the right format.\n- **The sequence of the adapter**: provide a custom sequence. If you analyze your own data you know which adapter sequences were used. Since this is public data we don't really know the name of the adapter. However, remember that FASTQC gives you a list of contaminating adapter sequences so you have the sequence of the adapter. Choose **custom adapter sequence** and paste the adapter sequence from FASTQC. You can only enter one sequence.\n\n\n\nClick **Execute** to run the tool.\n\nIn the history you see a new item, colored in yellow as long as the tool is running. Regularly hit the **Refresh** button in the History to check if the tool has finished. Clipping should go fast, after a few minutes you should have the result.\n\n\n### Running FASTQC in Galaxy\n\nSearch for **FASTQC** in the tools pane and click the resulting **FastQC** link to open the parameter settings in the middle pane:\n\n ![](../../images/Galaxy18b.png)\n\nFASTQC automatically recognizes all files it can use as an input. Select the file you want to use.\nThe FASTQC implementation in Galaxy can take an optional file containing a list of contaminants. If you don't specify one, FASTQC will look for standardly used Illumina adapters. <!--As another option the tool takes a custom limits .txt file that allows setting the warning thresholds for the different modules and to specify which modules to include in the output.-->\n\nIn most cases you keep the default settings and click **Execute**.\n\n\n## Quality control in GenePattern\n\n[Genepattern](http://www.broadinstitute.org/cancer/software/genepattern/) is very similar to Galaxy. It's as user-friendly as Galaxy, allows analysis of NGS data just like Galaxy... \n\nIt provides easy access to hundreds of tools for different kinds of analyses (e.g. RNA-seq, microarray, proteomics and flow cytometry, sequence variation, copy number and network analysis) via a web browser.\n\n**Links**\n\n- [BITS Genepattern server](https://dev.bits.vib.be:8686/gp/pages/index.jsf)\n- [fasta file containing Arabidopsis adapter sequence](https://data.bits.vib.be/pub/trainingen/NGSIntro/adapter.fa)\n- [fasta file containing E. coli adapter sequence](https://data.bits.vib.be/pub/trainingen/NGSIntro/adapterEcoli.fa)\n- [Overview of Trimmomatic parameters](http://wiki.bits.vib.be/index.php/Parameters_of_Trimmomatic)\n\n\nConsult the [GenePattern tutorial](https://wiki.bits.vib.be/index.php/GenePattern_tutorial) for more info.\n\n### Running Groomer in GenePattern\n\nThe Broad Genepattern server does not contain the Groomer tool, but we have added the tool to our BITS Genepattern server. \n\n- Search for the Groomer tool in GenePattern.\n- Define the parameters: one of the parameters you need to define is **Input format**: the encoding of the fastq file you want to clean. The encoding is important because it determines the offset of the quality scores (ASCII offset 33 or ASCII offset 64). If you're not sure you can check the encoding of your file in the FastQC report (take into account that FastQC sporadically makes the wrong guess).\n ![](../../images/GP9.png)\n- Run the Groomer tool.\n\n### Running FastQC in GenePattern\n\n- Search for the FASTQC tool\n- Fill in the parameters\n- Run the FASTQC tool\n\nYou can open the resulting HTML report in your browser: \n\n- Click the name of the output file at the bottom of the page\n- Select **Open Link**\n ![](../../images/GP18.png)\n\n### Running Trimmomatic in GenePattern\n\nIn GenePattern you can improve the quality of your NGS data using the Trimmomatic tool. \n- Search for the Trimmomatic tool\n- Fill in the parameters: See [this page](http://wiki.bits.vib.be/index.php/Parameters_of_Trimmomatic) for an overview of the Trimmomatic parameters.\n- Run Trimmomatic\n\n## Removing adapters using command line tools\n\nSee [exercise on using cutadapt to trim adapter sequences](http://wiki.bits.vib.be/index.php/Linux_command_line#Improving_the_quality_of_the_data)\n","After quality control, the next step is to align the reads to a reference sequence. \nThe reference is in most cases the full genome sequence but sometimes, a library of EST sequences is used. In either way, aligning your reads to a reference sequence is called mapping.\nThe most used mappers are [BWA](http://bio-bwa.sourceforge.net/) and [Bowtie](http://bowtie-bio.sourceforge.net/bowtie2/index.shtml) **for DNA-Seq data** and [Tophat](http://tophat.cbcb.umd.edu/) , [STAR](https://github.com/alexdobin/STAR) , [STAR article](http://bioinformatics.oxfordjournals.org/content/early/2012/10/25/bioinformatics.bts635) , or [HISAT2](http://www.ccb.jhu.edu/software/hisat/index.shtml) **for RNA-Seq data**.\nMappers differ in methodology, parameters, how fast and how accurate they are and whether they tolerate spliced alignments or not (relevant for RNA-Seq). Bowtie is faster than BWA, but looses some sensitivity (does not map an equal amount of reads to the correct position in the genome as BWA). BWA and Bowtie cannot align spliced reads while Tophat, STAR and HISAT2 can.\nAt the moment STAR is the most popular RNASeq mapper and HISAT2 is being pushed over TopHat.\n\n## Mapping in Galaxy\n\nLinks:\n - [Mapped data for Arabidopsis in the European Galaxy](https://usegalaxy.eu/u/janick/h/cleandatamapped)\n - [paper on intron sizes in various organisms](https://www.ncbi.nlm.nih.gov/pubmed/10454621)\n - [Sorted and indexed data for *E.coli* in the European Galaxy](https://usegalaxy.eu/u/janick/h/ecolisorted)\n - [fasta file containing the E.coli K12 genome](http://data.bits.vib.be/pub/trainingen/NGSIntro/EcoliK12.fasta)\n - [Bowtie manual](http://bowtie-bio.sourceforge.net/manual.shtml)\n\n### Running RNA STAR in Galaxy\n\nSTAR has a large number of parameters, we'll give an overview of the most important ones:\n - **Single end or paired end data**: the parameters you have to set will adjust accordingly\n - **RNASeq Fastq file**: STAR automatically detects files it can use as input, select the file you want to map.\n - **Custom or built-in reference genome**: many reference genomes are built-in in Galaxy just select the correct organism from the list of reference genomes.\n - **Length of the genomic sequence around annotated junctions**: the default is 100 but the ideal value is **read length-1**.\n - **Count number of reads per gene**: map reads and create a count table (table with counts of how many reads map to each gene).\n - **Would you like to set output parameters (formatting and filtering)?**: in most cases **yes** because the default settings will most likely not be ideal for your data\n - **Would you like to set additional output parameters (formatting and filtering)?**: in most cases **yes** because the default settings will most likely not be ideal for your data\n - **Would you like unmapped reads included in the SAM?**: by default STAR does not save the unmapped reads, so if you want to analyze them (BLAST...) you need to change this setting.\n - **Maximum number of alignments to output a read's alignment results, plus 1**: default is 10 meaning that reads that map to more than 10 locations in the genome are excluded from the results.  Multimappers are common when you map short reads. What to do with them is a complicated issue. You could use them to represent expression of whole classes/families of RNAs (e.g. transposons, gene families...). It can be useful to have two separate files: one for unique mappers and one for multimappers.\n - **Maximum number of mismatches to output an alignment, plus 1**: maximum number of mismatches for a read (single-end) or a pair of reads (paired-end). Default is 10. The value you should choose is dependent on the read length. For short quality trimmed reads you typically allow 5% mismatches.\n - **Maximum ratio of mismatches to read length**: how many mismatches you allow in the alignment (number is represented as a fraction of the total read length). Typically you choose 0.05 (= 5%) but this depends on the quality of the reads. In case of reads with many sequencing errors you need to increase the fraction of mismatches you allow.\n - **Other parameters (seed, alignment, limits and chimeric alignment)**: choose **extended parameter list** because the default settings will most likely not be ideal for your data\n - **Alignment parameters: Maximum intron size**: maximum distance between reads from a pair when mapped to the genome.\n - **Two-pass mode: Use two pass mode to better map reads to unknown splice junctions**: for the most accurate mapping, you should run STAR in 2-pass mode. It allows to detect more reads mapping to novel splice junctions. The basic idea is to run STAR with standard parameters, then collect the junctions detected in this first pass, and use them as annotated junctions for the second pass mapping.\n - **Parameters related to chimeric reads**: chimeric reads occur when one read aligns to two distinct portions of the genome. In RNA-Seq chimeric reads may indicate the presence of chimeric genes. Many chimeric genes form through errors in DNA replication or DNA repair so that pieces of two different genes are combined. Chimeric genes can also occur when a retrotransposon accidentally copies the transcript of a gene and inserts it into the genome in a new location. Depending on where the new retrogene appears, it can produce a chimeric gene...\n\nClick **Execute** to start the mapping.\n\nSTAR produces 3 result files:\n - **bam** file containing all alignments (multimappers, reads that map to multiple locations, are printed at each location)\n - **tab** file containing all detected splice junctions\n - **log** file containing mapping statistics\n\n### Running Bowtie for Illumina (= Bowtie1) in Galaxy\n\nThis is an overview of the main parameters:\n - **Will you select a reference genome from your history or use a built-in index?** Galaxy has many built-in genomes for Bowtie 1 but you can also use a fasta file from the history when the organism you work is not supported.\n - **Is this library mate-paired?** single end or paired end ?\n - **FASTQ file** Galaxy will automatically detect potential input files, select the file you want to use as input.\n - **Bowtie settings to use** ask for full parameter list since the defaults are most likely not ideal for your data\n - **Trim n bases from high-quality (left) end of each read before alignment (-5)** trim bases from high-quality (left) end of each read before alignment, default is 0.\n - **Trim n bases from low-quality (right) end of each read before alignment (-3)** trim bases from low-quality (right) end of each read before alignment, default is 0.\n - **Alignment mode** when the default -n option is used, bowtie determines which alignments are valid according to the following policy: alignments may have no more than n mismatches (where n is a number 0-3, set with **Maximum number of mismatches permitted in the seed (-n)**) in the first l bases (where l is a number 5 or greater, set with **Seed length (-l)**) on the high-quality (left) end of the read. The first l bases are called the \"seed\". The sum of the Phred quality scores at all mismatched positions (not just in the seed) may not exceed e (set with **Maximum permitted total of quality values at all mismatched read positions (-e)**).\nIn -v mode, alignments may have no more than v mismatches, where v may be a number from 0 through 3 set using the **Maximum number of mismatches (-v)** option. Quality values are ignored.\n - **Suppress all alignments for a read if more than n reportable alignments exist (-m)**  default is no limit. Bowtie is designed to be very fast for small -m but can become significantly slower for larger values of -m\n\n### Download mapping results from Galaxy\n\nClick the name of the file containing the sorted alignments in the history.\nClick the **download** button at the bottom of the description. You should download two files: the bam file containing the mapping results and an index file (.bai) for fast access to the bam file. In Galaxy, indexing of bam files is done automatically. You need to download both files into the same folder. \n ![](../../images/IGV2.png)\n\n## Mapping in GenePattern\n\n**Links**:\n - [Parameters of STAR](https://wiki.bits.vib.be/index.php/Parameters_of_STAR)\n - [paper on intron sizes in various organisms](https://www.ncbi.nlm.nih.gov/pubmed/10454621)\n - [fasta file containing the E.coli K12 genome](http://data.bits.vib.be/pub/trainingen/NGSIntro/EcoliK12.fasta)\n - [Bowtie manual](http://bowtie-bio.sourceforge.net/manual.shtml)\n\n### Running STAR in GenePattern\n\n - Search for the STAR aligner tool\n - Fill in the parameters of STAR, you can find a detailed description of the parameters on [this page](https://wiki.bits.vib.be/index.php/Parameters_of_STAR)\n - Run STAR\n - Store the resulting bam file in your uploads folder\n - View the …align_summary.txt file in your browser to get an overview of the mapping results.\n\n### Running Bowtie_1 indexer in GenePattern\n\nSearch for the Bowtie_1 indexer tool. Here's a detailed description of the main parameters:\n\n - **fasta files** one or several fasta files containing the DNA sequence of the genome to index.\n - **index name** a name for the bowtie 1 index files.\n\nRun the indexer, it will produce 6 files:\n - <index name>.1.ebwt\n - <index name>.2.ebwt\n - <index name>.3.ebwt\n - <index name>.4.ebwt\n - <index name>.rev.1.ebwt\n - <index name>.rev.2.ebwt\n\nFor easy handling in GenePattern Bowtie_1.indexer puts all these files in a ZIP archive, which can be given as input to Bowtie_1.aligner. Store the resulting zip file in your uploads folder.\n\n### Running Picard SortSam in GenePattern\n\nSome downstream tools cannot handle raw bam files since they are so large and chaotic, they need sorted and indexed bam files. Bam files can be sorted and indexed with samtools or Picard. \n - Search for a tool that can sort sam or bam files\n - Sort the file, keep the results in bam format. Sorting will add an index to the bam file (this is the .bai file that is generated)\n - Download the sorted bam and bai files to your computer\n\n## Mapping via command line tools\n\nOn our Linux command line page you can find:\n[an exercise on mapping with Bowtie](http://wiki.bits.vib.be/index.php/Linux_command_line#Mapping_reads_with_Bowtie) via the command line.\n\nWe will handle the mapping in detail in advanced NGS trainings, so we are not going into more detail now.\n\n## Visualisation of mapping results in IGV\n\n - [bam-file for *Arabidopsis thaliana* from GenePattern](http://data.bits.vib.be/pub/trainingen/NGSIntro/GP_Athaliana.bam)\n - [bai-file for *Arabidopsis thaliana* from GenePattern](http://data.bits.vib.be/pub/trainingen/NGSIntro/GP_Athaliana.bai)\n - [bam-file for *Arabidopsis thaliana* from Galaxy](http://data.bits.vib.be/pub/trainingen/NGSIntro/Galaxy_Athaliana.bam)\n - [bai-file for *Arabidopsis thaliana* from Galaxy](http://data.bits.vib.be/pub/trainingen/NGSIntro/Galaxy_Athaliana.bai)\n - [bam-file for *E. coli* from GenePattern](http://data.bits.vib.be/pub/trainingen/NGSIntro/GP_Ecoli.bam)\n - [bai-file for *E. coli* from GenePattern](http://data.bits.vib.be/pub/trainingen/NGSIntro/GP_Ecoli.bai)\n - [bam-file for *E. coli* from Galaxy](http://data.bits.vib.be/pub/trainingen/NGSIntro/Galaxy_Ecoli.bam)\n - [bai-file for *E. coli* from Galaxy](http://data.bits.vib.be/pub/trainingen/NGSIntro/Galaxy_Ecoli.bai)\n\nIGV needs a sorted bam file and an index (.bai) file.\n\n - Open IGV by clicking its icon on the Desktop. Be patient, it might take a few minutes for the program to start.\n - If necessary change the genome in IGV from **Human hg19** to the one you used in the mapping.\n\n ![](../../images/IGV3.png)\n - Load the mapped reads via **File** in the top menu and **Load from File**.\n\n ![](../../images/IGV4.png)\n\nSelect the .bam file to open. You don't need to load the .bai file, it's suffcient that it is present in the same folder as the .bam file. \n - This loads the data into the center view. At this point, you can't see the reads, you have to zoom in to view them.\n - To zoom in on a gene type its accession number in the top toolbar and clicking **Go**:\n\n ![](../../images/IGV5.png)\n\n - Zooming in can be done using the zoom bar in the top toolbar:\n\n ![](../../images/IGV6.png)\n\nThe reads are represented by grey arrows, the arrow indicating the orietation of the mapping. Hovering your mouse over a read gives additional info on the mapping. The colored nucleotides indicate mismatches between the read and the reference.\n\nBy default IGV calculates and displays the coverage track (red) for an alignment file. When IGV is zoomed to the alignment read visibility threshold (by default 30 KB), the coverage track displays the depth of the reads displayed at each locus as a gray bar chart. If a nucleotide differs from the reference sequence in greater than 20% of quality weighted reads, IGV colors the bar in proportion to the read count of each base (A, C, G, T). You can view count details by hovering the mouse over a coverage bar:\n\n ![](../../images/IGV8.png)\n\n## Quality control of mapping results using Qualimap\n\nQualimap is very similar to FastQC. It has an easy-to-use user interface and works on any platform: Windows, Mac, Linux. It's installed on the BITS laptops: you can run it by clicking the icon on the desktop.\nYou can do several analyses in Qualimap: we will focus on the BAM Quality Control and the RNA-Seq Quality Control.\n\n### Starting a BAM QC analysis in Qualimap\n\n - [gtf-file for *Arabidopsis thaliana* from Ensembl Plants](http://data.bits.vib.be/pub/trainingen/NGSIntro/Arabidopsis_thaliana.TAIR10.31.gtf)\n\nIn the top menu, expand **File** and select **New analysis** and **BAM QC**\n\n ![](../../images/GP22.png)\n\nA parameters form is opened.\n\nSelect a .bam file as input file and leave all other parameters at their default setting:\n\n ![](../../images/GP23.png)\n\n - With the default settings the mapping is evaluated over the full reference sequence but you can limit the evaluation to certain regions by selecting the **Analyze regions** option and providing a gtf file containing the regions of interest.\n - There are parameters for specific types of NGS experiments e.g. stranded libraries (**Library strand specificity**) and paired-end reads (**Detect overlapping paired-end reads**).\n\nA BAM Quality Control report is generated, very similar to the report that FastQC produces. Let's take a look at some of the figures in the report:\n\n - **Coverage across reference**: In the top figure you see the coverage (red line; average coverage in a window of a certain size) across the reference sequence. In the bottom figure you see the GC content (black line) across the reference.\n - **Coverage histograms**: What percentage of the genome is not covered, covered at least once....\n\n### Starting a RNA-Seq QC analysis in Qualimap\n\nSpecifically for RNA-Seq data you can  do a RNA-Seq QC in Qualimap. In the top menu, expand **File** and select **New analysis** and **RNA-seq QC**\n\n ![](../../images/GP25.png)\n\nA parameters form is opened. \n\n - You need to provide an annotation file so Qualimap knows where the exons are located on the reference sequence. This annotation file is in gtf format and can be downloaded from the Ensembl or [EnsemblGenomes](http://ensemblgenomes.org/info/access/ftp) ftp site. GTF stands for general transfer format, used for linking features (exons, introns, genes, transcripts, repeats, mutations...) to locations in the genome. \n\n ![](../../images/GP26.png)\nSelect the .gtf file as annotation file\n - Select the .bam file as input file \n - Leave all other parameters at their default setting\n\n ![](../../images/GP28.png)\n\nA RNA-seq Quality Control report is generated.\n\n*Coverage Profile (Total)*: The plot shows mean coverage profile of the transcripts. All transcripts with non-zero coverage are used to calculate this plot.\n*Coverage Profile (Low)*: The plot shows mean coverage profile of 500 lowest-expressed genes.\n*Coverage Profile (Total)*: The plot shows mean coverage profile of 500 highest-expressed genes.\n*Coverage Histogram (0-50x)*: Coverage of transcripts from 0 to 50X. If certain genes have higher coverage level they are added to the last column (50X).\n*Junction Analysis*: This pie chart shows analysis of junction positions in spliced alignments. Known category represents percentage of alignments where both junction sides are known. Partly known represents alignments where only one junction side is known. All other alignments with junctions are marked as Novel.\n\n[Solutions of Group Exercises](http://data.bits.vib.be/pub/trainingen/NGSIntro/Solutions.pdf)\n\n","[Download the slides for this training session](http://data.bits.vib.be/pub/trainingen/NGSIntro/NGSBroad.pdf).\n\nThe dataset comes from a 2014 publication on *Human Airway Smooth Muscle Transcriptome Changes in Response to Asthma Medications*.\nThe goal of the analysis is to find DE genes (differentially expressed: genes with different expression levels in one group of samples compared to other groups of samples). Typically the groups of samples represent different treatments: one consisting of biological replicates that have received a control treatment, others consisting of replicates that received a specific biological treatment.\n\nIn this experiment the data consists of four groups (**treatment**):\n - The **dex** group: samples from 4 cell lines after treatment with the glucocorticoid dexamethasone (dex), used as astma medication\n - The **alb** group: samples from the same cell lines after treatment with albuterol (alb), another astma medication\n - The **alb_dex** group: samples from the same cell lines after treatment with both astma medications\n - The **untreated** group: samples from the same untreated cell lines cultured in parallel.\n\nSo all samples come from the same 4 cell lines (**cells**).\n```\n#   run_accession  read_count  samples            cells    treatment\n1   SRR1039508     22935521    CL1_untreated   CL1   untreated\n2   SRR1039509     21155707    CL1_Dex         CL1   Dex\n3   SRR1039510     22852619    CL1_Alb         CL1   Alb\n4   SRR1039511     21938637    CL1_Alb_Dex     CL1   Alb_Dex\n5   SRR1039512     28136282    CL2_untreated   CL2  untreated\n...\n```\nThe data comes from a paired-end sequencing experiment so we have two files for each sample. \nFor simplicity we will do the analysis on a single sample, SRR1039509, obtained from dexamethasone treated cell line 1.\n\n### Quality checks\n\nBefore you analyze the data, it is crucial to check the quality of the data.\nWe use the standard tool for checking the quality of NGS data generated on the Illumina platform: [FASTQC](http://www.bioinformatics.babraham.ac.uk/projects/fastqc/)\n\nCorrect interpretation of the FASTQC report is very important.\nIf the quality of your data is good, you can proceed with the analysis.\n**!! If the quality of your data is very bad, don't immediately throw the data in the recycle bin but contact an expert and ask for his/her opinion. !!**\n\nDouble click the FASTQC icon on the Desktop and open the fastq file (it's in the summer folder of your home folder). FASTQC consists of multiple modules each checking a specific aspect of the quality of the data. On the first page you can select the module you wish to view.\nThe names of the modules are preceded by an icon that reflects the quality of the data. The icon indicates whether the results of the module seem normal (green tick), slightly abnormal (orange triangle) or very unusual (red cross).\n\nHowever, these evaluations must be interpreted in the context of what you expect from your library. A 'normal' sample as far as FastQC is concerned is random and diverse. Some experiments may be expected to produce libraries which are biased. You should treat the icons as pointers to where you should concentrate your attention on and understand why your library may not look normal.\n\n#### General information on the reads\n\n> How long are the reads in this file ?\n>  63 nucleotides\n>  ![](../../images/FASTQCRNASeqB1.png)\n\n#### Checking the quality scores of the reads\n\nPhred scores represent base call quality. The higher the score the more reliable the base call. Often the quality of reads degrades over the length of the read. Therefore, it is common practice to determine the average quality of the first, second, third,...nth base by plotting the distribution of the Phred scores on each position of the reads using box plots.\n\n> Evaluate the quality scores per position\n>  Go to the **Per base sequence quality** module: \n>  ![](../../images/FASTQCRNASeqB2.png)\nThe y-axis on the graph shows the Phred quality scores, the x-axis shows the position in the read. So again you see that the reads are 63 bases long.\n\nThe average Phred score is depicted by the blue line, the median Phred score by the red line. The yellow boxes contain 50% of all Phred scores on a certain position. As expected the quality is steadily declining. \n\nThe background of the graph divides the y-axis into very good quality calls (green), calls of reasonable quality (orange), and calls of poor quality (red; Phred score < 20). As you can see the Phred scores of this data set are very high.\n\nRemark: In new Illumina kits the sequence quality goes up a bit first before it steadily declines.\n\nInstead of showing the quality of each position separately, you can calculate the average Phred score of each read and show a cumulative plot of the average qualities of all the reads.\n\n> Evaluate the overall quality\n> Go to the **Per sequence quality scores** module: \n ![](../../images/FASTQCRNASeqB3.png)\nThe y-axis on the graph shows the number of reads, the x-axis shows the Phred score.\n\nMost reads have an average Phred score of 40. This is a very high score (Phred scores of Illumina calls range from -5 to 41).\n\n\nIllumina flow cells are divided into tiles. To see if there is a loss in quality associated with specific parts of the flow cell, FASTQC calculates average quality scores for each tile across all positions in the reads. \n\n> Evaluate the quality per tile\n> Go to the **Per tile sequence quality** module: \n ![](../../images/FASTQCRNASeqB4.png)\nThe y-axis on the graph shows the tile number, the x-axis shows the position in the reads.\n\nThe plot shows the deviation from the average tile quality. The colours are on a cold to hot scale, with blue being the average tile quality and other colours representing tiles where the quality was different from the average. In the example you see that a few tiles show poor quality over a few positions. A good plot should be blue all over. Although the plot isn't entirely blue the results of this module are still acceptable.\n \n\nReasons for seeing warnings or failures on this plot could be transient problems such as bubbles going through the flow cell, or they could be more permanent problems such as smudges or debris on/in the flow cell or a very high density of clusters in a tile. The most common cause of warnings in this module is the flow cell being overloaded.\n\nIt is recommended to ignore warnings/failures which mildly affect a small number of tiles for only a few cycles, and to only pursue larger effects which show high deviation in scores, or which persist for a high number of cycles.\n\n#### Checking duplicates\n\nIn a diverse library generated by shearing genomic DNA, most fragments will occur only once. A low level of duplication may indicate a very high level of coverage of some target sequences, but a high level of duplication indicates a bias (eg PCR overamplification, contamination of the library with adapter dimers...).\n\nThe **Sequence duplication levels** module counts the degree of duplication for every read and creates a plot showing the relative number of reads with different degrees of duplication.\n\n> Evaluate the sequence duplication levels\n> Go to the **Sequence duplication levels** module: \n ![](../../images/FASTQCRNASeq5.png)\nThe y-axis on the graph shows the percentage of occurrence, the x-axis shows the duplication level.\n\nThe blue line represents the counts of all duplicated sequences. The percentage is computed relative to the total number of reads.\n\nThe red line represents the number of **distinct** sequences that are duplicated. The percentage is computed relative to the total number of **distinct** sequences in the data (see slides).\n\nSeeing duplication in RNA-Seq data is normal. To sequence lowly expressed genes you must oversequence the genes with high expression levels. However, RNA-Seq libraries may be contaminated with adapters.\n\nThe presence of contaminating adapters will produce spikes on the far right of the plot. These peaks will appear in the blue trace as they make up a high proportion of the original library, but usually disappear in the red trace as they make up a small proportion of the deduplicated set (you only use 2 adapters to create a library).\n\nAdditionally, as in every RNA-Seq file you also see a substantial number of oversequenced reads with lower duplication levels.\n\n\nSince the reads are random fragments from the genome sequence, the contribution of A, C, G and T should be identical on each position.\n\n> Evaluate the per base sequence content\n> Go to the **Per base sequence content** module: \n\n ![](../../images/FASTQCRNASeqB6.png)\n\nThe y-axis on the graph shows the percentage of occurrence, the x-axis shows the position in the read.\n\nOn this plot you should see straight lines for the four nucleotides. In reality you often see that this is not the case for the first positions. Libraries produced by priming using random hexamers (nearly all RNA-Seq libraries) and those which were fragmented using transposases inherit an intrinsic bias in the first positions of the reads. This bias does not come from a single sequence, but because of enrichment of a number of different K-mers at the 5' end of the reads. So it isn't something you can correct by trimming (you do not have one specific sequence that you can remove from the reads). In most cases it doesn't adversely affect the downstream analysis but it will produce a warning or failure in this module. \n\n\nDuplicates often arise because libraries are contaminated with adapter sequences. You can check for contaminating sequences using the **Overrepresented sequences** module: it lists all sequences which make up more than 0.1% of the total. For each sequence in the list the module will look for matches in a database of common contaminants and will report the best hit it finds.\n\n> Which contaminants are found in this library ?\n> Go to the **Overrepresented sequences** module: \n\n ![](../../images/FASTQCRNASeqB7.png)\n\nAs you can see, a single illumina adapter was found contaminating the library to a small extent: 0,5% of the library consists of adapter sequences.\n\n\nThe **Overrepresented sequences** module shows contamination with full adapter sequences (= reads that completely correspond to adapters), but often the library also contains reads that have remnants of adapter sequences at their 3' ends. These reads are not detected by the **Overrepresented sequences** module. \n\nThis was the quality check of one file from one of the 16 samples. We do not have the time to do all quality checks. But in the real world, you would have to do this for each of the 32 files of this experiment.\n\n### Improving the quality of the data\nThere are many possible steps to improve the quality of the data. Due to time constraints, we are going to focus on\n\n - removing adapter sequences, both filtering full adapter sequences and trimming remnants of adapters from the 3' ends of the reads\n - filter reads of low quality\n\n\n\nThere are many tools to remove adapters from reads, but we chose cutadapt because it works on paired-end reads and it can do the two steps at the same time (removing adapters and filtering reads of poor quality).\n\nTo make it feasible to go through the complete RNA-Seq workflow during the training, we have limited the data set to reads that map to chromosome 22. The data come from a paired-end experiment so we have two files with reads. You can download these limited data sets: [http://data.bits.vib.be/pub/trainingen/NGSIntro/chr22_SRR1039509_1.fastq.gz data_set_1] (first reads of a pair that map to chromosome 22) and [http://data.bits.vib.be/pub/trainingen/NGSIntro/chr22_SRR1039509_2.fastq.gz data_set_2] (second reads of a pair that map to chromosome 22). On the bits laptops, the files are already present in the /home/bits/NGS/RNASeq/ folder.\n\nRemember that the **Overrepresented sequence** module of the FASTQC report showed contamination with the following TruSeq adapter in the first file of sample SRR1039509: \n```\nACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAA\n```\nWe will remove this adapter from the file containing the reads that map to chromosome 22.\n\nOpen the terminal.\n\n> Make a variable called adapter to hold the sequence of the adapter\n|-\n```\nadapter=ACACGTCTGAACTCCAGTCACTGACCAATCTCGTATGCCGTCTTCTGCTTGAAAAAAAAAAAA\n```\nRemember to use Shift + Insert to paste in the terminal !\n\n\n> Check the cutadapt option for defining the number of mismatches you allow (= error rate)\n> Cutadapt is not a regular bash command but a Python script so it doesn't have a manual. So to open the cutadapt help type:\n```\ncutadapt -h\n```\n\nScrolling down the help file shows that the **-e** option defines the maximum allowed error rate: the default is 0.1 meaning that it allows one mismatch every 10 nucleotides. Adapters are identified by aligning each read to the adapter sequence: if the frequency of mismatches in the alignment is below the allowed error rate then the adapter is trimmed from the read.\n\n\n> Check the option for defining the adapter sequence\n> In the help file you see that you have multiple options:\n\n - **-a** to trim adapter sequences at the 3' end of the reads. In most cases this is the end that is causing the problems: when small RNA fragments are sequenced, the resulting reads can be longer than the RNA fragments. As a results they will contain (parts of) the adapter at their 3'end. In long reads the adapter might even lie within the read:\n```\nMYSEQUEN                         (no adapter contaimination)\nMYSEQUENCEADAP                   (part of adapter at 3' end)\nMYSEQUENCEADAPTER                (adapter at 3' end)\nMYSEQUENCEADAPTERSOMETHINGELSE   (adapter within the read)\n```\nCutadapt will cut the adapter (part) and all sequence following it resulting in:\n```\nMYSEQUEN\nMYSEQUENCE\nMYSEQUENCE\nMYSEQUENCE\n```\n\n - **-g** to trim adapter sequences at the 5' end of the reads. These adapters are expected to appear at the start of a read (where they can be just partially there) or somewhere within the read:\n```\nADAPTERMYSEQUENCE              (5' end)\nDAPTERMYSEQUENCE               (partial)\nTERMYSEQUENCE                  (partial)\nSOMETHINGADAPTERMYSEQUENCE     (within)\n```\nIn all cases, the adapter itself and the sequence preceding it will be removed, leaving in all examples above:\n```\nMYSEQUENCE\n```\n\n - **-b** to trim adapters at the 3' or 5' end of the read. If there is at least one base before the adapter, then the adapter is trimmed as a 3’ adapter and the adapter itself and everything following it is removed. Otherwise, the adapter is trimmed as a 5’ adapter and it is removed from the read, but the sequence after it remains:\n```\nBefore trimming \t        After trimming \t\nMYSEQUENCEADAPTERSOMETHING \tMYSEQUENCE \nMYSEQUENCEADAPTER \t        MYSEQUENCE\nMYSEQUENCEADAP \t                MYSEQUENCE\nMADAPTER \t                M\nADAPTERMYSEQUENCE \t        MYSEQUENCE\nPTERMYSEQUENCE \t                MYSEQUENCE\nTERMYSEQUENCE \t                MYSEQUENCE\n```\n\n\nSince we have contaminating adapter at the 3'end we'll take the -a option\n\n\nWe will use a few other options:\n\n - Discard trimmed reads that are shorter than 20 bases after trimming using the **-m** option \n - Trim low-quality bases at the 3'ends from reads before adapter removal if their Phred score is less than 10 using the **-q** option\n\n\n\n> Check the -q option in the help file\n> Scroll down to the **Additional modifications to the reads:** section to check the usage of the -q option:\n\n ![](../../images/CLcutadapt1.png)\n\n\n> Check the -m option in the help file\n> Scroll up to the **Options for filtering of processed reads:** section to check the usage of the -m option:\n\n ![](../../images/CLcutadapt2.png)\n\n\nRemember that we are working with paired-end reads !\n\n> Check the usage of cutadapt for paired-end reads in the help file\n> Scroll up to the start of the help file to check the usage of cutadapt for paired-end reads:\n\n ![](../../images/CLcutadapt3.png)\n\n\nSince we have to specify the location in the file system of two input and two output files, we are going to create a variable called folder for holding the path.\n\n> Create the variable path\n|-\n```\nfolder=/home/bits/NGS/RNASeq/\n```\n\nRemember to use tab autocompletion.\n\n> Clean up the files using the knowledge you have obtained\n|-\n```\ncutadapt -a ${adapter} -q 10 -m 20 -o ${path}chr22_SRR1039509_1t.fastq -p ${path}chr22_SRR1039509_2t.fastq ${path}chr22_SRR1039509_1.fastq.gz ${path}chr22_SRR1039509_2.fastq.gz\n```\n\n - **-a** to specify the sequence of the adapter and to specify that we want to cut adapter sequences from the 3'ends of the reads\n - **-q** to specify the minimal quality score is 10. Bases at the 3'end of the reads with a quality score below 10 will be removed\n - **-m** to specify the minimal length of the read after trimming. Reads smaller than 20 bases will be removed\n - **-o** to specify the location in the file system where you want to write the output files\n - **-p** to specify the location in the file system where you want to write the results for the reads from the other end of the fragments. As such you specify that these are paired-end reads.\n\n\n\nIn the cutadapt stats you see we only trimmed one file (containing sequences from one end of the fragments). \n\n ![](../../images/CLcutadapt4.png)\n\nThat is because the sequences from the other end of the fragments contain another adapter: \n```\nGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAAAAAAAAA\n```\n\n> Redefine the adapter variable\n|-\n```\nadapter=GTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATTAAAAAAAAAAAAAAAAAA\n```\nRemember to use Shift + Insert to paste in the terminal.\n\n\n> Run cutadapt again to remove the other adapter\n> Remember to switch the two files now:\n```\ncutadapt -a ${adapter} -q 10 -m 20 -o ${path}chr22_SRR1039509_2trim.fastq -p ${path}chr22_SRR1039509_1trim.fastq ${path}chr22_SRR1039509_2t.fastq ${path}chr22_SRR1039509_1t.fastq\n```\nRemember to use the up arrow to go back in the history.\n\n\nNow you see in the cutadapt stats that you have trimmed adapters from (both files) both ends of the fragments.\n\n ![](../../images/CLcutadapt5.png)\n\n### Check the quality of the cleaned reads\n\nI have done this also for the complete files and rechecked the trimmed reads in FASTQC. You can download [the report for the complete trimmed reads from sample SRR1039509](http://data.bits.vib.be/hidden/jhslbjcgnchjdgksqngcvgqdlsjcnv/ngsrnade2015/ex1/fastqc_SRR1039509_1/trimmed_reads/SRR1039509_1.fastq.pdf).\n\n> Are all the reads still 63 nt long after trimming ?\n> In the **Basic statistics** tab you see that the length of the reads varies between 20 (in the cutadapt command we set 20 as the minimal length for a read to be retained) and 63 (reads that were not trimmed):\n ![](../../images/FASTQCRNASeqB9.png)  \n\n> Have the quality scores of the reads significantly changed after trimming ?\n> The **Per base sequence quality** is similar to that of the untrimmed file, as is the **Per sequence quality**.\n\nQuality scores have of course changed a bit since we trimmed low quality bases, but the initial quality of the reads was so good that you don't really see the effect of the trimming.\n\n> Has the per base sequence content improved as a result of the trimming ?\n> The **Per base sequence content** - the tool to detect adapter contamination - plot has improved, it's even more stable now.\n\n ![](../../images/FASTQCRNASeq10.png) \n\n> What is the little bump you see in the Sequence length distribution plot ?\n> \nApparently many reads contain 3 bases that belong to the adapter. These 3 bases have been cut leaving reads of 60 nt long: this is the small peak you see on the plot at length 60. All intermediate lengths of adapter contamination have been detected but in such a small fraction of reads that you cannot see the influence of the trimming on the plot.\n\n> Are there any overrepresented sequences left ?\n> No.\n\n ![](../../images/FASTQCRNASeq11.png) \n\n> Are there any overrepresented heptamers ?\n> FASTQC still detects overrepresented heptamers although at much lower counts than before. \n\n ![](../../images/fastqcTrim5.png)\n\nFastQC confirmed the removal of the two adapters by cutadapt. \n\n### Mapping\n\n#### Obtaining the reference genome\n\nBefore we can do any mapping we need a reference sequence first. We will map the reads against the hg19 human genome build. Mapping requires a specially formatted file (hash database). This hash database can be derived from the reference genome using the bowtie2 tools. However, for some organisms like human the hash table can be obtained 'ready-to-use' from the bowtie2 website. If you also need a fasta copy of the hg19 genome, you can obtain it from the hash table using bowtie2. \nWe can download the hash table from the [bowtie2 website](ftp://ftp.ccb.jhu.edu/pub/data/bowtie2_indexes/hg19.zip) using the **wget** command. It takes about 90 minutes to download so we are not going to download it during the training, it is already present in the /home/bits/NGS/RNASeq/reference/ folder.\n\nGo to this folder and look at its contents. As you can see the file is a compressed .zip file\n \n> Unzip the reference genome file\n> To decompress a .zip file you need the **unzip** command:\n```\nunzip hg19.zip\n```\n\nIt will take a few minutes and it will generate 7 files:\n\n ![](../../images/MapRNASeq1.png) \n\nTo transform the hash table into a fasta sequence we use bowtie2. From the [bowtie2 documentation](http://bowtie-bio.sourceforge.net/bowtie2/manual.shtml#the-bowtie2-inspect-index-inspector) we learn that we should use bowtie2-inspect without options to generate the fasta file.\n\n> Generate the human reference genome sequence in fasta format.\n> The required command simply is:\n```\nbowtie2-inspect hg19 > hg19.fa\n```\n\nThe **>** sign indicates that the output of the command (so the fasta file with the reference sequence) needs to be written to a file called **hg19.fa**.\nIt will take a few minutes to generate the file. \n\n ![](../../images/MapRNASeq2.png)\n\n#### Installing TopHat\n\nMapping RNA-Seq reads is done using the TopHat tool. So we need to install the [TopHat tool](http://ccb.jhu.edu/software/tophat/tutorial.shtml).\n\n> Go to the TopHat website and fetch the download link.\n> \n - Go to the [TopHat website](http://ccb.jhu.edu/software/tophat/tutorial.shtml)\n - Right click the Linux download link\n - Select **Copy Link Location**\n\n> Download the file.\n> \n - Go to the terimnal\n - Navigate to the /usr/bin/NGS/ folder\n - Type **wget **\n - Press the Shift and Insert keys simultaneously to paste the url\n\nTopHat is downloaded as a .tar.gz file \n\n> Decompress the file\n> For decompressing a .tar.gz file you need the following command:\n```\ntar -xzvf tophat-2.1.0.Linux_x86_64.tar.gz\n```\nRemember to use tab autocompletion !\n\nThis creates a new folder:\n\n ![](../../images/MapRNASeq3.png) \n\nGo into the tophat folder and type:\n```\n./tophat\n```\n\nIf this opens the help of tophat, it means the software has been installed correctly. It does not mean that you can use the software now. Well you can but you will always have to type the commands from inside the tophat folder or provide the full path to the tophat folder. To avoid this we can create a symbolic link for tophat2.\n\n> Create a symbolic link for tophat2\n> For creating the link you need the following command:\n```\nln -s /usr/bin/NGS/tophat-2.1.0.Linux_x86_64/tophat2 /usr/local/bin/tophat2\n```\nRemember to use tab autocompletion !\n\nNow go to a different folder and type **tophat2**. If you see the help file, the link works.\n\n#### Installing samtools\n\nWhen you navigate to the **tophat** folder in /usr/bin/NGS/ you see that samtools is automatically installed when TopHat was installed:\n ![](../../images/MapRNASeq5.png)\n\nIf you see the samtools help page when you type\n```\n./samtools_0.1.18\n```\nit means that samtools is indeed installed\n ![](../../images/MapRNASeq6.png)\n\nIf you want to use samtools from anywhere in the file system you can create a soft link. \n\n> Create a soft link for samtools.\n> Create a link using the **ln -s** command:\n```\nln -s /usr/bin/NGS/tophat-2.1.0_Linux_x86_64/samtools-0.1.18/samtools /usr/local/bin/samtools\n```\nGo up one directory in the file system and check if the command works. If you type\n```\nsamtools view\n```\n(one of the possible samtools commands) you should see the manual of the command.\n\n#### Mapping the reads\n\nWe are not going to do the actual mapping since it takes almost 25 minutes even with the chromosome22-limited datasets. If we were to map the reads we would use the following command:\n```\nfolder=/home/bits/NGS/RNASeq/\ntophat2 --no-coverage-search ${folder}reference/hg19 ${folder}chr22_SRR1039509_1.fastq.gz ${folder}chr22_SRR1039509_2.fastq.gz \n```\n\n - **--no-coverage-search**: is related to how TopHat finds splice junctions. I'm not going to go into detail here but in the TopHat manual the developers of TopHat say: \"We only suggest users use the **--coverage-search** option for short reads (< 45bp) and with a small number of reads (<= 10 million).\" Since we have the double amount of longer reads (63bp) we have to go for the **--no-coverage-search** option.\n - the first argument is the location of the hash table of the reference genome\n - the second argument is the (cleaned) fastq file containing the reads from one end of the fragments. As you can see TopHat can work directly on the compressed file.\n - the third argument is the (cleaned) fastq file containing the reads from the other end of the fragments\n\nOther useful options for Tophat:\n\n - **-p**: the number of processors (cpu) that TopHat can use for the mapping. The default is 1. This is ok for a laptop since laptops do not contain manu cpu but of course the more cpu you give TopHat the faster the mapping. So it's better to do the mapping on a strong computer with many cpu\n - **-o**: if you want to store the results of the mapping in another folder\n\nThe mapping generates a new folder **tophat_out** containing 3 .bed files and 2 .bam files containing the resulting alignments:\n\n - **accepted_hits.bam**: a list of read alignments.\n - **unmapped.bam**: a list of reads that could not be mapped. As you can see the size of this file is quite small compared to the accepted_hits.\n - **junctions.bed**: a list of splice junctions between exons in [UCSC BED format](http://genome.ucsc.edu/FAQ/FAQformat.html#format1) (that can be opened as a track in the UCSC genome browser).\nTophat can find novel - not yet annotated - splice junctions based on the alignment of the reads to a reference genome. This is what Tophat is specifically good at, compared to mappers like bwa and bowtie which will only find annotated splice junctions. This is why we use Tophat for mapping RNA-Seq data.\n - **insertions.bed**: a list of insertions.\n - **deletions.bed**: a list of deletions.\n\n ![](../../images/MapRNASeq4.png)\nSince we haven't actually done the mapping, we do not have this folder. However, you can find the bam file with the read alignments in the /home/bits/NGS/RNASeq folder.\n\n#### Quality control of the mapping\n\nIt is vital to check the quality of mapping before proceeding with the RNASeq workflow. The mapping to a reference genome has sorted the reads and it is now possible to identify \n - the regions of the genome the reads originate from\n - duplicate reads\n - RNA degradation...\n\nSeveral program exist to perform quality control of bam files; e.g. RSeQC pubmed: 22743226, [QualiMap](http://qualimap.bioinfo.cipf.es/) pubmed: 22914218 , samtools, [deeptools](https://github.com/fidelram/deepTools/wiki/All-command-line-options) pubmed: 24799436 , [Picard](http://broadinstitute.github.io/picard/) which is part of the very popular GATK platform - pubmed: 20644199...\n\nWe are going to use **samtools** here.\n\n> Get the basic stats of the bam file.  \n> On the [samtools wiki](http://davetang.org/wiki/tiki-index.php?page=SAMTools)\n\n ![](../../images/samtools2b.png)\n\nYou can see that you need the **samtools flagstat** command for this. The bam file is in the /home/bits/NGS/RNASeq/ folder so we are going to reuse the folder variable that we created for the mapping:\n```\nfolder=/home/bits/NGS/RNAseq/\nsamtools_0.1.18 flagstat ${folder}accepted_hits_chr22.bam\n```\n\nor if you have done the mapping yourself: \n\n```\nsamtools_0.1.18 flagstat /usr/bin/NGS/tophat-2.1.0_Linux_x86_64/tophat_out/accepted_hits_chr22.bam\n```\n\nThe samtools flagstat command displays an overview of the alignment results on your screen. You just see that 100% of the reads were mapped. This is extremely high but it is of course because we reversed engineered our chromosome 22 limited data set. From the complete fastq files we took the reads that mapped to chromosome 22 so it's normal that we get an almost perfect mapping.\n\n ![](../../images/MapRNASeq7.png)\n\nThis overview deserves some explanation:\n\n - **nan** means **Not A Number** (e.g: divided by 0 )\n - **paired in sequencing** means reads that belong to a pair regardless of the fact that they are really mapped as a pair\n - **read1** means forward reads\n - **read2** means reverse reads\n - **properly paired** means that both mates of a read pair map to the same chromosome, oriented towards each other, and with a sensible insert size\n - **with itself and mate mapped** means that both reads of a pair map to the genome but they are not necessarily properly paired, they just map somewhere on the genome\n - **singletons** means that one of the reads of a pair is unmapped while its mate is mapped\n - **with mate mapped to a different chr** means reads with a mate mapped on a different chromosome\n - **with mate mapped to a different chr (mapQ >= 5)** means reads with a mate mapped on a different chromosome having a mapping quality greater than 5\n\n> Compare the number of forward and reverse reads in the paired-end experiment.  \n> the counts of forward and reverse reads are to be found on the lines ending with read1 and read2 respectively. As you see the number of forward reads exceeds the number of reverse reads by 55. \n\n> How many reads were mapped as a pair in the paired-end experiment?   \n> 814320 reads were properly mapped as a pair, that's 99,46% of the total number of reads.\n\nTools like Qualimap, RSeqQC and Picard will give much more detailed information on the quality of the mapping. Unfortunately we do not have time to use them.\n\n### Calculating a count table\n\nIn order to compute differential expression between groups of samples, we need to convert mapping results to read counts for each gene in each sample. The counting can also be done in R using various packages but will be slower as compared to command-line tools.\nWe will use the popular [HTSeq-count tool](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) to compute gene counts.\n\n#### Prepare the alignment file\n\nWe need to sort the bam file since we have paired-end reads. HTSeq assumes the file is sorted so that reads belonging to the same pair are in adjacent lines. If you don't sort the bam file by read name, HTSeq will think there are lot of reads with missing mates. \nIn the [samtools manual](http://www.htslib.org/doc/samtools.html) we can look up which command we need to do the sorting.\n\n> Sort the reads in the .bam file by name\n> As you can see in the manual the **samtools sort** command sorts .bam files:\n\n ![](../../images/CountTable7.png)\n\nThe input and output file are located in the /home/bits/NGS/RNASeq/ folder (or the /usr/bin/NGS/tophat-2.1.0.Linux_x86_64/tophat_out/ folder if you have done the mapping yourself). We are going to create a folder variable:\n```\nfolder=/home/bits/NGS/RNASeq/\nsamtools_0.1.18 sort -n ${folder}accepted_hits.bam ${folder}accepted_hits_sort\n```\n\nGo to the folder where input and output file are stored and check if the sorted .bam file was generated:\nAccording to [the HTSeq manual](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) the input file for HTSeq contains the aligned reads in **SAM** format. In our case the mapping generated a .bam file. Fortunately samtools contains scripts to convert BAM format to SAM.\n\nIn the [samtools manual](http://www.htslib.org/doc/samtools.html) we can look up which command we need to do the transformation.\n\n> Transform the .bam into a .sam file  \n> As you can see in the manual the **samtools view** command can transform any alignment format into standard SAM format:\n\n ![](../../images/CountTable8.png)\n\nWe are going to reuse the folder variable:\n```\nsamtools_0.1.18 view ${folder}accepted_hits_sort.bam > ${folder}accepted_hits.sam\n```\n\nGo to the folder where input and output file are stored and check if the .sam file was generated.\n\n#### Obtaining a reference annotation file\n\nTo calculate read counts we need a gtf file containing the annotation of all exons. You can obtain such files from genome annotation databases such as NCBI, Ensembl, and UCSC. The problem is that there are small differences between the formats of annotation files coming from different databases. These differences have implications for counting the reads.\nFor instance, we used pre-built index files from the Bowtie website for the mapping. These files have UCSC format. So it seems obvious to use UCSC annotation files for the counting. However, HTSeq prefers Ensembl gtf files. As stated in the [HTSeq documentation](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) using gtf file generated by UCSC will result in very low counts. In the UCSC files, the gene_id incorrectly contains the same value as the transcript_id. Hence, if a read maps to an exon shared by several transcripts of the same gene, this will appear to htseq-count as an overlap between different genes since the different transcripts have different gene_ids. The read will be considered ambiguous and not counted. Therefore, the counts will be incorrect.\nAs a solution, HTSeq recommends to use a gtf file from Ensembl. You can find Ensembl gtf files on the [Ensembl ftp server](ftp://ftp.ensembl.org/pub/). The version that we need is called grch37 (this corresponds to UCSC genome build hg19). So you can download the gtf file from [this web site](ftp://ftp.ensembl.org/pub/grch37/).\n\nNavigate to the /home/bits/NGS/RNASeq/reference/ folder:\n\n> Decompress the gtf file.  \n> .gz files are decompressed by the **gunzip** command:\n```\ngunzip Homo_sapiens.GRCh37.82.gtf.gz \n``` \nUse Tab autocompletion for the name of the file.\n\n> Look at the first 10 lines of the gtf file.  \n> Use the **head** command to preview the file:\n```\nhead Homo_sapiens.GRCh37.82.gtf\n```\nUse Tab autocompletion for the name of the file.\n\n ![](../../images/CountTable3B.png)\n\nAs you can see the first column of the file contains chromosome numbers. Ensembl uses 1, 2, 3... as chromosome IDs.\n\n> Look at the first 10 lines of the sam file.  \n> Use the **head** command to preview the file:\n```\nhead accepted_hits.sam\n```\nUse Tab autocompletion for the name of the file.\n\n ![](../../images/CountTable3C.png)\n\nAs you can see the third column of the file contains chromosome IDs but they have UCSC format: **chr**22 (remember that all reads come from chromosome 22 in our example). So we need to:\n\n - Filter the annotation for chromosome 22 from the gtf file to limit processing time.\n - Transform Ensembl chromosome IDs into UCSC format.\n\nFirst of all we'll give the gtf file a simple name to simplify processing.\n\n> Use the move command to rename the file to hg19_EnsGene.gtf  \n> Use the **mv** command to rename the file:\n```\nmv Homo_sapiens.GRCh37.82.gtf ./hg19_EnsGene.gtf\n```\nThe **./** defines to move the file to the current folder (the folder that you are in when you type the command). So you will move the file to the same folder but under another name, which corresponds to just renaming it.\n\n> Filter chromsome 22 annotations from the gtf file. Name the resulting file chr22_Ens.gtf\n> Chromosome 22 annotations are lines starting with **22**. Use the **grep** command to filter the file:\n```\ngrep \"^22\" hg19_EnsGene.gtf > chr22_Ens.gtf\n```\nThe **^** defines the start of a line. So **^22** means: search for lines that start with **22**.\n\n> Look at the first 10 lines of the filtered gtf file.  \n> Use the **head** command to preview the file:\n```\nhead chr22_Ens.gtf\n```\nUse Tab autocompletion for the name of the file.\n\n ![](../../images/CountTable3D.png)\n\n\nNow, we still need to transform the Ensembl chromosome IDs into UCSC format, meaning that we simply need to add the prefix **chr** to each line in the filtered gtf file. You'll need the **sed** command for this. Look at [the sed documentation](http://www.grymoire.com/Unix/Sed.html) the sed documentation] before you try to do the substitution. To add the word **chr** to the start of each line, you essentially need to replace the start of a line by **chr**.\n\n> Add the prefix to each line of the gtf file. Name the resulting file chr22_Ens_corr.gtf\n> To do a replacement or substitution you need to use the **s** command, followed by what you want to replace and what to replace it with, each separated by a **/**. Remember from the filtering exercise that the start of a line is represented by **^**. So use the following command to make the substitution:\n```\nsed 's/^/chr/' chr22_Ens.gtf > chr22_Ens_corr.gtf\n```\nUse Tab autocompletion for the name of the file.\n\n> Look at the first 10 lines of the substituted gtf file.  \n> Use the **head** command to preview the file:\n```\nhead chr22_Ens_corr.gtf\n```\nUse Tab autocompletion for the name of the file.\n\n ![](../../images/CountTable3E.png)\n\n#### Installing HTSeq\n\nHTSeq is a Python script. Python scripts can be installed using the **pip install** command. Remember you need administrator privileges for installing tools.\n\n> Try to install HTSeq. What happens ?  \n> Use the following command to install the tool:\n```\npip install HTSeq\n```\n\nAs you can see this generates an error and the tool is not installed.\n\n ![](../../images/CountTable4.png)\n\nLooking up the error in Google leads to [this web page](http://www.cyberciti.biz/faq/debian-ubuntu-linux-python-h-file-not-found-error-solution/), where you can find the solution to the problem: some dependencies are missing.\n\n> Install the missing dependencies and try again. What happens ?  \n> Use the following command to install the dependencies:\n```\napt-get install python-dev\n```\n\nThen try to install HTseq again:\n```\npip install HTSeq\n```\n\nAs you can see this generates a new error and the tool is not installed.\n\n ![](../../images/CountTable5.png)\n\nLooking up the error in Google leads to [this web page](http://ubuntuforums.org/archive/index.php/t-345201.html), where you can find the solution to the problem: the C compiler is missing.\n\n> Install the missing compiler and try again. What happens ?  \n> Use the following command to install the dependencies:\n```\napt-get install g++\n```\n\nThen try to install HTseq again:\n```\npip install HTSeq\n```\n\nAs you can see this does not generate an error. To check if the tool works type:\n```\nhtseq-count\n```\n\nIf this displays the help file, you know that the tool was correctly installed\n\n ![](../../images/CountTable6.png)\n\n#### Calculating the count table\n\n**HTSeq** counts reads in different **modes**: \n[HTSeq](http://www-huber.embl.de/users/anders/HTSeq/doc/_images/count_modes.png)\n\n{{Tip|We will use the **union** method}}\n\nIn the [HTSeq manual](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) we get an overview of the options we can use.\n\n> How to define that we want to use the union mode ?\n> The **-m** option allows to define the mode:\n ![](../../images/CountTable10.png)\n\n> How to define that this was not a strand-specific experiment ?\n> The **-s** option allows to define if a strand-specific library was used:\n ![](../../images/CountTable11.png)\n\n> How to define the minimum alignment quality score for a read to be counted ?\n> The **-a** option allows to define the minimum alignment score:\n ![](../../images/CountTable12.png)\n\nWe'll go for the default mininimum alignment score of 10 (90% confidence).\n\n> How to define we want to count based on exon annotation ?\n> The **-t** option allows to define the feature to base the count on:\n ![](../../images/CountTable13.png)\n\nFor a .gtf file **exon** is the default. It means HTSeq will count the number of reads that align to each exon and then combine the counts for all exons of a transcript variant. \n\n> How to define the feature we want to use as an ID after the counting ?\n> The **-i** option allows to define the feature to use as ID.\n\nFor a .gtf file gene_id is the default: it means that the output of HTSeq will be a list of gene_ids and for each gene_id you'll see the number of reads that align to all its exons. \n\n> Calculate the count table\n> HTSeq was installed by **pip install** which automatically creates a link. So the HTSeq commands will work from anywhere in the file system. We will go to the folder that contains the input sam file: **/home/bits/NGS/RNAseq/** and run the command from there or create a variable folder containing the path.\nIn the [HTSeq manual](http://www-huber.embl.de/users/anders/HTSeq/doc/count.html) we get an overview of the options we can use. Default options should not be defined.\n```\nfolder=/home/bits/NGS/RNAseq/\nhtseq-count -m union -s no ${folder}accepted_hits.sam ${folder}reference/chr22_Ens_corr.gtf > ${folder}chr22_counts.txt\n```\n\n> View the first 10 lines of the resulting count table\n> \n```\nhead chr22_counts.txt\n```\n\n ![](../../images/CountTable14.png)\nYou nicely see the read counts for each gene...\n","### Your first command\n\n> Print the word hello to the screen\n\n```\necho hello\n```\n> Print the sentence “hello my friends to the screen (with open quotation and without end quotation)\nRemember you can use the up arrow to go back to previously typed commands \n```\necho \"hello my friends\n```\nNow the terminal hangs. We typed an incorrect command and the terminal does not know what to do. \n\n> What to type when the terminal hangs ?\n\nCtrl-C\nIf **Ctrl-C** fails, try hitting **ESC**. In most of the cases, this will do the trick.\n\n> Open the manual of the echo command ?\n\n```\nman echo\n```\nThe synopsis of this command is:\n```\necho [-n] [string ...]\n```\n\nThings in between square brackets are optional, so it means that you can use echo without options and arguments.\n\nWhen the manual page is longer than the terminal, you can scroll down the page one line at a time by pressing the down arrow key, or one page at a time by pressing the spacebar. To exit the man page, press **q** (for quit).\n\nThe manual explains that echo prints its argument by default to the screen and then puts the prompt on a new line. The way it does this is by appending a character called a newline (a special character that literally puts the text on a new line). Because echo is often used in programs to print out a sequence of strings not separated by newlines, there is an option to prevent the newline from being inserted.\n\n> By reading the man page, find the command to print hello without a newline, and verify that it works as expected.\nAgain remember to use the up arrow to go to previously typed commands.\n```\necho -n hello\n```\n\n> Open the manual of the sleep command, find how to make the terminal “sleep” for 5 seconds, and execute the command.\n \n```\nman sleep\n```\nAccording to the manual sleep has a required argument called number representing the number of seconds to sleep.\n```\nsleep 5\n```\n> Make the terminal sleep for 5000 seconds and rescue the terminal.\n \n```\nsleep 5000\n```\nThat’s more than an hour so use Ctrl-C to break off the command.\n\n### Navigating the Linux file system\n\nType the following command in the terminal:\n```\ncd\n```\ncd stands for change directory and is used for navigating the Linux file system\n\n> Which directory are you in ?\nTo view the name of the current working directory, type\n```\npwd\n```\npwd stands for print working directory.\nYou see that using cd without arguments leads you to your home directory, on the BITS laptops this is /home/bits.\n> Which directories are located in your home directory ?\nTo view a list of the files and directories that are located in the current working directory, type\n```\nls\n```\nls stands for **list** and is used for listing all files and directories in the current directory. \nOn the BITS laptops the home directory **/home/bits** contains a set of folders like Desktop, Documents, Downloads...\n\n> List all files and directories in your home directory that start with the letter D\n\n```\nls D*\n```\nD(star) means everything which name starts with a **D**\n\nA common pattern when using the command line is changing directories using **cd** and then immediately typing **ls** to view the contents of the directory.\n> List the detailed content of your home directory ?\n\n```\nls -l\n```\nthe l in -l stands for **long output**. \nAmong others, the detailed list shows a date and time indicating the last time a file was modified. The number before the date is the size of the file in bytes.\n> List the content of the /usr/local/bin directory ?\n\n```\nls /usr/local/bin\n```\n/usr/local/bin corresponds to a directory in the file system (/), with bin a subdirectory of local and local a subdirectory of usr.\n\nIf you have to reuse a variable often then it can be helpful to create a name for a variable, especially when the variable is long. Suppose you want to work in a directory called **Illumina_exp4_20042004_mapping_to_hg19_results**. To avoid repeating this long name over and over you can create a variable for it, give it a short name and use that in your commands. \n\n> Name the variable **folder**\n\nUse the following command: ```folder=Illumina_exp4_20042004_mapping_to_hg19_results```\n\nTo create a new directory use the **mkdir** (make directory) command.\n\n> Create the folder using the newly created variable\n\nIf you want to refer to a named variable in a command you have to preceed the name by a **$** sign to indicate that what is following is a **reference** to a variable.\nSo use the following command: ```mkdir ${folder}```\nThe curly braces delineate the start and end of the variable name.\nCheck if the folder is created using the **ls** command.\n\n{{Wiki-img|NGS/Intro/CL3.png|500px}}\nTo remove a directory, use the **rm** (remove) command. You could use **rmdir** but this only works on empty folders. To remove a folder with the rm command you need to use the **-r** option. This stands for **recursively** which means it will remove the folder and its complete content. \n\n> Remove the Illumina_exp4_20042004_mapping_to_hg19_results directory.\n\nUse the variable as an argument of the rm command:\n```\nrm -r ${folder}\n```\nCheck if it's removed using the **ls** command.\nNow navigate to the **NGS** folder which is located in the **/home/bits/** folder.\n\n> Navigate to this location.\nSince you want to navigate, you need to use the **cd** command. Since the NGS folder is located in the folder that you are currently in, you can simply give the name of the folder (NGS) as an argument:\n```cd NGS```\nIf you want to move to a folder that's located in another location of the file system, you have to give the full path to the folder.\n\n> Go to the **/usr/bin** folder\n```cd /usr/bin```\n> Go back to your home folder\n```cd```\n\n### Manipulating files\n\nEven without a text editor, there are ways to create a file with text using the redirect operator **>**\n\n> Create a file called test1.txt containing the text \"Why do bioinformaticians work on the command line?\" using echo\n\n```\necho \"Why do bioinformaticians work on the command line?\" > test1.txt\n```\nThe redirect operator > takes the text output of echo and redirects its contents to a file called test1.txt\n> Check if it worked by viewing the content of the file on the screen\n```\ncat test1.txt\n```\nThe name cat is short for “concatenate”. The command can be used to combine the contents of multiple files, but we use it here to dump the content of a single file to the screen. Cat is as a “quick-and-dirty” way to view the content of a file, less is a neater way. \n> Add the line \"Because they don't want to scare you with huge amounts of data!\" to the file and check if it worked\nTo add lines of text to a file, use the append operator **>>**:\n```\necho \"Because they don't want to scare you with huge amounts of data!\" >> test1.txt\ncat test1.txt\n```\nThe append operator >> appends the text output of echo to the file test1.txt\n\n> Create an empty file called test2.txt and check if it exists\nTo create an empty file, use the **touch** command:\n```\ntouch test2.txt\nls\n```\n> List the names of all text files in your current directory\n\n```\nls *.txt\n```\nHere *.txt automatically expands to all filenames that match the pattern “any string followed by .txt”.\n> Rename the file test2.txt to test_partII.txt using mv and check if it worked\nTo rename a file use the mv command, short for **move**:\n```\nmv test2.txt test_partII.txt\nls *.txt\n```\n> Copy the file test_partII.txt to test2.txt and check if it worked\nTo copy a file use the cp command, short for **copy**:\n```\ncp test_partII.txt test2.txt\nls *.txt\n```\nYou don't have to type out test_partII.txt, instead you can type something like test_-Tab thereby making use of tab completion. Tab completion involves automatically completing a word if there’s only one valid match on the system. For example, if the only file starting with the letters “test_” is test_partII.txt, test_-Tab refers to test_partII.txt\nEspecially with longer names, tab completion can save a huge amount of typing.\n\n> Remove the file test_partII.txt and check if it worked\nTo remove a file use the rm command, short for **remove**:\n```\nrm test_partII.txt\nls *.txt\n```\nDownload the file called exams.txt, containing the results of the spelling and maths exams of all 10-year olds of a school, into your home folder. Use wget to download the file from http://data.bits.vib.be/pub/trainingen/NGSIntro/exams.txt\n\n> Download the file.\n```\nwget http://data.bits.vib.be/pub/trainingen/NGSIntro/exams.txt\n```\n\n\n> Show the first 10 lines of the file.\n```\nhead exams.txt\n```\nTwo complementary commands for inspecting files are head and tail, which allow to view the beginning (head) and end (tail) of a file. The head command shows the first 10 lines of a file.\n\n> Show the last 10 lines of the file.\nSimilarly, tail shows the last 10 lines of a file.\n```\ntail exams.txt\n```\nOpen the manual of head to check out the options of head. Learn how to look at the first n lines of the file. \n\n> Save the first 30 lines of exams.txt in a file called test.txt\n```\nhead -n 30 exams.txt > test.txt\n```\n> Look at test.txt using the less command\n```\nless test.txt\n```\nThere are many commands to look at the full content of a file. The oldest of these programs is called **more**, and the more recent and powerful variant is called **less**. Less lets you navigate through the file in several ways, such as moving one line up or down with the arrow keys, pressing space bar to move a page down... Perhaps the most powerful aspect of less is the forward slash key /, which lets you search through the file from beginning to end. \n\n> Search for Jasper in test.txt\nThe way to do this in less is to type /Jasper\nThe last three essential less commands are G to move to the end of the file and 1G to move back to the beginning. To quit less, type **q** (for quit).\n\n> Look at the last 10 lines of the first 20 lines of exams.txt\n```\nhead -n 20 exams.txt | tail \n```\nThe command runs head -n 20 exams.txt and then pipes the result through tail using the pipe symbol **|** \n\nThe reason the pipe works is that the tail command, in addition to taking a filename as an argument, can take input from “standard in”, which in this case is the output of the command before the pipe. The tail program takes this input and processes it the same way it processes a file.\n\n### Running tools\n\nBioinformatics tools are just commands on the commands line. You use them in exactly the same way as all the commands we have run up to now, by defining options and arguments. A list of options and arguments can be found in the help file. \n\n#### Installing and running sl\n\nWe have seen **ls** the list command and use it frequently to view the contents of a folder but because of miss-typing sometimes you would result in **sl**, how about getting a little fun in the terminal and not **command not found**. This is a general linux command, you can install it from a repository. \n\n> Install sl\nFor installing you need superuser privileges !\n```\nsudo apt-get install sl\n```\n> Find out in the manual what sl stands for\n\n```\nman sl\n```\nYou can find the solution in the **description** section of the manual.\n> run the command\n\n```\nsl\n```\n:o)\nTry out some of the options !!\n\n#### Running blastp\nIn the folder /home/bits/Linux/ you find a file called [http://data.bits.vib.be/pub/trainingen/NGSIntro/sprot.fasta sprot.fasta] containing a set of protein sequences. We will use this file as a database for blast. The query sequence is the following: \n```\nMLLFAPCGCNNLIVEIGQRCCRFSCKNTPCPMVHNITAKVTNRTKYPKHLKEVYDVLGGSAAWE\n```\n\n> Create a fasta file containing the query sequence using echo called seq.fasta\n\n```\necho \">query seq\" > seq.fasta\ncat seq.fasta\necho MLLFAPCGCNNLIVEIGQRCCRFSCKNTPCPMVHNITAKVTNRTKYPKHLKEVYDVLGGSAAWE >> seq.fasta\ncat seq.fasta\n```\nBlast can be done via the [https://blast.ncbi.nlm.nih.gov/Blast.cgi blast website], but you can also download the blast tool and run it locally (on your computer) via the command line. For instance if you want to blast against you own database of sequences, you have to do it locally. Blast has been installed on the bits laptops.\n\nFirst you have transform your own database (the sprot.fasta file in our case) into a database that can be searched by blast using the **makeblastdb** command.\n\n> Look at the help file of makeblastdb and find the options to define the input fasta file and the database type\n\n```\nmakeblastdb -help\n```\nYou have to define the input fasta file using the -in option and the type of sequences using the -dbtype option\n> Create the blast database\n\n```\nmakeblastdb -in sprot.fasta -dbtype prot\n```\nNow you can perform a blastp search using the **blastp** command. Write the results to a tabular text file with comments called output.txt\n\n> Look at the help file of blastp and find the options to define input, database, output and output format\n\n```\nblastp -help\n```\nYou need the -query, the -db, the -out and the -outfmt option\n> Perform the blast and open the results with less\n\n```\nblastp -query seq.fasta -db sprot.fasta -out output.txt -outfmt 7\nless output.txt\n```\n\n#### Running cutadapt\n\nIn this exercise we'll do some real NGS analysis on the SRR074262.fastq file that is stored in folder /home/bits/NGS/Intro. \n\n> Go to this folder and look at the 10 first lines of the file.\n\n```\ncd /home/bits/NGS/Intro\nhead SRR0474262.fastq\n```\nThis data sets contain a high number of adapter sequences. These are reads that consist solely or partly of adapter sequence. You have to remove this adapter contamination using command line tools like [https://code.google.com/p/cutadapt/ cutadapt]. This tool is installed on the bits laptops. It is not a regular bash command (it's a python program) so it doesn't have a manual but it does have a help file.\n\n> Check the help file of cutadapt for the option to define the adapter sequence and trim at the 3'ends of the reads.\nTo open the cutadapt help file type:\n```\ncutadapt -h\n```\nThe **-a** option trims adapter sequences at the 3' end of the reads. \nAt the top of the help file you see that the standard usage of the command is:\n```\ncutadapt -a ADAPTER -o output.fastq input.fastq\n```\nThe sequence of the adapter is GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA\n\n> Trim the adapter sequence and store the trimmed sequences in a file called SRR074262trim.fastq\nGo to the folder where the input file is located and type:\n```\ncutadapt -a GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA -o SRR074262trim.fastq SRR074262.fastq\n```\n> Look at the first lines of the trimmed file\nGo to the folder where the input file is located and type:\n```\nhead SRR0474262trim.fastq\n```\n\n#### Running Picard\n\nThe trimmed fastq file is subsequently mapped resulting in a bam file that you can download from http://data.bits.vib.be/pub/trainingen/NGSIntro/1271011_reads.pair.1.list.accepted_hits.bam \n\n> Download the file via the command line\n\n```\nwget http://data.bits.vib.be/pub/trainingen/NGSIntro/1271011_reads.pair.1.list.accepted_hits.bam\n```\n> Rename the file SRR074262.bam\nRemember to use tab autocompletion !\n```\nmv 1271011_reads.pair.1.list.accepted_hits.bam SRR074262.bam\n```\nThis is a raw unsorted bam file, if we want to visualize the mapping results in IGV, we need to sort and index the file. We can do the sorting using one of [http://broadinstitute.github.io/picard/ the Picard tools], called SortSam. Picard can be downloaded from https://github.com/broadinstitute/picard/releases/download/2.8.2/picard-2.8.2.jar\n\n> Download the file\nRemember to use tab autocompletion !\n```\nwget https://github.com/broadinstitute/picard/releases/download/2.8.2/picard-2.8.2.jar\nll\n```\nFor the tools to run properly, you must have Java 1.8 installed. To check your java version run the following command:\n```\njava -version\n```\nRunning Java tools from the command line requires a special syntax: you have to start the command with **java** and then the name of the java tool and its options and arguments.\n\nJava jar-files are archives of multiple java files (similar to tar archives of multiple regular files). They require an even more elaborate syntax. You have to start the command with **java -jar** and then the name of the jar file and its options and arguments. As you can see the picard tools come as a jar-file.\n\n> Test the installation by opening the help file\n\n```\njava -jar picard-2.8.2.jar -h \n```\nBam files are enormous files that are hard to search through. The order of the reads in a bam file is the same as in the original fastq file. However, if you want to visualize the mapping results or if you want to calculate mapping statistics it's much more efficient to sort the reads according to genomic location. This can be achieved with the SortSam tool. Look in [https://broadinstitute.github.io/picard/command-line-overview.html the picard documentation] for the SortSam tool.\n\n> Sort the bam file to SRR074262sorted.bam\nRemember to use tab autocompletion !\n```\njava -jar picard-2.8.2.jar SortSam \\\n      I=SRR074262.bam \\\n      O=SRR074262sorted.bam \\\n      SORT_ORDER=coordinate\n```\nBam files contain duplicate reads unless you removed them during the quality control step. MarkDuplicates locates and tags duplicate reads in a bam or sam file. Duplicate reads originate from the same fragment and were typically introduced during library construction using PCR. Duplicate reads can also result from a single cluster on the flow cell, incorrectly detected as multiple clusters by the optical sensor of the sequencing instrument.\nMarkDuplicates compares sequences of reads and detects duplicates. The tool's output is a new SAM or BAM file, in which duplicates have been identified in the SAM flags field. If needed, duplicates can be removed using the REMOVE_DUPLICATE and REMOVE_SEQUENCING_DUPLICATES options. (See  [https://broadinstitute.github.io/picard/command-line-overview.html#MarkDuplicates the Picard documentation] for more details).\n\n> Remove duplicates from the sorted bam file\nRemember to use tab autocompletion !\n```\njava -jar picard.jar MarkDuplicates \\\n      I=SRR074262sorted.bam \\\n      O=SRR074262sortednodup.bam \\\n      M=marked_dup_metrics.txt \\\n      REMOVE_DUPLICATES=true\n```\nFor visualization and easy access you can build an index to the bam file using BuildBamIndex. Look in [https://broadinstitute.github.io/picard/command-line-overview.html the picard documentation] for the BuildBam Index tool.\n\n> Build the bai file for SRR074262sortednodup.bam\nRemember to use tab autocompletion !\n```\njava -jar picard-2.8.2.jar BuildBamIndex \\\n      I=SRR074262sortednodup.bam \n```\nCheck if the files were generated.\n\n### File compression\n\n> Compress the SRR074262.bam file to .gz format\nRemember to use tab autocompletion !\n```\ngzip SRR074262.bam\nll\n```\n> and unzip it again\nRemember to use tab autocompletion !\n```\ngunzip SRR074262.bam.gz\nll\n```\n\n### Writing scripts\n\n#### Writing and executing bash scripts\n\nWe are going to make additions to the bash script you find below:\n```\n#this program pretends to hack sites\n!Define a variable str equal to \" 0  1  23  45  6 789\"\nclear\n!Print to screen: \"hacking www.bits.vib.be\"\n!Do nothing for 2 seconds\n!Print to screen: \"Server hacking module is loading\"\n!Do nothing for 2 seconds\n!Print to screen: \"Hack module is starting in 2 seconds\"\n!Do nothing for 1 second\n!Print to screen: \"1 second\"\n!Do nothing for 1 second\nping -c 3 www.bits.vib.be\n!Do nothing for 1 second\nnetstat\n!Do nothing for 1 second\nfor i in {1..1000}\ndo\nnumber1=$RANDOM\nlet \"number1 %= ${#str}\"\nnumber2=$RANDOM\nlet \"number2 %=4\"\n!Print to screen without newlines and with backslash escapes: \"\\033[1m${str:number1:1}\\033[0m\"\ndone\n!Print to screen: \"453572345763425834756376534\"\n!Do nothing for 3 seconds\n!Print to screen: \"www.bits.vib.be succesfully hacked!\"\n!Print to screen: \"PASSWORD ACCEPTED: token is 453572345763425834756376534\"\n```\n\nOpen gedit and paste the code.\n\n> Replace all lines that start with ! by the appropriate command\n\n```\n#this program pretends to hack sites\nstr=\" 0  1  23  45  6 789\"\nclear\necho \"hacking www.bits.vib.be\"\nsleep 2\necho \"Server hacking module is loading\"\nsleep 2\necho \"Hack module is starting in 2 seconds\"\nsleep 1\necho \"1 second\"\nsleep 1\nping -c 3 www.bits.vib.be\nsleep 2\nnetstat\nsleep 1\nfor i in {1..1000}\ndo\nnumber1=$RANDOM\nlet \"number1 %= ${#str}\"\nnumber2=$RANDOM\nlet \"number2 %=4\"\necho -n -e \"\\033[1m${str:number1:1}\\033[0m\"\ndone\necho \"453572345763425834756376534\"\nsleep 3\necho \"www.bits.vib.be succesfully hacked!\"\necho \"PASSWORD ACCEPTED: token is 453572345763425834756376534\"\n```. \n> Add a shebang line to the top of the script\n\n```\n#!/usr/bin/env bash\n#this program pretends to hack sites\nstr=\" 0  1  23  45  6 789\"\nclear\n...\n``` \nSave the script as HackIt.sh\n\n> If necessary make executable\n\n```\nchmod 755 HackIt.sh\n```\n> Run the script\n\n```\nbash HackIt.sh\n```\nWhat if you want to \"hack\" another website ? The easiest way to do allow for this is to enable to give the url as an argument of the bash command so that's what we'll do.\n\nReopen the file in gedit\n\n> Replace www.bits.vib.be by $1\n\n```\n#!/usr/bin/env bash\n#this program pretends to hack sites\nstr=\" 0  1  23  45  6 789\"\nclear\necho \"hacking $1\"\nsleep 2\necho \"Server hacking module is loading\"\nsleep 2\necho \"Hack module is starting in 2 seconds\"\nsleep 1\necho \"1 second\"\nsleep 1\nping -c 3 $1\nsleep 2\nnetstat\nsleep 1\nfor i in {1..1000}\ndo\nnumber1=$RANDOM\nlet \"number1 %= ${#str}\"\nnumber2=$RANDOM\nlet \"number2 %=4\"\necho -n -e \"\\033[1m${str:number1:1}\\033[0m\"\ndone\necho \"453572345763425834756376534\"\nsleep 3\necho \"$1 succesfully hacked!\"\necho \"PASSWORD ACCEPTED: token is 453572345763425834756376534\"\n``` \n> Save and run the script again now giving www.kuleuven.be as an argument \n\n```\nbash HackIt.sh www.kuleuven.be\n```\n$1 refers to the first argument of the command. If you have two arguments you use $1 and $2 to represent them.\n\n#### Writing and executing Perl scripts\n\nWe are going to create and the perl script you find below:\n```\n#This program predicts if a sequence is protein, nucleic acid or rubbish\n$seq = $ARGV[0];\nif ($seq =~ /[JO]/) {\n  print \"is not a sequence, first illegal character is $&\\n\";\n} elsif ($seq =~ /[EFILPQZ]/) {\n  print \"is protein\\n\";\n} else {\n  print \"is nucleic acid\\n\";\n}\n```\n\nOpen gedit and paste the code.\n\n> Add a shebang line to the top of the script\n\n```\n#!/usr/bin/env perl\n#This program predicts if a sequence is protein, nucleic acid or rubbish\n$seq = $ARGV[0];\nif ($seq =~ /[JO]/) {\n...\n``` \nSave the script as SeqIt.pl\n\n> If necessary make executable\n\n```\nchmod 755 SeqIt.pl\n```\n> Run the script using your first name in capitals as an argument\n\n```\nperl SeqIt.pl JANICK\n```\n\n#### Writing and executing Python scripts\n\nWe are going to make additions to the python script you find below:\n```\n#This program counts the number of amino acids in a protein sequence\n!Define variable mySequence equal to \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGI\"\n!Create a set myUniqueAminoAcids out of mySequence\nfor aaCode in myUniqueAminoAcids:\n  !Print to screen, use format to fill in the values: \"Amino acid {} occurs {} times.\"\n```\n\nOpen gedit and paste the code.\n\n> Replace all lines that start with ! by the appropriate command\n\n```\n#This program counts the number of amino acids in a protein sequence\nmySequence = \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGI\"\nmyUniqueAminoAcids = set(mySequence)\nfor aaCode in myUniqueAminoAcids:\n  print(\"Amino acid {} occurs {} times.\".format(aaCode,mySequence.count(aaCode)))\n```\n> Add a shebang line to the top of the script\n\n```\n#!/usr/bin/env python\n#This program counts the number of amino acids in a protein sequence\nmySequence = \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGI\"\nmyUniqueAminoAcids = set(mySequence)\n...\n``` \nSave the script as CountIt.py\n\n> If necessary make executable\n\n```\nchmod 755 CountIt.py\n```\n> Run the script\n\n```\npython CountIt.py\n```\nWhat if you want to \"count\" another protein ? The easiest way to do allow for this is to enable to give the sequence as an argument of the python command so that's what we'll do.\n\nReopen the file in gedit\n\n> Adjust the code to read the first argument of the python command using the sys library\n\n```\n!#/usr/bin/env python\n#This program counts the number of amino acids in a protein sequence\nimport sys\nmySequence = sys.argv[1]\nmyUniqueAminoAcids = set(mySequence)\nfor aaCode in myUniqueAminoAcids:\n  print(\"Amino acid {} occurs {} times.\".format(aaCode,mySequence.count(aaCode)))\n``` \n> Save and run the script again now giving QWEERTIPSDFFFGHKKKKLLLLLLLLLLLLLL as an argument \n\n```\npython CountIt.py QWEERTIPSDFFFGHKKKKLLLLLLLLLLLLLL\n```\n\nsys.argv[1] refers to the first argument of the command. If you have two arguments you use sys.argv[1] and sys.argv[2] to represent them.\n\n#### Installing and using Python tools\n\nInstalling Python-based tools is not done with apt-get, instead the comand pip is used. If pip is not yet installed, the terminal will show an error message saying that pip is currently not installed. You can install pip using apt-get.\n\nAs an example we will install Biopython, a Python library for bioinformatics. See [http://biopython.org/wiki/Download the documentation] for more details. \n\n> Install biopython \n\nYou need superuser privileges for this\n```\nsudo pip install biopython\n```\nWe will write a small python script to check if Biopython was successfully installed. In the folder /home/bits/Linux/ you find a file called [http://data.bits.vib.be/pub/trainingen/NGSIntro/sprot.fasta sprot.fasta] containing a set of protein sequences that we will use as input. Move to the folder containing the file.\n\nWe will use SeqIO module of Biopython to parse the fasta file with the protein sequences. Check out [http://biopython.org/wiki/SeqIO the tutorial of the module].\n\n```\n!Import the SeqIO module of the Bio library\n!For every record in the sprot.fasta file do:\n!Print the id of the seq_record to the screen\n!Print the length of the sequence to the screen\n```\n\nOpen gedit and paste the code.\n\n> Replace all lines that start with ! by the appropriate command\n\n```\nfrom Bio import SeqIO\nfor seq_record in SeqIO.parse(\"sprot.fasta\",\"fasta\"):\n     print(seq_record.id)\n     print(len(seq_record))\n```\n> Add a shebang line to the top of the script\n\n```\n#!/usr/bin/env python\nfrom Bio import SeqIO\nfor seq_record in SeqIO.parse(\"sprot.fasta\",\"fasta\"):\n...\n``` \nSave the script as ParseIt.py in the folder that contains the input file.\n\n> If necessary make executable\n\n```\nchmod 755 ParseIt.py\n```\n> Run the script\n\n```\npython ParseIt.py\n```\n\n### Compressing and decompressing files\n\nSome files or tools come in **.zip** format, how to decompress them ? \n\nIn the **/usr/bin/tools** folder you can find the zipped version of the FastQC tool. To unzip it, you have to use the **unzip** command.\n\nThe **/usr/bin/** folder belongs to the root user, not to the bits user. Therefore only root is allowed to do manipulations in this folder. Switch to root using the **su** command or type **sudo** in front of your commands. The system will ask for the password: bitstraining on the BITS laptops. \n\n> Decompress the FastQC tool with unzip.\nFirst look at the unzip manual to get an idea about the working of the command. \n```man unzip```\nTo unzip the file you can use the simple command: ```unzip name_of_the_zip_file```. Remember to use tab autocompletion.\nThis will generate a folder called FastQC in /usr/bin/tools.\nAfter decompression use **ls** and **cd** to take a look at the content of the newly created **FastQC** folder. You will see the fastqc command in this folder.\n\n> Make sure that you can read, write and execute the fastqc command and that other people can read and execute it.\nTo see the current permissions of the command:\n```ls -l``` \nThe command that allows you to change the access permissions of files and directories is **chmod** (change mode). chmod has two mandatory arguments:\n\n - A three digit number representing the access permissions you want to set. Each digit refers to a different audience: \n\n - first digit refers to the owner of the file\n - second digit refers to the group the owner belongs to\n - third digit refers to all others\n\nThe numbers themselves represent the permissions:\n\n - 7 full access: read, write and execute\n - 6 read and write\n - 5 read and execute\n - 4 read only\n - 3 write and execute\n - 2 write only\n - 1 execute only\n - 0 no access\n \n\n - The name of the file for which you want to change the access permissions\n\n \n{{Wiki-img|NGS/Intro/LCLExtra2.png|400px}}\n\nAs you can see **root** is the owner of the file. This is why you need to log on as superuser (= root) to be able to change root's files. \n\n### Sorting files\n\nWe want to sort the file exams.txt from highest to lowest score on maths.\n\n> Sort the file based on score on maths. Write results to a file called examssort1.txt\nYou have to **sort** the lines in the file according to the maths score. So you want to sort the file based on the numbers in the second column: it means that you cannot use the default sort command (this will sort the lines based on the content of the first column) but you have to use an option that allows you to specify the column you wish to sort on.\nWhen you look in the manual you see that you can use the -k option for this: \n```\nsort -k2 exams.txt\n```\nThis will sort the file according to the values in the second column, but it will overwrite the original file. To save the sorted list in a new file, examssort1.txt, use the **redirect operator: >**\n```\nsort -k2 exams.txt > examssort1.txt\n```\n> Use the head command to look at the sorted file.\n\n```\nhead examssort1.txt\n```\nYou can see that the sorting was not done correctly: it was done alphabetically, treating the numbers in the second column as characters, instead of numbers. This means that we are still missing an option that allows for numerical sorting.\n\n> Sort the file numerically based on score on maths.\n```\nsort -k2 -n exams.txt > examssort1.txt\nhead examssort1.txt\n```\nThis looks a lot better, but we still have to reverse the order since we want the scores from high to low. \n\n> Sort the file numerically from highest to lowest score on maths.\nFor this we need to add a third option to the **sort** command.\nWhen you look in the manual you see that you can use the -r option for this:\n\n```\nsort -k2 -n -r exams.txt > examssort1.txt\nhead examssort1.txt\n```\n> Show the results of the 10 students with the highest scores on the maths exam using a single line of commands.\n\nThis means that you have to combine the **head** command and the **sort** command from the previous exercise into one single command. Remember that you can combine commands by writing them in the order they have to be performed, so in our case first **sort** then **head**, separated by the **pipe operator: |**\n\n```\nsort -k2 -n -r exams.txt | head\n```\n> Show only the names of the 10 students with the highest scores on the maths exam using a single line of commands.\n\nTo leave out gender and scores you have to use the **cut** command. To specify which columns to cut you can use the -f option. Please note that the -f option specifies the column(s) that you want to retain ! As an argument you have to specify the name of the file you want to cut.\nIn the manual you can see that TAB is the default delimiter for the cut command. So if you have a tab-delimited text file, as in our case, you do not need to specify the delimiter. Only if you use another delimiter you need to specify it.\n\n```\nsort -k2 -n -r exams.txt | head | cut -f3\n```\n\n\n**The case of chromosomes and natural sorting.**\n'sort' will sort chromosomes as text; adding few more parameters allows to get the sort you need.\n\n> Write a list of human chromosomes (values: 22 to 1 X Y MT) to the screen. Use {end..begin} to define a numerical range.\n\nRemember that you can use **echo** to print text to the screen, so to generate text. Try\n```\necho {22..1} X Y MT\n```\nand see what happens...\nYou don't want to numbers next to each other in one row, you want them in a column underneath each other. This means you want to replace the blanks by end-of-lines. \n\n> Replace blanks by end-of-lines. Use the sed command for this.\n\nLook up the command for replacing text in the slides. Blanks are represented by **\\ ** (back slash followed by a blank) and end-of-lines are represented by **\\n** (back slash followed by n). To replace all blanks by an end-of-line you need to add the **g** option (see [http://sed.sourceforge.net/sed1line.txt sed tutorial] for more info). So \n```\nsed \"s/\\ /\\n/g\"\n```\nshould do the replacement. Of course you need to combine the two commands using the output of echo as input in sed. Look in the slides or the cheat sheet how to do this.\nHowever, you do not want to print the text to the screen you want to print the text to a file. Look in the slides or the cheat sheet how to do this and try to combine the three parts of the command.\n\n> Write chromosomes as a column to a file called chroms.txt\n\nThe correct solution is: \n```\necho {22..1} X Y MT | sed \"s/\\ /\\n/g\" > chroms.txt\n```\nThe s in the sed argument refers to substitution: you want to substitute blanks by end-of-lines, it is followed by the character you want to replace (a blank or \"\\ \"), then the character you want to replace it with (an end-of-line or \"\\n\"), then you add g to use sed recursively, in other words to do the substitution more than once so each time a blank is encountered.\nIt prints the chromosome numbers as a column to the file chroms.txt\n\n> Look at the file using the less command.\n\n```\nless chroms.txt\n```\nRemember to use q to leave a less page. \n\n> Sort the chromosome file by using a simple sort. Write results to chromssort.txt\n\n```\nsort chroms.txt > chromssort.txt\nhead chromssort.txt\n```\nNot good! This is a tricky problem that always comes up when you are working with chromosome numbers e.g. when sorting bam/sam files, annotation files, vcf files...\n\n> Modify the sort command so that the sorting of the chromosomes is done in the correct way.\n\nMost people solve it by specifying that you want sort to do natural sorting using the -V option:\n```\nsort -V chroms.txt > chromssort.txt\nhead chromssort.txt\n```\nNice !\nNow try with chr in front.\n\n> Create a file with values chr22 to chr1 chrX chrY chrMT into one column called chroms2.txt in one single command\n\n```\necho chr{22..1} chrX chrY chrMT | sed \"s/\\ /\\n/g\" > chroms2.txt\nhead chroms2.txt\n```\n> Sort the file into a new file called chromssort2.txt\n\n```\nsort -V chroms2.txt > chromssort2.txthead chroms2.txt\n```\n\n### Getting files from the internet\n\nTo download data via a link on the internet you can use the **wget** command.\nFor NGS analysis you often need to download genomic sequence data from the internet. As an example we are going to download the E.coli genome sequence from the iGenomes website: ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Escherichia_coli_K_12_MG1655/NCBI/2001-10-15/Escherichia_coli_K_12_MG1655_NCBI_2001-10-15.tar.gz\n\nDownload this file into the folder NGS/ChIPSeq/ in your home directory. \n\n> Download the data into this folder. \n\nGo to this folder and use the wget command to download the data:\n```\ncd /home/bits/NGS?ChIPSeq/\nwget ftp://igenome:G3nom3s4u@ussd-ftp.illumina.com/Escherichia_coli_K_12_MG1655/NCBI/2001-10-15/Escherichia_coli_K_12_MG1655_NCBI_2001-10-15.tar.gz\nll\n```\nIn the same way you can download NGS data from the internet. We are not going to actually do this because \nNGS data sets are enormous and can take hours to download. Interrupting the download is done with {{key press|Ctrl}} + C\n\n> Decompress the file. \n\n```\ntar -xzvf Escherichia_coli_K_12_MG1655_NCBI_2001-10-15.tar.gz\nll\n```\nThis creates a new folder called Escherichia_coli_K_12_MG1655.\nGo into this folder and look at the whole genome fasta sequence\n\n> Look at the fasta sequence. \nUse **cd** to navigate the folders and **head** to look at the file\n```\ncd Escherichia_coli_K_12_MG1655\nll\ncd NCBI\nll\ncd 2001-10-15\nll\ncd Sequence\nll\ncd WholeGenomeFasta\nll\nhead genome.fa\n```\n\n### Installing tools\n\nThe FastQC tool was installed by unzipping it. Most tools can be installed using the **make** command. There are many ways to install software on Linux:\n\n - via the software manager, an application with a very easy user friendly interface\n - via the **apt-get** command\n - software packages written in Python are installed via the **pip install** command\n\n\nThese methods handle the installation and removal of software on Linux distribution in a simplified way. They fetch the software from software repositories on the internet. However, these repositories do not always contain the most up-to-date version of software packages, especially not for niche software like bioinformatics tools.\n\nSo to be on the safe side, it is recommended that you download the latest version of a tool from its website (using wget) and use **make** to install it. In that way, you have full control over the version of the tool that you are installing.\n\nThis is not true for pip. Pip does the difficult steps in the installation for you and accesses an up-to-date package repository, so Python programs can safely be installed using pip.\n\nDownload and install all packages in the **tools** folder of the **/usr/bin/** folder. This is a folder owned by root so it is a good idea to switch to superuser again.\n\n#### Installing TopHat\n\nIn the Introduction training we use RNA-Seq reads. Mapping RNA-Seq reads is done using the TopHat tool. So we need to install the [http://ccb.jhu.edu/software/tophat/tutorial.shtml TopHat tool]. We are going to do this in the /usr/bin/NGS/ folder so we need to be superuser for this.\n\n> Go to the TopHat website and fetch the download link.\n\n - Go to the [http://ccb.jhu.edu/software/tophat/tutorial.shtml TopHat website]\n - Right click the Linux download link\n - Select **Copy Link Location**\n\n> Download the file into the /usr/bin/NGS/ folder.\n\n - Go to the terimnal\n - Navigate to the /usr/bin/NGS/ folder\n - Type **wget **\n - Press the Shift and Insert keys simultaneously to paste the url\n\nTopHat is downloaded as a .tar.gz file \n\n> Decompress the file\nFor decompressing a .tar.gz file you need the following command:\n```\ntar -xzvf tophat-2.1.1.Linux_x86_64.tar.gz\n```\nRemember to use tab autocompletion !\n\nThis creates a new folder called tophat-2.1...\nGo into the tophat folder and type:\n```\n./tophat\n```\n\nIf this opens the help of tophat, it means the software has been installed correctly. It does not mean that you can use the software now. Well you can but you will always have to type the commands from inside the tophat folder like we do here or provide the full path to the tophat folder. The dot slash (./) in front of the command means use the tophat **that is located in this folder**. It tells the command line where it can find the script (./ = the current directory = /usr/bin/tools/tophat-2.1.1.Linux_x86_64/).To avoid this we can create a symbolic link for tophat2 (see later).\n\n#### Installing samtools\n\nWhen you navigate to the **tophat** folder in /usr/bin/NGS/ you see that samtools is automatically installed when TopHat was installed:\n{{Wiki-img|NGS/Intro/MapRNASeq5.png|600px}}\n\nIf you see the samtools help page when you type\n```\n./samtools_0.1.18\n```\nit means that samtools is indeed installed\n{{Wiki-img|NGS/Intro/MapRNASeq6.png|600px}}\n\n\n[http://wiki.bits.vib.be/index.php/Introduction_to_ChIP-Seq_analysis Installing tools for the ChIP-Seq training]\n#### Installing cutadapt\n\nCutadapt is a Python program that removes adapter sequences from NGS reads.\nIt has already been installed on the bits laptops but if you need to install it, use [http://wiki.bits.vib.be/index.php/Installing_cutadapt these instructions].\n\n### Quality control of NGS data\n\n#### Checking the quality of the Introduction training data using FASTQC====\n\nIn the /home/bits/NGS/Intro directory you can find a file called SRR074262.fastq (the file containing Arabidopsis RNA-Seq reads), that was used in the exercises on FastQC in Windows. FastQC is a tool that checks the quality of fastq files, containing NGS data.\n\nWe will now try to do the same FastQC analysis from command line in Linux. FastQC is a java-based tool that needs java to be able to run. \n\n> Check if the correct version of java is installed\nIn command line you can check if java is installed on your laptop using the following command:\n```\njava -version\n```\nYou should see something like:\n```\nava version \"1.8.0_101\"\nJava(TM) SE Runtime Environment (build 1.8.0_101-b13)\nJava HotSpot(TM) 64-Bit Server VM (build 25.101-b13, mixed mode)\n```\nIf you get an error then you don't have java installed. If the version listed on the first line is less than 1.5 then you will have problems running FastQC and you need to update java on your laptop.\n\n> Run FastQC\nTo run FastQC as a GUI just like in Windows type:\n```\nfastqc\n```\nThis opens the FastQC GUI and you could load a fastq file via the GUI to get its quality report. However, you can also use fastqc as a command via the command line. \n\n> Open the file SRR074262.fastq to obtain the sequence of the contaminating adapter.\nCheck [http://wiki.bits.vib.be/index.php/Quality_control_of_NGS_data#Exercise_1:_Quality_control_of_the_data_of_the_introduction_training the exercise on FastQC in Windows] for details on the quality report that is generated\nThe big plus of running FastQC from command line is that command line allows you to combine and run a set of commands as a program by writing a command script.\n\n#### Automating FASTQC analyses\nIf you have many FASTQ files to check you might prefer running FASTQC from command line so you can loop over your files and process the reports automatically. \n\n> View the help files of the fastqc command\nAs for most commands the -h option nicely opens the help file:\n```\nfastqc -h\n```\n\n{{Wiki-img|NGS/Intro/CLFastQC1.png|500px}}\n\nTo run via command line you can simply specify a list of files to process:\n```\nfastqc somefile.fastq someotherfile.fastq\n```\nYou can specify as many files as you like. If you don't specify any files the program will open the GUI.\n\nHowever, there are a few options that might be helpful to use. Since FASTQC can process FASTQ, SAM and BAM files, it is always safer to tell him upfront which format to expect.\n\nWe will generate FASTQC reports for the two FASTQ files in the /home/bits/NGS/RNASeq/ folder.\n\n> Decompress the files\nFirst you have to decompress the fastq files. In the cheat sheet look up the command for decompressing a .gz file\n```\ngunzip chr22_SRR1039509_1.fastq.gz\ngunzip chr22_SRR1039509_2.fastq.gz\n```\nDecompression of the files results in two .fastq files that can be used as inputs generating the FASTQC reports.\n\n> Generate the FASTQC reports for the two fastq files.\nAs you can see in the help file of fastqc, the -f option allows you to specify the format of the input file(s). \n```\nfastqc -f fastq chr22_SRR1039509_1.fastq chr22_SRR1039509_2.fastq\n```\nThe two .html files contain the FASTQC reports and can be opened in a browser. \n\n> Open the first report in firefox via command line\n\n```\nfirefox chr22_SRR1039509_1_fastqc.html\n```\nBy default, FastQC will create an HTML report with embedded graphs, but also a zip file containing individual graphs and additional data files containing the raw data from which the plots were drawn.\n\n> Remove the .html and the .zip files \n\n```\nrm *.zip\nrm *.html\n```\nIf you have many files you might want to use a for-loop instead of typing all file names into the command.\n\n> Write a for-loop to process the two FASTQ files.\nFirst go back to the folder that contains the fastqc command and make sure you are operating as superuser.\nTake a close look at the syntax of the for-loop that is described in the slides. We are going to use the syntax for looping over files in a folder. Don' t forget the ***** to loop over all fastq files in the specified folder:\n```\nfor file in /home/bits/NGS/RNASeq/*.fastq\ndo\nfastqc -f fastq ${file}\ndone\n```\nDon't forget the **$** since file is just a variable that refers to the actual files in the folder. Write every line on a different line in the terminal.\n\nWhen you go to the /home/bits/NGS/RNASeq folder you should see the same html and zip files as in the previous exercise. The two .html files contain the FASTQC reports and can be opened in a browser.\nIf you want to save your reports in a folder other than the folder which contains your fastq files you can specify an alternative location by using the **-o** option.\n\n> Create a new folder called FASTQCresults\n\n```\nmkdir FASTQCresults\n```\n> Create a variable output. Its value is the path to the newly created folder.\n\n```\noutput=/home/bits/NGS/RNASeq/FASTQCresults/\n```\n> Write a for-loop to analyze the quality of the fastq files and write the report to the new folder\n\nAdjust the code of the for-loop to write the results to the newly created folder\n```\nfor file in /home/bits/NGS/RNASeq/*.fastq\ndo\nfastqc -f fastq -o ${output} ${file}\ndone\n```\nDon't forget the **$** since output and file are variables. Write every line on a different line in the terminal.\nWhen you go to the /home/bits/NGS/RNASeq/FASTQCresults folder you should see the same html and zip files as in the previous exercise. The two .html files contain the FASTQC reports and can be opened in a browser.\n\nIn this way you can process hundreds of FASTQ files automatically. You can even write a script to process the reports and create a general overview of the quality of the complete experiment.\n\nIn the **Templates** directory of the /usr/bin/tools/FastQC/ you will find a file called **header_template.html** which you can edit to change the look of the report. This file contains all information concerning the layout of the FASTQC reports like the header for the report, the CSS section... and you can alter this however you see fit.\n\n\n===Improving the quality of the data===\nIn this exercise we go back to the data set of the Intro training in folder /home/bits/NGS/Intro. \nAlmost all NGS data sets contain a high number of contaminating adapter sequences. You can remove these adapters using command line tools like [https://code.google.com/p/cutadapt/ cutadapt]. See [http://wiki.bits.vib.be/index.php/Installing_cutadapt installation instructions].\n\n> Check the help file for the option that defines the number of mismatches you allow (= error rate).\nTo open the cutadapt help files (it's not a regular bash command so it doesn't have a manual) type:\n```\ncutadapt -h\n```\n\nScrolling down the help file shows that the **-e** option defines the maximum allowed error rate: the default is 0.1 meaning that it allows one mismatch every 10 nucleotides. Adapter sequences are identified by aligning each read to the adapter sequence: if the frequency of mismatches in the alignment is below the allowed error rate then the adapter sequence is trimmed from the read.\n> Check the option you need for defining the adapter sequence\nIn the help file you see that you have multiple options:\n\n - **-a** to trim adapter sequences at the 3' end of the reads. In most cases this is the adapter that's causing the problems: when small RNA fragments are sequenced, the resulting reads can be longer than the RNA fragments. As a results they will contain (parts of) the 3’ adapter. In longer reads the adapter might even lie within the read:\n```\nMYSEQUEN                         (no adapter contaimination)\nMYSEQUENCEADAP                   (part of adapter at 3' end)\nMYSEQUENCEADAPTER                (adapter at 3' end)\nMYSEQUENCEADAPTERSOMETHINGELSE   (adapter within the read)\n```\nCutadapt will cut the adapter (part) and all sequence following it resulting in:\n```\nMYSEQUEN\nMYSEQUENCE\nMYSEQUENCE\nMYSEQUENCE\n```\n\n - **-g** to trim adapter sequences ligated at the 5' end of the reads. These adapters are expected to appear at the start of a read (where they can be just partially there) or somewhere within the read:\n```\nADAPTERMYSEQUENCE              (5' end)\nDAPTERMYSEQUENCE               (partial)\nTERMYSEQUENCE                  (partial)\nSOMETHINGADAPTERMYSEQUENCE     (within)\n```\nIn all cases, the adapter itself and the sequence preceding it will be removed, leaving in all examples above:\n```\nMYSEQUENCE\n```\n\n - **-b** to trim adapters at the 3' or 5' end of the read. If there is at least one base before the adapter, then the adapter is trimmed as a 3’ adapter and the adapter itself and everything following it is removed. Otherwise, the adapter is trimmed as a 5’ adapter and it is removed from the read, but the sequence after it it remains:\n```\nBefore trimming \t        After trimming \t\nMYSEQUENCEADAPTERSOMETHING \tMYSEQUENCE \nMYSEQUENCEADAPTER \t        MYSEQUENCE\nMYSEQUENCEADAP \t                MYSEQUENCE\nMADAPTER \t                M\nADAPTERMYSEQUENCE \t        MYSEQUENCE\nPTERMYSEQUENCE \t                MYSEQUENCE\nTERMYSEQUENCE \t                MYSEQUENCE\n```\n\n\nSince we probably have contaminating adapter at the 3' end we'll take the -a option\nAt the top of the help file you see that the standard usage of the command is:\n```\ncutadapt -a ADAPTER -o output.fastq input.fastq\n```\nYou can find the sequence of the adapter in the FastQC report of SRR074262.fastq\n\n> Trim the adapter sequence using the default error rate, store the trimmed sequences in a file SRR074262trim.fastq\nSo in our case the command is: \n```\ncutadapt -a GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA -o SRR074262trim.fastq SRR074262.fastq\n```\nNote that the default error rate means that you allow max. 10% mismatches in the alignment of adapter and read. \n\n> How many reads consisted solely of adapter sequence (and were consequently completely removed) ?\nThe output of the cutadapt command is:\n```\nThis is cutadapt 1.8.1 with Python 2.7.6\nCommand line parameters: -a GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA -o SRR074262trim.fastq SRR074262.fastq\nTrimming 1 adapter with at most 10.0% errors in single-end mode ...\nFinished in 66.92 s (7 us/read; 8.62 M reads/minute).\n\n### Summary\n\nTotal reads processed:               9,619,406\nReads with adapters:                 2,327,902 (24.2%)\nReads written (passing filters):     9,619,406 (100.0%)\n\nTotal basepairs processed:   346,298,616 bp\nTotal written (filtered):    271,141,022 bp (78.3%)\n\n### Adapter 1\n\nSequence: GATCGGAAGAGCTCGTATGCCGTCTTCTGCTTGAAA; Type: regular 3'; Length: 36; Trimmed: 2327902 times.\n\nNo. of allowed errors:\n0-9 bp: 0; 10-19 bp: 1; 20-29 bp: 2; 30-36 bp: 3\n\nBases preceding removed adapters:\n  A: 6.1%\n  C: 1.5%\n  G: 1.8%\n  T: 3.0%\n  none/other: 87.5%\n\nOverview of removed sequences\nlength    count    expect    max.err    error counts\n3    156030    150303.2    0    156030\n4    48693    37575.8    0    48693\n5    12005    9394.0    0    12005\n6    8702    2348.5    0    8702\n7    6686    587.1    0    6686\n8    5546    146.8    0    5546\n9    5958    36.7    0    5484 474\n10    5479    9.2    1    4539 940\n11    4197    2.3    1    3737 460\n12    4038    0.6    1    3713 325\n13    3392    0.1    1    3158 234\n14    2730    0.0    1    2531 199\n15    2801    0.0    1    2625 176\n16    2384    0.0    1    2221 163\n17    1887    0.0    1    1759 128\n18    1998    0.0    1    1848 150\n19    1572    0.0    1    1447 123 2\n20    1257    0.0    2    1079 107 71\n21    1141    0.0    2    1029 90 22\n22    730    0.0    2    671 46 13\n23    504    0.0    2    471 21 12\n24    549    0.0    2    499 37 13\n25    495    0.0    2    441 39 15\n26    587    0.0    2    538 35 14\n27    657    0.0    2    585 53 19\n28    711    0.0    2    633 40 26 12\n29    764    0.0    2    687 49 24 4\n30    889    0.0    3    760 85 33 11\n31    887    0.0    3    739 94 42 12\n32    579    0.0    3    466 65 37 11\n33    438    0.0    3    347 36 38 17\n34    700    0.0    3    541 85 53 21\n35    5390    0.0    3    4652 507 171 60\n36    2037526    0.0    3    1870684 129754 20094 16994\n```\nIn the last line you see the number of reads with 36 bases aligned to the adapter sequence. Since that is the total of the read (the reads are 36bp long) it means that over 2 million reads only consist of adapter sequence, 1.870.684 being completely identical to the adapter, 129.754 containing 1 mismatch with the adapter...\n> Open the trimmed sequences in FastQC\nTo open the FastQC GUI type the fastqc command\n```\nfastqc\n```\nYou can compare the results with these of the original reads on [http://wiki.bits.vib.be/index.php/Quality_control_of_NGS_data the Quality control of NGS data wiki page].\n\n> Are all the reads still 36 nt long after trimming ?\nIn the **Basic statistics** tab you see that the length of the reads varies as was to be expected after trimming\n\n{{Wiki-img|NGS/Intro/fastqcTrim1.png|400px}}  \n> Have the quality scores of the reads significantly changed after trimming ?\nThe **Per base sequence quality** is similar to that of the untrimmed file, as is the **Per sequence quality**. The latter one just shows a lower number of sequences since the 2 million reads that consisted solely of adapter sequence are no longer taken into account.\n\n{{Wiki-img|NGS/Intro/fastqcTrim2.png|400px}} \n\nQuality scores have changed a bit of course since you removed bases and reads from the data set but you did not trim based on quality but based on similarity to an adapter sequence so the scores of the trimmed reads are similar to those of the untrimmed reads. If you had trimmed low quality bases, the quality scores would have been higher in the trimmed reads.\n> Has the per base sequence content improved as a result of the trimming ?\nThe **Per base sequence content** - the tool to detect adapter contamination - plot has greatly improved allthough it is still not considered stable enough.\n\n{{Wiki-img|NGS/Intro/fastqcTrim3.png|400px}} \n> What are the bumps you see in the Sequence length distribution plot ?\n\nThis question is related to the results of the trimming:\n```\nOverview of removed sequences\nlength    count    expect    max.err    error counts\n3    156030    150303.2    0    156030\n4    48693    37575.8    0    48693\n5    12005    9394.0    0    12005\n...\n33    438    0.0    3    347 36 38 17\n34    700    0.0    3    541 85 53 21\n35    5390    0.0    3    4652 507 171 60\n36    2037526    0.0    3    1870684 129754 20094 16994\n```\nAs you can see here over 2 million reads corresponded to adapter over their entire length and as a result were trimmed to length zero. This is the large peak at length zero on the plot. Over 150000 reads contain 3 bases that belong to the adapter. These 3 bases have been cut leaving reads of 33 nt long: this is the small peak you see on the plot at length 33. All intermediate lengths of adapter contamination have been detected but in such a small fraction of reads that you cannot see the influence of the trimming on the plot.\n\n{{Wiki-img|NGS/Intro/fastqcTrim4.png|400px}} \nFASTQC calls a failure for this plot because it knows the file contains Illumina data and it expects the reads to have the same lengths. The software does not consider the fact that this is no longer true after trimming.\n\n> Are there any overrepresented sequences left ?\nThe 2 million sequences that were initially detected as contaminating adapters are still in the list but now as sequences with zero length. The other contaminating sequences are of course still present but at very low counts.\n\n{{Wiki-img|NGS/Intro/fastqcTrim6.png|400px}} \n> Are there any overrepresented hexamers ?\nFASTQC still detects overrepresented hexamers although at much lower counts than before. These are probably parts of the remaining overrepresented sequences.\n\n{{Wiki-img|NGS/Intro/fastqcTrim5.png|400px}}\n\n### Linking files\n\n#### Linking FastQC\n\nIn the previous exercise you had to specify the path of the fastqc command, otherwise the operating system was not able to find (and thus execute) the command. You can avoid having to specify the path every time you want to execute a command by creating a link to the command using the **ln** command.\nYou can soft or hard links, for what we want to achieve a soft link is fine. When you place a link to the command in /usr/local/bin you will be able to run the program from any location by just typing\n```\nfastqc\n```\nSo the overall format of the command is as follows:\n```\nln -s (soft link) path_where_fastqc_is (source path) /usr/local/bin/fastqc (destination path)\n```\n\n> What's the command you would need for creating this soft link ? \nWhen you look in the manual of **ln** you see that for creating a soft link you need the **-s** option. So you use the following command: \n```\nln -s /usr/bin/tools/FastQC/fastqc /usr/local/bin/fastqc\n```\nCheck if you can run the fastqc command from any location now.\n\n#### Linking Tophat2\n\nIf you don't create a symbolic link you have to specify the full path of the command when you want to run it, otherwise the operating system is not able to find (and thus execute) the command. You can avoid having to specify the full path every time you want to execute a command by creating a link to the command using the **ln** command. For creating symbolic links you need superuser powers!\nYou can make soft or hard links, for what we want to achieve a soft link is fine. When you place a link to the command in /usr/local/bin/ you will be able to run the program from any location by just typing its name.\nSo the overall format of the command is as follows:\n```\nln -s (soft link) path_where_command_is (source path) /usr/local/bin/name (destination path)\n```\n\n> Create a symbolic link for tophat2\nFor creating the link you need the following command:\n```\nsudo ln -s /usr/bin/NGS/tophat-2.1.1.Linux_x86_64/tophat2 /usr/local/bin/tophat2\n```\nRemember to use tab autocompletion !\nNow type **tophat2**. If you see the help file, the link works.\n\nIf you mess up the link you have to remove it before you can try again using the following command:\n```\nsudo unlink /usr/local/bin/tophat2\n```\n\n\n#### Linking samtools\n\nWe will also do the same for samtools to use samtools from anywhere in the file system. \n\n> Create a symbolic link for samtools\nCreate a link using the **ln -s** command:\n```\nsudo ln -s /usr/bin/NGS/tophat-2.1.1.Linux_x86_64/samtools_0.1.18 /usr/local/bin/samtools-0.1.18\n```\nCheck if the command works. If you type\n```\nsamtools-0.1.18 view\n```\n(one of the possible samtools commands) you should see the manual of the command.\nIn many cases you will have several versions of samtools running on your laptop. That's why I don't call the tool samtools but I choose the full name including the version number.\n\n[http://wiki.bits.vib.be/index.php/Introduction_to_ChIP-Seq_analysis#Linking_tools Linking tools for the ChIP-Seq training]\n\n\n### Mapping reads\n\n#### Mapping reads of the ChIP-Seq training with Bowtie\n\n### Mapping reads with Bowtie\n\n*Exercise created by Morgane Thomas Chollier*\n\n#### Obtaining the reference genome\nIf you are going to follow the ChIP-Seq training, skip this part: you are going to do these steps during the ChIP-Seq training. The fasta file containing the reference genome is called Escherichia_coli_K12.fasta and is stored in the /home/bits/NGS/ChIPSeq/ folder on the BITS laptops. Alternatively you can use the file that you downloaded via wget in exercise 3.\n\nIf you are not going to follow the ChIP-Seq training, go on and see how to obtain the reference genome. \n\nBack to the ChIP-Seq data of *E. coli*. In this experiment we want to see which genomic regions are bound to transcription factor FNR. However, at this point what we have is a set of reads that are identified by their location of the flow cell. To answer our question we should link the reads to regions in the genome.\nTo obtain their genomic coordinates, we will map each read on the reference genome sequence\nAs said before, for Illumina reads the standard mappers are BWA and Bowtie (version 1 and 2). In this exercise we will use Bowtie version1. \nCheck out the [http://wiki.bits.vib.be/index.php/Linux_command_line#Installing_Bowtie installation instructions for Bowtie].\n\nBowtie1 was installed and a symbolic link was created so the command should work from anywhere in the file system when you type bowtie-1.1.2\n\n> What happens when you type the bowtie command ?  \nThis prints the help of the program. However, the help file is a bit difficult to read ! If you need to know more about the program, it's easier to directly check [http://bowtie-bio.sourceforge.net/manual.shtml the  manual on the website]\nBowtie needs a reference sequence to align each read on it. \n\n> Which *E. coli* strain was used in the experiment ?  \nGo to [http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1003565 the paper] and check the part **Strains and growth conditions** in the **Materials and methods** section. There you see that the experiment was done using *E. coli* K-12 MG1655.\n\n{{Wiki-img|NGS/Intro/mapping1.png|600px}}\nSo we need the genome sequence of *E. coli* K-12 MG1655 and it needs to be in a specific format (=index) for bowtie to be able to use it. Several pre-built indexes are available to download on [http://bowtie-bio.sourceforge.net/manual.shtml the bowtie webpages] or the [http://support.illumina.com/sequencing/sequencing_software/igenome.html iGenomes website].\n\nAlthough the *E. coli* sequence is available we will not use it to show you how you should proceed if you don't find your reference sequence here. In that case you will need to make the index file yourself. \n\nIf you can't find your reference on the iGenomes website you have to download it from:\n\n - [http://genome.ucsc.edu/ UCSC]\n - [http://www.ensembl.org/index.html Ensembl]\n - [http://www.ncbi.nlm.nih.gov/ NCBI]\n\nSince Ensembl focuses on higher eukaryotes, we are going to download the genome from NCBI.\n\n> Which reference sequence was used in the experiment ?  \nGo to [http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1003565 the paper] and check the part **High-throughput RNA sequencing (RNA-seq) analysis**. There you see that the reads were mapped to an NCBI sequence with version number **U00096.2**.\n\n{{Wiki-img|NGS/Intro/mapping2.png|600px}}\n\n> Search for this sequence on NCBI ?  \nGo to [http://www.ncbi.nlm.nih.gov/ the NCBI website], select the **Nucleotide** database, type **U00096.2** as a search term and click **Search**. In the record of this sequence you see that an updated version is available. Click the **See current version** link. \n\n{{Wiki-img|NGS/Intro/mapping3.png|500px}}\n\nThis sequence is not a RefSeq sequence (the high quality part of NCBI Genbank). You can see that because the version number does not contain an underscore and all RefSeq version numbers contain an underscore.\n\n> Is there a RefSeq sequence available ?  \nIn [http://www.ncbi.nlm.nih.gov/nuccore/U00096.3 the record of the current version], scroll down to the **Related information** section in the right menu. There you see that a RefSeq sequence is available. Click the **Identical RefSeq** link. \n\n{{Wiki-img|NGS/Intro/mapping4.png|600px}}\n\nThis brings us to a RefSeq record with version number NC_000913.3. Note that we will not take this lastest version but the previous one (NC_000913.2), because the available tools for visualization have not been updated yet to the latest version. This will not affect our results.\n\n> Download the sequence of the previous version of the RefSeq record in FASTA format  \nSearch the **Nucleotide** database for **NC_000913.2**\n\n{{Wiki-img|NGS/Intro/mapping5.png|600px}}\n\n{{Wiki-img|NGS/Intro/mapping6.png|600px}}\n\n\n - In the record expand the **Send to** section (red).\n - Select **File** as destination (green). This means that you download the data on your computer.\n - Select **FASTA** format (blue).\n - Click **Create File**.\n\nThis creates a file called **sequence.fasta** in the **Downloads** folder in your **Home** folder. Copy the downloaded file to the folder where the fastq files are located (/home/bits/NGS/ChIPSeq on the BITS laptops) and rename it as **Escherichia_coli_K12.fasta**.\n\n#### Writing a bash script to map the reads to the reference genome\n\nSuppose that you expect to be doing many NGS experiments on *E. coli*. Each time we analyze a data set, we will have to map the reads against the *E. coli* genome. The best way to ensure that you can reuse commands during the next analysis, is to combine them into a script (= small program). Since the script will consist of command line (= bash) commands, the script is called a bash script.\n\nYou cannot do the mapping directly on the .fasta file, you need to index the file first. Reference genomes from the Bowtie /iGenomes website are already indexed so when you get your reference there you can skip this step. Reference genomes downloaded from NCBI, Ensembl or UCSC need to be indexed using the bowtie-build command.  \n\nIndexing a reference genome is a one-time effort: you do not have to repeat it each time you do a mapping. This is why we are not going to include the indexing in the script.  \n\n> Create a variable called folder containing the path to the folder that contains the E. coli fasta file\n\n```\nfolder=/home/bits/NGS/ChIPSeq/\n```\n> Check out the manual of the bowtie-1.1.2-build command to see the arguments it takes\nSince we have created a soft link for the bowtie-1.1.2-build command, the command should work from any location in the Linux file system. To see to help file just type the command:\n```\nbowtie-1.1.2-build \n```\nIn the help file you see that you need to specify the reference genome that you want to index as an input (in our case the E. coli fasta file) and that you have to specify the output file.\n```\nUsage: bowtie-build [options]* <reference_in> <ebwt_outfile_base>\n    reference_in            comma-separated list of files with ref sequences\n    ebwt_outfile_base       write Ebwt data to files with this dir/basename\n```\nWe will give the output files the same name as our input file: Escherichia_coli_K12\n\n> Prepare an indexed reference sequence for E. coli using the bowtie-build command, use the folder variable\nSo as an input the command expects the name of the input and the output file. \n```\nbowtie-1.1.2-build ${folder}Escherichia_coli_K12.fasta ${folder}Escherichia_coli_K12\n```\nbowtie-build will index the Escherichia_coli_K12.fasta generating a whole set of .ebwt files whose name all start with Escherichia_coli_K12. We will write a bash script to do the rest of the mapping.\n\nWriting a script can be done in any text editor. On the BITS laptops you can use gedit:\n\n - Click the **Menu** at the bottom left corner of the desktop\n - Type **gedit** in the text search box\n - Click the **Text Editor** button\n\n{{Wiki-img|NGS/Intro/script1.png|300px}}\nThe first thing you do when you write a script is define all the variables you need.\nWe need the following variables:\n\n - The **folder** that contains the reference genome.\n - The name of the **input** fastq file you want to map (if it's in the same folder as the reference as it is in our case). If the fastq file is in another folder you have to specify the full path to the file.\n\n> Create the required variables \n```\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\n```\nMake sure that the file containing the indexed reference genome and the fastq files containing the *E. coli* reads are located in the same folder.\n\n> Check the help file for bowtie-1.1.2 \n Go back to the terminal and type\n```\nbowtie-1.1.2\n```\n> What is the first argument bowtie expects ?\nAs first argument bowtie expects the path to the ebwt files (= the genome index files) so in our case that's Escherichia_coli_K_12\n```\nUsage: \nbowtie [options]* <ebwt>\n```\n> What is the second argument bowtie expects ?\nAs second argument bowtie expects the information of the input file containing the reads, in our case SRR576933.fastq Bowtie can be used to map single end reads as we have but also to map paired end reads. In the case of paired end reads you have two fastq files, one with the upstream reads and one with the downstream reads. That's why you can specify two input files m1 and m2. In our case it's just one file.\n```\nUsage: \nbowtie [options]* <ebwt> {-1 <m1> -2 <m2> \n\n  <m1>    Comma-separated list of files containing upstream mates (or the\n          sequences themselves, if -c is set) paired with mates in <m2>\n  <m2>    Comma-separated list of files containing downstream mates (or the\n          sequences themselves if -c is set) paired with mates in <m1>\n```\n> What is the final argument bowtie expects ?\nAs final argument bowtie expects the output file which is in our case SRR576933.sam\n```\nUsage: \nbowtie [options]* <ebwt> {-1 <m1> -2 <m2> | --12 <r> | <s>} [<hit>]\n  <hit>   File to write hits to (default: stdout)\n\n```\nYou need to tell bowtie which type of file your input file is.  \n\n> What is the option for doing this ?\nVia the option: -q indicates the input file is in FASTQ format.\n```\nUsage: \nInput:\n  -q                 query input files are FASTQ .fq/.fastq (default)\n```\nFastq is the default, so you don't have to explicitly set this option. If you don't specify it in your command bowtie will automatically assume your input is in fastq format.\n\nYou need to tell bowtie the maximum number of mismatches you allow in the alignments of your reads to the reference.  \n\n> What is the option for doing this ?\nVia the option: -v \n```\nAlignment:\n  -v <int>           report end-to-end hits w/ <=v mismatches; ignore qualities\n```\nIf you set this option's argument to 2, it means that bowtie will allow two mismatches anywhere in the read, when aligning the read to the genome sequence.\n\nThen we want to set an option that allows to define a number of bases that should be trimmed from the 3' ends of the reads before the alignment is done.\n\n> What is the option for doing this ?\nVia the option: -3 \n```\n  -3/--trim3 <int>   trim <int> bases from 3' (right) end of reads\n```\nWe want to set this option to trim the last base from the 3' ends of the reads before the alignment is done.\n\nWe also want to specify that we only want reads that map specifically to one location in the genome in our output.\n\n> What is the option for doing this ?\nVia the option: -m \n```\n  -m <int>           suppress all alignments if > <int> exist (def: no limit)\n```\nFinally we want to specify that the output should be SAM format.\n\n> What is the option for doing this ?\nVia the option: -S \n```\nSAM:\n  -S/--sam           write hits in SAM format\n```\n> Write the error channel to a file called SRR576933.out\nVia the option: -S \n```\n2> SRR576933.out \n```\nIn the script you use the variables to you have created instead of the actual file name SRR576933\n\n> Map the reads to the indexed reference sequence ?\n\nSo the full script becomes:\n```\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\nbowtie-1.1.2 ${folder}Escherichia_coli_K12 -q ${folder}${input}.fastq  -v 2 -m 1 -3 1 -S 2> ${folder}${input}.out > ${folder}${input}.sam\n```\nWe asked the mapper to create a sam file with mapping results. In the same way we could create a bam file. While SAM files can be inspected using Linux commands (head, less, grep, ...), BAM format is compressed and requires a special parser to read the file. Samtools is used to view bam files but it can also be used to analyze sam files.\n\nLook at this [http://davetang.org/wiki/tiki-index.php?page=SAMTools very informative wiki on samtools] and the [http://samtools.sourceforge.net/samtools.shtml official manual of samtools]. The manual does not document some of the commands, so it is better to first look in the wiki to find the command you need and then look in the manual to have an overview of the options it uses.\n\nWe will use samtools to get a rough idea of the quality of the mapping. Look at the samtools wiki to see which command you need for getting the basic statistics of a sam file.\n\n> Command to get the basic stats of the mapping file.  \nOn the [http://davetang.org/wiki/tiki-index.php?page=SAMTools samtools wiki] \n\n{{Wiki-img|NGS/Intro/samtools2b.png|300px}}\n\nyou can see that you need the **samtools flagstat** command for this.\nHowever samtools flagstat expects a bam file as input. So look at the samtools wiki to see which command you need for transforming a sam into a bam file.\n\n> Command to convert sam into bam files.  \nOn the [http://davetang.org/wiki/tiki-index.php?page=SAMTools samtools wiki] you can see that you need the **samtools view** command for this. \nFor the exact command you need to know if the sam file contains a header. Let's assume that the sam file indeed contains a header (it does, I checked). The symbolic link for samtools is samtools-0.1.18  Notice that we include the version number of bowtie and samtools in the symbolic link because we have mutiple versions of bowtie and samtools installed on the laptops.\n\n> Add the command for transforming the sam into a bam file to your script\n\n```\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\nbowtie-1.1.2 ${folder}Escherichia_coli_K12 -q ${folder}${input}.fastq  -v 2 -m 1 -3 1 -S 2> ${folder}${input}.out > ${folder}${input}.sam\nsamtools-0.1.18 view -bS  ${folder}${input}.sam > ${folder}${input}.bam\n```\n> Add the command for analyzing the bam file to your script\n```\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\nbowtie-1.1.2 ${folder}Escherichia_coli_K12 -q ${folder}${input}.fastq  -v 2 -m 1 -3 1 -S 2> ${folder}${input}.out > ${folder}${input}.sam\nsamtools-0.1.18 view -bS  ${folder}${input}.sam > ${folder}${input}.bam\nsamtools-0.1.18 flagstat ${folder}${input}.bam\n```\nBash scripts all have one characteristic: the first line of a bash script is always the following:\n```\n#!/bin/bash\n```\nThis tells the system which program should be used to interpret the script (in this case: /bin/bash)\n \n\n> Add this line to your script\nSo the full script becomes:\n```\n#!/bin/bash\nfolder=/home/bits/NGS/ChIPSeq/\ninput=SRR576933\nbowtie-1.1.2 ${folder}Escherichia_coli_K12 -q ${folder}${input}.fastq  -v 2 -m 1 -3 1 -S 2> ${folder}${input}.out > ${folder}${input}.sam\nsamtools-0.1.18 view -bS  ${folder}${input}.sam > ${folder}${input}.bam\nsamtools-0.1.18 flagstat ${folder}${input}.bam\n```\nSave the script as \"my_mapping\" in the /home/bits/NGS folder.\n{{Wiki-img|NGS/Intro/script2.png|400px}}\n\n> Check permissions of the script and change them if needed.\nGo to the folder where you have saved the script: /home/bits/NGS and type\n```\nll\n```\nThe script is not executable:\n{{Wiki-img|NGS/Intro/script4.png|300px}}\nMake it executable by typing:\n```\nchmod 755 my_mapping\nll\n```\nTo run the script make sure you are in folder containing the script (/home/bits/NGS) and type:\n```\n./my_mapping\n```\nThe mapping should take few minutes as we work with a small genome. For the human genome, we would need either more time, or a dedicated server.\n\nThe samtools flagstat command displays an overview of the alignment results on your screen. The results are not very informative because the data set comes from a single-end sequencing experiment. You just see that 62% of the reads were mapped. This may seem low but remember that we haven't done any cleaning on the file. According to FASTQC the file contains about 30% of adapter sequences that will not map.\n\nRepeat the analysis for the control sample SRR576938.fastq  These two fastq files come from a ChIP-Seq experiment, the first contains the reads of the ChIP sample, the second of the control sample, which consists of fragmented genomic DNA. You need both to identify regions in the genome that are represented more in the ChIP reda than in the control (these are the regions that bind to the transcription factor).\n\n> Repeat the analysis for sample SRR576938.fastq ?\nRepeating the mapping is easy now the only thing you need to do is changing the value of the input variable in the script:\n\n - Reopen the script in gedit\n - Change the name of input file\n - Save the changes\n - In the terminal go to the folder containing the script (/home/bits/NGS)\n - Run the script by typing:\n```\n./my_mapping\n```\n\n \n> How many reads of the control sample were mapped ?\nIn the flagstat results, you see that 95% of the reads was mapped. This is of course ok but you expected a high percentage here since the control sample is nothing more than the reference genome cut up into small pieces. \nAt this point, you have two sam and two bam files, one for the treated sample, one for the control sample. \n\nFor paired-end data flagstat results are much more informative, see an example below:\n\n{{Wiki-img|NGS/Intro/samtools3.png|500px}}\n\nThis overview deserves some explanation:\n\n - **nan** means **Not A Number** (e.g: divided by 0 )\n - **paired in sequencing** means reads that belong to a pair regardless of the fact that they are really mapped as a pair\n - **read1** means forward reads\n - **read2** means reverse reads\n - **properly paired** means that both mates of a read pair map to the same chromosome, oriented towards each other, and with a sensible insert size\n - **with itself and mate mapped** means that both reads of a pair map to the genome but they are not necessarily properly paired, they just map somewhere on the genome\n - **singletons** means that one of the reads of a pair is unmapped while its mate is mapped\n - **with mate mapped to a different chr** means reads with a mate mapped on a different chromosome\n - **with mate mapped to a different chr (mapQ >= 5)** means reads with a mate mapped on a different chromosome having a mapping quality greater than 5\n\n> Compare the number of forward and reverse reads in the paired-end experiment.  \nthe counts of forward and reverse reads are to be found on the lines ending with read1 and read2 respectively. As you see the number of reverse reads exceeds the number of forward reads by 439. \n> How many reads were mapped as a pair in the paired-end experiment?   \n12.911.388 reads were properly mapped as a pair, that's 85,68% of the total number of reads\n\nYou can find similar info in the SRR576933.out file in the ChIPSeq folder (using the **less** command), which also contains some statistics about the mapping.\n\n> How many reads were mapped according to this file ?\nYou see that 62% of the reads was mapped, which is good considering 30% of the reads contained adapter sequences. Type **q** to leave the less editor. This result is in agreement with the result of the samtools flagstat command.\n\n\n#### Visualize mapping in IGV\n\nIGV is installed on the bits laptops and can be run using the **igv** command.\n```\nigv\n```\n\nThis opens the graphical user interface of the tool (similar to what we have with firefox during the class). Be patient, it might take a few minutes for the program to start.\n\nWe open the bam file that was generated by the Picard modules in IGV. The bam file contains Arabidopsis reads. This means we have to visualize them on the Arabidopsis genome. Change the genome in IGV from Human hg19 to A. thaliana (TAIR10).\n\n{{Wiki-img|NGS/Intro/IGV3.png|700px}}\n\nThis should display the Arabidopsis genome in the top and the bottom view.\nNow it's time to load the mapped reads via **File** in the top menu and **Load from File**.\n\n{{Wiki-img|NGS/Intro/IGV4.png|300px}}\n\nSelect the .bam file to open. You don't need to load the .bai file, it's suffcient that it is present in the same folder as the .bam file. \nThis loads the data into the center view. At this point, you can't see the reads, you have to zoom in to view them.\nAccording to the [http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0025279#s3 supplemental material] accompanying the paper describing this data set, AT1G02930 is highly expressed in all samples and differentially expreesed during the defense response in ''A. thaliana''. So we will zoom in on this gene. You can do this by typing the accession number in the top toolbar and clicking **Go**:\n\n{{Wiki-img|NGS/Intro/IGV5.png|700px}}\n\nThe reads for this gene are now visualized in the center view. You can zoom in even more using the zoom bar in the top toolbar:\n\n{{Wiki-img|NGS/Intro/IGV6.png|700px}}\n\nZoom in until you see the nucleotides of the reference sequence.\n\n{{Wiki-img|NGS/Intro/IGV7.png|700px}}\n\nThe reads are represented by grey arrows, the arrow indicating the orietation of the mapping. Hovering your mouse over a read gives additional info on the mapping. The colored nucleotides indicate mismatches between the read and the reference. Alignments that are displayed with light gray borders and white fill, have a mapping quality equal to zero. Interpretation of this mapping quality depends on the mapper as some commonly used mappers use this convention to mark a read with multiple alignments. In such a case, the read also maps to another location with equally good placement. It is also possible the read could not be uniquely placed but the other placements do not necessarily give equally good quality hits. \n\nBy default IGV calculates and displays the coverage track (red) for an alignment file. When IGV is zoomed to the alignment read visibility threshold (by default 30 KB), the coverage track displays the depth of the reads displayed at each locus as a gray bar chart. If a nucleotide differs from the reference sequence in greater than 20% of quality weighted reads, IGV colors the bar in proportion to the read count of each base (A, C, G, T). You can view count details by hovering the mouse over a coverage bar:\n\n{{Wiki-img|NGS/Intro/IGV8.png|700px}}\n","# Bitmap vs Vector images\n## Bitmap \n- Pixels in a grid/map\n- Resolution dependent\n- Restricted to rectangle\n- Resizing reduces visual quality\n- Easily converted\n- Minimal support for transparency\n- Popular file formats: BMP, GIF, JPEG, JPG, PNG, TIFF\n\nBit depth or color depth is the amount of data assigned to every pixel (e.g. 1-bit = black/white, 4-bit = 16 colors/shades of grey, etc.) The more data, the more realistic your image will be. More data per pixel also means larger files.\n\n## Vector\n- Scalable\n- Resolution independent\n- No background\n- Inappropriate for photo-realistic images\n- XML based text format\n- Popular file formats: SVG, AI, CGM, DXF, WMF, EMF\n\n# Pixels\nResolution = number of pixels =  how much detail an image holds\nPPI: pixel per inch\n- Screen pixel density (monitor/smartphone)\n- Tells you how large an image is\n\nDPI: dots per inch\n- Print-out dots density (inkjet/laser printer)\n- Printer settings\n\nAn image at 300 PPI will look fine on a monitor, but printing is another matter! Print it on paper and you will notice the difference between 72 DPI and 300 DPI\n\n# File formats and compression\n## JPG/JPEG\n- Supports 26 million colours (24 bit)\n- Lossy compression (information is lost from original file)\n- Small file size (compressed)\n- Photographs\n## BMP\n- Supports 8/16/24-bit\n- Uncompressed file format\n- Large file size\n## TIFF\n- Tagged Image File Format\n- All colour and data information is stored\n- Uncompressed (lossy and lossless compression is possible)\n- Very large file size\n## GIF\n- Graphics Interchange Format\n- Only 256 colours possible (8-bit)\n- Replace multiple occuring patterns into one\n- Small file size\n- Animation\n## PNG\n- Portable Network Graphics\n- 256 / 16M colours\n- 8-bit transparancy\n- Lossless compression\n## SVG\n- Scalable Vector Graphics\n- XML-based format\n- Lossless data compression\n- Creatable and editable with a text editor\n- Can contain both bitmap and vector data\n## PDF\n- Portable Document Format\n- Can contain both bitmap and vector data\n## RAW/DNG\n- Digital Negative (DNG) is a universal RAW file format\n- Raw image file (without white balance, color saturation, contrast settings, …)\n- RAW files can be camera brand specific\n- Large file size\n- Multiple options without taking the picture again\n## Publication vs Presentation\nKey features for publications:\n- Raw/uncompressed image file (e.g. TIFF)\n- High quality image (300 PPI) and resolution\n- Lossless compression (e.g. PNG)\n- Compression is sometimes allowed (check journal website!)\n\nKey features for presentation:\n- Normal quality image (72 PPI) and smaller resolution (max width: 1920 pixels)\n- Compression is allowed (e.g. JPEG)\n- Smaller file size\n\n# Guidelines on image editing\nScientific accepted image manipulations are described in guidelines. VIB also has a document to guide you in what is and what isn't acceptible when adjusting your images. Some examples are:\n- No specific feature within an image may be enhanced, obscured, moved, removed or introduced\n- Adjustments of brightness, contrast or color balance are acceptable if they are applies to the whole image as long as they do not misrepresent information in the original\n- Grouping of images from different parts of the same or different gel, fields or exposures must be made explicit by the arrangement of the figure (dividing lines)\n- The original data must be available by the author when asked to provide it, otherwise acceptance of the publications may be revoked\n\nyou can find all the VIB guidelines [here](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/VIB_guidelines.pdf).","# What is Inkscape?\nInkscape is professional quality vector graphics software which runs on Windows, Mac OS X and GNU/Linux. It is used by design professionals and hobbyists worldwide, for creating a wide variety of graphics such as illustrations, icons, logos, diagrams, maps and web graphics. Inkscape uses the W3C open standard SVG (Scalable Vector Graphics) as its native format, and is free and open-source software.\nDuring this training we will use **Inkscape 0.92** on Windows. To download the most recent version, browse to the [Inkscape Download page](https://inkscape.org/en/download/). For Windows 10 S: the Inkscape app is also available in the Microsoft Store.\n## External training material\n- [Online Inkscape tutorials](https://inkscape.org/en/learn/tutorials/).\n- [Nick Saporito Inkscape tutorials for beginners](https://www.youtube.com/playlist?listPLynG8gQD-n8BMplEVZVsoYlaRgqzG1qc4 )\n- [Nick Saporito Inkscape intermediate/advanced tutorials](https://www.youtube.com/playlist?listPLynG8gQD-n8AFcLFAkvqJYnQUiBweRh1y )\n\n## User Interface\nInkscape is a single-window program. Drawing tools are on the left hand side, option docks are on the right. \nIn the central window, you have the drawing area with default an A4 page as document layout. To select another format for e.g. posters, go to **File - Document Properties**. Next to the document size, you can adjust the background colour (default: transparant).\n\n## Import Images\nYou can import scalable vector graphic files (.svg) and also GraphPad Prism graphs (.emf or .pdf format).\nInkscape is not used for editing images like GIMP. If you import bitmap images, note that they are not scalable like vector objects!\n\n## Drawing lines and objects\nYou can draw a line with the Draw Bezier tool. You can make your own shape or just draw a line or path. On top of your drawing area you can select the Mode: Regular Bezier curves, Spiro paths, straight line segments and paraxial line segments. When selecting the straight line mode, you can hold the Ctrl button to make your line snap every 15 degrees around your first/previous point.\nYou can draw shapes by using the Rectangle tool, Ellipse tool and the Create Stars and Polygons tool. On top of the drawing area you can specify your polygon and star properties, size and lock aspect ration. Here is the Crtl key useful as well for creating squares, circles or specify the position of your object.\nWhen you have an object (polygon or others) you can select a color for the stroke and inside of the object. Selecting an object using the Selection tool will give you more options on top of the view area. You have the option to rotate, flip, change dimensions and XY position (in different units). You can change the position of the selected object compared to others (move up/down). \n\n## Paths\nA path consist of lines and nodes. These lines can be straight or curved and you can make an object using paths ( closed path). When in Path mode you have several options; add or remove a node, joining or breaking nodes apart and changing the node properties. You can also change the segment (line between nodes) properties with the options on top of the screen. \nYou can convert an object into a path to gain more flexibility by selecting the object and go to **Path – Object to path**. Afterwards you can use the object tool or the path tool to manipulate the object. \n\n## Fill and stroke\nPaths, lines and objects can be given a plain color, patterns, gradient color or left blank/transparent. You can also configure the stroke style and color. Click **Object – Fill and Stroke** to see all the options. Paths/lines can be transformed into arrows using the Stroke style option **Markers**.\n\n## Text\nAt the left there is also a Text tool available. With this tool you can create and change text, it's colour, font, style and size. After entering text, you’re able to manipulate it like an object. You can also attach text into a frame by selecting both objects and click on **Text – Flow into Frame**.\nYou can also align text to a path. Select both text and path and click **Text – Put on Path**. Once the text in aligned to the path it stays adaptable and can be removed from the path; **Text - Remove from Path**.\nText is an object at first. When you select **Path - Object to path** you can modify your text like any other object that is converted into a path.\n\n## Grouping, aligning and arranging object/paths\nTo group several object you must select them all (hold Shift) and select **Object – Group**. To unite several paths you must select **Path – Combine**. Both options are the same and allow you to manipulate objects/paths as one. Both actions can be reversed (Ungroup / Break Apart).\nSeveral object must be aligned before you group them, think of text inside a box. To display the options, go to **Object - Align and Distribute**. When multiple objects are selected, you can align the top, bottom, left and right edges of the objects. Aligning on the central axes is also possible, this in both horizontal as vertical direction. The aligned objects always need an anchor, this can be changed in the box on top of the toolbox (Relative to:). This anchor can be an object (first, last, smallest or biggest) or the page, a selection or the complete drawing. Distributing objects works in a similar way, but manages the space between objects. For paths you can only align the nodes.\nAligning or distributing objects allows you to manipulate the X and Y position of your objects. There is also a virtual Z axis. When you have multiple objects with different colours, you can move the one above the other. Every new object you draw will be on top of all the rest. To raise an object one step or to the top, you can use the buttons on top of your screen. The same can be done to lower an object one step or to the bottom.\n\n## Path Effects and operations\nWhen you want to distribute/multiply an object along a guideline, there is a tool called Path Effects. First draw and select the object or group of objects and past it in the clipboard (Ctrl + C). Draw or select your path (guideline) and select **Path – Path Effects**. Click on the '+' sign and select the effect **Pattern Along Path**. In the new box on the right: select 'Repeated' on the option Pattern copies. Now click on 'Paste path' to paste the object you want to multiply. Note that only the shape is pasted, not the color. When adjusting the color, it will affect the entire path. To copy the colour, use Crtl+C again on your original, select your path of objects and go to **Edit - Paste Style - Paste Style**. There are also standard patterns to distribute along a path. When clicking on the '+' sign to add an effect, select ‘Gears’ or ‘Hatches (rough)’. Each of these effects have their own options to create an effect and to adjust the pattern.\nWhen it comes to paths, you can do much more than combining them. When you want to cut one shape out of another shape, you can use the options in the Path menu; Union, Difference, Intersection, Exclusion, Division and Cut Path.\n\n## Diagrams\nTo make a diagram with objects (circles, rectangles, stars, etc.) connected by lines, there is the Diagram connector tool. First you must draw and align the objects to create your diagram. Then select the Diagram connector tool. Every object can be selected by clicking in the white box in the middle of the object. Once connected the lines will follow the object if you move it to another place. The lines can be used as a path, therefore you can also modify them to e.g. dashed lines, arrows, etc.\n\n# Exercises\n> ### {% icon hands_on %} Hands-on: Exercise 1\n> Image 1 PNG: [Image 1](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Drawing.png)\n> Image 1 SVG: [Image 1 SVG](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Drawing.svg)\n> Task: Reproduce the top strand. Afterwards, reproduce the bottom strand using the first one.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 2\n> Image 2 PNG: [Image 2](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Drawing2.png)\n> Image 2 SVG: [Image 2 SVG ](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Drawing2.svg)\n> Task: Reproduce one of the sets of this image. Afterwards, reproduce the others using the first set.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 3\n> Image infographic 1: [Image 1](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/procent_bars.png)\n> Image infographic 2: [Image 2](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/circle_infographic.png)\n> Image infographic 3: [Image 3](http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/flower_diagram.png)\n> Task: Try to reproduce one of these images using the video tutorial series from Nick (see top of this page).\n{: .hands_on}","Using the mapping results we can define the peaks, the regions with a high density of reads in the ChIP sample, where the transcription factor was bound.\nThere are multiple programs to perform the peak calling. Some are more directed towards histone marks (broad peaks) while others are specific to narrow peaks (transcription factors). Here we will use MACS because it's known to produce generally good results, and it is well-maintained by the developer.\n\nMACS is installed on GenePattern. Check the documentation on GenePattern or read the manual on [the MACS github](https://github.com/taoliu/MACS/). Let's see the parameters of MACS before launching the peak calling.\n\n> How to define the input files?  \n> **treatment** and **control**: the treatment mapped read file (SRR576933.bam) and the control mapped read file (SRR576938.bam)\n\nNote that the bam files need to be sorted according to genomic location. At this point they are not, the reads are in the same order as they were in the fastq file, according to position on the flow cell.\n\n> Which Picard tool can we use to sort the files? \n> You can use Picard.SortSam for this.\n\n> Sort the bam files so that they can be used as input for MACS. \n> Use the default parameter settings.\n\nLet's go over the different parameters of MACS: \nThe **effective genome size** is the size of the genome considered \"usable\" for peak calling. This value is given by the MACS developers on their website. It is smaller than the complete genome because many regions are excluded (telomeres, highly repeated regions...). The default value is for human (2700000000), so we need to change it. As the value for <i>E. coli</i> is not provided, we will take the complete genome size **4639675**.\n\nMACS needs the length of the fragments, which are longer than the read length, because the sequencer sequences only parts starting from the end of the fragments. MACS2 does this by making a model of enrichment of reads in the ChIP sample versus the background, searching pairs of peaks within a bandwidth of 300 bases with an enrichment ratio between 5 and 50. If there are not enough pairs of peaks, as is the case in our data, you can fall back on using a preset fragment length by setting the **model** parameter to **no**. The default of **shift 0 extsize 200** is adequate for ChIPSeq. It means that reads are extended to a length of 200 bases before they are counted.\n\nThe **duplicates** specifies how MACS should treat the reads that are mapped to the exact same location (duplicates). The manual specifies that keeping only 1 representative of these \"stacks\" of reads is giving the best results.\n\nThe **make BedGraph** parameter will output a file in BEDGRAPH format to visualize the peak profiles in a genome browser. There will be one file for the treatment, and one for the control.\n\n**FDR** and **FDR for broad peaks** indicates that MACS will report peaks if their associated p-value is lower than the value specified here. Use a relaxed threshold as you want to keep a high number of peaks (even if some of them are false positives).\n\n> Perform peak calling on the sorted bam files. \n> Set the parameters as described above: \n\n- Load sorted bam files for treatment and control\n- Set effective genome size to 4639675\n- Don't use a model\n- Make a bedgraph file\n\nLook at the files that were created by MACS.\n\n> Which files contains which information?\n> \n- macs_summits.bed: location of the summit base for each peak (BED format).If you want to find the motifs at the binding sites, this file is recommended. The file can be loaded directly to the UCSC genome browser. Remove the beginning track line if you want to analyze it by other tools.\n- macs_peaks.xls: peak coordinates with more information, to be opened with Excel. Information include:\n\n- chromosome name\n- start position of peak\n- end position of peak\n- length of peak region\n- absolute peak summit position\n- pileup height at peak summit\n- -log10(pvalue) for the peak summit (e.g. pvalue =1e-10, then this value should be 10)\n- fold enrichment for this peak summit against random Poisson distribution with local lambda\n- -log10(qvalue) at peak summit\n\nCoordinates in XLS is 1-based which is different from BED format. \n\n- MACS_peaks.narrowPeak is a BED file which contains the peak locations together with peak summit, p-value, and q-value. You can load it to the UCSC genome browser. Definition of some specific columns are:\n\n- 5th: integer score for display. It's calculated as int(-10*log10pvalue) or int(-10*log10qvalue) depending on whether -p (pvalue) or -q (qvalue) is used as score cutoff.\n- 7th: fold-change at peak summit\n- 8th: -log10pvalue at peak summit\n- 9th: -log10qvalue at peak summit\n- 10th: relative summit position to peak start\n\nThe file can be loaded directly to the UCSC genome browser. Remove the beginning track line if you want to analyze it by other tools.\n- The MACS_treat_pileup.bdg and MACS_control_lambda.bdg files are in bedGraph format which can be imported to the UCSC genome browser or be converted into even smaller bigWig files. The MACS_treat_pileup.bdg contains the pileup signals (normalized) from the ChIP sample. The MACS_control_lambda.bdg contains local biases estimated for each genomic location from the control sample.\n","## Introduction to GenePattern\n\n### Access GenePattern\n\nYou can work on our [BITS Genepattern server](https://dev.bits.vib.be:8686/gp/pages/index.jsf). Ask the trainer for login details.\n\n### The GenePattern user interface\n\nLogging in brings you to the GenePattern homepage: \n\n![loggingingp](../../images/GP2b.png)\n\n\n- Click the GenePattern icon at the top of the page (red) to return to this home page at any time.\n- The upper right corner shows your user name (green).\n- The navigation tabs (blue) provide access to other pages.\n\nWe'll zoom in on the navigation tabs: \n\n- The Modules tab gives access to the tools that you can run. Enter the first few characters of a module in the search box to locate a tool. Click the Browse modules button to list the tools.\n\n![modulesgp](../../images/GP3b.png)\n\n- The Jobs tab shows an overview of the analyses that you have done by showing the tools that you have run, together with a list of output files that were generated.\n\n![jobsgp](../../images/GP4.png)\n\n- The **Files** tab shows a list of files you can use as input for the tools. These are files that you have uploaded from your hard drive or files that were generated as the output of a tool and that were saved to the **Files** tab. In your case the Files tab contains a folder **uploads**. \n\n### Searching a tool in GenePattern\n\nYou can find a module by typing its name into the search box on the **Modules** tab: \n\n![modulesgp](../../images/GP4a.png)\n\nSearching a tool makes its name appear in the main window.\n\n### Running tools in GenePattern\n\nClicking the name of the tool will open its parameter form in the main window.\n\n![groomer](../../images/GP5.png)\n\nFill in the parameters and click **Run** to start the analysis.\n\nAs long as the tool is running you see an arched arrow in the top right corner: \n\n![groomer](../../images/GP11.png)\n\nWhen the tool has finished the arched arrow is replaced by a checkmark and the file(s) containing the results appear at the bottom: \n\n![groomersrr074262](../../images/GP12.png)\n\nNote that apart from the file containing the results, other files are generated e.g. stdout.txt containing the error log of the tool. You can consult the error log in case of problems.\n\n#### Check the documentation\n\nTo obtain a description of the parameters of a tool and their default values click the Documentation link at the top of the page. \n\n![groomerdoc](../../images/GP16.png)\n\n#### Define input files\n\nMany input files are located in the shared data folder. In the parameter form of a tool, you will find the **Add Paths** or **URLs button** in the **input files** section: \n\n![groomerinput](../../images/GPAddPaths.png)\n\nClick the button and expand **BITS trainingdata Chipseq**: \n\n![groomerinput](../../images/GPSharedData.png)\n\n### Store the output of a tool in GenePattern \n\nCopy the file in the uploads folder on the Files tab to store it permanently and allow to use it as input for other tools. Output files that are not saved in the uploads folder are stored 7 days on the server and are visible via the Jobs tab.\n\nWhen a tool has finished output files are generated at the bottom of the page. \n\n- Click the name of the output file. \n\n![groomerinput](../../images/GP14.png)\n\n- Select Copy to Files Tab\n\n![groomerinput](../../images/GP13.png)\n","## Mapping reads with Bowtie\n\nExercise created by Morgane Thomas Chollier\n\n### Obtaining the reference genome\n\nIn the ChIP-Seq experiment of *E. coli* we want to see which genomic regions are bound to transcription factor FNR. However, at this point what we have is a set of reads that are identified by their location on the flow cell. To answer our question we should link the reads to regions in the genome to obtain their genomic coordinates. This process is called mapping.\nFor Illumina reads the standard mappers are BWA and Bowtie (version 1 and 2).\n\n> Which version of Bowtie are we going to use?\n> We will use Bowtie version 1 as this version was designed for mapping short reads (< 50nt) and our reads are short (36nt).\n\nThe Bowtie_1 aligner is installed on GenePattern. Check the documentation on GenePattern or read [the manual on the Bowtie website](http://bowtie-bio.sourceforge.net/manual.shtml).\n\nBowtie needs the complete genome, in FASTA format as a reference sequence to align the reads to.\n\n> Which *E. coli* strain was used in the experiment?\n> Go to [the paper](http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1003565) and check the part **Strains and growth conditions** in the **Materials and methods** section. There you see that the experiment was done using *E. coli* K-12 MG1655.\n\n![](../../images/mapping1.png|600px}}\n\nThe genome sequence of *E. coli* K-12 MG1655 needs to be in a specific format (=index) for bowtie. Several pre-built indexes are available to download on [the bowtie webpages](http://bowtie-bio.sourceforge.net/manual.shtml) or the [iGenomes website](http://support.illumina.com/sequencing/sequencing_software/igenome.html).\nAlthough the *E. coli* sequence is available we will not use it to show you how you should proceed if you don't find your reference sequence on this website. In that case you will need to make the index yourself. \n\nIf you can't find your reference on the iGenomes website you have to download it from:\n- [UCSC](http://genome.ucsc.edu/)\n- [Ensembl](http://www.ensembl.org/index.html)\n- [NCBI](http://www.ncbi.nlm.nih.gov/)\n\nSince Ensembl focuses on higher eukaryotes, we are going to download the genome from NCBI.\n\n> Which reference sequence was used in the experiment ?  \n> Go to [the paper](http://journals.plos.org/plosgenetics/article?id=10.1371/journal.pgen.1003565) and check the part **High-throughput RNA sequencing (RNA-seq) analysis**. There you see that the reads were mapped to an NCBI sequence with accession number **U00096**.\n\n![](../../images/mapping2.png)\n\n> Search for this sequence on NCBI ?  \n> Go to [the NCBI website](http://www.ncbi.nlm.nih.gov/), select the **Nucleotide** database, type **U00096** as a search term and click **Search**.\n\nNCBI Nucleotide is notorious for the amount of errors it contains both in sequences and in annotations. Therefore, if available you should always use sequences from RefSeq, the clean subset of NCBI’s Nucleotide database. This sequence is not a RefSeq sequence. You can see that because the accession number does not contain an underscore and all RefSeq accession numbers contain an underscore.\n\n> Is there a RefSeq sequence available ?  \n> In [the Nucleotide record](http://www.ncbi.nlm.nih.gov/nuccore/U00096.3), scroll down to the **Related information** section in the right menu. There you see that a RefSeq sequence is available. Click the **Identical RefSeq** link. \n\n![](../../images/mapping4.png)\n\nThis brings us to a RefSeq record with accession number NC_000913.\n\n> Download the sequence of the RefSeq record in FASTA format  \n> \n![](../../images/mapping6.png)\n\n- In the record expand the **Send to** section (red).\n- Select **File** as destination (green). This means that you download the data on your computer.\n- Select **FASTA** format (blue).\n- Click **Create File**.\n\nIf all goes well you should see the following message” => the message is “sequence.fasta. This creates a file called **sequence.fasta** in the **Downloads** folder of your computer. \n\n> Upload the downloaded file to your **Uploads** folder in GenePattern. \n> \n- Go to the **Files** tab in GenePattern.\n- Drag and drop the file onto the **Drag Files Here** section.\n- Select the **Uploads** folder and click **Select**\n\n![](../../images/GPUpload.png)\nIf all goes well you should see the following message\n![](../../images/GPUpload2.png)\n\nIf the upload takes too long use the fasta file from the **SHARED_DATA** folder in GenePattern.\n\n### Indexing the reference genome\n\nYou cannot do the mapping directly on the .fasta file, you need to index the file first. Reference genomes from the Bowtie/iGenomes website are already indexed so when you get your reference there you can skip this step. Reference genomes downloaded from NCBI, Ensembl or UCSC need to be indexed using the Bowtie_1 indexer tool.  \nIndexing a reference genome is a one-time effort: you do not have to repeat it each time you do a mapping.  \nCheck the documentation of the Bowtie_1 indexer to see the parameters it takes. The documentation shows that you need to specify: \n- the reference genome that you want to index as an input (in our case the *E. coli* fasta file)\n- the name of the indexed output file\n\nGive the output file the same name as the input file: **Escherichia_coli_K12**. The Bowtie indexer will generate a zip file containing a whole set of .ebwt files whose name all start with Escherichia_coli_K12.\nCopy the zip-file to your **Uploads** folder.\n\n### Mapping the reads\n\nOpen the Bowtie_1 aligner parameter form. \n\n> Use the indexed E.coli genome for mapping\n> The first parameter of the Bowtie 1 aligner parameter form are the genome index files (= the zipped ebwt files in your **Uploads** folder). \n- Go to the **Files** tab\n- Click the **Upload your own file** button in the **bowtie1 index** section of the bowtie 1 parameter form\n- Drag and drop the zip file to the **Drag your files here** section\n\n![](../../images/GPBowtie1.png)\n\n> How to define the input file(s) ?\n> Bowtie needs an input file containing the reads (in our case SRR576933.fastq). Bowtie can map single end reads like we have but also paired end reads. In the case of paired end reads you have two fastq files, one with the upstream reads and one with the downstream reads. That's why you can specify two input files: **reads pair 1** and **reads pair 2**. We just select **SRR576933.fastq** from the **SHARED_DATA** folder as input for **reads pair 1**.\n\n![](../../images/GPBowtie2.png)\n\nYou need to tell bowtie what type of file your input file is.\n\n> What is the parameter for doing this ?\n> Via the parameter called **input format** you can specify that the input file is in fastQ format.\nFastQ is the default, so you don't have to explicitly set this option.\n\nBowtie has two modes of mapping. The simplest strategy is called v-mode alignment: you align complete reads (from the first to the last base aka end-to-end) to the reference and you count the number of mismatches in this alignment. In this mode quality values are ignored and you need to tell bowtie the maximum number of mismatches you allow.  \n\n> Do a v-mode mapping allowing 2 mismatches in the alignments.\n> - Expand the **advanced customization of run** parameters\n> - Set **alignment mode** to **v-mode**\n> - Set **max mismatches for vmode alignment** to **2** it means that bowtie will allow two mismatches anywhere in the alignments. The value for this parameter must be a number from 0 through 3.\n\n![](../../images/GPBowtie3.png)\n\nRemember because the base quality at the 3'end of the reads is lower, base calls at the 3'ends are often incorrect. This will inevitably lead to mismatches in the alignments. Reads with more than 2 mismatches will not be reported. To avoid losing too many reads during the mapping we can either trim low quality bases from the 3' ends of the reads before the alignment is done or use a mapping strategy that takes into account the quality scores of the bases.\n\nThis strategy is called n-mode alignment. It's the default mode. It aligns seeds, the first N bases of the reads at the high quality 5'end, to the reference. You have to set the length of the reads and the maximum number of mismatches allowed in the seed alignment. Additionally the sum of the quality scores at all mismatched positions (not just in the seed) is calculated and you can set a maximum for this parameter. In this way, reads with mismatches with high quality scores will not be reported whereas mismatches with low scores are more or less ignored.\n\nThe FASTQC report showed that the last base is of low quality. Since the reads are 36 bases ling we could use seeds of 35 bases for the mapping.\n\n> Do an n-mode mapping with seeds of 35 bases allowing 2 mismatches in the seeds.\n> - Expand the **advanced customization of run** parameters\n> - Set **alignment mode** to **n-mode**\n> - Set **seed length for nmode alignment** to **35**\n> - Set **max mismatches in seed** to **2** it means that bowtie will allow two mismatches in the alignments of the seeds (the first 35 bases of the reads) to the reference. The value for this parameter must be a number from 0 through 3.\n\n![](../../images/GPBowtie4.png)\n\nWe also need to specify that we only want to report reads that map specifically to one location in the reference.\n\n> What is the parameter for doing this ?\n> Via the parameter called **report alignments** you can specify that the output file should contain reads **only mapping at unique location**.\n\nBy default, bowtie will include unmapped reads in the output file. That's unnecessary since no one uses these unmapped reads.\n\n> How to exclude unmapped reads from the output file?\n> Via the parameter called **include unaligned** in the **output** section you can specify not to include unmapped reads in the output file.\n\nWe want to get a rough idea of the quality of the mapping. Look at the stdout.txt file that was generated by bowtie to get the basic statistics of the mapping.\n\n![](../../images/GPBowtie5.png|300px}}\n\nYou see that 62% of the reads were mapped. This may seem low but remember that we haven't done any cleaning on the file. According to FASTQC the file contains about 30% of adapter sequences that will not map.\n\n> How many multi-mapped reads were originally present in the sample?\n> Multimappers are reported as **reads with alignments suppressed due to -m**. Behind the scenes the **report alignments** parameter in the form is translated into a bowtie -m option that is run at command line. This option is used to guarantee that reported alignments are unique. Specifying -m 1 instructs bowtie to refrain from reporting any alignments for reads having more than 1 reportable alignment. \n\nThe output of Bowtie is a sam file. The SAM format corresponds to large text files, that can be compressed (\"zipped\") into .bam files that take up to 4 times less disk space and are usually sorted and indexed for fast access to the data they contain. The index of a .bam file is named .bai aand some tools require these index files to process the .bam files. So we need to transform the .sam file with our mapping results to a .bam file. You can use one of the tools from the Picard toolbox for this. \n\n> Convert the sam to a bam file.\n> You can use the tool Picard.SamToBam for this.\n\nRepeat the analysis for the control sample SRR576938.fastq  These two fastq files come from a ChIP-Seq experiment, the first contains the reads of the ChIP sample, the second of the control sample, which consists of fragmented genomic DNA. You need both to identify regions in the genome that are represented more in the ChIP reads than in the control (these are the regions that bind to the transcription factor).\nSuppose that you have many fastq files that you need to map to the *E. coli* genome. The best way to ensure that you can reuse tools and parameter settings during the next analysis, is to combine them into a pipeline.\n\n> Create a pipeline to map ChIPSeq data set?\n> - In the top menu select **Modules & Pipelines**\n> - Click **New Pipeline**\n\n> ![](../../images/GPPL.png|100px}}\n\n> - In the **Search Modules** section search for the modules you need: first **Bowtie_1_aligner** and then **Picard.SamToBam**\n> - Click a tool to open its parameter form in the right pane of the pipeline editor.\n> - You can set values for the parameters or you can allow users to give values for the parameters when they use the pipeline. For the Bowtie_1_aligner allow users to define the index and the input fastq file by checking the boxes in front of these parameters. After you have done this you should see a user icon appearing in front of these parameters in the middle pane of the pipeline editor.\n\n![](../../images/GPPL2.png|750px}}\n\n> - Use the same settings for the remaining parameters as you used for mapping SRR576933.fastq\n> - Connect the sam output of bowtie as input file in Picard.\n> - Click the **Properties** button at the top to open the **Editing pipeline** parameters in the right pane.\n> - Type a name for the pipeline and hit Enter\n> - Click the **Save** button at the top.\n> - The pipeline has now become a module that you can search for and run in GenePattern. Exit the pipeline editor by clicking the **GenePattern** button at the top.\n\n![](../../images/GPPL3.png|150px}}\n\nNow you use the pipeline as a regular module.\n\n> Repeat the analysis for sample SRR576938.fastq use the ChIPSeqMapping pipeline.\n> Repeating the mapping is easy, the only thing you need to do is define the index and the input file:\n> - Open the parameter form of the ChIPSeqMapping pipeline\n> - Drag and drop the zip file with the indexed genome to the **Drag your files here** section\n> - Use **SRR576938.fastq** from the **SHARED_DATA** folder as input file\n> - Run the pipeline\n\n> How many reads of the control sample were mapped ?\n> In the stdout.txt file generated by bowtie, you see that 95% of the reads was mapped. This is of course ok but you expected a high percentage here since the control sample is nothing more than the reference genome cut up into small pieces. \n\nAt this point, you have two sam and two bam files, one for the treated sample, one for the control sample.\n","For visualization with deepTools we need a bam file in which the order of the reads is determined by genomic location. We have created such a bam file in the peak calling step using the SortSam tool from the Picard suite.\nThe bam file still contains duplicate reads (=reads that map to exactly the same position in the genome). Such reads represent technical duplicates often caused by biased PCR amplification during the library prep or by fragments coming from repetitive elements in the genome... Since we are going to quantify the reads (we look for regions that are enriched in the ChIP sample) these technical duplicates will distort the quantifications. So they should be removed from the .bam file\nAdditionally an index file should be created to allow for fast and easy access to the sorted and processed .bam file.\n\n> Which tool from the Picard suite can be used to mark/remove duplicates?\n> Picard MarkDuplicates can be used to remove duplicates. \n\n> Remove duplicates and index the .bam files?  \n> - Use the sorted .bam files as input files\n> - Indicate that the files are sorted according to coordinates\n> - Remove the sequencing duplicates, duplicates generated by PCR\n> - Create an index file\n\nMarkDuplicates generates an error but you can ignore the error. Open the metrics.txt file that is generated by MarkDuplicates.\n\n> How many duplicates were found in the ChIP sample?  \n> \n ![](../../images/DuplicationMatrics.png)\n\nNow we will plot a Lorenz curve with DeepTools to assess the quality of the ChIP. It answers the question: “Did my ChIP work?” Did the antibody-treatment enrich sufficiently so that the ChIP signal can be differentiated from the background signal in the control sample? This is a valid question since around 90% of all fragments in a ChIP experiment will represent the genomic background. \n\nFor factors that enrich well-defined, narrow regions, the plot can be used to assess the strength of the ChIP, but the broader the enrichments, the less clear the plot will be. Vice versa, if you do not know what kind of signal to expect, the plot will give you an indication of how careful you have to be during downstream analyses to separate biological noise from meaningful signal.\n\nThe tool randomly samples genome regions (bins) of a specific legth in indexed BAM files, calculates the sum of all reads that map in a bin. These sums are sorted according to their rank and a profile of cumulative sums is plotted.\n\n> Which tool from the DeepTools toolset are you going to use for this? \n> Run DeepTools [plotFingerprint](http://deeptools.readthedocs.io/en/latest/content/tools/plotFingerprint.html) to draw the Lorenz curve.\n\n> Create a Lorenz curve for the ChIP sample \n> - You have to provide both the .bam and the .bai file as input! \n> - The **nsamples** parameter represent the number of bins that is sampled from the genome. It has to be smaller than the genome size divided by the size of the bins (default 500nt). The size of the *E. coli* genome is 4639675 nt. So we will set this parameter to 9000.\n> - Other parameters can be kept at default settings\n\nAn experiment with perfect uniform distribution of reads along the genome (without enrichment) and infinite sequencing coverage should generate a straight diagonal line. A very specific and strong ChIP enrichment will be indicated by a prominent and steep rise of the curve towards the highest rank. This means that a big chunk of reads from the ChIP sample is located in few bins.\nBelow you see a few examples on how to interpret this curve:\n\n ![](../../images/DTLorenz.png)\n\n**What do you think about the fingerprint plot that was generated on the *E. coli* data?**\n","## Downloading a data set for the ChIP-Seq training\n\n### Download the data from GEO\n\nFor the ChIP-Seq training, we are going to use the data set that is described in the article of Myers et al., 2013 [6]. The data consists of reads from ChIP enriched genomic DNA fragments that interact with FNR, a well-studied global transcription regulator of anaerobiosis. As a control, reads from fragmented genomic DNA were used.\n\nNGS datasets are (usually) made freely accessible, by depositing them into specialized databases. Sequence Read Archive (SRA) located in USA and hosted by NCBI, and its European equivalent European Nucleotide Archive (ENA) located in England hosted by EBI both contains raw, unprocessed reads.\n\nProcessed reads from functional genomics datasets (transcriptomics, genome-wide binding such as ChIPSeq,...) are deposited in Gene Expression Omnibus (GEO) or its European equivalent ArrayExpress. <p>The article contains the following sentence at the end of the Materials and Methods section:\n\"All genome-wide data from this publication have been deposited in NCBI’s Gene Expression Omnibus (GSE41195).\"\nIn this case GSE41195 is the identifier that allows you to retrieve the dataset from the NCBI GEO (Gene Expression Omnibus) database.\n\nGEO hosts processed data files from experiments related to gene expression studies, based on NGS or microarrays. The files of NGS experiments can include alignments, peaks and/or counts.\n\nGo to the [GEO page](http://www.ncbi.nlm.nih.gov/geo/)\n\n> ### {% icon hands_on %} Download the data of the experiment with GEO ID GSE41195 \n>\n> - Type the ID in the search box on the GEO home page\n> - Click Search\n>   ![searchGEO](../../images/GEO1.png)\n> - This redirects you to the GEO record of the full experiment consisting of microarrays, tiling arrays and a ChIP-Seq experiment.\n>  ![searchGEO](../../images/GEO2.png)\n> - In the Experiment type section you can see that this GEO record indeed reports a mixture of expression analysis and ChIP-Seq experiments.\nScroll to the bottom of the page:\n     ![searchGEO](../../images/GEO3.png)\n> 4. You can see that the ChIP-Seq data have their own GEO ID: GSE41187\n> 5. Click the ChIP-Seq data ID: GSE41187.\nThis brings us on the GEO record of the ChIP-Seq experiment.\nIn the GEO record scroll down to the Samples section:\n>   ![searchGEO](../../images/GEO4.png)\n> For time's sake, we will focus in the training on a single sample: FNR IP ChIP-seq Anaerobic A\n> 6. Click the GEO ID GSM1010219 of the sample that we will use in the training\n> This brings us to the GEO record of the sample.\n> 7. Scroll to the bottom of GEO record of the sample to the Relations section:\n>  ![searchGEO](../../images/GEO5.png)\n>  GEO only contains processed data, no raw data. The raw data is stored in the SRA database. In the Relations section you can find the SRA identifier of this data set. For the training we would like to have a fastq file containing the raw data.\n> 8. Copy the SRA identifier\n{: .hands_on }\n\n### Download the data from ENA at EBI\n\nAlthough direct access to the SRA database at the NCBI is doable, SRA does not store sequences in a FASTQ format. So, in practice, it's simpler (and quicker!!) to download datasets from the ENA database (European Nucleotide Archive) hosted by EBI (European Bioinformatics Institute) in UK. ENA encompasses the data from SRA.\n\nSRA identifiers are also recognized by ENA so we can download the file from EBI.\n\nGo to the ENA website at [EBI](http://www.ebi.ac.uk/)\n\n> ### {% icon hands_on %} Download the data with SRA ID SRX189773\n> \n> - Type the ID in the search box on the EBI home page\n> - Click the **search icon**\n    This returns two results: a link to the record of the experiment and a link to the record of the run:\n>\n>   ![resultssearchENA](../../images/ENA2.png)\n> - Click the first result (red)\n>   ![resultssearchENA3 -80width](../../images/ENA3.png)\n>   The table at the bottom of the page contains a column called Fastq files (ftp)\n> - Click the link in this column to download the data in fastq format\n{: .hands_on }\n\n\n\nFor the training you do not have to download the data, it's already on the GenePattern server.\n\nTo download the replicate and the control data set, we should redo the same steps starting from the GEO web page of the ChIP-Seq experiment (click the sample ID of the FNR IP ChIP-seq Anaerobic B and the anaerobic INPUT DNA sample). The fastq file of the control sample is also available on the GenePattern server.\n","## Choosing a genome browser\n\nThere are several options for genome browsers, divided between the local browsers (need to install the program, eg. IGV) and the online web browsers (eg. UCSC genome browser, Ensembl). We often use both types, depending on the aim and the localisation of the data.\nNote that if you're working on a non-model organism, the local viewer will be the only choice. If the aim is to share the results with your collaborators, view many tracks in the context of many existing annotations, then the online genome browsers are more suitable.\n\n## Viewing the aligned reads in IGV\n\nOpen IGV. Be patient, it might take a few minutes for the program to start.\nChange the genome in IGV from '''Human hg19''' to the one you used in the mapping.\n\n> Load the desired genomed. \n> Load the *E. coli* genome as reference (from the file Escherichia_coli_K_12_MG1655.fasta, downloaded to build the bowtie index).\n\n- Top menu: **Genome** -> **Load Genome from File**\n ![](../../images/IGVLoadGenome.png)\n- The loaded genome appears in the top left panel:\n ![](../../images/IGVLoadGenome2.png)\n\nYou can also visualize the annotation (genes) in IGV. You can obtain a file with annotations from the Refseq record.\n\n> Download the annotations from RefSeq in GFF3 format. \n> Go to the [RefSeq record](https://www.ncbi.nlm.nih.gov/nuccore/NC_000913.3) of the *E. coli* genome.\n> - Expand the **Send to** section at the top of the page.\n> - Choose **File** as destination.\n> - Select **GFF3** format.\n ![](../../images/NCBIGFF3.png)\n\nYou can also [download the GFF3 file](http://data.bits.vib.be/pub/trainingen/NGSIntro/Ecoli_annotations.gff3) from our website.\n\nIf you want to load the .gff3 file and visualize the annotation properly in IGV, it’s necessary to comment (or remove) the third line:\n```\n##sequence-region NC_000913.3 1 4641652\n##species https://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=511145\n## NC_000913.3\tRefSeq\tregion\t1\t4641652\t.\t+\t.\tID=NC_000913.3:1..4641652;Dbxref=taxon:511145;Is_circular=...\nNC_000913.3\tRefSeq\tgene\t190\t255\t.\t+\t.\tID=gene-b0001;Dbxref=ASAP:ABE-0000006,ECOCYC:EG11277...\n```\n\nYou can visualize reads in IGV as long as they are sorted according to genomic location. Download the two sorted and indexed bam files (for SRR576933 and SRR576938) from GenePattern to your computer and load them in IGV.\n\n> Load the annotation and the bam files of the ChIP and the control sample. \n> - Top menu: **File** -> **Load from File**:\n>  ![](../../images/IGVLoadFile.png)\n> - You should see the track now.\n> - Do the same for the .bam files.\n> Note that you have to download the .bai files too and store them in the same folder as the .bam files. You do not have to explicitly open the .bai files in IGV but they have to be in the same folder as the .bam files or IGV will throw an error.\n> - Zoom in u8ntil you see the reads\n\n ![](../../images/IGVbams.png)\n\n**Browse around in the genome. Do you see peaks?**\n\n> Go to the following gene: pepT.\n> Type **pepT** in the box at the top (red) and click **Go**:\n ![](../../images/IGVpepT.png)\n\nDo the same for gene ycfP.\nLooking at .bam files does not allow to directly compare the two samples as data are not normalized. To generate normalized data for visualization you can use bamCoverage from deepTools (it's available in GenePattern). It generates BigWig files out of .bam files.\n\n> Create a BigWig file from the sorted and indexed .bam file of the ChIP sample\n> The bamCoverage tool has the following parameters:\n> - **input file** is the sorted and indexed .bam file to process\n> - **index** is the accompanying .bai file\n> - **output format** is the output file type, we want to generate a **BigWig** file\n> - **genomeSize** **4639675** nt for *E. coli*\n> - **normalize**: different overall normalization methods; we will use the **RPGC** method corresponding to 1x average coverage\n> - **skip noncovered**: skip non-covered regions (without mapped reads) in the genome? Set to **yes**.\n> - **extend reads**: extend reads to fragment size, in our case **200** nt.\n> - **ignore duplicates**: reads that map to the same location in the genome will be considered only once. Set this to **yes**.\n\nRepeat for the control (again you see the benefit of creating a pipeline for repeating the same steps on multiple samples).\nDownload the BigWig files, start a new session in IGV and load the BigWig files in IGV.\n\n> Create a BigWig file from the sorted and indexed .bam file of the ChIP sample\n> - Top menu: **File** -> **New session**\n> - Top menu: **File** -> **Load from File**. Load the two BigWigs and the .ggf3 with the annotation.\n> - Right click the names of the BigWig tracks and select **Autoscale**.\n\n**Go back to the genes we looked at earlier: pepT, ycfP. Look at the shape of the signal.**\n\n## Viewing the peaks in IGV\n\nDownload the bdg files generated by MACS from GenePattern to your computer and rename them with the extension .bedgraph.\n\n> Dowload the bdg files. \n> Click the names of the bdg files in the **Files** tab and select **Save File**\n ![](../../images/GPDownloadBdg.png)\n\nReplace .bdg by .bedgraph otherwise the file will not be recognized by IGV.\nOpen a **new session** in IGV. Reload the .ggf3 file with the annotation.\n\n> View the bedgraph files. \n> Load the control bedgraph file:\n\n- Top menu: **File** -> **Load from File**:\n ![](../../images/IGVLoadFile.png)\nYou might get a warning that the file is big. Simply click on the button continue.\n\n- You should see the track (in blue):\n ![](../../images/IGVLoadFile2.png)\n\nRepeat this step to load the treatment bedgraph file. You should now see the 2 tracks (in blue):\n ![](../../images/IGVLoadFile3.png)\n\nDownload and view the BED file containing the peak locations.\n\n> View the bed file with the peak locations. \n> Save the file from GenePattern to your computer and load the bed file into IGV.\nA new track with discrete positions appears at the bottom:\n ![](../../images/IGVLoadFile4.png)\n\nThe end result should look like this: 3 tracks with data (the bedgraph files of the 2 samples and the peaks file) and 1 track with annotation:\n ![](../../images/IGVLoadFile5.png) \n\n**Go back again to the genes we looked at earlier: pepT, ycfP. Do you see peaks?**\n","## Motif analysis\n\nFor the motif analysis, you first need to extract the sequences corresponding to the peaks. There are several ways to do this (as usual...). If you work on a UCSC-supported organism, the easiest is to use RSAT fetch-sequences. Here, we will use Bedtools, as we have the genome of interest at our disposal (Escherichia_coli_K12.fasta). However, we have to index the fasta file first to make it easy to access.\n\n> Which tool can be used to index the fasta file ?  \n> When you search for modules containing the word *fasta* you find a tool called SAMtools.FastaIndex that can index a reference sequence in fasta format and this is exactly what we need. \n\nUse this tool to index the *E. coli* genome and copy the resulting .fai file to the **Files** tab (in the same folder as the fasta file).\n\n> How to extract sequences corresponding to the peaks ?  \n> Use the BEDTools.fastaFromBed module for this.\n> - The **input file** is the fasta file of the *E. coli* genome that you uploaded to the server.\n> - The **bed file** is the bed file with the peaks that was generated by MACS (narrowPeak)\n\nSave the resulting .fa file to your computer.\n\nTo detect transcription factor motifs, you will use the **Regulatory Sequence Analysis Tools**. It has a specific teaching server recommended for trainings: [http://pedagogix-tagc.univ-mrs.fr/rsat/](http://pedagogix-tagc.univ-mrs.fr/rsat/)\nYou will use the program **peak-motifs**.\n\n> How to find the peak-motifs program \n> In the left menu, click on NGS ChIP-seq and then click on peak-motifs. A new page opens, with a form\n\nThe default peak-motifs web form only displays the essential options. There are only two mandatory parameters.\n\n> Fill the mandatory options \n> - The **title box**, which you will set as **FNR Anaerobic** . \n> - The **sequences**, that you will upload from your computer, by clicking on the button Choose file, and select the file FNR_anaerobic_combined_peaks.fa from your computer.\n\nWe will now modify some of the advanced options in order to fine-tune the analysis according to your data set.\n\n> Fill the advanced options \n> \n- Open the \"Reduce peak sequences\" title, and make sure the **Cut peak sequences: +/- ** option is set to **0** (we wish to analyze our full dataset) \n- Open the “Motif Discovery parameters” title, and check the oligomer sizes 6 and 7 (but not 8). Check \"Discover over-represented spaced word pairs [dyad-analysis]\"\n- Under “Compare discovered motifs with databases”, remove \"JASPAR core vertebrates\" and add RegulonDB prokaryotes (2015_08) as the studied organism is the bacteria E. coli.\n\n> Launch the analysis \n> - You can indicate your email address in order to receive notification of the task submission and completion. This is particularly useful because the full analysis may take some time for very large datasets. \n> - Click on the button “GO”. As soon as the query has been launched, you should receive an email indicating confirming the task submission, and providing a link to the future result page.\n\nThe Web page also displays a link, You can already click on this link. The report will be progressively updated during the processing of the workflow.\n","## Quality control of the data of the ChIP-Seq training\n\nUse FASTQC inside GenePattern to get basic information on the data (read length, number of reads, global quality of the datasets).\n\nRead the GenePattern tutorial for more details on how to use GenePattern.\nThe data is already present on the GenePattern server. When you open a tool in GenePattern, you will find the **Add Paths or URLs button** in the **input files** section: \n\n![inputfiles](../../images/GPAddPaths.png)\n\nClick the button and expand BITS trainingdata Chipseq: \n\n![inputfiles](../../images/GPSharedData.png)\n\nThe fastq file of the control data set is also available in the shared data folder (SRR576938.2.fastq)\n\n> ### {% icon hands_on %} Generate and view the FASTQC report of SRR576933.2.fastq in GenePattern \n>\n> - Search for **FASTQC** in the **Modules** section and open the parameter form.\n> - Use the fastq file from the Shared Data folder as input file.\n> - Leave the other parameters at their default values.\n> - Run FASTQC\n> FASTQC will generate a zip file and a html file. You can open the HTML report in your browser: \n> - Click the name of the output file at the bottom of the page.\n> - Select **Open Link**\n>   ![searchGEO](../../images/GP18.png)\n{: .hands_on }\n\nThe only parameter you might want to change in if you work on your own data is the contaminants file. It contains a long list of known adapter sequences (see the Documentation in GenePattern). If for some reason the adapters you used are not in the list, you have to provide them as a fasta file. \n\n### {% icon question %} How many reads does the file contain?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is one of the results of the Basic statistics module in FASTQC (red): \n\n![fastqc9b](../../images/FASTQC9b.png)\n\nKnowing that it is recommended for ChIPSeq to have around 30 million reads, the number of reads in this fastq file seems very low. \n</details>\n{: .question }\n\n### {% icon question %} Should we be concerned about the low number of reads in the sample?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nNo it's not a problem because the sample comes from E. coli. This bacterium has a very small genome so 3 million reads will still generate high coverage. However, if this was a human or mouse sample the number of reads would be way too low and we would indeed be concerned. \n\n</details>\n{: .question }\n\n### {% icon question %} What is the length of the reads?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is one of the results of the Basic statistics module in FASTQC (green): \n\n![fastqc9b](../../images/FASTQC9b.png)\n\nAgain, you see that the data set consists of very short reads although this data set is very recent. This is because it has been shown that elongating the reads does not improve your results in ChIP-Seq analysis. It will just cost you more money.\n\n</details>\n{: .question }\n\n### {% icon question %} Are there any positions with low sequence quality?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is shown in the Per base sequence quality module in FASTQC:\n\n![fastqcpositions](../../images/FASTQC11b.png)\n\nThe overall sequence quality is good, although it drops sharply at the last position, but this is normal in Illumina data, so this feature is not raising hard concerns.\n\n</details>\n{: .question }\n\n### {% icon question %} What could be the cause of the failure in the per base sequence content plot?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe content of the 4 nucleotides is far from constant over all positions: \n\n![fastqcadapters](../../images/FASTQC12b.png)\n\nThis typically point the presence of adapter or other contaminating sequences in your reads. \n\n</details>\n{: .question }\n\n### {% icon question %} Which FASTQC module allows you to confirm this suspicion?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe **Overrepresented sequences** module will show if your read file is enriched in known contaminants.\n\n</details>\n{: .question }\n\n### {% icon question %} What does this module tell you?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe **Overrepresented sequences** module shows a high percentage of adapter sequencess (29% !).\n\n![fastqcadapters](../../images/FASTQC13b.png)\n\nAgain you see that adapter contamination is a frequently occurring problem of Illumina NGS data.\n\n</details>\n{: .question }\n\n### {% icon question %} What about sequence duplication levels?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThere is sequence duplication. Adapter contamination will be partly responsible for the high duplication levels (the blue peaks at the far right of the plot) but the main cause lies in the technique itself. Typically, after ChIP, you end up with a very small initial amount of DNA (antibodies are not that effective, many cleanup steps in the protocol,...) and you have to do PCR to get your library up to a proper size for sequencing. So naturally, you expect many clones of the same DNA fragment due to the small initial size of the library. \n\n![fastqcadapters](../../images/FASTQC17b.png)\n\n</details>\n{: .question }\n\nNow do the same for the control data set: **SRR576938.2.fastq**.\n\nIn theory one expects that regions with high read count in the ChIP sample represent the regions that were enriched by the immunoprecipitation, i.e. the regions that were bound to the protein. However many studies have shown that the read count is affected by many factors, including GC content, mappability, chromatin structure, copy number variations... To account for these biases, a control sample is used consisting of fragmented genomic DNA that was not subjected to immunoprecipitation or that was precipitated using a non-specific antibody.\n\n### {% icon question %} How many reads does the control data set contain?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is one of the results of the **Basic statistics** module in FASTQC. You see that the control data set contains the double amount of reads as the ChIP data set.\n\nThe ChIP and control samples are usually sequenced at different depths, generating files with different total number of reads. This means that these two samples have to be made comparable later on in the analysis by normalization (see ChIP-Seq training).\n\n</details>\n{: .question }\n\n### {% icon question %} What is the length of the reads in the control data set?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is one of the results of the **Basic statistics** module in FASTQC. You see that the control data set contains reads of 36 bases just like the ChIP data set.\n</details>\n{: .question }\n\n### {% icon question %} Are there any positions with low sequence quality?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThis is shown in the Per base sequence quality module in FASTQC: \n\n![fastqc9b](../../images/FASTQC14b.png)\n\nThe overall sequence quality is good, although it drops sharply at the last position, but this is normal in Illumina data, so this feature is not raising hard concerns.\n\n</details>\n{: .question }\n\n### {% icon question %} Why did the per base sequence quality plot raise a failure in the ChIP sample and not in the control?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nIn the slides you can see that the thresholds for a warning are: \n\n- end of box < 10\n- median < 25\n\n![fastqcpositions](../../images/FASTQC14c.png)\n\nOn the figure you see that the culprit is the median:\n\n- In the ChIP sample the median Phred score of the last position is 21 (so below 25) raising a failure\n- In the control sample the median Phred score of the last position is 26 (so above 25)\n\n</details>\n{: .question }\n\n### {% icon question %} Which FASTQC module gives a failure?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe **Per tile sequence quality** module. The quality of one of the tiles is consistently different from the rest of the tiles\n\n![fastqcadapters](../../images/FASTQC15b.png)\n\n</details>\n{: .question }\n\n### {% icon question %} Is this also the case in the ChIP sample?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nYes, you see exactly the same problem in the ChIP sample. Since both samples were probably loaded on the same lane, it seems normal that you see the same problem in the ChIP sample.\n\n![fastqcadapters](../../images/FASTQC15c.png)\n\n</details>\n{: .question }\n\n### {% icon question %} Why does the Sequence duplication levels modules give a failure in the control sample?  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\nThe duplication levels in the control data set are high. \n\n![fastqcadapters](../../images/FASTQC15d.png)\n\nThere are a high number of sequences with low duplication levels which could be due to high coverage. Remember that you are working with E. coli which has a small genome.\n\n</details>\n{: .question }\n\n### Estimation of coverage\n\nKnowing your organism size is important to evaluate if your data set has sufficient coverage to continue your analyses, e.g. for the human genome (3 Gb), 10 million reads are considered sufficient.\n\n### {% icon question %} What is the size of the genome of the E. coli K-12 strain substr. MG1655??  \n\n<details markdown='1'>\n  <summary>Check the answer.</summary>\n\n- Go to the [NCBI website](http://www.ncbi.nlm.nih.gov/)\n- Select the **Genome** database to search in\n- Type **Escherichia coli** in the search term box\n- Click **Search**\n\n![fastqcadapters](../../images/Genome2.png)\n\nThe genome is 4.64 Mbase. \n\n</details>\n{: .question }\n\nThe FASTQC report has shown that the fastq files of the ChIP and control sample contain 3.6 and 6.7 million reads respectively. As you aim for 10 million reads for 3 Gb in human, we can assume that these data sets contain enough reads for proper analysis. ","# 1. Routine usage\nAs mentioned in the first chapter, there are three conceptual areas in Git: the development area, the staging area and the commit repository. The routine usage is depicted in the figure below. When we want to save a file from the development area on our computer to the commit repository, we'll always have to add it to the staging area first, before we can commit it. The usual routine looks like this: \n\n\n---\n\n<center><img src=\"../../images/conceptual_areas_push.png\" width=\"1000\" /></center>\n\n---\n\n\nThese commands will subsequently add the file `<file>` to the **staging area** and then commit it to the **commit repository**. If we wouldn't pass along the `-m`-message parameter, Git would have opened the editor asking to write the commit message there. It's good practice to write a short, but powerful commit message that helps your future self to determine what has changed in this commit. \n\nThe last step is to take these commits, essentially representing the folder with all the committed files, and push them to GitHub. Uptil now we kept track of our code locally on our computer. Why do we want to store this project and all of its files on GitHub? Imagine that you lose your computer now, you've also lost your project (and all the files in it). A bit less drastical, if you would just like to show your project to your colleagues or with the whole world, we need to publish it somewhere on the internet. And that is exactly what GitHub does for us. Here's how it looks like (once everything is set) when we would use the appropriate commands on GitHub. \n\n```\ngit add <file>\ngit commit -m \"some text that explains what has changed\"\ngit push\n```\n\nThat's all you need to know: `add-commit-push` x repeat. This repetition represent 90% of how we interact with Git & GitHub. \n\n\nBefore we can start adding, committing and pushing, we have to start a version controlled project/repository. There are two ways of **initializing a new Git repository** which only has to be performed once right at the start:\n- Clone a GitHub repository (from GitHub): see Section 2\n- Initialize Git on a folder on your computer: see Section 4   \nBoth options will work just fine and it depends on your preferences or maybe the situation of the project which one is preferable. The first option can be used if you're about to start a new project, the second option can be used when you already have some files in a project which you now want to start version controlling. \n\n\n\n# 2. Create a new repository from GitHub\n\nGo to your GitHub homepage and click on the '+' icon in the upper right corner and select 'New repository'. The following screen will pop up.\n\n\n---\n\n<center><img src=\"../../images/02-2-create-repository.PNG\" /></center>\n\n---\n\n\nWe already filled in a repository name and an optional description. You can choose to already publish your repository, however as this is a meaningless repository, we will choose not to. When you're about to start a new project, there are three things to consider:\n- For a new repository, it's a good practice to initialize the repository with a [README file](https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-readmes). This file will eventually include a (general) description about the project, what others can expect to find in the project and how they can use it. \n- Adding an `.ignore` file is something we will cover later, however for now it suffices to know that the `.ignore` file will contain some code which tells git to exclude certain files from tracking and avoids uploading them to GitHub.\n- Adding a license makes sense when your project becomes public. It defines under which license the content is made available. More information on licenses is available [here](https://elearning.bits.vib.be/courses/writing-a-data-management-plan/lessons/licences/).\n\nIn our case, we will initialize the repository with a README file and click 'Create repository', which will then look like this:\n\n---\n\n<center><img src=\"../../images/02-3-create-readme-repository.PNG\" /></center>\n\n---\n\nThis is the home page of our GitHub repository. From here we can already do a lot, like changing or uploading files. We initialized a GitHub repository with a README file and we can see that we have only one file in this repository: a `README.md` file. By default the text in this README file is the title of the repository and the description that we created here above. Notice that it's a Markdown-file as we can see by the `.md` extension, similar to an ordinary text file on your computer with a `.txt` extension. Markdown is enriched text allowing us to create formatted text using plain-text. More information related to markdown can be accessed from the Github guides [here](https://guides.github.com/features/mastering-markdown/).  \n\n\nNow that we created the repository in GitHub, we want to work on it on our computer. Therefore we need to download it, i.e. we have to **clone** it to our computer. Click on the green button 'Clone' and choose any of the options:\n- Clone: with https link or with **SSH**. This will download the repository and all its contents, keeping the link to the GitHub repository. \n- Open with GitHub Desktop (this might be interesting for you at a later stage).\n- Download: will download all of the contents in a zipped file, however loses the connection to the repository.  \n\nWith the Git Bash (or Terminal), navigate with `cd` to the folder where you want to keep your project folder and type the following:\n```\ngit clone <link>\n```\nwith `<link>` being the link from GitHub that will look something like this for SSH: `git@github.com:username/repository-name.git`. This command is only used once in the beginning and creates a new folder on your computer with all the contents from GitHub (the README file). \n\n---\n\n> ### {% icon hands_on %} Exercise 1\n>\n>  Create a new GitHub repository, give it a name and initialize it with a `README`-file. Upload [this file](../../../../assets/files/git-introduction/plot1.R) to the repository on GitHub. What is GitHub asking you to do? Which stage is omitted when uploading a file directly to GitHub?  \n> \n> Clone the repository to your computer. How many files are there in your local repository?\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > Click on upload files and drag the file into the screen. GitHub is asking to add a commit message which defines the changes that you'll do to your repository. In this case we'll add the very brief *Upload R script* message. Notice that there is no staging area when you upload a file directly on GitHub. \n>    > \n>    > <center><img src=\"../../images/solution1.PNG\"/></center>\n>    > \n>    > Click on 'Commit changes' and find the two files: `README.md` and `example.R` in your repository. Now, we can find the clone link via the green 'Clone' button. In our Terminal we type the following command to start using the repository locally on our computer: \n>    > ```\n>    > git clone <link>\n>    > ```\n>    > in which you change `<link>` to the link that you copied from GitHub. There should be two files in your local repository as well.   \n>    > On a Windows computer we have a folder that contains the following files:\n>    > <center><img src=\"../../images/folder1.PNG\"/></center>\n>    >  \n>    > </details>\n>\n{: .hands_on}\n---\n\n# 3. Our first commit\nOur local copy (clone) of the GitHub repository is now able to communicate with the GitHub repository. Every change within this repository is traceable, whether it is a new file or changes to a file. When we make changes in our local repository (e.g. create a new file), you have to add the file to the staging area first (`git add`) and then commit it (`git commit`) before pushing it (`git push`) to GitHub. \n\n\n## 3.1 Staging\nLet's add a new file to our folder on our computer locally. Download [this file](../../../../assets/files/git-introduction/plot2.R) and add it in the folder where also the `plot1.R` file is located. It contains some R code for plotting a new figure.  \n\nThe first thing we will have to do now, is to stage the file into the staging area.  Remember that this is an intermediate area before committing the file to the repository. In a next section we will learn why this staging area can be useful. \n\nNow we have two options, depending on the situation:\n  1. `git add <file>` : will add a **specific** file to the staging area\n  2. `git add .` : will add **all** the changed or new files to the staging area\n\nIn this case, we can choose either of both options as we have only added one file. As this is a new file, `git add` will not only add it to the staging area, but it will also tell Git that it needs to keep track of changes that happen in this file. \n\n## 3.2 Committing\nOur new file is now in the staging area, ready to be committed. For this, we have to use the following command:\n```\ngit commit -m \"some descriptive yet short message\"\n```\nWe added a parameter `-m` (message) to the command followed by a descriptive text. This text informs our future selves or our colleagues of what changes were done. In this case it could be: \"added plot2.R script\". We make this message as explanatory as possible, yet as short as possible. Some tips and general best practices in writing commit messages are described in [this link](https://chris.beams.io/posts/git-commit/). \n\n---\n> ### {% icon question %} Question\n> \n>  Which of the following commit messages would be most appropriate for a hypothetical commit made to our `README.md` file?\n>   - “Update README file”\n>   - “Added line ‘We use this repository as an example’ to README.md”\n>   - “Added purpose description to the README file”\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > One can argue on the appropriatness of commit messages as it is subjective. In this case however, the third options seems most ideal. It's both not too generic and not too specific. \n>    >\n>    > </details>\n>\n{: .question}\n\n---\n\n\n\n> ### {% icon question %} Question\n> \n> What has happened after committing?\n>   - We saved a version of the file which is now visible on GitHub.com\n>   - We saved a version of the file which is now stored in our commit repository\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > We've been working locally uptil now and didn't push the commits to the GitHub repository, hence it's still in our commit repository. \n>    >\n>    > </details>\n>\n{: .question}\n\n---\n\n\n> ### {% icon question %} Question\n> \n> What would have happened if we forgot about the message argument when committing a file (`-m`)\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > If the `-m` parameter was not added, git will launch a text editor and ask to write a message. We can not make a commit without providing a message. \n>    >\n>    > </details>\n>\n{: .question}\n\n---\n\n\n\n## 3.3 Push commits to GitHub\nRecall that when we added the first file on GitHub (exercise 1), it was immediately committed and showed up right away in the GitHub repository. When we change or add files on our computer and commit them, GitHub doesn't know this yet. Hence, we have to do one final step: \n```\ngit push\n```\nHave a look in the GitHub repository and verify that the new file is now in our repository. \n\n\n## 3.4 Stage-commit-push\nWe've learned how to make a GitHub repository, clone it to our computer, add a file, commit it and push it back to GitHub. This is everything you need to know for a routine usage of Git(Hub) on one of your projects. In order to grasp this concept a bit better, we'll repeat it by making changes on both files in the next exercise. \n\n\n---\n\n> ### {% icon hands_on %} Exercise 2\n>\n>  Add a title to both files (\"# Title plot 1\" and \"# Title plot 2\"). You can choose how you do this: e.g. open the files in a text editor and add the line on top of the file. Follow the routine steps to push your changes to our GitHub repository, however to make it a bit more difficult, you need to store the changes of both files in separate commits. \n> \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > After adding the titles, use the following commands \n>    > ```\n>    > git add plot1.R\n>    > git commit -m \"Added a title to plot1.R files\"\n>    > git add plot2.R\n>    > git commit -m \"Added a title to plot2.R files\"\n>    > git push\n>    > ```\n>    > We first added the changes of `plot1.R` in the staging area, then we commit those changes in a given commit. Afterwards, we add the changes of `plot2.R` in the staging area and subsequently commit them. Finally, we use push to push all the latest commits together towards GitHub. \n>    > </details>\n>\n{: .hands_on}\n---\n\n## 3.5 Commit all tracked files at once\nOne thing we haven't really said until now is that Git actually keeps track of the changes that you make to files as soon as you have told Git to do so. The first thing you have to do when you add a new file, is to tell Git to keep track of changes made in this file. If you do not do this, Git will know that there is a new file, but it will classify it as *untracked*. After adding it to the staging area a first time, it will always keep track of the changes in this file. \n\nOn the premise that Git is already keeping track of the files, you can simply do `git commit -a -m \"some informative text\"` in which `-a` stands for add all changes in all files to the staging area and commit them at once. \n\n\n\n# 4. Create a new repository from your computer\nAs discussed here above, you can also create a Git repository from your computer. This is especially useful when we already have a project with a bunch of files which we now want to start version controlling. The first thing that we will do is **initialize Git** on this folder. Alternatively, make a new folder which will contain the files of an imaginary project in case you don't have one yet. In Git Bash (Windows) or in your Terminal (Mac, Linux), move to the project folder with `cd` and use the following command: \n\n```\ngit init\n```\n\nUnfortunately, it is not possible to create a GitHub repository from our computer. Hence, we need to open GitHub and create a new repository and DO NOT initialize it with a `README.md`, `.gitignore` or a license. It is important that it is empty in the beginning. We can add those files later.\n\nOnce created, GitHub will seggest commands that you might want to use on the Terminal to push our first changes to this GitHub repository. \n\nWe already initialized Git in our folder, so we can skip this step:\n```\ngit init\n```\n\nTHe following steps basically ask us to commit our first changes. Given that we edited the README file:\n```\ngit add README.md\ngit commit -m \"first commit\"\n```\n\nHere comes the tricky part. We will learn about branches in [Chapter 5](https://material.bits.vib.be/topics/git-introduction/tutorials/5_branches/tutorial.html), however it suffises for now to understand that each branch carries a name and the default one is now called `main` where it earlier was called `master`. The following command will overwrite the name of the branch to `main`. \n```\ngit branch -M main\n```\n\nThen, we need to link the repository on our computer to the one on GitHub with: \n```\ngit remote add origin git@github.com:tmuylder/testtt.git\n```\n\nAnd finally push our commit to GitHub. The argument `-u` or `--set-upstream` will set the remote as upstream (see later):\n```\ngit push -u origin main\n```\n\n\n> ### {% icon question %} Question\n> \n> What if we want to create a new folder inside the folder which we are using for version controlling? Do we need to initialize Git inside this subfolder as well? \n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > It is important to note that `git init` will keep track of all the subdirectories and their files that are in the folder. Thus, you don't need to create a git repository for each folder or subdirectory. Git repositories can interfere with each other if they are “nested”: the outer repository will try to version-control the inner repository. This will lead to errors.\n>    >\n>    > </details>\n>\n{: .question}\n\n\n> ### {% icon question %} Question\n> \n> How can we know whether a folder is already initialized with Git, meaning that we are already version controlling the project? \n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > If we use  `ls -al` we get a list of all files and directories, including the hidden ones. A `.git` folder is present when the project is being version controlled. Git uses this special directory to store all the information about the project like the history of all commits. If we ever delete the `.git` sub-directory, we will lose the project’s history. \n>    >\n>    > Another possibility is to use the `git status` command which results in *fatal: not a git repository...* if the project is not being version controlled. \n>    >\n>    > </details>\n>\n{: .question}\n\n\nBefore starting with the next exercise we also want to stress the importance of not uploading data to GitHub. It's good practice to have links to data, however not the data itself. GitHub is not your next cloud storage instance. \n\n---\n\n> ### {% icon hands_on %} Exercise 3\n>\n>  Find a folder on your computer with some files that you want to version control, initialize Git on that folder and make it (privately) available on GitHub. \n> \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > See the steps in Section 4.  \n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n# 5. The strength of the staging area\nNow you're probably wondering why it's useful to have that many steps to save a file (add, commit, push). We will give a practical example based on the figure below: \n\n---\n\n<center><img src=\"../../images/staging_area.png\" /></center>\n\n---\n\nImagine that you're working on a project with multiple Python scripts and you're working on all of them. In this case your folder in your development area contains the files  `scriptA.py`, `scriptB.py` and `scriptC.py`. The changes that you made in script A and script C are somehow related, but script B is not. It's good practice to make commits in which changes that are related to each other are bundled. Hence, in this case we want to make one commit with the changes from file A and C. Now we can simply add scripts A and C to the staging area and commit it. The changes in script B will remain unsaved until we commit the changes in a separate commit. \n\nIt's always better to have more commits; in case you want to remove part of your work in a later stage, or you want to start your work again from a specific commit. \n\n\n# 6. Pull\nImagine that you change something in a file on GitHub, or upload a new file online via GitHub. We would want to include these changes or that file in the folder on our computer as well. For this we need to use the `pull` command to pull in the changes from GitHub. \n\nLet's go back to our repository on GitHub. We will make a change in the repository on GitHub and then pull these changes back into the repository on our computer (i.e. the project folder on our computer). \n\nClick on the `README.md` file in the list of files and click the pencil icon on the upper right. The file will open in an editor mode and we can change the title from *introduction-github* to *Introduction GitHub* or we can add some more descriptive text. Note that a README file is by default a markdown-file. Markdown is a text file with lay-outing options. If you haven't heard of it before, it's worth some [further reading](https://guides.github.com/features/mastering-markdown/).\n\nSave the changes by committing them as depicted here below:\n\n---\n\n<center><img src=\"../../images/commitReadme.PNG\"/></center>\n\n---\n\nGitHub is now one commit ahead of our local repository. Hence, we have to pull this commit into our local repository. We can do this by using the following command:\n```\ngit pull\n```\n\nOpen the file `README.md` and check whether the changes have merged in.  \n\n---\n\nLet's go to the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/4_history_status/tutorial.html)!","# What is GIMP?\nGIMP is short for **GNU Image Manipulation Program**. It is a free and Open-source, cross-platform image editor available for GNU/Linux, MacOS and Windows operating systems. During this training we will use **GIMP 2.10** on Windows. To download the most recent version for your OS, browse to the [GIMP Download page](https://www.gimp.org/downloads/).\n## External training material\n- [GIMP Manual page](https://www.gimp.org/docs/).\n- [GIMP 2.10 Basics on YouTube](https://www.youtube.com/watch?v=2EPIUyFJ4ag)\n- [Nick Saporito GIMP Tutorials](https://www.youtube.com/playlist?list=PLynG8gQD-n8Dl23X0o1HFu_5PmBl79niz)\n\n## User Interface\nGIMP has a 'Single-window' mode, this allows you to switch from multiple windows (for e.g. multiple monitors) to a single window. When the 'Single-window' mode is disabled, you have separate windows for toolboxes, view area and dockable dialogs. When enabled you have one window with all tools, options and dockable dialogs attached to the central view area. For beginners, we would advise the 'Single-window' enabled.\nOn the left panel you have the 'Toolbox' (if not present: **Windows - Toolbox** or press **Ctrl + B**) and underneath the 'Tool Options' dialog. Selecting a tool will result in a different Tool Option bar. Every tool has his own set of parameters and functions, best to keep them close to each other. \nOn the right-hand panel you can find other 'dockable dialogs'. These are easy to move, remove and re-introduce if necessary. To get a list of all 'dockable dialog' go to **Windows – Dockable Dialogs - ...** . If you want a full screen view of your image select **Windows – Hide Docks**. \n\n## Import data and image properties\nTo import an image: **File – Open**\nWhen you select an image (any file type) in the import window, you get a preview and information on the right side. Click **Open** and the image(s) will be displayed in the middle box at zoom level 100% (1 pixel image = 1 pixel screen) or fitted to your windows. To zoom use Ctrl + mouse scroll up or down. Multiple images in GIMP are displayed in different tabs on top of the View Area.\nBefore you export your image, make sure it has the right resolution and pixel density. **Image - Image Properties** will give you all the information your image holds. This information can be very useful when you open an image from an unknown source.\n\n## Selection\nRectangular selection has several options and shortcut keys. The first icons in the tool options are the selection modes: add to selection (Shift), subtract from selection (Ctrl) and intersect with selection (Shift+Ctrl). More options are: feathering edges, rounding of the corners, expand from center, lock aspect ratio, size and position and if necessary to highlight the selection). The Ellipse selection tool has more or less the same options.\nThere are other selection tools available: Free Selection, Select by Color, Fuzzy Selection, Scissor Selection, Foreground Selection. Those tools have different tool options and are only used in specific cases.\n\n## Transforming\nThere are several ways to transform your image or selection; rotating, scaling, shearing and flipping. You can transform a selection, a layer or the image. When using the rotation tool, you have several options in the dockable dialog below. An important option is “Clipping” this will change the aspect ratio of your image after rotating. \nAnother way of rotating an entire image is: **Image – Transform – ...** then you have the option to flip (horizontal/vertical) or rotate (90°/180°). The entire image will be rotated including the selection and image orientation. \n\n## Layers\nMake sure you have the dockable dialog ‘Layers’ in your window. All options for layers can be found in the menu bar “Layer”. You can make a new blank layer or duplicate the current layer (e.g. copy of original image to compare or as back-up). In the dockable dialog you can hide or show a layer (eye button), rename them or move them up and down in the layer stack. If you want to link/connect two or more layers, you can use the chain button (next to the eye button).\nTo copy a selection to a new layer, perform a regular copy/past action of that selection (Ctrl+C and then Ctrl+V) and select **Layer - To New Layer**\nIf you want to merge all layers into one layer you can select **Image – Merge Visible Layers**.\n\n## Brightness and contrast\nIn the menu bar you can find **Colors** . This menu has multiple option to manipulate your image; \n- Color Balance will change the cyan, magenta and yellow color levels of your image\n- Brightness and Contrast will change brightness and contrast and you can save these settings as a favorite \n- Threshold will reduce your image to two colors by using a threshold value\n- Adjust color curve will change the gamma setting of your image\n- Posterize will change the number of colors (2-256)\n\n## Guides and cropping\nYou can split your image in different sub-images. This can be done by using 'Guides'. To create such a break-line, go to **Image - Guides - New Guide... or (by Percent)...**. You can create a horizontal or vertical guide at the value/percentage you enter. A guide will be displayed as a blue dashed line. To chop your image in multiple parts, go to **Filters- Web- Slice** (Older versions: Image - Transform - Guillotine). The sub-images will be generates in the folder you selected.\nIf you only want a selection of your image without all the rest you can crop by clicking **Image – Crop to Selection** or use the Crop tool from the Toolbox.\n\n## Scaling and print size\nWhen you want to scale your image to a smaller resolution you can select **Image – Scale Image**. There you can scale in pixels (or another unit) and you can lock the aspect ratio (chain symbols).\nIf you want to change the print size to make your image suitable for publication you can select **Image - Print Size...**. There you can change the dimension/resolution and pixel density of your image.\n\n## Remove background color\nIf you download an image of a company or university logo, it might have a white (or any other color) background. This can be very annoying when the destination background is different. In order to remove the background color, we first have to add an alpha channel to this image: **Layer - Transparency - Add Alpha Channel** - If the Alpha channel is already present, skip this step. Now you're able to get a transparent background using the option: **Image - Color to Alpha**. In the new window you can select the color which you would like to convert to transparent pixels. You can either select by clicking the color bar or use the color picker icon.\n\n## Exporting\nSelect **File – Export as…**\nIf you click on the '+' next to Select File Type, you have a list of all possible extensions in which you can export your image. Each of those file formats has different compression options.\n\n# Exercises on image manipulations in GIMP\n\n> ### {% icon hands_on %} Hands-on: Exercise 1\n> Source file: [http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/original_file.tif Image 1]\n> Task: Split this image in 2 parts, one for each gel. Make sure the band are horizontal and export the 2 new images in the same file format as the original. You can adjust brightness and contrast to make all the band more visible.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 2\n> Source file: [http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Exercise1.1.jpg Image 2]\n> Task: Rotate this image 45 degrees and crop an image of 500x500 pixels out of the original. Make sure the printing resolution is set to 300 ppi and export this image as a PNG file. Adjust brightness and contrast to make this image look better.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 3\n> Source file: [http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Exercise1.2.jpg Image 3]\n> Task: Cut this image in 4 equal parts. Know that the printing width is 150 mm and the journal demands a minimum op 300 ppi for all 4 images. Also export each of them in a different file formats without losing image quality. Adjust brightness and contrast to your own opinion.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 4\n> Source file: [http://data.bits.vib.be/pub/trainingen/GIMP_Inkscape/Exercise1.3.jpg Image 4]\n> Task: Adjust brightness and contract of this images and export it in a way to make the file as small as possible. Use preferably lossless compression (try lossy compression to compare file size), there is no restriction on file formats. Be sure your image is exported with at least 300 ppi.\n{: .hands_on}\n> ### {% icon hands_on %} Hands-on: Exercise 5\n> Source file: select from the internet\n> Task: Download an image from your most favorite brand and remove the white (or other color) background. Export this new image in a format that support transparent pixels.\n{: .hands_on}","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\nThe goal of this exercise is appreciate how protein interactions can be studied through visual inspection and other software tools. Protein interactions can be classified into different groups regarding the molecular properties and functions of the interacting partners. (These groups are intertwined in several cases.) Some examples include:\n\n- the interactions of proteins with other proteins, small molecules, carbohydrates, lipids or nucleic acids;\n- Receptor-ligand interactions;\n- Antigen-antibody interactions;\n- Enzymatic interactions, enzyme-inhibitor interactions.\n\n## Exploring the structure of a nanobody-stabilized active state of the β2 adrenoceptor - the ligand \n\nWe will start with exploring one crystal structure of the β2 adrenoceptor. Together with the Steyaert lab from VIB, Kobilka published several crystal structures of the β2 adrenoceptor in its various activation states (Rasmussen et al. Nature 2011, 477)\n\n\n> ### {% icon hands_on %} Get the structure\n>\n> 1. Download the crystal structure 3P0G from the PDB into YASARA. \n>\n>    ```\n>    File - Load - PDB file from internet    \n>    ```\n>    As you can immediately appreciate, it is a bigger crystal structure with more than one molecule. \n>\n{: .hands_on}\n\n> ### {% icon question %} Questions\n>\n> 1. How many molecules are present in the crystallized structures? \n> 2. And how many chain identifiers are used? \n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> 1. There are three molecules, chain A Beta-2 adrenergic receptor; Endolysin, chain B Camelid Antibody Fragment, and a small molecule ligand. \n     Also have a look at PDBe [3P0G](https://www.ebi.ac.uk/pdbe/entry/pdb/3p0g) which gives a very nice overview of the structure and its composition.\n> 2. Only two chain identifiers A and B. Sometimes, this leads to issues depending on the software you might want to use for downstream processing.\n> \n> </details>\n>\n>\n{: .question}\n\nSome software routines need seperate chain identifiers for molecular entities to work correctly, so we suggest to rename the small molecule to chain L.\n\n\n> ### {% icon hands_on %}  \n>\n> 1. Activate the Head-up display\n> 2. Select Rename\n> 3. Enter 'L' to proceed with the renaming. \n>\n{: .hands_on}\n\nWe first have a look whether we can find out if there are specific interactions of the small molecule ligand with the adrenoreceptor.\n\nIn order to do so, we first have to add Hydrogens to all present molecules.\n\n> ### {% icon hands_on %}  \n>\n> 1. Edit - Add - hydrogens to : All \n> 2. Change the display of the ligand to Sticks\n> 3. Select the amino acids of the binding pocket i.e. a sphere of 10 Angstrom around the ligand\n>    ```\n>    Select – in sphere around – Residue and drag with the mouse until the display says 10 Å\n>    ``` \n> 4. ```\n>    View – show interactions – hydrogen bonds of - Residues\n>    ```\n>    select 'Selected' in the panel Belongs to or has\n>    and press OK in the subsequent window\n>\n{: .hands_on}\n\nGiven that hydrogen bonding is dependent on the definition of a hydrogen bond in the program, it is not a bad idea to use other tools to compare the analysis. There are many options to do this online if you look at published crystal structures. Next to the tools which are directly linked out from the web site of the crystal structure at the PDB database you can use the [ProteinPlus server](http://proteinsplus.zbh.uni-hamburg.de/)\n\nGo to the web site of ProteinPlus and enter the PDB code 3P0G into the search box. After clicking on Go, you should be presented with on overview of tools the ProteinPlus server provides.\n\nWe do not go into great detail on all the tools but only mention PoseView. With this tool, you can prepare an automatic sketch of the small molecule-protein interactions.\n\n<figure id=\"figure-1\"><img src=\"../../images/ProteinPlusPoseView.png\" alt=\"Protein Plus Server\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Overview of 3P0G</figcaption></figure>\n<figure id=\"figure-2\"><img src=\"../../images/3P0G_A_PoseView_Input.png\" alt=\"Zoom on ligand of 3P0G\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Zoom on ligand co-crystallized with 3P0G</figcaption></figure>\n\n\n> ### {% icon question %} Questions\n>\n> 1. Between which amino acids and the ligand do you see hydrogen bonds using YASARA? \n> 2. According to PoseView, between which amino acids and the ligand do you see hydrogen bonds?\n> 3. What other interactions are presented in the sketch?\n> 4. Inspect the visualisation in Yasara: Do you see the interactions in Yasara as well?\n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> 1. In YASARA, you observe hydrogen bonds between Asp113A as well as the carbonyl function of Asn312A and the charged amine function.\n>    \n> 2. PoseView indicates hydrogen bonds between Asp113A as well as the carbonyl function of Asn312A and the charged amine function. Furthermore, hydrogen bonds are indicated between the phenolic OH and Ser207A and Ser203A as well as the amine function and Ser203A.\n> \n> 3. Furthermore, hydrophobic interactions are indicated for the methylbenzyl moiety and pi-pi interactions of Phe290A and the phenolic moiety.\n>\n> 4. With YASARA Structure license, those hydrophobic interactions can also be visualised. \n> </details>\n{: .question}\n\n\n# Exploring the structure of a nanobody-stabilized active state of the β2 adrenoceptor - the nanobody \n\nIn order to estimate the binding energy between the nanobody and the β2 adrenoceptor, we can use the FoldX tool AnalyseComplex. It is recommended to calculate these binding energies on energy-minimized structures. To illustrate the effect of the energy minimization, we compare the interaction energy of the current crystal structure and its minimized structure.\n\n\n## Use the tool FoldX tool AnalyseComplex \n\n> ### {% icon hands_on %} \n>\n> 1. Given that energy-minimization takes a while for this rather large complex,\n>     please download the Yasara scene [here](http://data.bits.vib.be/pub/trainingen/PSA/3P0G_1.sce)  \n>    \n>    Calculate the interaction energies between the chain A and B of the object 3P0G \n>    and the RepairObj1, respectively. \n>\n>    ```\n>    Analyze - FoldX - Interaction energy of molecules\n>    ```\n{: .hands_on}\n\n> ### {% icon question %} Questions\n>\n> 1. What is the dG in the two cases? \n> 2. Any idea why the difference is rather hugh?\n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> 1. first case (X-ray structure): Interaction energy between molecule(s) A and B in object 1 = -9.86 (kcal/mol)\n>    second case: \n>    Interaction energy between molecule(s) A and B in object 2 = -20.19 (kcal/mol)\n> 2. Through the energy minimisation of the Repair Object function, the interactions of the amino acids are optimised.  \n> </details>\n{: .question}\n\nThis command also creates a list of residues forming the interface of the two proteins. Hit space to see the list of residues in the interface.\n\nTip: This list can also be useful if you want to make visualisations of the interaction site.\n\n```\nPlugin>interface residues between A and B\nPlugin>TA66 TA68 IA72 IA127 RA131 AA134 IA135 TA136 SA137 PA138 FA139 KA140 QA142 YA219 VA222 EA225 AA226 LA266 KA267 EA268 AA271 LA272 TA274 LA275 IA278 IA325 YA326 RA328 SA329 PA330 SB27 IB28 FB29 SB30 IB31 TB33 AB50 IB51 eB52 SB56 TB57 NB58 YB100 AB102 VB103 LB104 YB105 EB106 YB107\n```\n\n# Comparing the active and the inactive conformation of the β2 adrenoceptor \n\nIn case, there is still time, I would recommend to try to use some of your capabilities you learned today and create a superposition of the inactive and active conformation of the β2 adrenoceptor. We take one of the crystal structures which are available: 3SN6\n\n```\nFile - Load - PDB file from Internet\n```\n\nYou will be kind of overwhelmed once the structure is loaded into YASARA. In order to get a first quick overview, click on the 'Center' buttom in the menu of YASARA (5th buttom from the left). Then, it is time to look at the PDB entry of 3SN6 in the PDB database to have a first idea on what molecules are in the PDB file.\n\nAs you see on the website [3SN6](http://www.rcsb.org/pdb/explore/explore.do?structureId=3SN6i), the chain R consists of 2 molecules, the β2 adrenoceptor and lysozyme. \nIn the corresponding article, it is stated that 'the unstructured amino terminus of the β2AR is replaced with T4 lysozyme (T4L)'.\n\nSince this is an extra molecule in the crystal structure which disturbes our view, we will delete it.\n\nAfter the manipulation, the overall picture should look roughly like this.\n\n<figure id=\"figure-3\"><img src=\"../../images/3SN6_withoutLysozyme.png\" alt=\"Superposition\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Overview of 3SN6 without lysozyme</figcaption></figure>\n\nIn the following step, we structurally align only the receptors. The rest of the structures will move along.\nIt is suggested to use the first chain A from 3P0G as target. In order to do a structural alignment, it is suggested to use the first chain A from 3P0G as target.\n\n```\nAnalyze - Align - Pairwise, based on structure - Molecules with MUSTANG\n```\n\nInvestigate the differences in TM helices and the binding of the nanobody compared to the subunit of the G protein.\n\nTip: Color the secondary structures to better identify the individual chains/units of G protein. \n\n# Conclusion\n{:.no_toc}\n\nSum up the tutorial and the key takeaways here. We encourage adding an overview image of the\npipeline used.\n","## Search for a structure\n\n### Via [UniProt](http://www.uniprot.org/)\nThe way of searching for a specific protein structure depends on the data you already have. You might already have the PDB ID (a unique identifier), that's an easy one. But mostly you have the protein name or you just have a sequence. In the last cases I recommend to start from the UniProt website at <http://www.uniprot.org>, which is the best annotated protein database in the world. Our first model protein will be the molecular chaperone DnaK from *E. coli*. Below is an image of the UniProt search box where you can start your search for proteins.\n\n<figure id=\"figure-1\"><img src=\"../../images/uniprotsearchbox.png\" alt=\"uniprotsearchbox.png\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Search box</figcaption></figure>\n\n> ### {% icon hands_on %} Explore a PDB structure on the Uniprot web site\n>\n> 1. Go to the UniProt website and search for the DnaK protein\n> - The UniProt search engine returns a list of DnaK protein sequences from a variety of organisms. An entry with accession code **P0A6Y8** and entry name **DNAK_ECOLI** should be near the top of this list.\n> 2. Click on the *accession code* (column Entry) to view the protein page of this DnaK from the model organism *Escherichia coli*.\n> 3. Click on *Structure* in the left-side menu and then look at the *3D structure databases* table.\n{: .hands_on }\n\n### {% icon question %} Guidelines which PDB structures to select\n\nWhich structures (give the 4-character PDB ID) of the C-terminal domain of DnaK should preferentially be use for analysis and why?\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n>\n> Usually, the recommended selection criteria are using an X-ray structure with low resolution and low $R_{free}$ factor. Furthermore, the PDB database has pre-calculated a validation report for all of the structures.\n>\n> As an example, have a look at https://www.ebi.ac.uk/pdbe/entry/pdb/4EZX under the section 'Experiments and Validation'. For many PDB structures, there is also a re-done structure available with a vast amount of information on the quality of the X-ray structure and suggested 'better' models e.g. (https://pdb-redo.eu/db/4ezx). In our case, we could opt for the structures 1DKX and 4EZX.\n>\n> This is a difficult example since there are so many high resolution structures available. So, it is recommended to study the articles and compare the available structures to find your favorite structure for further analysis.\n> </details>\n{: .solution}\n{: .question }\n\n\n### Via the Protein Data Bank by PDB ID\n\nYou can find structural information directly at the PDB database. The web site of the PDB consortium is located at <http://www.wwpdb.org>. This web site provides links to all members of the PDB (left side). It is a question of taste which resource you start off with. For X-ray structures, it is currently PDBe, RCSB PDB, PDBj. For NMR structres, you find the BMRB. In today's course, we focus on the PDB resources only.\n\nBelow is an image of the RCSB search box <http://www.rcsb.org/pdb/home/home.do> where you can start your search for structures.\n\n<figure id=\"figure-2\"><img src=\"../../images/pdbsearchbox_RCSB.png\" alt=\"Pdbsearchbox_RCSB.png\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> PDB Search Box</figcaption></figure>\n\nThe PDB file with ID **1DKX** contains the atomic coordinates of the molecular chaperone (DnaK) from *E. coli*.\n\n### {% icon hands_on %} Search a structure on the RCSB web site\n\n1. Go to the PDB website and type 1DKX in the search box\n{: .hands_on }\n\n\nThis will lead you to the same page we got earlier through UniProt.\n\n### Via the Protein Data Bank by sequence\n\nIn lots of cases we only have a sequence of which we would like to find out if there is structural information. The PDB can be searched using a sequence as input. Here is the sequence of the C-terminal substrate binding domain of DnaK:\n```\n    DVKDVLLLDVTPLSLGIETMGGVMTTLIAKNTTIPTKHSQVFSTAEDNQSAVTIHVLQGE\n    RKRAADNKSLGQFNLDGINPAPRGMPQIEVTFDIDADGILHVSAKDKNSGKEQKITIKAS\n    SGLNEDEIQKMVRDAEANAEADRKFEELVQTRNQGDHLLHSTRKQVEEAGDKLPADDKTA\n    IESALTALETALKGEDKAAIEAKMQELAQVSQKLMEIAQQQHAQQQTAGADASANNAKDD\n    DVVDAEFEEVKDKK\n```\nThe PDB allows sequence searches through the same search box we used before.\n\n<figure id=\"figure-3\"><img src=\"../../images/pdbsearchbox_RCSB.png\" alt=\"Pdbsearchbox_RCSB.png\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> PDB Search Box</figcaption></figure>\n\nThere is also an Advanced Search section, with a Blast/Fasta option in the Sequence Features section.\n\n<figure id=\"figure-4\"><img src=\"../../images/blastpdb.png\" alt=\"Blastpdb.png\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> BLAST</figcaption></figure>\n\n> ### {% icon hands_on %} Hands-on: BLAST search for PDB structure\n>\n> 1. Go to the Advanced Search section\n> 2. Please select 'Sequence BLAST/PSI-BLAST' in the Query type drop down.\n>    This method allows you to change some parameters for the search.\n> 3. Copy and paste the sequence in the ''Sequence'' field\n> 4. Press ''Submit query''.\n> 5. You should see the same structures popping up as you saw in the UniProt page of DnaK.\n{: .hands_on}\n\n# The PDB file\n\n## Introduction\n\nA PDB (Protein Data Bank) file is a plain text file that contains the\natom coordinates of a solved 3D structure of a protein or even DNA. Such\ncoordinate files can be obtained at the Protein Data Bank at\n<http://www.rcsb.org/pdb>. Each PDB file has a unique identifier (ID)\nconsisting of 4 characters, the first one is always a number. Note: It\nhas been announced that the 4 character code will change in the future\n<https://www.wwpdb.org/news/news?year=2017\\#5910c8d8d3b1d333029d4ea8>.\n\nThe PDB file with ID **1DKX** contains the atomic coordinates of the\nmolecular chaperone (DnaK) from *E coli*.\n\n> ### {% icon hands_on %} Hands-on: BLAST search for PDB structure\n>\n> 1. Go to the PDB website at <http://www.rcsb.org/pdb>\n> 2. Type 1DKX in the search and try to answer the following questions.\n{: .hands_on}\n\n### {% icon question %} Questions\n\n1. How many molecules were solved in this PDB file? What kind of molecules are these (proteins, peptides, DNA, ...)?\n2. Does the structure represent the full protein? If not, how many residues are missing? Hint: Click on the UniProt KB link in the Sequence tab to see the full sequence.\n3. Was this structure solved by X-Ray or NMR?\n4. What is the atomic resolution and R-factor?\n\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> 1. Two, called polymers or chains: they are polypeptides ![Type](../../images/Mol_desc_1DKX.png)\n> 2. To answer this question you can go to the sequence tab at the top:\n>    - ![Uniprot view](../../images/Pdb_firstresiduesmissing_1dkx.png)\n>    - Summary: a large chunk of the N-terminus is missing from the structure, the C-terminus is virtually complete.\n> 3. X-RAY diffraction, as shown by Experimental Details\n> 4. Atomic resolution: 2.00 Ångstrom and R-factor of 0.206\n> </details>\n{: .question}\n\n\n## Downloading the structure\n\nThe file that holds the 3D coordinates can be downloaded by clicking on\n*Download files* in the top right corner and then choosing *PDB file (text)*.\nFor convenience, save this file on your desktop. The filename is the\n4-character unique PDB ID.\n\n![Pdbdownloadfile1.png](../../images/pdbdownloadfile1.png)\n\n> ### {% icon hands_on %} Hands-on: Open downloaded PDB file in an editor\n> 1.   Open this file with a text editor, e.g. WordPad is an excellent tool for that.\n> 2. Do you see the different sections in the PDB file? Analyse some ATOM lines and try to explain what kind of data is in each column.\n{: .hands_on}\n\nAdditional exercises on searching PDB can be found on [the basic bioinformatics exercises page](http://wiki.bits.vib.be/index.php/Exercises_on_Protein_Structure).\n","## Install Python and PovRay\n\nPython and PovRay should be installed already, so you can skip this part.\n\nThe programming language Python must be installed to use some very useful YASARA features. Simply start YASARA as administrator. Right click the YASARA icon on the desktop and choose \"Run as administrator\". Once the program is opened, click\n\n```\nHelp > Install program > Python\n```\n\nPovRay is used to make high quality publication-ready images and should be downloaded first with:\n\n```\nHelp > Install program > PovRay\n```\n\n## Tutorial movie\n\nPlay the movie \"Working with YASARA\":\n\n```\nHelp > Play help movie > General: Working with YASARA\n\n```\n\n## Scene styles\n\nOpen the PDB with code 1TRZ in YASARA.\n```\nFile > Load > PDB file from Internet\n```\nIf this option is not there, it means you haven't installed Python yet. Please check above.\n\nThe molecule will be loaded and presented in the ball style. Different scene styles exist to rapidly change the view:\n\n* F1: Ball\n* F2: Ball & Stick\n* F3: Stick\n* F4: C-alpha\n* F5: Tube\n* F6: Ribbon\n* F7: Cartoon\n* F8: Toggle sidechains on/off (press multiple times and see what happens)\n\n**Be careful!** If you have just made a nice close-up of e.g. an active site where you show some residues and hide others, and put some atoms in balls while others are in sticks, you will lose everything when you press one of the F-keys!!! The F-keys change the viewing style without asking.\n\nTry all the different scene styles!\n\n## Showing and hiding residues\n\nThe function keys F1-F3 show all atoms and residues by default. The keys F4-F7 do not explicitly show atoms and residues but are merely a impressionistic representation of the structure. The F8 keys does, to a certain extent, show atoms, but only of side chains, not main chain atoms.\nMostly to do structure analysis, we want to show only the most interesting residues, the ones we want to analyze, and hide all the others.\n\nThe structure of insulin was crystallized together with some water molecules. In many cases, it is no problem to permanently delete those waters. To visualize the waters, select an atom view such as F1, F2 or F3. See the red water (oxygen) atoms floating around the surface?\n```\nEdit > Delete > Waters\n```\n\nThen select the base scene style without any explicit atoms, e.g. tube style (F5). Press F5. This is our representation of the backbone.\n\nThere are several ways to show the residues of interest:\n\n1. From the menu\n```\n   View > Show atoms in > Residue\n```\n   Select Cys7 from Molecule **A** and press OK\n2. From the sequence selector ![seqselector.png](../../images/Seqselector.png)\n   Hover the mouse on the bottom of the screen, you will see the sequence selector opening. Open it permanently by pressing the blue nailpin on the left side of it. Search for Cys7 from Molecule **B**, right-click and select:  \n```\n   Show > Residue\n```\n\nNow show the atoms of His5 in Molecule B using a method of choice.\n\nAnd now that we're on it, what is special about the two cysteines we just visualized?\n\n**Hiding** individual atoms or residues works in the same way as showing them, only now you should go to **Hide atoms** in the menus.\n\n## Showing and hiding secondary structure\n\nMost published molecular images show a detailed active site and all the\nrest is hidden for clarity. From the previous exercise we show the atoms\nof 3 residues (let's assume this is our active site). Now secondary\nstructure of the rest of the molecule is also still visible. To hide all\nthat, we do not have to hide atoms, but hide the secondary structure\n(the F5 tube view) from the rest of the structure. Atoms and residues in\nYASARA are not the same as the term 'secondary structure'. Atoms and\nresidues are balls and sticks, 'secondary structure' is an artistic\nimpression of the structure (beta sheet arrows, helix ribbons, ...). If\nyou get this concept, you are a YASARA master.\n\nSo let's hide many of the secondary structure, but keep just a few\nstretches around our active site. Our active site is Cys7 (A), Cys7 (B)\nand His 5 (B). This can be done in several ways. Since we would have to\nhide almost everything, I propose to hide first everything and then show\nagain those stretches that we want. But if you have a better idea, I\nwould like to hear it.\n\nHide all secondary structure:\n```\n   View > Hide secondary structure of > All\n```\n\nThen show stretches of residues 2-10 in Mol B and residues 4-10 in Mol A\nin tube view as:\n```\n    View > Show secondary structure > Tube through > Residue\n```\nThen select the correct stretches of residues by keeping the CTRL key\npressed to select multiple residues.\n\nThere are still some metal-bound histidines flying around that weren't\nhidden because they are metal bound (a YASARA specific thing). Hide\nthose histidines by clicking on one of the sidechain atoms, then\nright-click and select:\n\n```\n   Hide atoms > Residue\n```\n\nThe nasty dative bonds and metals can be removed simply by deleting all\nof them:\n\n```\n   Edit > Delete > Residue > Name\n```\n\nIn the name column select all the metals and ions you can find.\n\nEt voilà, a publication ready image!\n\n<figure id=\"figure-1\"><img src=\"../../images/Insulin_hires.jpg\" alt=\"Insuline\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Insuline</figcaption></figure>\n\n## Labels\n\nYou can put labels on the residues you want to highlight by going to the\nmain menu or selecting an atom from a residue and right-click. In the\nlatter case you select:\n\n```\n   Label > Residue\n```\n\nNote that *residue name* and *residue number* is automatically selected.\nChange the height to 0.5 or so and select a nice color for the label.\nPresto\\!\n\n## Colors\n\nYou can color on all levels: atoms, residues, molecules and objects. So\nbe careful, if you color a residue, all of its atoms will get that\ncolor. If you color a molecule, all atoms in that molecule will get that\ncolor.\n\nLet's color the secondary structure (the backbone in our case) of our\nactive site in orange. But the sidechains should keep their Element\ncolors. So we shouldn't color entire residues, but only a selected atom\nset. Therefore our selection will be at the atom level, not the residue\nlevel. Go to:\n\n```\n    View > Color > Atom > Belongs to or has > Backbone\n```\n\nThen select the orange color (color code 150) and select 'Apply unique\ncolor'. Hopefully, it is a satisfying result.\n\n## Saving all the beautiful work\n\nIt would be a pitty that you spent hours creating fancy molecular\ngraphics for that next Nature paper while you can't continue on the work\nthe next day. That's why YASARA can save the entire Scene including\norientations, colors, views, everything. To save the current scene, go\nto:\n\n```\n    File > Save as > YASARA Scene\n```\n\nChoose a filename such as MyInsulin.sce\n\nTo load the work again in YASARA go to:\n\n```\n    File > Load > YASARA Scene\n```\n    \nCareful: loading a Scene will erase everything else!\n\n## Creating high quality images\n\nTo save the current view to a high quality publication ready image file,\ngo to:\n\n```\nFile > Save as > Ray-traced hires screenshot\n```\n\nThis requires that the PovRay program has been installed. See the first\nitem on this page.\n\nUsually, you prefer to have a transparent background, so check the\nrespective box.\n\n## Distances\n\n**Distances** between atoms are calculated as follows:\n\n  - select the first atom\n  - keep CTRL pressed and select the second atom.\n  - left of the screen indicates the 'Marked Distance' in Angstrom.\n\n<!-- end list -->\n\n### {% icon question %} Questions\n\nWhat is the distance between the C-alpha (CA) atoms of Tyr19 and Leu16?\n\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> To solve the question you need to select a view that shows you atoms\nincluding C-alphas. Possible views or scene styles that show these atoms\ncan be F1 (ball), F2 (stick), F3 (ball\\&stick) and F4 (C-alpha). The\nviews F5-F8 won't show you any CA's explicitly. Try it.\n> So you've probably noticed that pressing the CTRL button allows you to select multiple atoms. This is important for the next exercise.\n> The distance is 5.8 Ångstrom.\n> </details>\n{: .question}\n\n## Hydrogen bonds\n\nTo show hydrogen bonds, YASARA needs the actual hydrogens to be present.\nIn NMR structures these are normally there. But in X-Ray structures\nhydrogens are missing. Luckily YASARA can add the hydrogens for you.\n\nSelect tube view (F5) and toggle on the sidechains with F8.\n\nAdd hydrogens with:\n\n```\nEdit > Add > Hydrogens to all\n```\n\nThen show the hydrogen-bonds:\n\n```\nView > Show interactions > Hydrogen bonds of> All > OK\n```\n\nIf the view is to chaotic for you, toggle off the sidechains with F8\n(press untill the sidechains are hidden).\n\n### {% icon question %} Questions\n\nDo you see the typical helix and beta sheet pattern?\n\nArg22 from Molecule/Chain B is making an hydrogen bonded electrostatic interaction (salt bridge) with another residue. Which residue?\n\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> The interaction partner is Glu17 from chain A.\n> </details>\n{: .question}\n\n\n\nTo remove the hydrogen bonds, you have multiple choices:\n\n```\nView > Hide hydrogen bonds of > All\n```\n\nor just delete all hydrogens (this will also delete all hydrogen bonds):\n\n```\nEdit > Delete > Hydrogens\n```\n\n## Surfaces\n\nIt can be very useful and informative to show the molecular surface of a\nprotein. you can visualize cavities, ligand binding sites, etc ... To\nshow the molecular surface of one monomer of dimeric insulin, go to:\n\n```\nView > Show surface of > Molecule\n```\n\nSelect in the *Name* column A and B (these are the two chains in 1\nsubunit). Press *Continue with surface color* and make sure Alpha is\n100. Any number lower than 100 will create transparency in the surface\n(could be nice as well).\n\n## Molecular graphics exercise\n\nTry to reproduce the following image of the 1TRZ insulin structure\n(hints below):\n\n<figure id=\"figure-2\"><img src=\"../../images/Insulin.png\" alt=\"insulin.png\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Insuline</figcaption></figure>\n\nHints:\n\n  - choose the proper secondary structure scene style (F6 was used here)\n  - find the correct orientation first\n  - color all backbone atoms in gray\n  - find the residue numbers of the 2 colored helices\n  - color those residues magenta\n  - show the sidechain atoms and the CA of the two histidines and the\n    glutamate\n  - color the sidechain atoms of all residues in the Element color\n  - label the histidines and the glutamate\n  - if you need some help how to change the parameters for the label,\n    please have a look at Help -\\> Show user manual and search in\n    Commands / Index\n\n## More coloring\n\nDownload GroEL via PDB code 1WE3 in YASARA.\n\nTry to reproduce (approximately) the following image (hints below):\n\n<figure id=\"figure-3\"><img src=\"../../images/Groel.png\" alt=\"groel.png\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> GroEL</figcaption></figure>\n\nHints:\n\n  - load the PDB as File \\> Load \\> PDB file from internet\n  - zoom out and find the correct orientation\n  - delete the ADP, DMS and Mg molecules (are treated as residues in\n    YASARA). So Edit \\> Delete \\> Residue \\> Adp ...\n  - color by molecule (every molecule will get another color) and color\n    by gradient (now you need to specify 2 colors, the begin and end\n    color).\n  - choose a first color (eg. color with code 0)\n  - choose a second color (eg. color with code 300, so you go over the\n    entire color wheel spectrum)\n\nMore exercises can be found on the [basic bioinformatics exercises\npage](http://wiki.bits.vib.be/index.php/Exercises_on_Protein_Structure).\n\n# Conclusion\n{:.no_toc}\n\nNow, you have explored the YASARA interface and acquainted with basic visualisations. You have identified how you can visualise secondary structure elements, surfaces, and hydrogen bonds. And most importantly, you can create publication-ready figures using Yasara.\n","## Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\nThe goal of homology modeling is to predict the 3D structure of a protein that comes close to what would be achieved experimentally with X-Ray experiments.\n\nMain principles of homology modeling\n\n- We predict the structure of a protein sequence on the basis of the structure of another protein with a similar sequence (the template)\n- If the sequences are similar, the structures will have a similar fold\n- Structure is more conserved than sequence\n\n## Main ingredients for homology modelling \n\n### The sequence\n\nLast week my colleague sequenced a plant protein. He is not a bioinformatician. Yet, he would like to know what the structure might look like to do some rounds of rational mutagenesis. Let's try to address the problem for him.\n \nHe came up with this sequence:\n\n```\nSVCCPSLVARTNYNVCRLPGTEAALCATFTGCIIIPGATCGGDYAN\n```\n\n### Searching for the template structure\n\nActually, the first step is to check whether the PDB already contains the structure of this sequence. That would be easy so we don't have to model anything. We will use Blast again to search with the sequence.\n\n> ### {% icon hands_on %} Hands-on: BLAST search for PDB structure\n>\n> 1. Go to the Advanced Search section\n> 2. Please select 'Sequence BLAST/PSI-BLAST' in the Query type drop down.\n>    This method allows you to change some parameters for the search.\n> 3. Copy and paste the sequence in the ''Sequence'' field\n> 4. Press ''Submit query''.\n> 5. You should see the same structures popping up as you saw in the UniProt page of DnaK.\n{: .hands_on}\n\nA suitable template structure to make a high quality model should have following properties:\n\n- The highest possible sequence identity from all structures in the PDB when aligned to the target sequence\n- A good resolution (and R-factor): if many identical template structures exist with the same sequence, filter by resolution\n- Is solved by X-RAY, not NMR.\n\n> ### {% icon question %} Questions\n>\n> 1. Is there a difference in the number of identities, positives and gaps between the two remaining x-ray structures? \n> 2. What is the PDB ID with the highest resolution, does not have insertions or deletions and should thus be the better template structure? \n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n>\n> 1. **TODO** \n> 2. **TODO** \n>\n> </details>\n>\n>\n{: .question}\n\n\n## Aligning target and template sequence and template selection\n\nThe alignment is the most crucial part of homology modeling. We will not explain what an alignment is and how you make it, this should be known. In an alignment, we put homologous sequences on top of each other in a text file. The point is that amino acids that are on top of each other in the same column are assumed to have the equivalent position in the 3D structure. So if the template sequence has an Ala at position 3, where the target sequence has a Phe, homology modelling tools will use the backbone of the template structure and replace the sidechain at position 3 from Ala to Phe.\n\nHomology modelling evolved over the years and many online tools for homology modelling are available. In my experience, homology modelling can be rather difficult and needs expert knowledge depending on the actual situation (sequence conservation, available templates, etc.).\n\nCan you imagine what could be the reasons?\n\n# Building the homology model with Swiss Model \n\nOur current request for homology modelling is a rather safe one, so we can use an automatic server for homology modelling. There are many automatic tools available and many of them compete in regular competitions like lastly, the 12th Community Wide Experiment on the Critical Assessment of Techniques for Protein Structure Prediction (CASP12) - [1].\n\nIn our example, we take the [Swiss Model server](https://swissmodel.expasy.org/interactive). SWISS-MODEL is a fully automated protein structure homology-modelling server, accessible via the ExPASy web server, or from the program DeepView (Swiss Pdb-Viewer). The purpose of this server is to make Protein Modelling accessible to all biochemists and molecular biologists worldwide.\n\n> ### {% icon hands_on %} Hands-on: Template selection step with Swiss Model \n>\n> 1. Browse to the [Swiss Model server](https://swissmodel.expasy.org/interactive) \n> 2. On the first page, paste the sequence of our unknown protein in the field 'Target Sequence' and give the project a name. \n>    <figure id=\"figure-1\"><img src=\"../../images/Modelling_sequence_template_step1.png\" alt=\"Swiss Model Start page -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Start page of Swiss Model</figcaption></figure>\n> 3. Click 'Search templates' to initiate the first step. \n>    Thereafter, the server identifies structural template(s) and gives an overview list of hits \n>    which you can select the templates from.\n>\n{: .hands_on}\n\n> ### {% icon question %} Question\n>\n> Which of the 10 (at the time of writing) possible template structures would you select as template for the model building process? \n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> We suggest as template **1jxx.1.A** given that it is an X-ray structure with high resolution and a very high \n> sequence identity (X-ray, 0.9 Å, 78.26 %).\n> </details>\n{: .question}\n\n\n> ### {% icon hands_on %} Hands-on: Model Building Step and Visualisation \n>\n> 1. Once you have selected the template, hit 'Build Model' to start the homology modelling procedure. \n>    The server will alignment of target sequence and template structure(s), build a model and evaluate it. \n>    These steps require specialized software and integrate up-to-date protein sequence and structure databases. \n>    Each of the above steps can be repeated interactively until a satisfying modelling result is achieved. \n>    ![download model -80width](../../images/Modelling_template_selection_step2.png)\n> 2. Once the model has been built, you can download it.\n>    ![download model -80width](../../images/Modelling_results_step3.png)\n> 3. If the Swiss Model server is too busy at the moment you execute the request, you can download the model from\n>    [here](https://zenodo.org/record/3551850#.Xdqs4ehKiUk).\n> 4. Load the created model into YASARA. \n>    Perform a structural alignment with your reference e.g. 1CRN and try to detect the differences through manipulating the visualisations.\n>    ![structural alignment](../../images/1214.png)\n{: .hands_on}\n\n\n# Conclusion\n{:.no_toc}\n\nHomology modelling evolved over the years and many online tools for homology modelling are available. You have used the Swiss Model service with a reasonable simple modelling request. Often, in research projects, homology modelling can be rather difficult and needs expert knowledge depending on the actual situation (sequence conservation, available templates, etc.).\n","## Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\nMutations in proteins can have various origins. Natural occurring mutations are random and can have any kind of effect on the protein structure and/or function. Mutations can have no effect at all, be stabilizing of destabilizing. In the last two cases, these can lead to diseases.\n\nBut we can also make mutations in the wet lab to study the effect of a single residue position on protein stability, interaction with a peptide ligand etc ... Such site-directed mutagenesis in the wet lab is hard labour and costs money, I don't have to explain that to you. So wouldn't it be easier, cheaper and more rational if you could predict the effect of some mutations first with bioinformatics and then test the really interesting ones in the lab?\n\nFoldX is a molecular modeling tool that can quantitatively predict the change in free energy (kcal/mol) upon mutation. These values approach experimental determined values. FoldX is a non-interactive command line program. In other words, not user friendly. But the bright news is that I recently developed a YASARA plugin for FoldX, so that all predictions are just a few clicks away. And the nice thing is, it's all free!\n\n## P53 as example protein \n\nIn this section we will let the FoldX plugin loose on some real world examples and give you step-by-step instructions on how to proceed and analyze the results. We will use the P53 tumor suppressor protein as our example molecule. In a first exercise you will make a point mutation with FoldX and determine if the mutation is stabilizing or destabilizing for the P53 structure. In a second exercise you will design a mutation in the P53 structure at the DNA binding interface and determine how the mutation affects the interaction energy of P53 with the DNA strand.\n\n## Get data\n\n> ### {% icon hands_on %} Hands-on: Data download\n>\n> Download the file [2AC0.sce](https://zenodo.org/record/3551686/files/2AC0.sce?download=1).\n>\n{: .hands_on}\n\n## What do FoldX energies mean?\n\n\nBefore we start, some basic information about FoldX energies is necessary.\n\nFirst of all, FoldX energies are expressed in kcal/mol.\n\nThe main focus of FoldX is the prediction of free energy changes, e.g. what happens to the free energy of the protein when we mutate an Asp to a Tyr? FoldX will then calculate the free energy of the wild type (WT) and the mutant (MT) and make the difference:\n\n```\nddG(change) = dG(MT) - dG(WT)\n```\n\nFoldX is trained using experimental values to predict ddG(change). It is important to realize that dG(WT) and dG(MT) are meaningless numbers as such. These do not correlate with experimental values. Only ddG(change) does.\n\nAs a rule of thumb we use:\n\n\n**ddG(change) > 0 : the mutation is destabilizing**\n\n**ddG(change) < 0 : the mutation is stabilizing**\n\n\nThe error margin of FoldX is approximately 0.5 kcal/mol, so changes in that range are insignificant. \n\n## How to minimize the structure with FoldX\n\nFoldX assumes that the starting structure has been energy minimized. Although crystal structures with high resolution represent the form with a low energy, FoldX performs best when we minimize it just before we do the predictions. This FoldX procedure is called RepairPDB and should be done on each structure you want to perform calculations on.\n\n> ### {% icon hands_on %} Energetically minimise the structure of P53 bound to DNA\n> \n> Open the YASARA scene 2AC0.sce in YASARA. This is a part of a tetrameric complex of the transcription factor P53 bound to DNA. I removed 3 of the 4 P53 structures for simplicity and visualized some nice features.\n> \n> Load the scene with:\n> \n> ```\n> File > Load > YASARA Scene\n> ```\n> <figure id=\"figure-1\"><img src=\"../../images/Training_1.png\" alt=\"monomer bound to DNA -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> P53 monomer bound to DNA</figcaption></figure>\n> \n> To Repair (or minimize) the structure with FoldX go to:\n> ```\n> Analyse > FoldX > Repair object \n> ```\n> \n> <figure id=\"figure-2\"><img src=\"../../images/Training_2.png\" alt=\"Select the object for repairing -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Select the object for repairing</figcaption></figure>\n> \n> And select the only object in the list.\n{: .hands_on}\n\nWhen the Repair is finished, the Repaired Object is placed in Object 2 (see top right corner) and superposed with the original Object 1. Take a look at the sidechains and see what FoldX has done while Repairing.\n\nIf you feel the repair takes too long (more than 10 minutes) due to a slow computer, download and open this YASARA Scene with the [Repaired Object](https://zenodo.org/record/3551686/files/2AC0_Repaired.sce?download=1).\n\nBecause we will continue working with this Repaired Object, we can now hide the entire Object 1 by toggling the Visibility column in the top right corner head-up display (HUD).\n\n# How to analyze a mutation \n\nFoldX has mutated the Ala to Trp and the structure with the Trp mutation has been loaded in the next Object (3) and is superposed with the wild type (WT, Object 2). We selected an option to show the VdW clashes in WT and mutant. The atoms that give rise to steric clashes are colored in red. Toggle the Visibility of Object 2 (WT) and Object 3 (mutant) and see how many clashes we introduced by mutating the Ala to Trp.\n\n\n<figure id=\"figure-3\"><img src=\"../../images/Training_7.png\" alt=\"Zoomed-in-view on the original Ala159, no Vander Waals clashes here -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Zoomed-in-view on the original Ala159, no Vander Waals clashes here</figcaption></figure>\n\n<figure id=\"figure-4\"><img src=\"../../images/Training_8.png\" alt=\"Zoomed-in-view on the mutated Ala159Trp, lots of red Vander Waals clashes here -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Zoomed-in-view on the mutated Ala159Trp, lots of red Vander Waals clashes here</figcaption></figure>\n\n\n> ### {% icon question %} Questions\n>\n> Van der Waals clashes are red colored atoms. \n> Do you see a difference around the mutation site between WT and mutant? \n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> Toggle the Visibility of WT and mutant to see the differences. \n> Open the Console by pressing the spacebar twice and see the free energy change of the mutation. \n> Anything above a change of +0.5kcal/mol is already assumed to be destabilizing.\n> In the console - to open press spacebar twice - we see an energy change of +29 kcal/mol.\n> <figure id=\"figure-5\"><img src=\"../../images/Training_9.png\" alt=\"In the console - to open press spacebar twice - we see an energy change of +29 kcal/mol. -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> Open the console to explore the situation.</figcaption></figure>\n> This is clearly a destabilizing mutation.\n> </details>\n{: .question}\n\n\n# Study the effect of a second mutation \n\nHide Object 3 by toggling its Visibility so that only Object 2 (the repaired WT) is visible.\nFirst turn on all atoms in the molecules G and H (DNA) again as you did previously, because the FoldX run has hidden it (it rearranged the view to show the VdW clashes).\n\nShow the sidechain of Arg273 of Object 2 by searching for it in the sequence selector, then right-click on it and go to:\n\n\n```\nShow atoms > Sidechain and CA and zoom in on Arg273\n```\n\nNotice how the positively charged Arginine is making an electrostatic interaction with the negative phosphate from the DNA backbone.\n\n<figure id=\"figure-6\"><img src=\"../../images/Training_10.png\" alt=\"R273 makes an electrostatic interaction with the DNA phosphate groups. -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 6:</span> R273 makes an electrostatic interaction with the DNA phosphate groups.</figcaption></figure>\n\nLet's see what would happen to the interaction energy between the DNA and P53 when we mutate this Arginine to Alanine.\n\nRight-click on this Arg273 in the sequence selector and go to:\n\n```\nFoldX > Mutate residue\n```\n\nA number of menus is now presented and here is what you need to do in each menu:\n\n1. Select Calculate interaction energy change\n2. Select Ala\n3. 'Move neighbours' and 'Show disrupted and new hydrogen bonds'\n4. Don't change any numerical options in the last menu\n\n<figure id=\"figure-7\"><img src=\"../../images/Training_11.png\" alt=\"View of the first options menu with 'Show new and disrupted hydrogen bondsxi' selected. -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 7:</span> View of the first options menu with 'Show new and disrupted hydrogen bonds' selected.</figcaption></figure>\n\n> ### {% icon question %} Questions\n> \n> 1. What is the change in interaction energy is between P53 and DNA chain G upon mutation?\n>    And what is the reason?\n> 2. Why doesn't the mutation affect the interaction with DNA chain H?\n>\n>\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n>\n> 1. Toggle the Visibility between this mutant and the WT structure and see how the hydrogen bonding changes and check the output in the Console. \n>     <figure id=\"figure-8\"><img src=\"../../images/Training_12.png\" alt=\"Mutation\"><figcaption><span class=\"figcaption-prefix\">Figure 8:</span> Change in interaction energy</figcaption></figure>\n>     We see that the mutation decreases the interaction with DNA strand G by approximately 1 kcal/mol\n>     since we lost 1 hydrogen bond.\n> \n> 2. ***TODO***  \n> \n> </details>\n>\n>\n{: .question}\n\n# Conclusion\n{:.no_toc}\n\nInstead of DNA-protein, FoldX can of course also calculate interaction energy changes in protein-protein or peptide-protein complexes.\n\n","## Structural comparison and RMSD \nWe compare structures by structurally aligning them on top of each other. That is, we\nalign structurally equivalent atoms. For now, we will only use CA atoms as a representation of the backbones. \nBut Yasara also can align on any type of atom you want. You always need to specify:\n\n-  source object(s): the structure(s) that needs to be rotated and translated to superpose on anoth\ner structure\n-  target object: the structure to superpose on\n\nAn optimal alignment is found when the root-mean-square deviation (RMSD) is at a minimum. \nThe RMSD is given as:\n<figure id=\"figure-1\"><img src=\"../../images/RMSD.gif\" alt=\"RMSD\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> calculation of RMSD</figcaption></figure>\nwhere R is the distance between two structurally equivalent atom pairs (CA in our case) and n is the total number of atom pairs.\n\n> ### {% icon hands_on %} Hands-on: Data download\n>\n> 1. Download the following adapted PDB files from [Zenodo](https://zenodo.org/record/3550492#.XdeNL1dKiUk) \n>\n>    ```\n>     1DKX_1.pdb 1DKY_1.pdb 1DKZ_1.pdb 3DPO_1.pdb 3DPP_1.pdb \n>    ```\n>\n{: .hands_on}\n\n## Aligning multiple structures using YASARA\n\nNow load all of them in YASARA:\n\n```\nFile > Load > PDB File\n```\n\nand select the CA (C-alpha) view (F4) and superpose with the MUSTANG algorithm:\n\n\n```\nAnalyze > Align > Objects with MUSTANG\n```\n\n\nIn the first window you have to select the source objects that will be repositioned. Select Objects 2 till 5. In the second window you select the target Object to superpose on. That would then be the first object.\n\nNotice that YASARA prints the RMSD of every structural alignment in the lower Console. Open the Console by pressing the spacebar once or twice to extend it.\n\nColor the atoms by their B-factor:\n\n```\nView > Color > Atom > Belongs to or has > All\nThen choose BFactor in the next window and press 'Apply unique color'.\n```\n\nHigh BFactors are yellow, low BFactors are blue.\n\n### {% icon question %} Questions\n\nDo you see a correlation between the BFactors and the variability in the structure?\n\n> <details markdown=\"1\">\n> <summary>{% icon solution %} Solution\n> </summary>\n> \n> ![Structural alignemnt](../../images/aligned-structures.png) \n>\n> \n> </details>\n\n\n# Conclusion\n{:.no_toc}\n\nStructural alignment of related structures is a very efficient approach to spot similarities and differences of structutally related proteins.\n","# 1. Introduction\nWhat if we have files that we do not want Git to track for us, like backup files or intermediate files created during data analysis? Remember that GitHub is not your next cloud storage infrastructure. Hence, (big) data should not be uploaded on GitHub. In fact, there's a strict file size limit of 100MB so you won't even be able to do so. \n\nRegardless of the above, it is often useful if your data is in the same projectfolder. And you can't help the fact that Jupyter Notebooks makes intermediate checkpoints (.ipynb_checkpoints) in the same folder of the notebook. \n\nGit has a file, the `.gitignore` file in which we can write expressions that define the files it should ignore. This chapter will briefly discuss the `.gitignore` file with a few simple examples. \n\n# 2. Expressions\nImagine the following project folder structure:\n\n```\n project-folder/\n    |\n    |- .git/\n    |- .ipynb_checkpoints/\n    |- .Rhistory/\n    |\n    |- data/\n    |   |- R1.fastq\n    |   |- dataset.csv\n    |\n    ...\n```\n\nLet's discuss how to ignore a specific file and how we can use symbols to generalize the ignoring behaviour.     \n\n- **Ignore a file**:\n\nThe easiest would be to define the file or the path to the file. E.g. the fastq file can be ignored by adding `data/R1.fastq` to the `.gitignore` file. \n\nSimilar to a file, a folder can also be ignored. The folders `data/` and `.ipynb_checkpoints/` can be ignored by adding the following lines:\n```\ndata/\n.ipynb_checkpoints/\n``` \n\n- **`*`, `!` and `#`**:\n\nThe asterisk is often used in `.gitignore` files and represents a wildcard. E.g. `*.csv` will ignore any csv file in your folder and subfolders. The asterisk can precede a file format in which case it will ignore all the files with that format (e.g. ignore all csv, fastq, sam, bam, xlsx, pdf, etc. files). \n\nAn exclamation mark is used for exceptions. The following lines of code will ignore all files in the data folder, except for the `dataset.csv` file:\n```\ndata/\n!data/dataset.csv\n```\n\nDocumentation lines are preceded by a `#`. \n\n# 3. Standard files\n\nIt's always good to think this through and manually add the files or folders that need to be ignored. However, it's also useful to know that there are standardized `.gitignore` files. These files have been created based on a specific programming environment. They are all accessible in [this repository](https://github.com/github/gitignore) and contain `.gitignore` files for Python, R, Ruby, Java, Perl, C++, amongst many others. These files can also be added on the fly to a new repository by initializing the repository with one of these files (see figure below). \n\n--- \n\n<center><img src=\"../../images/gitignore.PNG\" /></center>\n\n---\n\nLet's continue with the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/8_github_rstudio/tutorial.html)!\n","# Basic statistics with GraphPad Prism \n{:.no_toc}\n\nThis introductory video has been created during a livestream session in March 2020. We cover basic statistics, advanced statistics, graphs, curve fitting and survival analysis.\n\n<iframe src=\"https://www.youtube.com/embed/7KqYZ9P9eIk\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n","# 1. Getting started\nAs mentioned in the first chapter, we will first introduce Git in a terminal session. Linux and Mac users can open a terminal directly, Windows users have to open the 'Git Bash' program which will act like a normal Linux terminal. \n\nIf we want to use git from the command line, we always start with typing  `git` followed by a verb defining a more specific command. These commands can be anything like staging, committing, pushing, etc. If we want to have an overview of the most common Git commands we can enter `git --help`.\n\nBefore we can get started, we have to personalize a couple of configurations (e.g. we need to tell git who we are). Git comes with a configuration file that allows us to control all aspects of how Git looks and operates. There are three different levels on which we can do configurations, for example we could set configurations on a specific project (in one folder) or we could set them on a more global level where the configurations are applicable for all our projects. We will only edit the global configurations file here which is fine for pretty much all cases. \n\nWe can have a look at our global config file with the following command:\n```\ngit config --global --list\n```\n\nHowever, if this is the first time it will result in an failure, telling us that this file does not exist. If we just run the following commands, Git will create the configuration file automatically and add resp. our GitHub username and the email address of our account.\n\n```\ngit config --global user.name \"yourgithubusername\"\ngit config --global user.email \"your_email@domain.com\"\n```\n\nWith these settings we can already get started, however passing on information from and to GitHub in this manner is not really secure. Using the SSH protocol, we can connect and authenticate to remote servers and services in a secure way. With SSH keys, we can connect to GitHub without supplying our username or password at each visit. If you want to create one, follow the brief instructions below or find them at [GitHub](https://help.github.com/en/github/authenticating-to-github/checking-for-existing-ssh-keys). SSH keys exist of a private key and a public key. The private key must stay secured on your computer at all times, the public key can be shared with third-party softwares to connect to them. \n\n1. List all the files (using Git Bash) to see if existing SSH keys are present. \n\n```\nls -al ~/.ssh\n```\nIf there is a public SSH key present (file ending in `.pub`) we're all set and can continue to step 3, otherwise we will now generate a public key.\n\n2. The following will create a new ssh key, using the provided email as a label.\n```\nssh-keygen -t ed25519 -C \"your_email@domain.com\"\n```\nWhen you're prompted to \"Enter a file in which to save the key,\" press Enter. This accepts the default file location.\nThen it will ask us to type a secure passphrase, press Enter to skip this step. As long as no-one other than you has access to the key, you do not require a passphrase. \n\nNow we have our SSH keys, we can let them be managed by the `ssh-agent`. Ensure the ssh-agent is running with:\n```\neval `ssh-agent -s`\n```\n\nAdd your SSH private key to the ssh-agent.\n```\nssh-add ~/.ssh/id_ed25519\n```\n\n3. Lastly, we need to add the public key to GitHub. Copy paste the content of the public key file manually or with: \n```\nclip < ~/.ssh/id_ed25519.pub\n```\nThen, go to GitHub, click on your profile picture and go to settings. In the user settings sidebar, click **SSH and GPG keys** and select **New SSH key**. Choose a descriptive title (e.g. personal computer) and paste your key into the \"Key\" field and save your changes by clicking on **Add SSH key** and confirming with your password. \n\n\nAnother thing we can edit in the configuration file is the editor. An editor is the program we want Git to use when we have to add a message or solve conflicts. During this course we will ommit the usage of editors as much as possible, though it does not mean that something might go wrong and we need to interact with our editor. \n```\ngit config --global core.editor <editor>\n```\nin which we replace `<editor>` with `vim`, `notepad`, `emacs`, `atom`, `sublime` or any other editor you prefer.  \n\nThe next chapter is considered further reading material and will be discussed later in the course, however since it is related to the configurations file, we have mentioned it here. \n\n# 2. Aliases\n\nThe configuration file is also a place where we can make our own aliases. An alias is a new command tailored to your wishes. It often consists of an existing Git command (e.g. `git log`) followed by a bunch of variables. This omits that we have to type a long command the whole time. Here are some useful aliases for a structured history overview:\n\n```\ngit config --global alias.hist \"log --pretty=format:'%h %ad | %s%d [%an]' --graph --date=short\"\ngit config --global alias.oneline \"log --graph --decorate --pretty=oneline --abbrev-commit --all\"\ngit config --global alias.mylog1 \"log --pretty=format:'%h %s [%an]' --graph\"\ngit config --global alias.mylog2 \"log --pretty=format:'%Cgreen%h%Creset %ai | %s %Cblue[%an] %Cred%d' --date=short -n 10 --color\"\ngit config --global alias.mylog3 \"log --decorate --pretty='format:%C(auto) %h %d %s %Cgreen(%cr by %cn)%Creset' --graph --all\"\n```\nOnce they are set, you can use them whenever you like. E.g.: running `git hist` gives us the same result as `git log --pretty=format:'%h %ad | %s%d [%an]' --graph --date=short`. \n\n\nIf at some point we are not happy any more about an alias, we can delete it with the following command:\n```\ngit config --global --unset alias.<command>\n```\n\n---\n\nLet's go to the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/3_first_commit/tutorial.html)!","qbase+ is software to visualize and analyze qPCR data. It allows you to perform various types of analyses:\n  - statistical analysis of gene expression\n  - advanced copy number analysis\n  - miRNA profiling\n  - ChIP-qPCR analysis\n# Installation and licensing\nYou can find the installation instructions on [VIB qbase+ support page](https://www.bits.vib.be/index.php/software-overview/qbaseplus)\nVIB only offers qbase+ to VIB scientists, you need a valid VIB email address to run the software. Biogazelle (the company who has developed the software) have written a manual with instructions on how to use the software. Download [Biogazelle's user manual](https://www.biogazelle.com/system/files/manuals/qbaseplus_manual_0.pdf). Before you can download the manual you have to log on to [the qbase+ website](https://www.qbaseplus.com/) using your qbase+ account. Use your VIB email address for setting up this account.\n# Training material\n  - [slides](http://data.bits.vib.be/pub/trainingen/qbasePLUS/qbase_2018.pdf)\n  \n  **Extra**\n  - [clean log10 transformed CNRQs](http://data.bits.vib.be/pub/trainingen/qbasePLUS/Log10CNRQsClean.xlsx) for checking normality in Prism\n  - [clean untransformed CNRQs](http://data.bits.vib.be/pub/trainingen/qbasePLUS/CNRQsClean.xlsx) for visualization in Prism\n  - [R script](http://data.bits.vib.be/pub/trainingen/qbasePLUS/qPCR.R) for analysis and visualization\n  - [log10 transformed CNRQs of control samples](http://data.bits.vib.be/pub/trainingen/qbasePLUS/resultslog.csv) for analysis and visualization in R\n  - [log10 transformed CNRQs of treated samples](http://data.bits.vib.be/pub/trainingen/qbasePLUS/resultslogTreated.csv) for analysis and visualization in R\n","Since normalization of qPCR data is based on the assumption that the reference targets have the same expression level in all samples it is crucial that the expression of the chosen reference genes is stable.\nHowever, none of the so-called **housekeeping** genes is universally stably expressed.\n\n[Genevestigator](https://www.genevestigator.com/gv/), both the commercial and the free version, contains a tool, called RefGenes, that allows to identify candidate reference genes that display very stable expression in the context that you are working in, typically a certain tissue of a certain organism.\n\nGenevestigator is a platform that contains curated public microarray data from thousands of experiments/conditions.\n\nRefGenes allows you to select the conditions that are relevant for you, e.g. mouse liver, human fibroblasts, or Arabidopsis thaliana leaves. In a next step, RefGenes identifies the genes with the most stable expression in the selected conditions.\n\n## Starting the RefGenes tool\n\n| How to start the RefGenes tool ? |\n| :-------------------------------- |\n| - Open the [RefGenes page](http://www.refgenes.org/rg/).\n - Click **start GENEVESTIGATOR**\n - Click the **Install/Start** button\n - This will automatically open a Genevestigator startup page. Keep this page open during the analysis. Closing this page will close Genevestigator.\n - Login. Also for the free version you need to create an account (use your academic email for this since you will need your vib email to get access to the commercial version).\n - Genevestigator is opened automatically\n\n## The Genevestigator user interface\n\nThe Genevestigator consists of the following components:\n  - **Sample Selection** panel: to choose the experimental conditions you're interested in (green)\n  - **Gene Selection** panel: to choose the genes you're interested in (blue)\n  - Center panel shows an overview of all available tools (purple). Once you have selected a tool, the panel will show the results of the analysis that is done by the tool.\n  - **Home** button (red) allows to return to the overview of the tools at any time. The text next to the home button indicates the toolset that you have selected.\n\nClick the **RefGenes** tool at the bottom.\n\n## Using the RefGenes tool to find reference genes\n\n### STEP 1: Choose samples from a biological context similar to those in your qPCR expriment\n\n| How to choose the samples you want to analyze ? |\n| :-------------------------------- |\n|\n - Click the **New** button in the **Sample Selection** panel. The selection of samples defines which data are used for the analysis.\n - Select the organism you're interested in (in this example: human)\n - Select the array type you want to analyze (in this example: human 133_2).\nFor most organisms Genevestigator contains expression data from multiple types of microarrays, e.g. different generations of Affymetrix GeneChips®. On these arrays, genes are sometimes represented by different sets of probes. To keep the analysis results easily interpretable, data from different array types are not mixed.\n - Click the **Select particular conditions** button to select all samples with a certain annotation, e.g. all data from a certain tissue type.\n - Select the type of conditions (red) you want to base your selection on (in this example: Anatomy). For each type (anatomy, neoplasms, perturbations, development...) you can browse the corresponding ontologies and select the desired condition(s) (green) (in this example: cardiac muscle).\n - Click **OK**\n\nNote that you can select multiple tissues.\nWhen you select samples for use in the RefGenes tool, you have to focus on microarrays from samples that were collected in conditions similar to those in your qPCR experiment. Don't make a too general selection, e.g. all human samples: you might end up with genes that are stable in most conditions but not in yours. Don't make a very specific selection either, e.g. human heart samples from patients taking the same medication as yours. If you want to broaden your study later on with samples from other patients, your reference genes might not be valid anymore. It is recommended to select reference genes in the same organism and the same / a similar tissue type as the one that you used in your experiments.\n\n### STEP 2: Select the gene(s) you want to measure in your qPCR experiment\n\nThis step is not essential, but it helps you to see whether your target gene(s) is (are) strongly or weakly expressed in the conditions of interest selected in STEP1. This allows you to search for candidate reference genes in a similar range of expression.\n\n| How to choose the genes you want to analyze ? |\n| :-------------------------------- |\n|\n - Click the **New** button in the **Gene Selection** panel.\n - Enter the name of your target gene in the text area (in this example: GOT1) and click **OK**\n - Open the RefGenes tool (if you haven't done that already). A red box plot representing the distribution of the expression levels of GOT1 in the 68 selected human heart samples appears in the center panel. As you can see, this gene is highly expressed in heart.\n\n\n\n\n### STEP 3: Find candidate reference genes\n\nThe reference genes that are suggested by GeneVestigator have the\nfollowing characteristics:\n\n  - They have the most stable expression levels across all selected samples (a small boxplot)\n  - Their overall expression level is similar to that of the target gene(s) of your qPCR experiment\n| How to find the candidate reference genes ? |\n| :-------------------------------- |\n|Click the **Run** button in the RefGenes tool. RefGenes will show the top 20 most stable genes with similar expression levels:\n\n\n\n## Exercises\n\n### Finding candidate reference genes in the free version of Genevestigator\n\nNow we will make a more elaborate exercise on finding candidate reference genes. We will do the analysis in the free version of RefGenes but the analysis in the commercial version is very similar.\nSuppose we want to compare the expression stability of the 4 commonly used reference genes for qPCR on mouse liver samples (ACTB, GAPDH, HPRT and TUBB4B) to that of 4 reference genes that are suggested by Genevestigator.\nTo this end we open the RefGenes tool and select the liver samples of the mouse 430_2 arrays.\n\n| Check the expression stability of the 4 commonly used reference genes ? |\n| :-------------------------------- |\n|\n - Click the **New** button in the **Gene Selection** panel to create a new selection. The selection of samples defines which data are used for the analysis.\n - Enter the name of your target gene in the text area (for example: ACTB) and click **OK**\n\nWhen you are using the commercial version, you may enter multiple genes at the same time, in the free version you have to enter them one by one. This means that you have to add the first gene as described above and then add the next gene by clicking the **Add** button and so on...\n\nFinally you end up with an expandable list of the genes you asked for and you can tick or untick them to control the display of their expression data in the main window. When you tick the 4 commonly used reference genes you can see how stable they are expressed in the 651 mouse liver samples that are stored in Genevestigator:\n\nAs you can see, the expression levels of the commonly used reference genes in the selected mouse liver samples is pretty variable which is also confirmed by their relatively high SD values.\nOften there are multiple probe sets for the same gene. When you use the free version you may only choose one probe set per gene so you have to make a choice. How to make that choice ?\nAffymetrix probe set IDs have a certain meaning: what comes after the underscore tells you something about the quality of the probes:\n\n  - **_at** means that all the probes of the probe set hit one known transcript. This is what you want: probes specifically targeting one transcript of one gene\n  - **_a_at** means that all the probes in the probe set hit alternate transcripts from the same gene. This is still ok the probes bind to multiple transcripts but at least the transcripts come from the same gene (splice variants)\n  - **_x_at** means that some of the probes hit transcripts from different genes. This is still not what you want: the expression level is based on a combination of signals of all the probes in a probe set so also probes that cross-hybridize\n  - **_s_at** means that all the probes in the probe set hit transcripts from different genes. This is definitely not what you want: if the probes bind to multiple genes you have no idea whose expression you have measured on the array\n\nSo I always ignore probe sets with s or x. If you have two specific probe sets for a gene, they should more or less give similar signals. If this is not the case, I base my choice upon the expression level that I expect for that gene based on previous qPCR results.\n\nAs you can see, each of these 4 commonly used reference genes has a high expression level. Most genes do not have such high expression levels. In most qPCR experiments your genes of interest will have low or medium expression levels, so these reference genes will not be representative for the genes of interest.\n\nReference genes should ideally have similar expression levels as the genes of interest. Therefore, we will select the four most stably expressed genes with a medium expression level (between 8 and 12) according to the RefGenes tool.\n\n| Select the 4 most stably expressed candidate reference gene with medium expression levels. |\n| :-------------------------------- |\n|\n - Untick all target genes.\n - Click the **Run** button at the top of the main window and check if the range is set correctly\n\nSelect the 4 candidates with the lowest SD: Then, we performed qPCR on a representative set of 16 of our liver samples to measure the expression of these 8 candidate reference genes and analyzed the data ([See how to select the best reference genes using geNorm in qbase+](http://wiki.bits.vib.be/index.php/Analyzing_data_from_a_geNorm_pilot_experiment_in_qbase%2B)).\n\n\n### Finding candidate reference genes in the commercial version of Genevestigator\n\nWe will do the same exercise as above in the commercial version of Genevestigator. The difference between the free and commercial version of RefGenes is the number of target genes you can select. In the free version you have to select one gene and then gradually add all other genes one at a time. The commercial version allows you to load as many target genes as you want simultaneously. As a consequence, you can select multiple probe sets for the same gene.\nAll VIB scientists have free access to the commercial version of Genevestigator via their VIB email address. If you don't know your VIB email address, check [the Who's Who of VIB](http://www.vib.be/en/whoiswho/Pages/default.aspx).\n\n  - Open a browser and go to the [Genevestigator website](https://www.genevestigator.com/)\n  - If it's your **first time to access Genevestigator**, create an account by clicking **join now** button. You will be redirected to a new window in which you will give some personal information including a valid VIB email address. Click **Register** and check your email to activate your new account. Go back to the [GeneVestigator website](https://www.genevestigator.com/)\n  - Choose the research field you want to investigate: **pharma/biomediacal** or **plant biology** by clicking the corresponding button\n  - Click **Start**\n  - Use your VIB email address and password to login to Genevestigator.\n  - This will automatically open a Genevestigator startup page in your browser. Keep this page open during the analysis. Closing this page will close Genevestigator.\n  - Genevestigator is opened automatically\n\nOpen the RefGenes tool by clicking its icon in the **Further tools** secion and select the liver samples of the mouse 430_2 arrays [as explained in the previous exercise](http://wiki.bits.vib.be/index.php/Using_GeneVestigator_to_select_candidate_reference_genes#STEP_1:_Choose_samples_from_a_biological_context_similar_to_those_in_your_qPCR_expriment).\n| Check the expression stability of the 4 commonly used reference genes ? |\n| :-------------------------------- |\n| - Click the **New** button in the **Gene Selection** panel to create a new selection. The selection of samples defines which data are used for the analysis.\n - Enter the names of the 4 commercial reference genes in the text area and click **OK**\n\nI still remove probe sets with an _s or _x since they do not specifically bind to one single gene:\nFinally you end up with an expandable list of the genes you asked for and you can tick or untick them to control the display of their expression data in the main window. By default all probe sets are ticked so you can see how stable the commonly used reference genes are expressed in the 651 mouse liver samples that are stored in Genevestigator:\nAs you can see, the expression levels of the commonly used reference genes in the selected mouse liver samples is pretty variable which is also confirmed by their relatively high SD values.\n\nThe next step of selecting the 4 most stable candidate reference genes with medium expression levels is exactly the same as described above for the free version of RefGenes.\n\n| Create a new gene selection with 20 found candidate reference genes and call it mouse_references. |\n| :-------------------------------- |\n|Click the **New** button at the top of the main window to create a new selection.\n\nTo change the name of the selection right click the name in the **Gene selection** panel and select **Rename**\n\n| Identify perturbations where the mouse_references genes show more than 1,5 fold differential expression using the Condition perturbations tool. |\n| :-------------------------------- |\n|Click the **Home** button at the top to go back to the tools overview page.\n\nClick the **Perturbations** tool in the **Condition Search tools** section\n\n\nMake a **New Sample selection** including all mouse 430_2 arrays.\nUntick all genes except for the first one and filter the long heatmap for at least 1.5 fold change differential expression:\n\n\nYou now get a list of mouse samples in which the gene is not stably expressed so you can check if any of these samples is related to the samples in your study. Hover your mouse over the name of a sample to see more details about the sample.\nYou can do this for each of the candidate reference genes and select the ones that best fit your needs\n\n[Exercise on selecting reference genes for metacaspases in Arabidopsis thaliana](http://wiki.bits.vib.be/index.php/GV_Exercise.1).\n\n\nIn a geNorm pilot experiment you analyze a set of candidate reference genes in a representative set of samples that you want to test in your final experiment. Based on the M-values and CVs that are calculated by qbase+, you can choose the genes that most satisfy the criteria for a good reference gene.\n\n### Exercise 1: reference genes for mouse liver\n\nWe come back on the 8 candidate reference genes that we selected for mouse liver:\n\n  - 4 commonly used reference genes: ACTB, TUBB4B, GAPDH and HPRT\n  - 4 candidate reference genes with very stable medium expression levels selected based on expression data coming from more than 600 microarrays of mouse liver samples using Genevestigator: Gm16845, MUSK, OTOP3, EDN3\n\nWe have measured their expression in a represetative set of 16 of our mouse liver samples, each in triplicate. We will now analyze the stability of these candidate reference genes in our samples.\n\n#### Creating a new Experiment\n\n| Create a new Experiment called GeNormMouse in Project1 |\n| :------------------------------------------- |\n| Open qbase+ or, if the software is already open, click the Launch Wizard button.\n\nYou can find the details on how to create a new experiment in Creating a project and an experiment\n\n#### Loading the data into qbase+\n\nThe data is stored in [the RefGenes folder](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/RefGenes.zip). It consists of 8 Excel files, one file for each candidate reference gene. If you are not working on a BITS laptop, download and unzip the folder.\n\n| Import the data. This files are in qBase format. |\n| :------------------------------------------- |\n| You can find the details on how to start the data import in Loading data into qbase+\n\nUnlike the previous exercise, qbase+ does not allow you to do a quick import this time. In the Import Run window Manual import is selected:\nMake sure that Upload file to Biogazelle support for further analysis is NOT selected and click Next\nMake sure the correct File type is selected (qBase) and click Finish.\nThis file contains the data of the geNorm pilot experiment. In the pilot experiment, 8 candidate reference genes were measured in 16 representative mouse liver samples.\n#### Analyzing the geNorm pilot data\n\n| Specify the aim of the experiment. |\n| :------------------------------------------- |\n| In this experiment we want to select the ideal reference genes for our next experiments so we choose selection of reference genes (geNorm)\n\n| Check the quality of the replicates (use default parameter settings). |\n| :------------------------------------------- |\n| You can find the details on how to check the quality of the replicates in the Checking the quality of technical replicates and controls section of Analyzing gene expression data in qbase+\n\nWe haven't included any positive or negative controls so you don't need to show their details.\n\n| Select the Amplification efficiencies strategy you want to use. |\n| :------------------------------------------- |\n| You can find the details on how to select the Amplification effciencies strategy in the Taking into account amplification efficiencies section of Analyzing gene expression data in qbase+\n\nWe haven't included dilution series nor do we have data from previous qPCR experiments regarding the amplification efficiencies so we choose to use the same efficiency for all genes.\nIt is of course better to include a dilution series for each gene to have an idea of the amplification efficiencies of each primer pair.\n\n| Convert all genes to Reference genes. |\n| :------------------------------------------- |\n| You can convert all the genes simultaneously by selecting Use all targets as candidate reference genes\n\nClick Finish.\n\n| Which genes are you going to use as reference targets in further experiments ? |\n| :------------------------------------------- |\n| Upon clicking Finish, the geNorm window containing the analysis results is automatically opened. The geNorm window consists of three tabs. The tabs are located at the bottom of the window: geNorm M, geNorm V and Interpretation.\nThe first tab, geNorm M, shows a ranking of candidate genes according to their stability, expressed in M values, from the most unstable genes at the left (highest M value) to the best reference genes at the right (lowest M value):\nThe second tab, geNorm V, shows a bar chart that helps determining the optimal number of reference genes to be used in subsequent analyses:\n\nThe number of reference genes is a trade-off between practical considerations and accuracy. It is a waste of resources to quantify more genes than necessary if all candidate reference genes are relatively stably expressed and if normalization factors do not significantly change when more genes are included. However, Biogazelle recommends the minimal use of 3 reference genes and stepwise inclusion of more reference genes until the next gene has no significant contribution to the normalization factors.\nTo determine the need of including more than 3 genes for normalization, pairwise variations Vn/n+1 are calculated between two sequential normalization factors. Simply stated: V is measure of the added value of adding a next reference gene to the analysis. A large variation means that the added gene has a significant effect and should be included.\nIn normal experiments like the Gene expression experiment (see Analyzing gene expression data in qbase+), we only have 3 reference genes so we will see only 1 bar here. But in this geNorm pilot experiment, we analyzed 8 candidate reference genes, so we see 6 bars.\nAll pairwise variations are very low, so even the inclusion of a third gene has no significant effect. Based on a preliminary experiment that was done by Biogazelle, 0.15 is taken as a cut-off value for V, below which the inclusion of an additional reference gene is not required. Normally this threshold is indicated by a green line on the geNorm V bar chart. However since all V-values fall below the threshold in this geNorm pilot experiment, you don’t see this line on the bar chart.\nSo, these results mean that for all subsequent experiments on these samples, two reference genes, EDN3 and MUSK, would be sufficient. However, as stated before, Biogazelle recommends to always include at least three reference genes in case something goes wrong with one of the reference genes (so also include Gm16845). |\nThese are artificial data. But when you read [the paper by Hruz et al., 2011](http://www.biomedcentral.com/1471-2164/12/156/abstract) you see that the genes that are selected by Genevestigator are often outperforming the commonly used reference genes.\n\n### Exercise 2: reference genes for human heart\n\n#### Creating a new Experiment\n\n| Create a new Experiment called GeNormHuman in Project1        |\n| :------------------------------------------------------------ |\n| You can find the details on how to create a new experiment in Creating a project and an experiment |\n\n#### Loading the data into qbase+\n| Import [Run6](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run6.xls) . This file is in qBase format. |\n| :------------------------------------------- |\n| You can find the details on how to start the data import in Loading data into qbase+. Unlike the previous exercise, qbase+ does not allow you to do a quick import this time. In the Import Run window Manual import is selected:\n\nMake sure that Upload file to Biogazelle support for further analysis is NOT selected and click Next. Select the correct File type (qBase) and click Finish. This file contains the data of the geNorm pilot experiment. In the pilot experiment, 10 candidate reference genes were measured in 20 representative samples.\n\n#### Analyzing the geNorm pilot data\n\n| Specify the aim of the experiment.        |\n| :---------------------------------------- |\n| In this experiment we want to select the ideal reference genes for our next experiments so we choose selection of reference genes (geNorm) |\n\n| Check the quality of the replicates and the controls (use default parameter settings). |\n| :------------------------------------------- |\n| You can find the details on how to check the quality of the replicates in the Checking the quality of technical replicates and controls section of Analyzing gene expression data in qbase+\n\nAll replicates and controls have met the quality criteria so there's no need to inspect them further. |\n| Select the Amplification efficiencies strategy you want to use. |\n| :------------------------------------------- |\n| You can find the details on how to select the Amplification effciencies strategy in the Taking into account amplification efficiencies section of Analyzing gene expression data in qbase+. We haven't included dilution series nor do we have data from previous qPCR experiments regarding the amplification efficiencies so we choose to use the same efficiency (E=2) for all genes. |\n\nIt is of course better to include a dilution series for each gene to have an idea of the amplification efficiencies of each primer pair.\n| Convert all genes to Reference genes.                                                                         |\n| :------------------------------------------------------------------------------------------------------------ |\n| You can convert all the genes simultaneously by selecting Use all targets as candidate reference genes |\n\nClick Finish.\n\n| Which genes are you going to use as reference targets in further experiments ? |\n| :------------------------------------------- |\n| Upon clicking Finish, the geNorm window containing the analysis results is automatically opened. The geNorm window consists of three tabs. The tabs are located at the bottom of the window: geNorm M, geNorm V and Interpretation.\nThe first tab, geNorm M, shows a ranking of candidate genes according to their stability, expressed in M values, from the most unstable genes at the left (highest M value) to the best reference genes at the right (lowest M value):\nThe second tab, geNorm V, shows a bar chart that helps determining the optimal number of reference genes to be used in subsequent analyses:\n\nThe number of reference genes is a trade-off between practical considerations and accuracy. It is a waste of resources to quantify more genes than necessary if all candidate reference genes are relatively stably expressed and if normalization factors do not significantly change when more genes are included. However, Biogazelle recommends the minimal use of the 3 most stable candidate reference genes and stepwise inclusion of more reference genes until the next gene has no significant contribution to the normalization factors.\nTo determine the need of including more than 3 genes for normalization, pairwise variations Vn/n+1 are calculated between two sequential normalization factors. Simply stated: V is measure of the added value of adding a next reference gene to the analysis. A large variation means that the added gene has a significant effect and should be included.\nIn normal experiments like the Gene expression experiment, see Analyzing_gene_expression_data_in_qbase+, we only have 3 reference genes so we will see only 1 bar here. But in this geNorm pilot experiment, we analyzed 10 candidate reference genes, so we see 8 bars.\nAll pairwise variations are very low, so even the inclusion of a third gene has no significant effect. Based on a preliminary experiment that was done by Biogazelle, 0.15 is taken as a cut-off value for V, below which the inclusion of an additional reference gene is not required. Normally this threshold is indicated by a green line on the geNorm V bar chart. However since all V-values fall below the threshold in this geNorm pilot experiment, you don’t see this line on the bar chart.\nSo, these results mean that for all subsequent experiments on these samples, two reference genes, HPRT1 and GADP, would be sufficient. However, as stated before, Biogazelle recommends to always include at least three reference genes in case something goes wrong with one of the reference genes (so also include YHWAZ). \n\n\n\nIn this example we will analyze data from an artificial expression study containing the following samples:\n  - 6 treated samples: treated1, treated2, ... treated6\n  - 6 control samples: control1, control2, ... control6\n\nIn this study, the expression of the following genes was measured:\n  - 4 commonly used reference genes: ACTB, HPRT, GAPDH, and TUBB4. We have seen in [the previous exercise](http://wiki.bits.vib.be/index.php/Analyzing_data_from_a_geNorm_pilot_experiment_in_qbase%2B#Exercise_1:_reference_genes_for_mouse_liver) that the expression of these reference genes in mouse liver samples is not as stable as generally thought.\n  - 3 genes of interest:\n      - Low: a gene with low expression levels\n      - Medium: a gene with moderate expression levels\n      - HighVar: a gene with low and very noisy expression\n\nIn general, the lower the expression level, the more noisy the qPCR results will become. For each of the genes of interest we have included a run in which a 2-fold difference in expression between control and treated samples was created (Low1, Medium1 and HighVar1) and a run with a 4-fold difference in expression (Low2, Medium2 and HighVar2).\nThere are three technical replicates per reaction. In a second experiment we used [the reference genes that were obtained via Genevestigator](http://wiki.bits.vib.be/index.php/Using_GeneVestigator_to_select_candidate_reference_genes#Finding_candidate_reference_genes_in_the_free_version_of_Genevestigator) and that proved to be [more stably expressed in mouse liver samples than the commonly used references](http://wiki.bits.vib.be/index.php/Analyzing_data_from_a_geNorm_pilot_experiment_in_qbase%2B#Exercise_1:_reference_genes_for_mouse_liver).\nThe data can be found in the NormGenes folder on the BITS laptops or can be downloaded: [from our website](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/NormGenes.zip).\n\n#### Creating a new experiment\n\n| Create a new Experiment called NormGenes1 in Project1 |\n| :---------------------------------------------------- |\n| You can find the details on how to create a new experiment in Creating a project and an experiment |\n\n#### Loading the data\n\n| Import Run1 to Run5. These files are in qBase format. |\n| :---------------------------------------------------- |\n| You can find the details on how to import the data file in the Loading the data into qbase+ section of Analyzing data from a geNorm pilot experiment in qbase+ |\n\nWe are going to compare expression in treated versus untreated samples so we need to tell qbase+ which samples are treated and which not. To this end, we have constructed [a sample properties file](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Sample_Properties_Norm.xlsx) in Excel containing the grouping annotation as a custom property called Treatment.\n\n| Import the Sample Properties file. |\n| :---------------------------------------------------- |\n| You can find the details on how to import the data file in the Adding annotation to the data section of Loading data into qbase+.\n\n| Select to import the custom property. |\n| :---------------------------------------------------- |\n| So as you can see we have 6 treated and 6 untreated samples and we have measured the expression of the 4 commonly used reference genes and 6 genes of interest:\n\n#### Analyzing the data\n\n| Which amplification efficiencies strategy are you going to use ?     |\n| :------------------------------------------------------------------- |\n| You don't have data of serial dilutions of representative template to build standard curves so the only choice you have is to use the default amplification efficiency (E = 2) for all the genes. |\n\n| Appoint the reference genes. |ACTB, GAPDH, HPRT and TUBB4B are the reference genes:\nYou can find the details on how to appoint reference targets in the Normalization section of Analyzing gene expression data in qbase+ |\n\n| Is the stability of the reference genes ok ?                                                                 |\n| :----------------------------------------------------------------------------------------------------------- |\n| The M and CV values of the reference genes are shown in green so the stability of the reference genes is ok. |\n\n| Which scaling strategy are you going to use ? |Since you have a treated and a control group, it seems logical to use the average of the control group for scaling.\n\nYou can find the details on how to specify the scaling strategy in the Scaling section of Analyzing gene expression data in qbase+\nLook at the target bar charts.\n\n| In the target bar charts plot the average expression level of each group. |In the Grouping section at the bottom of the chart you can select Plot group average: Now do exactly the same for the second experiment with the same genes of interest but with other reference genes. This means that you have to return to the Analysis wizard. To this end, click the Launch wizard button a the top of the page:\n\n| Create a new Experiment called NormGenes2 in Project1 |\n| :---------------------------------------------------- |\n| You can find the details on how to create a new experiment in Creating a project and an experiment |\n\n| Import Run5 to Run9. These files are in qBase format. |\n| :---------------------------------------------------- |\n| You can find the details on how to import the data file in the Loading the data into qbase+ section of Analyzing data from a geNorm pilot experiment in qbase+ |\n\n| Import the Sample Properties file.                    |\n| :---------------------------------------------------- |\n| You can find the details on how to import the data file in the Adding annotation to the data section of Loading data into qbase+. Select to import the custom property. |\n\nSo as you can see we have 6 treated and 6 untreated samples and we have measured the expression of the 4 new reference genes and 6 genes of interest:\n| Appoint the reference genes. |EDN3, Gm16835, MUSK and OTOP3 are the reference genes:\n| :---------------------------------------------------- |\n| You can find the details on how to appoint reference targets in the Normalization section of Analyzing gene expression data in qbase+ |\n\n| Is the stability of the reference genes ok ?                                                                 |\n| :----------------------------------------------------------------------------------------------------------- |\n| The M and CV values of the reference genes are shown in green so the stability of the reference genes is ok. |\n\nAs you can see the M and CV values of these reference genes is much lower than these of the 4 commonly used reference genes pointing to the fact that genes are more stably expressed. It's not that the commonly used reference genes are bad references. Then qbase+ would not display them in green. It's just that the other reference genes are more stable. But this can have a big impact on the results of your analysis...\n\n| Use the average of the control group for scaling |You can find the details on how to specify the scaling strategy in the Scaling section of Analyzing gene expression data in qbase+\n\nPlot the average expression level of each group. Now we will compare the target bar charts of the second and the first experiment to assess the influence of the stability of the reference targets on the analysis results.\n\n| How to display the target bar charts of the second and the first experiment next to each other ? |You can display the bar charts next to each other by clicking the tab of the bar chart of the second experiment. Drag the tab to the right while you hold down the mouse button until you see and arrow at the right side of the qbase+ window and a dark grey box in the right half of qbase+ window. Release the mouse button when you see the arrow and the box. Now the two bar charts should be next to each other. Some laptop screens are too small to nicely display the two bar charts next to other. If this is the case switch to full screen mode by double clicking the tab of the first experiment. |\n\nNow you can compare the expression of each gene in the first and in the second experiment.\n\nWhen we do this for HighVar1 for instance, you see that the average expression levels of both groups are the same in the first and the second experiment (check the scales of the Y—axis\\!). Both experiments detect the two-fold difference in expression level between the groups. However, the error bars are much larger in the first experiment than in the second. The variability of the reference genes does have a strong influence on the errors and the size of the error bars will influence the outcome of the statistical test to determine if a gene is differentially expressed or not. The larger the error bars the smaller the less likely it is that the test will say that the groups differ.\n\nRemember that the error bars represent 95% confidence intervals:\n  - if the error bars of the two groups do not overlap: you are certain that the difference between the means of the two groups is significant\n  - if they do not overlap: you know nothing with certainty: the means can be different or they can be the same. Of course the more they overlap the smaller the chance that there is a significant difference between the groups.\n\nCheck out the results of HighVar2. Here, you clearly see the influence of the reference genes. Again, the fourfold difference in expression is detected by both experiments but:\n\n  - the least stable reference genes (experiment 1) give large overlapping error bars\n  - the most stable reference (experiment 2) give smaller, barely overlapping error bars\n\nThis means that in experiment 2, a statistical test will probably declare that HighVar2 is differentially expressed while in experiment 1 this will not be the case. We will test this assumption by performing a statistical test.\n\n#### Statistical analysis of differential expression\n\n| Use a non-parametric test to identify DE genes in experiment 1 ? |\n| :---------------------------------------------------- |\n| You can find full details on statistical analyses in qbase+ in the statistical analysis section of analyzing gene expression data in qbase+. In brief, you need to perform the following steps:\n\nOpen the Statistical wizard\n\nThe goal of this analysis is to compare the mean expression levels of our genes of interest in treated and untreated samples\n\nUse the Treatment property to identify treated and untreated samples\n\nAnalyze all genes of interest\n\nUse the default settings to perform the non-parametric Mann-Whitney test\n\nAs you can see, none of the genes is considered DE by the very conservative non-parametric test. Additionally most genes have the same p-value. That's normal when you don't have many replicates. In our case, we have 6 replicates. Non-parametric tests are based on a ranking of the data values and there are not so many ways to rank 6 data points. This is why you see the same p-values for many genes.\nAs said before, the non-parametric test is very stringent. If the data do come from a normal distribution, the test will generate false positives. Some of the genes might have have been labeled not DE while in fact they are DE so you might have missed some differential expression. The choice of statistical test with 6 biological replicates depends on what you prefer: false negatives or false positives. Most people will choose false negatives since they don't want to invest time and money in research on a genes that was labeled DE while in fact it is not DE.\n\nSuppose I don't mind false positives but I don't want to miss any potential DE genes. In that case, it's better to go for a t-test. Let's repeat the test n ow choosing a parametric t-test.\n| Use a t-test to identify DE genes in experiment 1 ? |\n| :---------------------------------------------------- |\n| You can find full details on statistical analyses in qbase+ in the statistical analysis section of analyzing gene expression data in qbase+.\nIn brief, you need to perform the following steps:\nOpen the Statistical wizard\nThe goal of this analysis is to compare the mean expression levels of our genes of interest in treated and untreated samples\nUse the Treatment property to identify treated and untreated samples\nAnalyze all genes of interest\nDescribe the data set as log-normally distributed\n\nStill none of the genes is considered DE but you do see that the p-values of the t-test are lower than these of the Mann-Whitney test.\n\n| Use a non parametric test to identify DE genes in experiment 2 ? |\n| :---------------------------------------------------- |\n| You can find full details on statistical analyses in qbase+ in the statistical analysis section of analyzing gene expression data in qbase+.\nIn brief, you need to perform the following steps:\n\nOpen the Statistical wizard\nThe goal of this analysis is to compare the mean expression levels of our genes of interest in treated and untreated samples\nUse the Treatment property to identify treated and untreated samples\nAnalyze all genes of interest\nUse default settings\n\nNow you see that 4 out of the 6 genes are considered DE. This is also what we expected since 3 of our genes of interst have a 4-fold difference in expression level between the two groups. It's understandable that it's hard to detect 2-fold differences in expression especially when the expression of the gene is somewhat variable as is the case for Low1 and HighVar1 but a 4-fold difference is a difference that you would like to detect.\n| Use a t-test to identify DE genes in experiment 2 ? |\n| :---------------------------------------------------- |\n| You can find full details on statistical analyses in qbase+ in the statistical analysis section of analyzing gene expression data in qbase+.\nIn brief, you need to perform the following steps:\n\nOpen the Statistical wizard\nThe goal of this analysis is to compare the mean expression levels of our genes of interest in treated and untreated samples\nUse the Treatment property to identify treated and untreated samples\nAnalyze all genes of interest\nDescribe the data as log normally distributed\n\nAgain the t-test generates lower p-values than the Mann-Whitney test but realize that choosing the t-test when the data is not normally distributed will generate false positives \\!","#### Create a project\n\nWhen you use qbase+ for the first time, you can't do anything unless you\ncreate a project to store your experiments in.\n\n| Create a new Project |\n| :----------------------------------- |\n| When you double click the qbase+ icon, the software starts up automatically opens the Start page where you can create a new project by clicking the Create new project button : This will create a new project with a default name like Project 1 . |\n\n#### Create an experiment\n\nTo open actual data (one/more runs) in qbase+, creating a project is not sufficient. You need to create an experiment in this project to hold the run data.\n\n| Create a new Experiment called GeneExpression in the new project. |\n| :----------------------------------- |\n| Select the Create a new qbase+ experiment option  in the Start page. Type a name for th new experiment . Click the Next button at the bottom of the page . This will create the experiment.\n\nWhen you leave the **Start page**, the **Import run** page is automatically opened allowing you to import the actual qPCR data into qbase+.\n\n#### Loading the data\n\nFirst a few quick words about the data set. We’ll be working with data coming from 3 runs (plates in the qPCR instrument): [Run1](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run1.xls), [Run2](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run2.xls) and [Run3](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run3.xls)\n\nThe data consist of Cq values for:\n\n  - 3 reference target genes: Stable, Nonregulated, and Flexible\n  - 3 target genes of interest: Duvel, Leffe, and Palm\n\neach measured twice (= technical replicates) in 16 different samples. Half of the samples have undergone a treatment, half of them are untreated control samples.\n\nThe data set also contains a series of standard samples consisting of a four-fold dilution series of cDNA for each target gene. These measurements allow to generate a standard curve from which target-specific amplification efficiencies can be calculated. Finally, negative controls (No Template Controls) have been measured. The goal of the analysis is to identify target genes of interest that have different expression levels in the treated samples compared to the untreated control samples.\n\n| In GeneExpression load CFX run files [Run1](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run1.xls), [Run2](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run2.xls) and [Run3](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run3.xls).\n| :---------------------------- |\n|  \n* Click the **Import runs** button  to open the **Import Run** window\n* Click the **Browse** button  to go to the directory that stores the files containing the qPCR data\n\nSelect the 3 run files simultaneously by holding the **Ctrl** key on your keyboard during the selection in Windows or the command button in MacOSX.\n\nClick the **Open** button \n\nNow you go back to the **Import Run** window, click the **Next** button (purple)\n\n* \nqbase+ tries to recognize the format of the selected import files. If only one format matches the files (as in our case CFX), it is selected and the quick import option is enabled. Click the **Finish** button.\n\nIn the Imported run names area on the **Import run** page you should now see the names of the 3 run files. If these are the correct files, click the **Next** button at the bottom of the page.\n\n#### Adding annotation to the data\n\nWhen you leave the **Import run** page, you are redirected to the **Sample target list** page, which gives you an overview of the targets (= genes) and samples qbase+ detected when reading in the datafiles.\nTake a look at the data. You see that the list of samples and targets matches the description of the qPCR experiment at the top of this page. The samples in this experiment are divided into two groups: samples that received some kind of treatment and untreated control samples. This information was not included in the run files so qbase+ does not know which sample belongs to which group. However, this is relevant information: in our analysis we are going to compare the expression of our genes of interest between treated and untreated samples. This means that qbase+ needs the grouping annotation to be able to perform the analysis we want to do. So we have to give qbase+ this annotation: we can do this by adding a custom sample property. To do this we need to create a sample properties file with a specific format that is described in [the tutorial](http://data.bits.vib.be/pub/trainingen/qbasePLUS/TutorialIII.pdf). You can find the file in the qbase+ folder on the BITS laptops or you can [download the file here](http://data.bits.vib.be/pub/trainingen/qbasePLUS/Sample_Properties_file.xlsx).\n\n| How to add the grouping annotation ?\n| :---------------------------- |\n|  To import the file containing to grouping annotation:\n\n* select **Add samples and targets** \n* click **Import sample list** \n* browse to the folder that contains the samples file\n* select the file and click **Open**\n* click **Next**\n\nIn the **Importing samples** window, you have to tell qbase+ which sample annotation you want to import from the sample properties file\n\nIn our case we could import Quantities (this annnotation is available in the sample properties file) but the quantities of the standard samples were included in the run files so qbase+ has already imported this annotation from the run files during data import.\nWe definitely need to import the Custom properties since they were not a part of the run files. The Treatment property will tell qbase+ which samples belong to the group of control samples and which samples belong to the group of treated samples.\nClick the **Next** button at the bottom of the page to finish the import.\n\nAt this point you don't see the custom annotation that you have imported, you will see it later in the analysis during scaling\nLeaving the **Sample target list** page takes you to the **Run annotation** page, where you have to confirm again that the sample and gene names are ok. If this is not the case you can adjust the annotation here.\n\nClick the **Next** button at the bottom of the page\n\nOur data file contains all required annotation:\n\n  - Cq values\n  - sample and target names\n  - sample types\n  - quantities for the standard samples\n  - grouping of the samples\n\n\n\nOnce runs are imported, you can start analyzing the data. Data consist\nof Cq values for all the wells.\n\n#### Specifying the aim of the experiment\n\nOn the **Aim** page you tell the software what type of analysis you want to do. Different types of analyses require different parameters, parameter settings and different calculations. By selecting the proper analysis type, qbase+ will only show the relevant parameters and parameter settings.\n\nSince we are doing a **gene expression analysis** in this exercise, this the option we should select. Click the **Next** button on the bottom of the page to go to the **Technical quality control** page.\n\n#### Checking the quality of technical replicates and controls\n\nThe **Technical quality control** page handles the settings of the requirements that the data have to meet to be considered high quality. For instance the maximum difference between technical replicates is defined on this page. If there are technical replicates in the data set, qbase+ will detect them automatically (they have the same sample and target name) and calculate the average Cq value. In theory, technical replicates should generate more or less identical signals.\n\n| How to set the maximum difference in Cq values for technical replicates ?\n| :---------------------------- |\n|  The quality criterium that the replicates must meet to be included for further analysis is one of the parameters in qbase+. You can set it on the **Technical quality control** page:\n\nThe default maximum allowed difference in Cq values between technical replicates is 0.5\n\nAdditionally, you can do quality checks based on the data of the positive and negative controls.\n| How to set quality requirements for the control samples ?\n| :---------------------------- |\n|  On the same **Technical quality control** page you can define the minimum requirements for a well to be included in the calculations:\n\n* **Negative control threshold** : minimum allowed difference in Cq value between the sample with the highest Cq value and the negative control with the lowest Cq value: the default is 5 which means that negative controls should be more than 5 cycles away from the sample of interest.\n* **Lower and upper boundary** : allowed range of Cq values for positive controls.\n\nExcluded means that the data are ignored in the calculations.\n\n| How to check if there are wells that do not meet these criteria ?\n| :---------------------------- |\n|  You can see flagged and excluded data by ticking the **Show details…** options  on the **Technical quality control** page and clicking the **Next** button (purple) at the bottom of the page.\n\nQbase+ will open the results of the quality checks for the replicates  and the controls  on two different tabs. These tabs show lists of samples that failed the quality control criteria. When you open the replicates tab  you can get an overview of the flagged  or the excluded (purple) wells. Select the **failing**  wells.\n\nWhen the difference in Cq between technical replicates exceeds 0.5, the wells end up in the flagged or failing list. They are included in calculations unless you exclude them by unticking them. You see that the two replicates of Palm in Sample05 have very different Cq values. All other bad replicates are coming from standard samples.\nIf you are finished checking the data quality, click **Next** to go to the **Amplification efficiencies** page.\n\n#### Taking into account amplification efficiencies\n\nQbase+ calculates an amplification efficiency (E) for each primer pair (= gene). Genes have different amplification efficiencies because:\n\n  - some primer pairs anneal better than others\n  - the presence of inhibitors in the reaction mix (salts, detergents…) decreases the amplification efficiency\n  - inaccurate pipetting\n\nQbase+ has a parameter that allows you to specify how you want to handle amplification efficiencies on the **Amplification efficiencies** page.\n\n| How to specify the amplification efficiencies strategy you want to use ?\n| :---------------------------- |\n|  Since we have included a dilution series for creating a standard curve in our qPCR experiment, we will select\n\n* **Use assay specific amplification efficiencies**\n* **Calculate efficiencies from included standard curves**\n\nAmplification efficiencies are calculated based on the Cq values of a serial dilution of representative template, preferably a mixture of cDNAs from all your samples. Since you know the quantity of the template in each dilution, you can plot Cq values against template quantities for each primer pair. Linear regression will fit a standard curve to the data of each gene, and the slope of this curve is used to calculate the amplification efficiency.\n\n| How to check the amplification efficiencies of the genes ?\n| :---------------------------- |\n|  Once you have made this selection, qbase+ starts calculating the efficiencies and the results are immediately shown in the **calculation efficiencies** table.\n\nIn this way, one amplification efficiency (E) for each gene is calculated and used to calculate **Relative Quantities (RQ)**:\n∆Cq is calculated for each well by subtracting the Cq of that well from the average Cq across all samples for the gene that is measured in the well. So ∆Cq is the difference between the Cq value of a gene in a given sample and the average Cq value of that gene across all samples. Cq is subtracted from the average because in this way high expression will result in a positive ∆Cq and low expression in a negative ∆Cq. \n**So at this point the data set contains one RQ value for each gene in each sample.**\n\nClick **Next** to go to the **Normalization** page.\n\n#### Normalization\n\nDifferences in amplification efficiency are not the only source of variability in a qPCR experiment. Several factors are responsible for noise in qPCR experiments e.g. differences in:\n\n  - amount of template cDNA between wells\n  - RNA integrity of samples\n  - efficiency of enzymes used in the PCR or in the reverse\n    transcription\n\nNormalization will eliminate this noise as much as possible. In this way it is possible to make a distinction between genes that are really upregulated and genes with high expression levels in one group of samples simply because higher cDNA concentrations were used in these samples.\nIn qPCR analysis, normalization is done based on housekeeping genes.\n\nHousekeeping genes are measured in all samples along with the genes of interest. In theory, a housekeeping gene should have identical RQ values in all samples. In reality, noise generates variation in the expression levels of the housekeeping genes. This variation is a direct measure of the noise and is used to calculate a normalization factor for each sample.\nThese normalization factors are used to adjust the RQ values of the genes of interest accordingly so that the variability is eliminated.\n\nThese adjusted RQ values are called **Normalized Relative Quantities (NRQs)**. In qbase+ housekeeping genes are called reference genes. In our data set there are three reference genes: Stable, Non-regulated and Flexible. On the **Normalization page** we can define the normalization strategy we are going to use, appoint the reference genes and check their stability of expression.\n\n| How to specify the normalization strategy you want to use ?\n| :---------------------------- |\n|  You can specify the normalization strategy you want to use on the Normalization method page:\n\n* **Reference genes** normalization is based on the RQ values of the housekeeping genes\n* **Global mean** normalization calculates normalization factors based on the RQ values of all genes instead of only using the reference genes. This strategy is recommended for experiments with more than 50 random genes. Random means that the genes are randomly distributed over all biological pathways.\n* **Custom value** normalization is used for specific study types. This strategy allows users to provide custom normalization factors such as for example the cell count.\n* **None** means that you choose to do no normalization at all. This option should only be used for single cell qPCR.\n\nWe have incorporated 3 housekeeping genes in our experiment so we select the **Reference genes** strategy.\n\n| How to appoint reference targets ?\n| :---------------------------- |\n|  You have to indicate which targets should be used as reference genes since qbase+ treats all genes as targets of interest unless you explicitly mark them as reference genes on the Normalization method page:\n\nWe have measured 3 housekeeping genes: Stable, Flexible and Non-regulated so we tick the boxes in front of their names.\n\nIt's not because you have appointed genes as reference genes that they necessarily are **good** reference genes. They should have stable expression values over all samples in your study. Fortunately, qbase+ checks the quality of the reference genes. For each appointed reference gene, qbase+ calculates two indicators of expression stability\n\n  - **M** (geNorm expression stability value): calculated based on the pairwise variations of the reference genes.\n  - **CV** (coefficient of variation): the ratio of the standard deviation of the NRQs of a reference gene over all samples to the mean NRQ of that reference gene.\n\nIt is considered that the higher these indicators the less stable the reference gene.\n\n| Are Flexible, Stable and Nonregulated good reference targets ?\n| :---------------------------- |\n|  M and CV values of the appointed reference genes are automatically calculated by qbase+ and shown on the Normalization method page:\n\nThe default limits for M and CV were determined by checking M-values and CVs for established reference genes in a pilot experiment that was done by Biogazelle. Based on the results of this pilot experiment, the threshold for CV and M was set to 0.2 and 0.5 respectively.\nIf a reference gene does not meet these criteria it is displayed in red. As you can see the M and CV values of all our reference exceed the limits and are displayed in red.\n\nIf the quality of the reference genes is not good enough, it is advised to remove the reference gene with the worst M and CV values and re-evaluate the remaining reference genes.\n\n| Which reference target are you going to remove ?                                                                                                                                    |\n| :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Both the M-value and the CV are measures of variability. The higher these values the more variable the expression values are. So we will remove the gene with the highest M and CV. |\n\nYou can remove a reference gene simply by unticking the box in front of its name.\n\n| Are the two remaining reference genes good references ?\n| :---------------------------- |\n|  After removing Flexible as a reference gene the M and CV values of the two remaining reference genes decrease drastically to values that do meet the quality criteria. M and CV values that meet the criteria are displayed in green.\n\nThis exercise shows the importance of using a minimum of three reference genes. If one of the reference genes does not produce stable expression values as is the case for Flexible, you always have two remaining reference genes to do the normalization.\n\n[See how to select reference genes for your qPCR experiment](http://wiki.bits.vib.be/index.php/Using_GeneVestigator_to_select_candidate_reference_genes).\n\n**So after normalization you have one NRQ value for each gene in each sample.**\n\nClick **Next** to go to the **Scaling** page.\n\n#### Scaling\n\nRescaling means that you calculate NRQ values relative to a specified reference level.\n\nQbase+ allows you to rescale the NRQ values using one of the following as a reference:\n\n  - the sample with the minimal expression\n  - the average expression level of a gene across all samples\n  - the sample with the maximal expression\n  - a specific sample (e.g. untreated control)\n  - the average of a certain group (e.g. all control samples): this is\n    often how people want to visualize their results\n  - positive control: only to be used for copy number analysis\n\nAfter scaling, the expression values of the choice you make here will be set to 1 e.g. when you choose **average** the average expression level across all samples will be set to 1 and the expression levels of the individual samples will be scaled accordingly.\n\n| How to scale to the average of the untreated samples ?\n| :---------------------------- |\n|  You can specify the scaling strategy on the **Scaling** page. Select **Scale to group** and set the **Scaling group** to the **untreated** samples . This is one of the reasons why you need the grouping annotation.\n\nRescaling to the average of a group is typically used to compare results between 2 groups, e.g. treated samples against untreated controls. After rescaling, the average of the NRQs across all untreated samples is 1 and the NRQs of the treated samples are scaled accordingly.\n\nClick **Next** to go to the **Analysis** page.\n\n#### Visualization of the results\n\nOne of the things you can select to do on the **Analysis** page is viewing the relative expression levels (= scaled NRQs) of each of the genes in a bar chart per gene. It is recommended to visualize your results like this.\n\nIt is possible to view the relative expression levels of all genes of interest on the same bar chart. You can use this view to see if these genes show the same expression pattern but you cannot directly compare the heights of the different genes because each gene is independently rescaled\\!\n\n| How to visualize single gene expression bar charts ?\n| :---------------------------- |\n|  Select **Visually inspect results For individual targets** on the **Analysis** page and click **Finish**\n\n| How to visualize the expression levels of Palm in each sample ?\n| :---------------------------- |\n|  Select **Visually inspect results For individual targets** on the **Analysis** page and click **Finish**\n\nThe **Target** select box allows you to select the gene you want to view the expression levels of. Relative expression levels are shown for each sample. Error bars are shown and represent the technical variation in your experiment (variation generated by differences in amounts pipetted, efficiency of enzymes, purity of the samples...).\n\nYou see that Palm has a low expression level and a very large error bar in Sample05 because the two replicates of this sample had very different Cq values. You can group and colour the bars according to a property.\n\n| How to group the bars of Palm according to treatment (so treated at one side and untreated at the other side)\n| :---------------------------- |\n|  In the **Grouping** section you can specify the property you want to group by.\n\n| How to view average expression levels in each group ?\n| :---------------------------- |\n|  In the **Grouping** section you can choose to plot individual samples as shown above but you can also choose to **plot group average** expression levels.\n\nThe error bars that you see here represent biological variation and will be used later on in the statistical analysis. The error bars are 95% confidence intervals which means that they represent the range that will contain with 95% certainty the real average expression level in that group of samples.\nThe nice characteristic of 95% confidence intervals is the following:\n\n  - if they do not overlap you are sure that the expression levels in the two groups are significantly different, in other words the gene is differentially expressed\n  - if they do overlap you cannot say that you are sure that the expression levels are the same. You simply don’t know if the gene is differentially expressed or not.\n\n| Assess the effect of switching the Y-axis to a logarithmic scale for Palm.\n| :---------------------------- |\n|  In the **Y axis** section you can specify if you want a linear or logarithmic axis.\nAs you can see you do not change the expression values, you just change the scale of the Y axis. Switching the Y-axis to a logarithmic scale can be helpful if you have large differences in NRQs between different samples\n\n| Assess the effect of switching the Y-axis to a logarithmic scale for Flexible.\n| :---------------------------- |\n|  Switch to the bar charts of Flexible. By switching the Y-axis to logarithmic you can now see more clearly the differences between samples with small NRQs.\n\n#### Statistical analysis\n\nOnce you generate target bar charts you leave the **Analysis wizard** and you go to the regular qbase+ interface. Suppose that you want to perform a statistical test to prove that the difference in expression that you see in the target chart is significant.\nAt some point, qbase+ will ask you if your data is coming from a normal distribution. If you don't know, you can select **I don't know** and qbase+ will assume the data are not coming from a normal distribution and perform a stringent non-parametric test.\nHowever, when you have **7 or more replicates per group**, you can check if the data is normally distributed using a statistical test. If it is, qbase+ will perform a regular t-test. The upside is that the t-test is less stringent than the non-parametric tests and will find more DE genes. However, you may only perform it on normally distributed data. If you perform the t-test on data that is not normally distributed you will generate false positives i.e. qbase+ will say that genes are DE while in fact they are not. Performing a non-parametric test on normally distributed data will generate false negatives i.e. you will miss DE genes.\n\nChecking if the data is normally distributed can be easily done in GraphPad Prism. To this end you have to export the data.\n| How to export the data ?\n| :---------------------------- |\n|  To export the results click **the upward pointing arrow** in the qbase+ toolbar:\nYou want to export the normalized data so select **Export Result Table (CNRQ)**:\nYou will be given the choice to export results only (CNRQs) or to include the errors (standard error of the mean) as well . We don't need the errors in Prism so we do not select this option.\nThe scale of the Result table can be linear or logarithmic (base 10) . Without user intervention, qbase+ will automatically log10 transform the CNRQs prior to doing statistics. So we need to check in Prism if the log transformed data are normally distributed.\nAdditionally, you need to tell qbase+ where to store the file containing the exported data. Click the **Browse** button for this .\n\nExporting will generate an Excel file in the location that you specified. However, the file contains the results for all samples and we need to check the two groups (treated and untreated) separately. The sample properties show that the even samples belong to the treated group and the odd samples to the untreated group.\nThis means we have to generate two files:\n\n  - [a file containing the data of the untreated samples](http://data.bits.vib.be/pub/trainingen/qbasePLUS/resultslog.csv)\n  - [a file containing the data of the treated samples](http://data.bits.vib.be/pub/trainingen/qbasePLUS/resultslogTreated.csv)\n\nNow we can open these files in Prism to check if the data is normally distributed.\n\n| How to import the data of the untreated samples in Prism ?\n| :---------------------------- |\n|  \n* Open Prism\n* Expand **File** in the top menu\n* Select **New**\n* Click **New Project File**\n* In the left menu select to create a **Column** table. Data representing different groups (in our case measurements for different genes) should always be loaded into a column table.\n* Select **Enter replicate values, stacked into columns** (this is normally the default selection) since the replicates (measurements for the same gene) are stacked in the columns.\n* Click **Create**\n\nPrism has now created a table to hold the data of the untreated samples but at this point the table is still empty. To load the data:\n\n* Expand **File** in the top menu\n* Select **Import**\n* Browse to the resultslog.csv file, select it and click **Open**\n* In the **Source** tab select **Insert data only**\n* Since this is a European csv file commas are used as decimal separators so in contrast to what its name might imply, semicolons and not commas are used to separate the columns in the csv file (you can open the file in a text editor to take a look). In American csv files dots are used as decimal separator and the comma is used to separate the columns. Prism doesn't know the format of your csv file so you have to tell him the role of the comma in your file. Select **Separate decimals**\n* Go to the **Filter** tab and specify the rows you want to import (the last rows are these of the standard and the water samples, you don't want to include them)\n* Click **Import**\n\nAs the file is opened in Prism you see that the first column containing the sample names is treated as a data column. Right click the header of the first column and select **Delete**\n\n| How to check if the data of the untreated samples comes from a normal distribution ?\n| :---------------------------- |\n|  \n* Click the **Analyze** button in the top menu\n* Select to do the **Column statistics** analysis in the **Column analyses** section of the left menu\n* In the right menu, deselect **Flexible**. It's a bad reference gene so you will not include it in the qbase+ analysis so there's no point checking its normality (it is probably not normally distributed). In that respect you could also deselect the other two reference genes since you will do the DE test on the target genes and not on the reference genes.\n* Click **OK**\n* In the **Descriptive statistics** and the **Confidence intervals** section deselect everything except **Mean, SD, SEM**. These statistics is not what we are interested in: we want to know if the data comes from a normal distribution. The only reason we select Mean, SD, SEM is because if we make no selection here Prism throws an error.\n* In the **Test if the values come from a Gaussian distribution** section select the **D'agostino-Pearson omnibus test** to test if the data are drawn from a normal distribution. Although Prism offers three tests for this, the D'Agostino-Pearson test is the safest option.\n* Click **OK**\n\nPrism now generates a table to hold the results of the statistical analysis: As you can see, the data for Palm are not normally distributed.\n\nSince we found that there's one group of data that does not follow a normal distribution, it's no longer necessary to check if the treated data are normally distributed but you can do it if you want to. We will now proceed with the statistical analysis in qbase+. Statistical analyses can be performed via the **Statistics wizard**.\n\n| How to open the Statistics wizard ?\n| :---------------------------- |\n|  You can open it in the **Project Explorer** (window at the left):\n\n* expand **Project1** if it's not yet expanded\n* expand the **Experiments** folder in the project if it's not yet expanded\n* expand the **GeneExpression** experiment if it's not yet expanded\n* expand the **Analysis** section if it's not yet expanded\n* expand the **Statistics** section\n* double click **Stat wizard**\n\nThis opens the **Statistics wizard** that allows you to perform various kinds of statistical analyses.\n\n| Which kind of analysis are you going to do ?                                                                                                                                                                                                                                                      |\n| :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |\n| On the **Goal** page: Select **Mean comparison** since you want to compare expression between two groups of samples so what you want to do is comparing the mean expression of each gene in the treated samples with its mean expression level in the untreated samples. Click **Next**. |\n\n| How to define the groups that you are going to compare ?\n| :---------------------------- |\n|  On the **Groups** page: specify how to define the two groups of samples that you want to compare. Select **Treatment** as the grouping variable to compare treated and untreated samples. Click **Next**.\n\n| How to define the genes that you want to analyze ?\n| :---------------------------- |\n|  On the **Targets** page: specify for which targets of interest you want to do the test. Deselect **Flexible** since you do not want to include it in the analysis. It's just a bad reference gene. Click **Next**.\n\nOn the **Settings** page you have to describe the characteristics of your data set, allowing qbase+ to choose the appropriate test for your data. \n\nThe first thing you need to tell qbase+ is whether the data was drawn from a normal or a non-normal distribution. Since we have 8 biological replicates per group we can do a test in Prism to check if the data are normally distributed.\n\n| Which gene(s) is/are differentially expressed ?\n| :---------------------------- |\n|  On the **Settings** page you describe the characteristics of your data set so that qbase+ can choose the ideal test for your data. For our data set we can use the default settings. Click **Next**. In the results **Table** you can see that the p-value for Palm is below 0.05 so Palm is differentially expressed.\n\n\n\nIn this example we will analyze data from another expression study with the following characteristics:\n\nAll samples fit in a single run: [Run7](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run7.xls)\nWe have the following samples:\n\n  - 5 control samples: control1, control2…\n  - 5 treated samples: treated1, treated2…\n  - 1 no template control: NTC\n\nThe expression of the following genes was measured:\n\n  - 2 reference genes: refgene1 and refgene2\n  - 2 genes of interest: gene1 and gene2\n\nThere are two technical replicates per reaction\n\n#### Creating a new experiment\n\n| Create a new Experiment called GeneExpression2 in Project1\n| :---------------------------- |\n| You can find the details on how to create a new experiment in Creating a project and an experiment\n\n#### Loading the data\n\n| Import [Run7](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run7.xls). This file is in qBase format.                    |\n| :-------------------------------------------------------------------------------------------------------------------------------------- |\n| You can find the details on how to import the data file in the **Loading the data into qbase+** section of Analyzing data from a geNorm pilot experiment in qbase+ |\n\n#### Adding sample annotation\n\nDownload the [the sample properties file](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Sample_Properties_GE2.xlsx).\n| Add a custom sample property called Treatment.\n| :---------------------------- |\n| You can find the details on how to add a custom sample property in the **Adding annotation to the data** section of Loading data into qbase+\n\n#### Analyzing the data\n\n| Choose the type of analysis you want to perform.\n| :---------------------------- |\n| \n\n| Check controls and replicates.\n| :---------------------------- |\n| First set the minimum requirements for controls and replicates  You see that 6 replicates do not meet these requirements . Select to **Show details and manually exclude bad replicates**\nAll negative controls pass the test . Positive controls were not included in this analysis. Qbase+ will now open the results for the failing replicates: as you can see the difference in Cq values between these replicates is not that big. They fail to meet the requirement just slightly.\n\n| Which amplification efficiencies strategy are you going to use ?\n| :---------------------------- |\n| You don't have data of serial dilutions of representative template to build standard curves so the only choice you have is to use the default amplification efficiency (E = 2) for all the genes.\n\n| Appoint the reference genes as reference targets.\n| :---------------------------- |\n| You can find the details on how to appoint reference targets in the **Normalization** section of Analyzing gene expression data in qbase+\n\n| Is the stability of the reference genes ok ?\n| :---------------------------- |\n| In the **Reference target stability window** the M and CV values of the reference genes are shown in green so the stability of the reference genes is ok. You can find the details on how to check reference target stability in the **Normalization** section of [](Analyzing_gene_expression_data_in_qbase+\" title=\"wikilink)Analyzing gene expression data in qbase+\n\n| Which scaling strategy are you going to use ?\n| :---------------------------- |\n| Since you have a treated and a **control** group, it seems logical to use the average of the control group for scaling. You can find the details on how to specify the scaling strategy in the **Scaling** section of Analyzing gene expression data in qbase+\n\nLook at the target bar charts.\n\n| In the target bar charts group the samples according to treatment.\n| :---------------------------- |\n| You can find the details on how to group the samples in the **Visualization of the results** section of Analyzing gene expression data in qbase+\n\nThe samples of each group are biological replicates so you might want to generate a plot that compares the average expression of the treated samples with the average expression of the untreated samples.\n\n| In the target bar charts plot the group averages instead of the individual samples.\n| :---------------------------- |\n| In the **Grouping** section at the bottom of the chart you can select **Plot group average**:\n\n| Are there any genes for which you see a clear difference in expression between the two groups ?\n| :---------------------------- |\n| For gene 1, the mean expression levels in the two groups are almost the same and the error bars completely overlap.\n\nWhen you look at the title of the Y-axis, you see that 95% confidence levels are used as error bars. In case of 95% confidence intervakls you can use the following rules:\n\n* if they do not overlap: you are certain that the difference between the means of the two groups is significant\n* if they do not overlap: you know nothing with certainty: the means can be different or they can be the same\n\nSo for gene 1 the means are very close but just based on the plot we may not make any conclusions with certainty. For gene 2, the mean expression levels in the two groups are very different and the error bars do not overlap. So the 95% confidence intervals do not overlap meaning that we can be certain that the difference between the means of the two groups is significant.\n\n| Use a statistical test to compare the expression levels between the two groups of samples ?\n| :---------------------------- |\n| You only have 5 replicates per group so you cannot test if the data comes from a normal distribution. Qbase+ will assume they're not normally distributed and perform a non-parametric Mann-Whitney test.\n\nThe p-value of gene2 is smaller than 0.05 so it has a statistically significant difference in expression levels in treated samples compared to untreated samples. For gene1 the p-value is 1 so we have no evidence to conclude that the expression of gene1 is different in treated compared to untreated samples. You can find the details on how to compare the means of the two groups in the **Statistical analysis** section of Analyzing gene expression data in qbase+\n","### Exercise 1: simple gene expression study\n\nIn my qPCR experiment I want to study the expression of 12 genes of interest in 8 samples of interest. I want to use 2 PCR replicates for each reaction.\n\n> How many 96 well plates do I need for this experiment ?\n> > I have 12 genes in 8 samples which gives a total of 96 reactions (one plate). I want to perform each reaction twice (2 PCR replicates) so I need two plates. However, I need to include reference genes in my experiment, preferably more than one. I can put these reference genes on a separate plate, I do not have to include them on each plate.\nIdeally, you need to include 3 reference genes so having 8 samples and 2 replicates this gives an additional 48 reactions. Thus, I need three 96 well plates to perform this experiment.\n\n| Do I need to include IRCs (inter-run calibrators) ?                      |\n| :--------------------------------------------------- |\n| No, I can easily fit all samples of the same gene on the same plate so I don't need to include IRCs. |\n\n### Exercise 2: a large study\n\nIn my qPCR experiment I want to study the pattern of expression of 96 genes (genes of interest and reference genes) in 96 samples of interest, divided into a few groups. I want to use 2 PCR replicates for each reaction.\n\n| Do I need to include IRCs (inter-run calibrators) ?                    |\n| :------------------------------------------------------- |\n| No, I can fit all samples of the same gene on the same plate so I don't need to include IRCs. |\n\nI want to include PCR replicates.\n\n| Do I need to include IRCs when I work on a 96 well plate ?                                                                                    |\n| :-------------------------------------------------------------------------------------------------------------------------------------------- |\n| Yes, I have 192 reactions per gene so I cannot place them on the same plate. Remember that replicates have to be located on the same plate \\! |\n\n| Do I need to include IRCs when I work on a 384 well plate ?                        |\n| :--------------------------------------------------------------------------------- |\n| No, I have 192 reactions per gene so I can even place two genes on the same plate. |\n\nI want to include no template controls but I don't want to increase the\nnumber of plates.\n\n| What is the most elegant strategy to make room for including negative controls ?      |                                                      \n| :------------------------------------------------------------------------------------ |\n| This kind of study screen for expression patterns and requires statistical analysis. Since you have many samples divided over a few groups it means you have many biological replicates so you could easily do without the PCR replicates. By doing so you preserve the biological variability which is often far greater than the technical variation. |\n\n### Exercise 3: how to fill plates ?\n\nIn my qPCR experiment I want to study the pattern of expression of 5 genes (genes of interest and reference genes) in 38 samples (samples of interest and control samples). I want to use 2 PCR replicates for each reaction.\n\n| What is the minimum number of 96 well plates I need for this experiment ? |\n| :------------------------------ |\n| 5 genes * 38 samples * 2 replicates = 380 reactions.\nI need a minimum of 4 plates for this experiment.\n\n| If I use the minimum number of 96 well plates do I need to include IRCs ?                                                      |\n| :----------------------------------------------------------------------------------------------------------------------------- |\n| Yes, 5 genes spread over 4 plates with 72 reactions per gene means that at least one gene will be spread over multiple plates. |\n\n| What can I do to avoid inter-run variability ?                                                                                                                                                                                         |\n| :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| I can use 5 plates and fill them with one gene each. They will not be completely filled (72 reactions) but at least I do not have to use IRCs (which are additional reactions that also cost money) and I have no inter-run variation. |\n\nSuppose there's only one 96-well plate left in your lab. You have 10 samples (samples of interest + control samples) and you want to make the most of what you have.\n\n| How many genes of interest would you measure ? |\n| :------------------------------ |\n| Since you want to make most of what you have, let's assume you are omitting PCR replicates.\nTheoretically, you could fit 9 genes on your 96-well plate. However, to avoid pipetting mistakes I would measure only 8 genes so I can work with one row / gene. This is very handy for multichannel pipets.\n\n### Exercise 4: a growing study\n\nIn my qPCR experiment I want to study the pattern of expression of 24 genes (genes of interest and reference genes) in 48 samples (samples of interest and control samples). I want to use 2 PCR replicates for each reaction.\n\n| How many genes can I analyze on one 384 well plate ? |\n| :------------------------------ |\n| 48 samples * 2 replicates = 96 reactions per gene.\nI can analyze 4 genes on each 384 well plate.\n\nEach week I receive 2 additional samples to analyze.\n\n| Do I analyze them immediately after I get them ? |\n| :------------------------------ |\n| No. Since the samples are placed on different plates as in the previous experiment, you have to use IRCs. You typically need 3 IRCs and a no template control sample. It means that if you want to analyze these 2 samples you have to include 4 additional samples for each gene. This is a lot of overhead for just 2 samples !\nTry to avoid this: it's better to wait a few weeks until you have 6 or 8 or even more samples.\n\n### Exercise 5: a diagnostic copy number screen\n\nIn diagnostic screens all samples are important: you cannot leave out samples and all measurements need to be of the highest quality possible. In my qPCR experiment I want to study copy number variation of 16 genes\n(genes of interest and reference genes) and 2 calibrator samples (samples with known copy number). Since we need high quality data we will use 4 technical replicates.\n\n| Are we going to use sample maximization ?                                   |                   \n| :------------------------------------------------------------------------- |\n| No. In contrast to gene expression studies, where we want to compare expression levels of a gene between different groups of samples, copy number analyses do compare genes. It means that in this case the sample maximization approach (placing all samples of the same gene on the same plate) is not valid. Instead we use a gene maximization approach here (placing same sample for different genes on the same plate). |\n\n| How many samples can I fit on a 384 well plate ? |\n| :------------------------------ |\n| We have 16 (genes) * 4 (replicates) = 64 reactions per sample.\nThis means that we can fit 6 samples on a 384 well plate: 4 unknowns and 2 calibrators.\n\n### Exercise 6: fix experiments with bad or missing data\n\nIn my qPCR experiment I want to study gene expression of 6 genes (3 genes of interest and 3 reference genes) in 20 samples (samples of interest and control samples). I want to use 2 technical replicates. One of my genes of interest failed completely and I want to repeat the measurements for this gene in a new run.\n\n| Do I need to include IRCs ?                                                                                |\n| :--------------------------------------------------------------------------------------------------------- |\n| No. We can put the 20 samples of the gene that failed on a single plate so we do not have to include IRCs. |\n\n| Do I need to include reference genes ?                                                                 |\n| :----------------------------------------------------------------------------------------------------- |\n| No. We just repeat all samples for the gene that failed and replace the old data with the new results. |\n\nOne of the reference genes failed completely.\n\n| What should I do ?                                                                                                                                                                                                                                                                                     |\n| :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Depending on the quality of the two remaining reference genes, you should either do nothing or do the same as in the previous example where one of your genes of interest failed. If the two remaining reference genes are stable you can do the normalization with the two remaining reference genes. |\n\nThree samples failed completely.\n\n| What's the first thing I need to do ?                                                                                                                             |\n| :---------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Since they failed completely, they are probably of low quality. Therefore, you have to prepare the samples again, check their quality and then use them for qPCR. |\n\n| Do I need to include IRCs ?                                                                                             |\n| :---------------------------------------------------------------------------------------------------------------------- |\n| Yes. If you want to compare these samples with the samples that didn't fail, you have to perform inter-run calibration. |\n\nThree samples failed for one of the genes of interest\n\n| What is the first question I need to ask ? |\n| :----------------------------------------- |\n| Is the gene expressed in these samples ?   |\n\n| Is it possible the RNA of these three samples was of low quality ?        |\n| :------------------------------------------------------------------------ |\n| Not likely, the measurements for the other genes in these samples are ok. |\n\nThree samples failed for one of the reference genes\n\n| Can I use the measurements of that reference gene in the non-failing samples for normalization ?                                         |\n| :--------------------------------------------------------------------------------------------------------------------------------------- |\n| No, qbasePLUS requires that you use the same reference genes for all samples so you have to discard all samples for that reference gene. |\n\n### Exercise 7: dilution series for calculating amplification efficiencies\n\nIn my qPCR experiment I want to study 8 new genes for which I had to design new primer pairs in 12 samples (samples of interest and control samples). I want to use 2 technical replicates and 96 well plates.\n\n| What is the first thing I need to do ?                                                        |\n| :-------------------------------------------------------------------------------------------- |\n| Perform a pilot experiment to determine the amplification efficiencies of these primer pairs. |\n\nFor this I need a dilution series of representative cDNA template.\n\n| How many dilutions would you include ?                                           |\n| :------------------------------------------------------------------------------- |\n| A dilution series with 6 dilutions for 8 genes nicely fits into a 96 well plate. |\n\nA few weeks after my initial qPCR experiment I want to test these 8 genes in a new set of samples.\n\n| Do I have to repeat the pilot experiment ?      |\n| :---------------------------------------------- |\n| No, dilution series do not need to be repeated. |","You need to do inter-run calibration if you want to compare samples from different runs e.g.:\n\n  - when it is not possible to get all samples for the same gene on the same plate\n  - when you do additional runs weeks or months after your initial experiment\n\nOf course there is a lot of variability between runs on a qPCR instrument:\n\n  - thermal block is not always heating uniformously\n  - quality of the lamp, the filters and the detector decreases over time\n  - data analysis settings on the qPCR instrument (baseline correction and threshold) can be slightly different\n  - efficiency of reagents (polymerase, fluorophores) is variable\n  - optical properties of the plastic plates vary\n\nFortunately, inter-run calibration allows you to eliminate most of this variability.\n\nIn this experiment we will analyze the data from the gene expression experiment (see Analyzing gene expression data in qbase+) together with data from 2 runs (Run4 and Run5) that were done weeks after the initial gene expression experiment.\n\nBecause the data comes from two different experiments spread over time, we have included three inter-run calibrators on the plates: Sample01, Sample02 and Sample03.\n\nThe principle of the IRCs is very similar to that of the reference genes:\nIn theory, the IRCs should have the same NRQ in each run. In practice, the difference in NRQ between two runs is a measure of the inter-run variation and can be used to adjust the NRQs to remove the inter-run variation.\n\n#### Creating a new Experiment\n| Import [Run1](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run1.xls), [Run2](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run2.xls), [Run3(all three in CFX format)](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run3.xls), [Run4](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run4.xls) and [Run5 (the latter two are in qBase format)](http://data.bits.vib.be/pub/trainingen/qbasePLUS/DataTraining/Run5.xls).\n| :----------------------------- |\n| Since the data is in files of two different format, you have to do a separate import for each format. So first import Run1, Run2 and Run3, then import Run4 and Run5. You can find the details on how to import CFX files in [](Loading_data_into_qbase+\" title=\"wikilink)Loading data into qbase+.\nThe details of importing qBase files are in [](Analyzing_data_from_a_geNorm_pilot_experiment_in_qbase+\" title=\"wikilink)Analyzing data from a geNorm pilot experiment in qbase+\n\n#### Analyzing the data\n\n| Use assay specific amplification efficiencies.\n| :----------------------------- |\n| You can find the details on how to convert the targets in the **Taking into account amplification efficiencies** section of Analyzing gene expression data in qbase+\n\nIn Analyzing gene expression data in qbase+ we have already checked the stability of the reference genes (see **Normalization** section). We determined that Flexible did not show stable expression.\n\n| Convert Stable and Nonregulated to Reference targets.\n| :----------------------------- |\n| You can find the details on how to convert the targets in the **Normalization** section of Analyzing gene expression data in qbase+\n| Appoint Sample01, Sample02 and Sample03 as IRCs.\n| :----------------------------- |\n| Leave the Analysis wizard by clicking the **Close wizard** button in the top menu.\n\n - Expand **Intermediate results** (red) in the **Project Explorer**\n - Double click **Interrun calibration** (green)\n\nThis opens the **Interrun calibration window**:\n\n - Click the **New** button (blue) to create a IRC\n - Once the IRC is created you have to appoint samples to it: select **Sample01** in the list of **Other samples**\n - Click the **Add Sample** button (purple)\n - Remember that you cannot give IRCs the same name in different runs: the software would think that they are technical replicates spread over different plates (which is not allowed). Therefore, in Run4 and Run5 we have given Sample01 another name: Sample01_2. Select **Sample01_2** in the list of **Other samples**\n - Click the **Add Sample** button (purple)\n\nYou have appointed the first IRC (grey), now do the same for the other two IRCs.\n\nRemember that for each target the variability of the normalized\nexpression levels of the IRCs between different runs will be used to\nadjust the other normalized expression levels of that target gene. The\nadjustment is done by amplifying the normalized expression levels with a\ncalibration factor that is calculated based on the normalized expression\nlevels of the IRCs.\nSince variability between runs is the same for each IRC, you expect that\nall IRCs measure the variability between the runs to the same extent,\nhence leading to similar calibration factors.\n\n| Do these IRCs generate similar calibration factors ?\n| :----------------------------- |\n| Open the **Calibration Factors** tab (red) of the **Interrun calibration window** and look at the result for Duvel:\n\nYou see that IRC2 returns a substantially different calibration factor in Run5 (green) so the validity of this IRC should be interpreted with care.\nFor Leffe the IRCs also gives inconsistent results in Run5. Switch to the results for Leffe by selecting **Leffe** in the **Targets** list (blue)\n| Do you still see the same expression pattern for Palm as you did in the first three runs ?\n| :----------------------------- |\n| Open the target bar chart for Palm.\n\nYou see that the pattern Palm showed in the first three runs (sample01 to sample16): high expression in the odd and low expression in the even samples is reversed in the samples from Run4 and Run5 (sample17 to sample25). In the latter runs you see high expression in the even and low expression in the odd samples. However, without annotation for Run4 and Run5 (which samples are treated and which not) it's impossible to interpret the bar chart.\n\n1. [Link](http://youtu.be/OJFsuZqNUHs)","The following exercise will make you familiar with the Primer3Plus software for designing primers for PCR. Primer3Plus is the user-friendly version of Primer3, the standard software for primer design.\n\n### Criteria for qPCR primers\n\nPrimers for qPCR have to follow all the gudelines for regular primers is and an additional set of rules specific for qPCR primers:\n\n  - qPCR products are small: 80-160 bp\n  - use intron or exon-exon junction spanning primers to detect genomic DNA contamination in the RNA samples. Primers of intron spanning primer pairs are located at both sides of an intron and will therefore generate a larger product on genomic DNA (containing the intron). Primer pairs containing an exon-exon junction spanning primer will not generate a PCR product on genomic DNA since the exon-exon junction only exist in the cDNA.\n  - primer length between 9 and 30 bp with an optimum at 20 bp\n  - melting temperature (Tm) of the primers between 58 and 60°C with an optimum at 59°C\n  - maximum Tm difference between the primers of a pair: 2°C\n  - GC content of the primers between 30 and 80% with an optimum at 50%\n  - the 5 nucleotides at the 3' end of the primers should have no more than 2 G or C bases\n  - avoid runs of 4 or more identical nucleotides (especially Gs)\n  - primers must specifically target the region you want to amplify\n\nThere are many programs for designing primers, the most important ones:\n\n  - [Primer3](http://frodo.wi.mit.edu/) \\[1\\] or use it's user-friendly version: [Primer3Plus](http://primer3plus.com/cgi-bin/dev/primer3plus.cgi)\\[2\\]\n  - [PrimerBLAST](http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome)\\[3\\]\n\nThe major downside of Primer3 and Primer3Plus is the fact that you have to check the specificity of the primers yourself. Primer3 will suggest a number of primer pairs that fulfill all of the above requirements, but Primer3 will not check the specificity of the primers. So you have use BLAST to check the specificity of the suggested primer pairs. Very often, the selected primers are not specific and you have to repeat the entire Primer3 analysis.\nIf you use Primer3 and do the BLAST yourself, BLAST against Refseq sequences unless they are not available for the organism you work with or you have reasons to believe that they are not complete (i.e. they do not represent the full genome). For model organisms, you can BLASTagainst the Refseq database. Limit the database to sequences from the organism you work with.\nAdditionally, it is especially important to check that the primers are specific at the 3' end because that's the site where the polymerase will attach nucleotides. So it is recommended to not use primers that contain long identical stretches (\\> 15nt for primers of 20nt long) to other regions in the genome, and certainly not if these stretches comprise the last nucleotide at the 3' end of the primer.\nFor these exercises we will use PrimerBLAST since [it uses the same algorithm to pick primers as Primer3](http://www.ncbi.nlm.nih.gov/tools/primer-blast/primerinfo.html) \\[4\\] and does the specificity check for you\\!\n\n## Designing qPCR primers for the fruit fly tap gene\n\n### Designing qPCR primers using PrimerBLAST\n\nThe RefSeq entry NM_079400 contains the sequence of the D. melanogaster mRNA coding for tap, the target of Poxn. Tap encodes a bHLH protein expressed in larval chemosensory organs and involved in the response to sugar and salt. We wish to amplify the region encoding the Helix-loop-helix domain. In the sequence of the RefSeq record, the domain is located between position +577 and +745.\nWe want to design qPCR primers for measuring the expression level of the hlh domain using SYBR green. Remember that it is advised to design intron/exon-exon junction spanning primers for qPCR experiments that are based on fluorescent labels to detect/avoid amplification of contaminating genomic DNA.\n\n| Check in NCBIs Gene database if the hlh domain contains any introns ? |\n| :------------------------------ |\n|To know the location of the introns, you need the genomic sequence instead of the mRNA sequence.\n\n - Go to [the NCBI RefSeq record](https://www.ncbi.nlm.nih.gov/nuccore/NM_079400).\n - In the right menu click the link to the **Gene** record\n - In the **Genomic regions, transcripts and products** secton you can see that the gene contains no introns: the transcript is not chopped up into pieces when aligned to the genome. Click [here](https://www.ncbi.nlm.nih.gov/gene/39934) for an example of a gene with introns.\n\nNext, we will design primers to measure the expression of the hlh domain.\n\n| Go to Primer BLAST by using the link in the Refseq record |\n| :------------------------------ |\n|Go back to the RefSeq mRNA record. There, you can go directly to PrimerBLAST by clicking the **Pick Primers** link in the **Analyze this sequence** section of the right menu.\n\nSince you want to measure the expression of the hlh domain you want\nprimers that are located inside the domain.\n\n| Define the range of the sequence in which you want to design primers. |\n| :------------------------------ |\n|You have to specify the range as follows:\n\n| Define the primer parameters to comply with the rules of qPCR primer design: product size and Tm. |\n| :------------------------------ |\n|To comply with the rules for qPCR primer design, you have to change the settings for PCR product size and melting temperature:\n\n| The PrimerBLAST automatically decides to check primer specificity in the Drosophila (organism ID: 7227) RefSeq mRNA database which is exactly what you want. For the qPCR you are going to use RNA samples from fruitfly. This means that the primers will only come into contact with Drosophila mRNAs so you only have to check their specifity in this database. Make sure the last 2 nucleotides are completely specific. |\n| :------------------------------ |\n|You want to ensure that the 3' end of the primers really is specific:\n\nThe PrimerBLAST gives you a set of 9 primer pairs that are specific (according to the criteria that you have specified) and that fulfill all other requirements that you have defined. Look at the detailed report of the first primer pair:\nAll parameters are quite self-explanatory except for the Self complementary and Self 3'complementarity scores.\n\n  - The first score represents the local alignment score when aligning a primer to itself. The scoring system gives 1.00 for a match, -1.00 for a mismatch. This means that the lower the score (the more mismatches), the less likely that the primer binds to itself.\n  - The second score represents the global alignment score when aligning a primer to itself. Here again, the lower the score, the better.\n\nThe scores are followed by information on the specificity of the primer: alignments of the two primers to all target sequences from the database that match the criteria that you specified. In these alignments dots represent matching nucleotides while letters represent mismatches. A specific primer pair will have two alignments (one for each primer): both perfect alignments (all dots) to the sequence you want to amplify.\n\n### Analyzing primer characteristics using OligoAnalyzer\n\n[OligoAnalyzer](https://eu.idtdna.com/calc/analyzer) is a tool implemented by ID\\&T (who sell primers) to check the characteristics of your primers. Take the first primer that is suggested by Primer-BLAST, the pair resulting in a product of 100bp.\n\n| What's the Tm of the first primer ? |\n| :------------------------------ |\n|Copy the sequence of the first primer in the **Sequence** box, adjust the concentrations to these that are typically used in PCR (see slides) and click **Analyze**:\nAs you can see the predicted melting temperature is 63.9 ºC, which is slightly different from the prediction made by BLAST. There are many different methods to predict Tm and each method will give a different result. Assumed concentrations of primers and ions have an enormous impact on the Tm prediction. So don't worry about these differences: these are theoretical calculations anyway, the only way to determine Tm values is by doing actual PCR. As long as the difference in Tm between the two primers is not too large, everything is fine.\n\n| What's the Tm of the second primer ?                                                                                                                                             |\n| :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |\n| Copy the sequence of the second primer in the **Sequence** box and click **Analyze**. The predicted melting temperature is also 63.9 ºC , the same Tm as the first primer. |\n\nRemember that the second primer had a large Self complementarity score according to PrimerBLAST.\n\n| Check the self-complementarity of the second primer in OligoAnalyzer ? |\n| :------------------------------ |\n|Click **Self-Dimer**:\n\nYou see that the highest scoring alignment indeed has 6 matches, giving a score of 6 as predicted by PrimerBLAST.\n\n| Do you expect this self-complementarity will give problems in the PCR ? |\n| :------------------------------ |\n|No, the complementarity is concentrated at the center of the primer, not at the 3' end. Since polymerases add bases at the 3’ end of the primer, the primer duplex cannot be extended so it will not give rise to aspecific products. [ID&amp;T](https://eu.idtdna.com/pages/docs/default-source/default-document-library/idt_self-dimer_and_hetero-dimer_results_guide.pdf) recommends to avoid complementary stretches of more than 2 bp at the 3’ end.\nHowever, even if the primer dimer cannot be extended, it could interfere when its formation competes with the annealing of primer and target. This is only the case when the stability of the dimer is similar to the stability of a perfectly matched primer-target duplex. The stability of the perfectly matched duplex is shown as a Maximum Delta G at the top of results. So non-extendable dimer structures that are much shorter than the intended duplex, as we have here, are not going to disrupt the PCR reaction.\nIt is advised to review all possible interactions between primers so both Self-Dimer (primers binding to themselves) and Hetero-Dimer (primers binding to each other) interactions between primers are examined.\n\n| Is it likely that the primers bind to each other ? |\n| :------------------------------ |\n|Click **Hetero-Dimer**:\n\nThis opens a text box to enter the second primer. Click **Analyze**. There is one structure (the fourth one) that looks problematic because there is a stretch of 3 matching nucleotides at the 3'end of one of the primers.\n\nSo you might consider taking a look at the second pair of primers that PrimerBLAST suggests. On the other hand, this structure is has relatively high free energy (delta G). The structure with the lowest total free energy, the target-primer duplex, is most important because it will dominate in solution. Structures with higher free energy are less stable and will be present in smaller amounts in the reaction mixture.\n\nTake a look at the second primer pair that was suggested by PrimerBLAST.\n\n| Is it likely that these primers bind to each other ?                                 |\n| :----------------------------------------------------------------------------------- |\n| No these primers do not form duplex structures that could pose a problem during PCR. |\n\n## Designing qPCR primers for the human F9 gene\n\n### Designing qPCR primers using PrimerBLAST\n\nThe RefSeq entry NM_000133.3 contains the sequence of the human mRNA coding for coagulation factor F9. The gene contains 8 coding exons and gives rise to a transcript of 2780 bp encoding a protein of 461 amino acids.\nNext, we want to design primers to measure the expression of the F9 gene.\n\nGo to [the RefSeq record of this transcript](http://www.ncbi.nlm.nih.gov/nuccore/NM_000133.3) to study its structure. When you scroll down to the **features** section you see that the CDS is located from position 40 to position 1415. Since RNA degradation starts at the 5'end of transcripts, we don't want to pick primers at the 5'end. On the other hand, we don't want to pick primers in the long 3'UTR either because it doesn't contain any introns (the exons are all coding) and we want to design exon-exon junction or intron spanning primers.\nLet's try to find exon-exon junction spanning primers between position 400 and 1600, with optimal anneal temperature = 60.\n\n| Find primers that fulfill the above defined criteria |\n| :------------------------------ |\n|Go to [PrimerBLAST](http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome) and fill in the form as follows:\n\nExclude predicted sequences in the database to search in .\n\n| Find primers that fulfill the above defined criteria |\n| :------------------------------ |\n|Go to [PrimerBLAST](http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome) and fill in the remainder of the form as follows:\n\nThe PrimerBLAST gives you a set of 10 primer pairs. Look at the detailed\nreport of the first primer pair:\n\nAs you can see the primers are not specific: they can bind to various other targets albeit with lower affinity because of the mismatches . The best option seems to be primer pair 7, which binds to both F9 transcript variants and potentially to one unintended target, but as you can see the last nucleotide at the 3' end of both primers are specific.\n\n### In silico PCR in the UCSC Browser\n\nWe will proceed using the third primer pair Primer-BLAST suggests. You can visualize the PCR product (and additional annotation) in the UCSC Genome Browser using [UCSC's In Silico PCR tool](http://genome.ucsc.edu/cgi-bin/hgPcr).\nSelect the most recent version of the human genome and paste the sequences of forward and reverse primers in their respective boxes. Click **submit**\nNormally, this returns the location and the sequence of the PCR product but our primer pair doesn't return a match. When you think about this was to be expected since we are working with exon-exon junction spanning primers that are not able to match the genome sequence. So checking SNPs is not so straight-forward in the case of exon-exon junction spanning primers.\nWe will repeat the primer search now searching for intron-spanning primers to show you how to use the in silico PCR tool. Taking into account the fact that the results for the exon-exon junction spanning primers were so messy we will make the search more stringent this time:\n\n  - We will the minimum number of mismatches to 4\n  - and at least 3 mismatches in the last 3 bps at the 3'end\n\n| Find intron spanning primers that fulfill the above defined criteria |\n| :------------------------------ |\n|Go back to the Primer-BLAST and fill in the form like in the previous exercise except that they should span an intron:\n\nPrimer-BLAST returns 10 primer pairs. Again the seventh primer pair is\nthe specific one.\n\n| Take the seventh suggested primer pair and check for SNPs in the UCSC Browser |\n| :------------------------------ |\n|Go to [PrimerBLAST](http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome) and paste the sequences of forward and reverse primers in their respective boxes.\nThis time the search finds a PCR product:\n\nClicking the location visualizes the PCR product in the UCSC genome browser. Remove unnecessary trancks by right clicking the box in front of them and selecting **hide**\n\nAdd tracks showing relevant annotation like position of SNPs...\n\nSetting the SNPs track from **hide** to **full** shows the SNPs in the browser. Center the forward primer by grabbing and dragging it to the center.\n\nZoom in to **base** display to see if the forward primer is matching any SNPs.\n\nAs you can see the forward primer does match two SNPs but none of them are located near the 3'end of the primer.\n\n1.  <http://frodo.wi.mit.edu/>\n2.  <http://primer3plus.com/cgi-bin/dev/primer3plus.cgi>\n3.  <http://www.ncbi.nlm.nih.gov/tools/primer-blast/index.cgi?LINK_LOC=BlastHome>\n4.  <http://www.ncbi.nlm.nih.gov/tools/primer-blast/primerinfo.html>","# 1. Introduction\n\nRstudio is a popular platform for downstream data-analysis, statistics, machine learning and more scientific related analysis using the R language. If you're unfamiliar with R and Rstudio, some materials on this website that will get you started are accesible via [this link](https://material.bits.vib.be/topics/R/). Uptil now we have focused on the core principles of Git & GitHub, which gives us enough knowledge to start integrating in other platforms. \n\nThere are three plausible scenarios:\n1. You have a version controlled project on your computer which you want to integrate in Rstudio\n2. You have a version controlled project on GitHub which you want to integrate in Rstudio locally \n3. You have an Rstudio project that you now want to start version controlling\n\nCreating a version controlled project in Rstudio from each of these scenarios is discussed in section 2: *Starting a project*. Exploiting Git's features in Rstudio is further exploited in section 3: *Exploring Git's integration in Rstudio*.\n\nWe will exploit the repository that we created in the previous chapters of this tutorial. A sample repository is also downloadable [here](https://github.com/vibbits/introduction-github). Download the repository as a ZIP-file and extract it.  \n\n# 2. Starting a project \n\n## 2.1 Integrating a version controlled project in Rstudio (scenario 1 & 2)\nLet's start by making a new project (File > New project...). The following screen pops up:\n\n---\n\n<center><img src=\"../../images/rstudio-1.PNG\" /></center>\n\n---\n\nThere are two options relevant for us to create a project in RStudio initialized with GitHub:\n- **Existing Directory**: The preferred choice when a project folder already exists and which has previously been initialized with Git. \n- **Version Control**: Ideally for creating a new R project based on a repository in GitHub. \n\nGiven the situation that there is a folder on our computer, created during this tutorial and initialized with Git, we will go for the first option. Select **Existing Directory**, browse to the location of the project folder and create the project. (If you've downloaded the sample repository mentioned above, this option does not hold as it only downloads the files)\n\n**Alternatively**, if we were to choose to create a new R project based on a GitHub repository, you would need to select **Version Control**, followed by *Git* and then copy the link of the GitHub repository from the green *Clone or Download* button and add it as the repository URL, and finally create the project. Using the sample repository for this option would mean that we need to fill in the following link as repository URL: *https://github.com/vibbits/introduction-github.git*.\n\n---\n\n<center><img src=\"../../images/rstudio-2.PNG\" /></center>\n\n---\n\nNotice that after creating the repository, a `.gitignore` file is added on the fly containing the following 4 lines. These lines will make sure that irrelevant information related to Rstudio is neglected.   \n```\n.Rproj.user\n.Rhistory\n.RData\n.Ruserdata\n``` \n\n## 2.2. Initiating version controlling on an existing Rstudio project (scenario 3)\nA third option assumes that you already have an R/Rstudio project. Click on *Tools > Version control > Project Setup...*. In the new screen, select Git as the version control system as depicted below and select yes when asked \"Do you want to initialize a new git repository for this project?\". Rstudio will need to restart for the changes to take place.\n\n---\n\n<center><img src=\"../../images/rstudio-7.PNG\" /></center>\n\n---\n\nThis approach will initialize Git on the project. As discussed in chapter 3, this local repository does not exist on GitHub yet, hence we can't push our commits to GitHub. In order to do so, we'll have to make a repository on GitHub first (see chapter 3.2.). This repository should be initialized without(!!) a README file, `.gitignore` file or license. Copy the link that GitHub created for the new repository (e.g. https://github.com/vibbits/rstudio-project.git). In Rstudio, find a *Git* tab in the upper right corner and click on *New Branch* (or the icon next to it).  \n\n---\n\n<center><img src=\"../../images/rstudio-8-1.PNG\" /></center>\n\n---\n\nClick on *add remote* in the new screen, paste the GitHub link and add the name of the project. \n\n---\n\n<center><img src=\"../../images/rstudio-9.PNG\" /></center>\n\n---\n\nFinally, add the name of the new branch *main* and hit create. Select *overwrite* when asked.  \n\n---\n\n<center><img src=\"../../images/rstudio-8-1.PNG\" /></center>\n\n---\n\n# 3. Git's features in Rstudio\n\nBy initializing Git on an Rstudio project, there appears a *Git* tab in the upper right corner as depicted below. The tab consists of the main actions that can be performed with Git (the window might be too small to contain the keywords related to the symbol). Neglecting the *diff* keyword which is out of scope for this tutorial, we can find the following actions: *Commit, Pull, Push, History* and *More* followed by *New Branch*, the name of the branch (*main*) and a refresh button.\n\n- **Stage**: The only action we're missing is the *staging*. Rstudio & Git actually continuously process the files within the project searching for new changes. If there is a new change it will appear in the list in the screen as depicted here for the `.gitignore` file. \n- **Commit**: Opens a new screen that controls the staging area and committing. \n- **Pull**: Pulls upstream changes from the GitHub repository into our, this local repository.\n- **Push**: Pushes previous commits to the GitHub repository.\n- **History**: Neatly visualizes the history log of the repository. Each commit, branch, contributor is reviewed in this screen. \n- **More**: Allows us to revert (undo) changes to a previous commit or ignore selected files (discussed below).\n- **New Branch**: Creates a new branch. \n\n---\n\n<center><img src=\"../../images/rstudio-3.PNG\" /></center>\n\n---\n\n\n# 4. Routine usage\n\nRecall the routine usage: *stage-commit-push*. Staging changes in Rstudio is done by simply checking the tickmarks in the list. This approach makes it very user-friendly to stage changes that are related with each other and that should be contained within the same commit. \n\n--- \n\n<center><img src=\"../../images/rstudio-4.PNG\" /></center>\n\n---\n\nSubsequently, click on commit and find a similar screen:\n\n--- \n\n<center><img src=\"../../images/rstudio-5.PNG\" /></center>\n\n---\n\nLet's explore this screen for a while: \n- We can find a *history* tab summarizing all the previous commits in this repository. As this project already existed before, it also contains the commits from before the integration in RStudio. \n- Next to that tab we can switch the branch, generally we leave this untouched as we're already in the preferred branch. \n- The *staging* tab allows us to stage and unstage specific files, even after they were staged in a previous step.\n- The *revert* tab is neglected in this tutorial\n- *Ignore* allows us to edit the `.gitignore` file by simply selecting the file that we want to ignore and clicking on *Ignore*. \n\nIf you're happy with the changes and the staging area, a commit message is written in the right tab and finalized by hitting the *Commit* button. A message will pop up summarizing the commit in a technical way. \n\nIf the commit has to appear on GitHub we need one more step. Click on *Push* and find your new status of the project in the GitHub repository.\n\n\n---\n\n> ### {% icon hands_on %} Exercise \n>\n> Add the `.gitignore` file to the staging area and exploit the *Ignore* button to add the *Rproj* file to the `.gitignore` file. Write a commit message, and commit and push your changes to GitHub. If the *Rproj* file already is in the `.gitignore` file, make a new example R-script which you can ignore. \n>\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > \n>    > Select *File > New File > R Script*, write something like `# test` and save the file. When they are saved, they will appear in the Git-tab. Select the files in the Git-tab and click on *More > Gitignore*. When you do this, the explicit name of the file will appear in the gitignore file. *Click* on Save. Now the gitignore file will apear in the Git-tab, ready to be staged, and the new file (or *Rproj* file) has disappeared from it. \n>    > The rest of the workflow remains the same. Click on the tickmarcks to stage the files, click on commit, write a message in the designated textbox and push your changes to the repository on GitHub. \n>    > \n>    > \n>    > </details>\n> \n{: .hands_on}\n\n---\n \n\n","## What is Linux?\nLinux is a very popular operating system in bioinformatics. In this training you will learn why that is and how it can help you with your bioinformatics analysis. After this training you will be able to:\n- install software on Linux\n- use command line to run tools\n- use command line to handle files\n- write small scripts to automate your analysis\n\n## Linux installation\n### Live modus\nWant to test a Linux distribution? Follow this procedure: \n- Grab an USB key and put your Linux distribution (e.g. Ubuntu) on it. \n- Boot your computer from that bootable USB key, and you have a full linux OS to play around with. This 'live modus' is an easy way to test the new stuff linux has to offer. \n- Before you test anything else, check if your hardware works (printer, sound,...) and check internet connection. \n- Secondly, do you like the desktop environment? Does it suit your needs? Play around and test. \nDone testing? Just reboot your computer, remove the USB key, and the original operating system will start up again as if nothing has happened...\n\n### Virtual machine\nGo to <https://www.virtualbox.org> and choose Downloads. Download the correct installer for your platform and install VirtualBox on your computer.\nSometimes VirtualBox displays errors when starting. Or trying VirtualBox for the first time, a virtual machine might not start. These problems might be related to not having virtualization enabled on your CPU.\nAll the latest processors and motherboards support virtualization technology (vt-x/amd-v). It many cases, VirtualBox requires this to be enabled. To do so, you have to reboot your computer, and get into the BIOS menu. In the BIOS menu, you should enable virtualization. Where this setting is located is different between computers, so check your hardware vendor for the BIOS options, or browse around in your BIOS menu until you find it. Most of the times it is named in a decent way. Enable the option, and boot your computer.\n\nWe need to download an .iso file, which is a (binary) copy of an installation DVD containing your distribution of choice. You can find it in the downloads section of the distribution's web page. You can download it using a direct download, depending on your preference and the options offered by the distribution's web page.\nYou can run Linux in 'live modus' (see instructions above) and install it directly on your virtual machine. Afterwards you have to reboot your virtual machine to get out of the live modus.\n\n### Dual boot\nMulti-booting allows more than one operating system to reside on one computer, for example if you have a primary operating system and an alternate system that you use less frequently. Another reason for multi-booting can be to investigate or test a new operating system without switching completely. Multi-booting allows a new operating system to configure all applications needed, and migrate data before removing the old operating system.\n\n## Training material\n[slides](http://data.bits.vib.be/pub/trainingen/Linux/Command_line_2019.pdf)\n\nOn the training there is a Linux Ubuntu installation available on a cloud environment. To access Linux we use Google Chrome and the 'VNC Viewer for Google Chrome' application.\nWhen you launch the application, you have to enter an IP address, this will be mentioned on the training.\n\n### Additional information\n- [Linux Beginner's Cheat page](https://wiki.bits.vib.be/index.php/Linux_Beginner%27s_Cheat_page)\n- [The practical command line cheat sheet](https://wiki.bits.vib.be/index.php/The_practical_command_line_cheat_sheet)\n- [AWK](https://wiki.bits.vib.be/index.php/AWK)\n- [Terminal keyboard shortcuts](http://data.bits.vib.be/pub/trainingen/cheat-sheets/Bash_Keyboard_Shortcuts.pdf)","## A script\nA script is just a plain text file. I will show this below. It contains written instructions, that can be understood by a programming language, in our case **bash** .\n\n### An example script \n\n> Create a text file named 'buddy' in your home with following content:\n```\nbadday=\"Cheer up\"\ngoodday=\"Doing great\"\necho \"$badday, $USER !\"\necho \"$goodday, $USER !\"\n```\n\n> One way of doing this is:\n```\nnano buddy\n```\n\n> and copy of the contents of the header above. Save the contents by pressing <ctrl>+O. Close nano with <ctrl>+x\n> What type of file did you create?\n```\nfile buddy\nbuddy: ASCII text\n```\n\n> That file contains plain text. To execute the commands in that file, feed it as an argument to the program 'bash'.\n```\nbash buddy\nCheer up, bits !\nDoing great, bits !\n```\n\nFew things to notice:\n- in the script, we have defined 2 **variables** 'badday' and 'goodday'\n- their values can be displayed by the program **echo** which takes as an argument the name of the variable preceded by a **$** sign.\n- the $USER variable, is an **environment variable**. They can be used in scripts. Env variables are typically written in capitals.\n\n### Getting more professional\nWe can make this easier. If you start your script with the symbol '#' and next specify the path to the interpreter, the terminal will feed this script automatically to the right interpreter for you! To see what this means, follow these steps.\n> Find out the path to the program bash\n```\nwhich bash\n/bin/bash\n```\n\nNow we know the path to bash, we have to provide this path, on the very first line, preceded by **#!** (shebang or crunchbang). If you have another type of script, let's say perl, you find out the path to perl, and at this path behind a #! on the very first line.\n> Open the text file 'buddy', and add at the start of the file '#!' followed by the path to bash:\n```\nnano buddy\n```\n\n... edit the text\n```\ncat buddy\n#!/bin/bash\nbadday=\"Cheer up\"\ngoodday=\"Doing great\"\necho \"$badday, $USER !\"\necho \"$goodday, $USER !\"\n```\n\n> Before turning the text file into a script, set the execute permission (to allow execution) with chmod\n```\nchmod +x buddy\n```\n\n> What type of file is your script?\n```\nfile buddy \nbuddy: Bourne-Again shell script, ASCII text executable\n```\n\nBy setting the **shebang**, the interpreter on the command line knows that this is a bash script! \n> Now run your script as if it were a program (./<script_name>)\n```\n./buddy\nCheer up, bits !\nDoing great, bits !\n```\n\nTo make it more readable, often the extension **.sh** is given to the text file. Note that this is not necessary! Linux does not define file types by extensions.\n> Rename your script to 'buddy.sh'\n```\n$ mv buddy buddy.sh\n```\n\n> **Alternative (less typing!)**\n```\n$ mv buddy{,.sh}\n```\n\n### A good habit\n\n>The last line of your script should be 'exit 0'. If bash reaches this lines, it means that the script was successfully executed. Add it by opening the file with 'nano' and modifying its contents.\n```\n$ cat buddy.sh \n#!/bin/bash\nbadday=\"Cheer up\"\ngoodday=\"Doing great\"\necho \"$badday, $USER !\"\necho \"$goodday, $USER !\"\nexit 0\n```\n\n> Alternative. Less typing! \n```\necho \"exit 0\" >> buddy.sh\n```\n\nThis was our first bash script! I hope it was a painless experience.\n\n## Download a Perl script\nMany bioinformatics programs are written in python or perl. It's quick to type some python or perl code in a text file, and get your job done. Those scripts are **text files**. You can download and store scripts on your computer. Usually these files have .py or .pl extension. As long as you have python or perl on your system (by default in Linux!), you can run the scripts. \n\n### Run perl code\nLet's try a small script below.\n\n- Download a simple perl script [here](http://data.bits.vib.be/pub/trainingmaterial/introduction_to_linux_for_bioinformatics/motifs_new.pl)\n- Download the dna file [here](http://data.bits.vib.be/pub/trainingmaterial/introduction_to_linux_for_bioinformatics/dna.txt)\n- Save the file, under ~/Downloads for now.\n- Open Geany on your computer, and copy the script code to Geany.\n- Execute the script by clicking the little 'gear' box. For this script, you will need to download the dna.txt file as input.\n- The results of the script appear in a small window. It will ask for an input (depending on your script). Enter the required details.\n\n### Extract some lines ## \n\n> Download the bed file [here](http://data.bits.vib.be/pub/trainingen/Linux/TAIR9_mRNA.bed) via command line \n```\nwget http://data.bits.vib.be/pub/trainingen/Linux/TAIR9_mRNA.bed\n```\n\n> Look at the first 10 lines of this file. \n```\n$ head TAIR9_mRNA.bed \nchr1\t2025600\t2027271\tAT1G06620.1\t0\t+\t2025617\t2027094\t0\t3\t541,322,429,\t0,833,1242,\nchr5\t2625558\t2628110\tAT5G08160.1\t0\t-\t2625902\t2627942\t0\t6\t385,143,144,186,125,573,\t2167,1523,1269,928,659,0,\nchr5\t2625558\t2628110\tAT5G08160.2\t0\t-\t2625902\t2627942\t0\t7\t258,19,143,144,186,125,573,\t2294,2167,1523,1269,928,659,0,\nchr4\t12006985\t12009520\tAT4G22890.5\t0\t+\t12007156\t12009175\t0\t10\t370,107,97,101,57,77,163,98,80,263,\t0,802,1007,1196,1392,1533,1703,1945,2120,2272,\nchr4\t12007040\t12009206\tAT4G22890.2\t0\t+\t12007156\t12009175\t0\t9\t315,113,97,101,57,77,163,98,101,\t0,741,952,1141,1337,1478,1648,1890,2065,\nchr4\t12006985\t12009518\tAT4G22890.3\t0\t+\t12007156\t12009175\t0\t10\t370,113,97,101,57,77,163,98,80,257,\t0,796,1007,1196,1392,1533,1703,1945,2120,2276,\nchr4\t12006985\t12009520\tAT4G22890.4\t0\t+\t12007156\t12009175\t0\t10\t370,104,97,101,57,77,163,98,80,263,\t0,805,1007,1196,1392,1533,1703,1945,2120,2272,\nchr4\t12006985\t12009520\tAT4G22890.1\t0\t+\t12007156\t12009175\t0\t10\t370,113,97,101,57,77,163,98,80,263,\t0,796,1007,1196,1392,1533,1703,1945,2120,2272,\nchr2\t14578539\t14581727\tAT2G34630.2\t0\t+\t14578688\t14581632\t0\t11\t293,93,81,72,132,87,72,86,133,189,275,\t0,797,1120,1320,1488,1711,1898,2165,2435,2649,2913,\nchr2\t14578629\t14581727\tAT2G34630.1\t0\t+\t14579725\t14581632\t0\t11\t203,96,81,72,132,87,72,86,133,189,275,\t0,704,1030,1230,1398,1621,1808,2075,2345,2559,2823,\n```\n\nThis is a typical bioinformatics text file, with every row divided in field by tabs. \n> Extract all lines that start with chr1 from the TAIR9_mRNA.bed and put them in a new text file “chr1_TAIR9_mRNA.bed”.\n```\n $ grep \"^chr1\" TAIR9_mRNA.bed > chr1_TAIR9_mRNA.bed\n```\n\n### Checking the data ##\n\n> Download human chromosome 21 from [this link](https://data.bits.vib.be/pub/trainingen/Linux/Homo_sapiens.dna.chromosome21.zip) and unzip the file.\n```\nwget https://data.bits.vib.be/pub/trainingen/Linux/Homo_sapiens.dna.chromosome21.zip\n```\n\n```\nunzip Homo_sapiens.dna.chromosome21.zip\n```\n\nEntries in a fasta file start with > \n> How many entries are in that fasta file? Remember you can combine commands with a |.\n```\ngrep \"^>\" Homo_sapiens.GRCh37.73.dna.chromosome.21.fa | wc -l\n```\n\n### How many?\n\nUse the TAIR9_mRNA.bed file used in the first exercise. Remember it looks like this\n```\nchr1\t2025600\t2027271\tAT1G06620.1\t0\t+\t2025617\t2027094\t0\t3\t\nchr5\t2625558\t2628110\tAT5G08160.1\t0\t-\t2625902\t2627942\t0\t6\t\nchr5\t2625558\t2628110\tAT5G08160.2\t0\t-\t2625902\t2627942\t0\t7\t\nchr4\t12006985\t12009520\tAT4G22890.5\t0\t+\t12007156\t12009175\t0\t10\t\nchr4\t12007040\t12009206\tAT4G22890.2\t0\t+\t12007156\t12009175\t0\t9\t\n```\n\nIf you want to find entries that lie on the + strand of a certain chromosome, you need to find lines that start with the chromosome number and that contain a + sign. The number of characters between the chromosome number and the + sign is variable.\n> How many genes are lying on the + strand of the first chromosome ?\n> Since you need to use the + sign to represent a set of characters of variable length you need to use egrep for this:\n```\ngrep \"^chr1.+\\+\" TAIR9_mRNA.bed | wc -l\n```\n\n### More complex extraction\n\nGet the last exon size for all mRNA records in Arabidopsis. Use TAIR9_mRNA.bed for this: this file contains the exon sizes. See the [.BED page](https://wiki.bits.vib.be/index.php/.bed) to check that the field we need is field 11. This contains a comma separated list of the sizes of all the exons of a mRNA\n> Get the exon sizes for all mRNA records in Arabidopsis. Write them to a file called exons.txt\n```\nawk '{ print $11 }' TAIR9_mRNA.bed > exons.txt\n```\n\n> Take a look at the first 10 lines of exons.txt\n```\nhead exons.txt\n```\n\nIf we try to print the last field with awk, using ',' as a delimiter, things go wrong:\n```\nawk -F',' '{ print $NF }' > lastexons.txt\n```\n\nThe reason is that the last field is empty, because the lines end with a ','. We need to remove the last ',' and can use sed for this.\n> Remove the last comma from the lines and save in a file called exonsclean.txt. You want to substitute the comma at the end of the line by nothing:\n```\nsed 's/,$//' exons.txt > exonsclean.txt\nhead exonsclean.txt\n```\n\n> Fetch the last field from exonsclean.txt and save in a file called lastexons.txt\n```\nawk -F',' '{ print $NF }' exonsclean.txt > lastexons.txt\nhead lastexons.txt\n```\n\n> Sort exonsizes from largest to smallest into a file called lastexonssort.txt\n```\nsort -nr lastexons.txt > lastexonssort.txt\nhead lastexonssort.txt\n```\n\nYou can use uniq to summarize the results\n```\nuniq -c lastexonssort.txt | head\n      2 6885\n      1 5616\n      1 5601\n      1 5361\n      1 5239\n      1 4688\n      2 4470\n      1 4446\n      1 4443\n      1 4275\n```\n\n### Analyzing a short read alignment\n\nSAM ('sequence alignment map') file format is the format which summarizes the alignment of reads to a reference genome. Is is one of the key files in NGS analysis, and you can learn a lot from it. See the [SAM page](https://wiki.bits.vib.be/index.php/.sam) for a description of this format.\n> Download the sam file from [here](http://data.bits.vib.be/pub/trainingen/Linux/sample.sam)\n```\nwget http://data.bits.vib.be/pub/trainingen/Linux/sample.sam \n```\n\n> How many lines has the SAM file?\n```\nwc -l sample.sam\n```\n\n100015 lines\n\n> How many lines start with '@', which is the comment symbol in the SAM format.\n```\ngrep '^@' sample.sam | wc -l\n```\n\n15 lines\n\nYou can use grep to skip the lines starting with '@', since they are comment lines.\n```\ngrep -v '^@' sample.sam | head\n```\n\n> Write the FLAG field (second field) to a file called flags.txt and pipe the grep results to awk to print the second field.\n```\ngrep -v '@' sample.sam | awk '{ print $2 }' > flags.txt\nhead flags.txt\n```\n\n> Sort and summarize (using uniq) flags.txt and pipe the grep results to awk to print the second field.\n```\nsort -nr flags.txt | uniq -c\n```\n\n> Sort the results on number of times observed (the first field). We build on the previous command, and just pipe the output to sort -nr. We do not have to use the option -k, since sort always takes the first field.\n```\nsort -nr flags.txt | uniq -c | sort -nr \n```\n\n### Advanced\nWe use the TAIR9_mRNA.bed to answer this.\nFirst we check how many different genes are in the file. A gene has the code ATG. Splice variants have to same AT number but different version number (the numbers after the . are different. We are not interested in splice variants so want to remove the .1, .2... before counting. You can do this by using the . as a field delimiter\n> Remove everything after the . and save in a file called TAIRpart.txt\n```\nawk -F'.' '{ print $1 }' TAIR9_mRNA.bed > TAIRpart.txt\nhead TAIRpart.txt\n```\n\nNow you need to summarize the fourth column of this file and count the lines of the result\n> How many different genes are in the file?\n```\ncut -f4  TAIRpart.txt | sort | uniq | wc -l\n```\n\n27379\n\nWhen you look at TAIR9_mRNA.bed you see that the the fifth column contains 0.\n> Check if there is any entry that contains another number in that column ? (summarize will give you the answer)\n```\ncut -f5 TAIR9_mRNA.bed | sort -nr | uniq -c\n```\n\nNo\nAnother example: Show all Arabidopsis mRNA with more than 50 exons\n```\nawk '{ if ($10>50) print $4 }' TAIR9_mRNA.bed\n```\n\n> Print the number of exons (field number 10) of mRNAs from the first chromosome.\n```\ngrep '^chr1'  TAIR9_mRNA.bed | awk '{ print $10 }' \n```\n\n> Obtain AT numbers (field 4) and exon info (field 11)\n```\nawk '{ print $4,\",\",$11 }'  TAIR9_mRNA.bed \n```\n\n## Bash Aliases to enhance your productivity\n\nYou specify aliases in the **.bashrc file** in your home directory. \n```\nalias myalias=\"<my fancy command>\"\n```\n\nChange 'my fancy command' to a real command!!\nBefore you can use your new aliases, you have to reload the .bashrc file. You do this by \n```\n$ source ~/.bashrc\n```\n\nor \n```\n$ . ~/.bashrc\n```\n\nNow, let's do this exercise.\nSometimes you might want to open a big text file from the end on, and start scrolling towards the top. We will create an **alias** for this in this exercise.\n> Create an alias that starts scrolling from the bottom. Tip: it's less and the appropriate option you must configure. Read through the man page of less. To help you: you can search for the string \"at the end\". Open the man page of less \n```\n$ man less\n```\n\n> Type \"/at the end\" and <ENTER>. Less will search in the content for \"at the end\". Examine the entries with the string./ Go to the following result by typing \"/\" followed by ENTER.\n> The option is: add the alias by opening .bashrc with an editor, and adding the line:\n```\nalias sell=\"less +G\"\n```\n\n> When you have changed the content of .bashrc, it needs to be reloaded. Close your terminal and fire it up again. OR execute:\n```\n$ . ~/.bashrc\n```\n\n```\n$ source ~/.bashrc\n```\n\n> We now have **sell** to our disposal, which starts scrolling large text files from the end of the file.\n```\n$ sell /var/log/syslog\n```\n\n### Show all aliases on your system\n\nForgot an alias? To see all your aliases, run the command \n```$ alias```.\n\n## Writing loops\n\n**For** loops are used to repeat commands a number of times. We will start with two simple examples.\n> Write a for loop to create 3 files: test1.txt, test2.txt, test3.txt\n```\nfor i in 1 2 3\ndo\ntouch test$i.txt\ndone\nls -l\n```\n\n> Write a for loop to create 3 folders: folder1, folder2, folder3\n```\nfor i in 1 2 3\ndo\nmkdir folder$i\ndone\nls -l\n```\n\n","## Tutorial on the linux file system\n\n### Which protocol achieves highest compression ratio?\nLet's do a little test. Download [this compressed file](http://data.bits.vib.be/pub/trainingmaterial/introduction_to_linux_for_bioinformatics/data_linux_training.tar.gz).\n\n> Create a folder named 'Compression_exercise' in your home. Copy the downloaded tar.gz to it.\n```\n$ cd\n$ mkdir Compression_exercise\n$ cp Downloads/data_linux_training.tar.gz Compression_exercise/\n```\n\n> Unpack the data_linux_training.tar.gz  file.\n```\n$ tar -xvzf data_linux_training.tar.gz \n```\n\nAlternative: you can specify the options without the '-' sign.\n```\n$ tar xvfz data_linux_training.tar.gz \n```\n\n> Decompress the file DRR000542_1.fastq.subset.gz\n```\n$ gunzip DRR000542_1.fastq.subset.gz\n```\n\n> Copy the DRR000542_1.fastq.subset file to a new file called 'bzip2_test.fastq'. Compress this file with bzip2.\n```\n$ bzip2 bzip2_test.fastq\n```\n\n**Tip!** If you would like to know how long the command took to finish, use \"time\"\n```\n$ time bzip2 bzip2_test.fastq\nreal\t0m5.878s\nuser\t0m5.728s\nsys\t0m0.112s\n```\n\nThree different times are given. What matters to you is the line 'real', also called the wall-clock time.\n> Copy DRR000542_1.fastq.subset file to a new file called gzip_test.fastq and compress with gzip.\n```\n$ time gzip gzip_test.fastq\nreal\t0m5.878s\nuser\t0m5.728s\nsys\t0m0.112s\n```\n\nA relatively unknown package is lrzip, 'long range zip', which achieves very good results on big files. Let's try that one also!\n> Copy DRR000542_1.fastq.subset file to a new file called lrzip_test.fastq and compress with lrzip.\n```\n$ lrzip lrzip_test.fastq\nThe program 'lrzip' is currently not installed.  You can install it by typing:\nsudo apt-get install lrzip\n```\n\n**apt-get** is the command line tool to install software on Debian distro's. Equivalent to the software center.\n```\n$ sudo apt-get install lrzip\n[sudo] password for joachim: \nReading package lists... Done\nBuilding dependency tree       \nReading state information... Done\nThe following packages were automatically installed and are no longer required:\nlibnet-ip-perl diffstat libnet-dns-perl libparse-debianchangelog-perl\ngir1.2-unique-3.0 kde-l10n-engb python-webpy libnet-domain-tld-perl\nlibemail-valid-perl libapt-pkg-perl python-flup kde-l10n-zhcn\nUse 'apt-get autoremove' to remove them.\nThe following NEW packages will be installed:\nlrzip\n0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded.\nNeed to get 159 kB of archives.\nAfter this operation, 313 kB of additional disk space will be used.\nGet:1 http://be.archive.ubuntu.com/ubuntu/ precise/universe lrzip amd64 0.608-1 [159 kB]\nFetched 159 kB in 0s (780 kB/s) \nSelecting previously unselected package lrzip.\n(Reading database ... 662617 files and directories currently installed.)\nUnpacking lrzip (from .../lrzip_0.608-1_amd64.deb) ...\nProcessing triggers for man-db ...\nSetting up lrzip (0.608-1) ...\n```\n\nNow we can compress:\n```\nOutput filename is: lrzip_test.fastq.lrz\nlrzip_test.fastq - Compression Ratio: 6.724. Average Compression Speed:  0.563MB/s.\nTotal time: 00:03:02.97\nreal\t3m3.026s\nuser\t3m1.947s\nsys\t0m0.804s\n```\n\n> Compare the sizes of the different resulting compressed files.\n```\n$ ls -lh *zip*\n-rw------- 1 bits bits 17M Oct 22 14:06 bzip2_test.fastq.bz2\n-rw------- 1 bits bits 21M Oct 22 14:06 gzip_test.fastq.gz\n-rw------- 1 bits bits 104M Oct 22 14:06 lrzip_test.fastq\n-rw------- 1 bits bits 16M Oct 22 14:10 lrzip_test.fastq.lrz\n```\n\nDecide for yourself whether the extra time needed for higher compression is worth the gain in compression.\n> Put the three files in a newly created folder 'results', and make an archive of it.\n```\n$ mkdir results\n$ mv  *{bz2,q.gz,lrz} results/\n$ ls results/\nbzip2_test.fastq.bz2  gzip_test.fastq.gz  lrzip_test.fastq.lrz\n$ tar cvf results.tar results/\n$ rm -rf results/\n$ ls -lh\ntotal 281M\n-rw------- 1 bits bits 104M May  4  2011 ERX000016.test.fastq\n-rw-r--r-- 1 bits bits 21M Oct 22 14:02 ERX000016.test.fastq.tar.gz\n-rw------- 1 bits bits 104M Oct 22 14:06 lrzip_test.fastq\n-rw-r--r-- 1 bits bits 53M Oct 22 14:28 results.tar\n```\n\n### Symbolic links\nSymbolic links (symlinks) point to a file, making the file accessible in another directory than where the file is. So you can avoid copying! When the original file is deleted, the symlink is dead. When you remove the symlink, the original file is still present. \nThe syntax for symbolic links is:\n```\n$ ln -s /home/bits /data/large.fastq /home/bits /Projects/ProjectA/\n```\n\nTip: when using **ln**, preferably provide absolute paths. If you want to use relative paths, make sure first going to the directory you want the link to be in, and create the link using a relative path (using '.' and '..' to make the path).\nRemoving symbolic links as such:\n```\n$ unlink /home/bits /Projects/ProjectA\n```\n\nIn contrast, there is also something as a \"hard link\" (ln without the -s option). When you delete a hard link, the file to which it referred is gone. So 'ln -s' is mostly used.\n\n### Linking data instead of copying\nIn the Rice Example directory (should be available under your home): download [this annotation file](http://rice.plantbiology.msu.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3) into the 'Genome data'/'Annotation' directory. Make a symbolic link to this file in the 'Genome data'/'Sequence' directory. Read the first 10 lines from the symbolic link file.\n> When you have tried yourself, see the solution.\n```\n$ cd Rice\\ Example/\n~/Rice Example $ ls\nbin  Genome data\n~/Rice Example $ cd Genome\\ data/Annotation/\n~/Rice Example/Genome data/Annotation $ ls\n~/Rice Example/Genome data/Annotation $ wget http://rice.plantbiology.msu.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3\n--2013-10-28 11:45:26--  http://rice.plantbiology.msu.edu/pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir/all.gff3\n           => `all.gff3'\nResolving http://rice.plantbiology.msu.edu (http://rice.plantbiology.msu.edu)... 35.8.196.190\nConnecting to http://rice.plantbiology.msu.edu (http://rice.plantbiology.msu.edu)|35.8.196.190|:21... connected.\nLogging in as anonymous ... Logged in!\n==> SYST ... done.    ==> PWD ... done.\n==> TYPE I ... done.  ==> CWD (1) /pub/data/Eukaryotic_Projects/o_sativa/annotation_dbs/pseudomolecules/version_7.0/all.dir ... done.\n==> SIZE all.gff3 ... 81498659\n==> PASV ... done.    ==> RETR all.gff3 ... done.\nLength: 81498659 (78M) (unauthoritative)\n100%[======================================>] 81,498,659  1.34M/s   in 65s     \n2013-10-28 11:46:33 (1.20 MB/s) - `all.gff3' saved [81498659]\n~/Rice Example/Genome data/Annotation $ ls ..\nAnnotation  Sequence\n~/Rice Example/Genome data/Annotation $ cd ../Sequence/\n~/Rice Example/Genome data/Sequence $ ln -s ../Annotation/all.gff3 .\n~/Rice Example/Genome data/Sequence $ ls -l\ntotal 381300\nlrwxrwxrwx 1 bits bits 22 Oct 28 11:49 all.gff3 -> ../Annotation/all.gff3\n-rw-r--r-- 1 bits bits 390444160 Mar  8  2013 IRGSPb5.fa.masked\n-rw-r--r-- 1 bits bits 55 Mar  8  2013 IRGSPb5.fa.masked.gz.md5\n~/Rice Example/Genome data/Sequence $ head all.gff3 \n##gff-version 3\nChr1\tMSU_osa1r7\tgene\t2903\t10817\t.\t+\t.\tID=LOC_Os01g01010;Name=LOC_Os01g01010;Note=TBC%20domain%20containing%20protein%2C%20expressed\nChr1\tMSU_osa1r7\tmRNA\t2903\t10817\t.\t+\t.\tID=LOC_Os01g01010.1;Name=LOC_Os01g01010.1;Parent=LOC_Os01g01010\nChr1\tMSU_osa1r7\texon\t2903\t3268\t.\t+\t.\tID=LOC_Os01g01010.1:exon_1;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t3354\t3616\t.\t+\t.\tID=LOC_Os01g01010.1:exon_2;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t4357\t4455\t.\t+\t.\tID=LOC_Os01g01010.1:exon_3;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t5457\t5560\t.\t+\t.\tID=LOC_Os01g01010.1:exon_4;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t7136\t7944\t.\t+\t.\tID=LOC_Os01g01010.1:exon_5;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t8028\t8150\t.\t+\t.\tID=LOC_Os01g01010.1:exon_6;Parent=LOC_Os01g01010.1\nChr1\tMSU_osa1r7\texon\t8232\t8320\t.\t+\t.\tID=LOC_Os01g01010.1:exon_7;Parent=LOC_Os01g01010.1\n```\n\n### Introduction: symbolic links to easily install manually applications\nIf a package is not available via a package manager, manual installation might be an option. I put manually applications in '''/opt'''. Next, I link them to a correct location on our system, usually '''/usr/local/bin'''. Below you have some examples of this, which you can try out yourself.\n\nIf you want to manually install apps, '''/opt''' is the advised directory. However, only the administrator ('root') can access /opt. You can check that the /opt directory belongs to root with <pre>ls -l /opt</pre>\nTo be able to copy and write stuff into /opt, we need root permissions. To do so, precede your commands with '''sudo''', as exemplified in the next exercise below. When we do that, our password will first be asked. Next, the command is executed with root permissions. In this way, we can edit contents in root-owned directories! You are a sudoer!\n\n### Transpose, a tool to transpose\nTranspose is an extremely convenient text tool to transpose tabular data. We will use it later. The code is hosted on [SourceForge](http://sourceforge.net/projects/transpose/).\n> Download transpose installation file (zip) via the browser. Copy them to /opt using **sudo cp**.\nGo to the Sourceforce website with the browser, and click on the Download button.\n```\nDownloads $ sudo cp transpose-2.0.zip /opt\n[sudo] password for joachim: \n Downloads $\n```\n\nWe need to precede the ''cp'' command with the ''sudo'' command, since only the root user can copy into ''/opt''.\n> Unpack the installation in /opt, compile the binary and test it with 'tranpose --help'. Use sudo to do so.\n```\n$ pwd\n/opt\n$ ls trans*\ntranspose-2.0.zip\n$ sudo unzip transpose-2.0.zip \nArchive:  transpose-2.0.zip\n   creating: transpose-2.0/\n   creating: transpose-2.0/win32-bin/\n  inflating: transpose-2.0/win32-bin/transpose.exe  \n   creating: transpose-2.0/src/\n  inflating: transpose-2.0/src/transpose.c  \n  inflating: transpose-2.0/README \n```\n\nThe zip file is now unpacked. Let us now compile the code. ALWAYS have a look at the README file for this.\n```\n$ cd transpose-2.0\n$ head README\nTo Compile:\n\tgcc transpose.c -o transpose\nTo Install - Just copy into your path. e.g.:\n\tcp transpose /usr/local/bin/\n$ cd src/\n$ sudo gcc transpose.c -o transpose\n```\n\nThe program **gcc** compiles the human readable code in the file **transpose.c** and produces a binary file out of it, called **transpose**.\n> We can now run the binary file from within the directory.\n```\n$ ./transpose --help\n       Description:     \n\tThis software is released under the GPL license\n\tReshapes delimited text data - amongst other things, it can transpose a matrix of plain text data.\n```\n\n> Create a symbolic link to the newly created binary to /usr/local/bin. This directory collects binaries/commands to be used on the command line.\n```\n$ sudo ln -s /opt/transpose-2.0/src/transpose /usr/local/bin\n$ which transpose \n/usr/local/bin/transpose\n```\n\n","## Tutorial on the linux command line\nWe will first hold your hand: type over these commands below step by step, and watch what they do.\nUse **cd** to change the current working directory (user bits). To create your own directories use the **mkdir** (make directory) command.\n\n```\n$ cd ~\n$ mkdir sequences\n$ cd sequences\n$ mkdir proteins\n$ cd proteins\n$ pwd\n/home/bits/sequences/proteins\n$ cd ../..\n$ pwd\n/home/bits\n```\n\nTo create a new file, use the **touch** command:\n```\n$ cd ~/sequences/proteins/\n$ touch my_sequence.txt\n$ ls -l\n-rw-r--r-- 1 bits users 0 Sep 19 15:56 my_sequence.txt\n```\n\nIn the last command above, the **-l** (a lowercase “L”, not a “1” (one)) option was used with the ls command. The -l indicates that you want the directory contents shown in the “long listing” format.\nMost commands accept options. But which options can you use? The command **man** helps you. Type **man** followed by the command name. E.g. **man ls** to see what options are available for the ls command. You get a the list of options. Keep pressing Space until the page stops scrolling, then enter “q” to return to the command prompt.\nLuckily, most tools have the **--help** option. (ls --help for example). These 2 methods should help you further. To see what options can be used with ls, enter **man ls**.\n\n```\n$ man ls\n```\n\nTo delete a file, use the **rm** (remove) command:\n\n```\n$ cd ~/sequences/proteins/\n$ ls\nmy_sequence.txt\n$ rm my_sequence.txt\n$ ls\n$\n```\n\nTo remove a directory, use the **rmdir** (remove directory) command. The directory needs to be empty to do this.\n```\n$ cd ~/sequences/\n$ ls\nproteins\n$ rmdir proteins\n$ ls\n$\n```\n\nTo copy a file, use the **cp** (copy) command:\n\n```\n$ cd ~/sequences\n$ touch testfile1\n$ ls\ntestfile1\n$ cp testfile1 testfile2\n$ ls\ntestfile1 testfile2\n```\n\nTo rename a file, or to move it to another directory, use the **mv** (move) command:\n```\n$ cd \n$ touch testfile3\n$ mv testfile3 junk\n$ mkdir testdir\n$ mv junk testdir\n$ ls testdir\njunk\n```\n\nTo download a file, use the **wget** command:\n```\n$ cd ~/Downloads\n$ wget http://data.bits.vib.be/pub/trainingen/Linux/sample.sam\n$ ls sample.sam\n$\n```\n\nThe commands covered so far represent a small but useful subset of the many commands available on a typical Linux system.\n\n### Make a project folder structure\nWe assume that start from your home folder.\n> Create the following directory structure\n> <figure id=\"figure-1\"><img src=\"https://wiki.bits.vib.be/images/c/c4/Dirstructureex1.png\" alt=\"tree-structure\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Tree</figcaption></figure>\n```\n$mkdir -p docs/{pdf/{man,readme},html/{man,readme}}\n```\n\nThe '{' and '}' can group arguments but you can also create the structure step by step.\n> The little tree figure above is created with the 'tree' command. Display such a tree.\n```\ntree /home/bits/docs/\n```\n\n## Downloading and storing bioinformatics data\n\n### Create a project folder \nThe first thing to do when you start a bioinformatics project, is to create a structure of folders to put your data in an organised fashion.\n\n### Downloading\nAs an example, we will download the rice genome from the Rice Annotation Project database. But first create the folder structure.\n> Create following folder structure.\n```\n $ mkdir \"Rice Example\"\n $ cd Rice\\ Example\n $ mkdir Genome\\ data\n $ cd Genome\\ data\n $ mkdir Sequence\n $ mkdir Annotation\n $ cd\n```\n\n** Be aware of white spaces on the command line!**\nOn the command line, programs, options and arguments are separated by **white spaces**. If you choose to use a folder name containing a white space, it will interpret every word as an option or argument. So you have to tell Bash to **ignore the white space**. This can be done by: putting **strings between quotes** like ' or \" **escape** a white space with \\. See the examples above.\nHence, you might save yourself some trouble (and typing!) by putting _ instead of white spaces in names. Also make sure to use tab expansion, wherever possible!\n\n### Download the genome data directly on the command line\nYou can fetch the rice genome [from this link](http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz).\n> Download the genome data to the \"Rice example\"/\"Genome data\"/Sequence folder. Use **wget** to download from the link.\n> Right-click on the download link, and copy the download link. The download link is: http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz\n> Go the directory and execute wget\n```\n$ cd      ## to go back to the home directory\n$ cd Ric<tab>\n$ cd Gen<tab>/Seq<tab>\n$ wget http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz\n--2013-10-15 09:36:01--  http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz\nResolving rapdb.dna.affrc.go.jp (rapdb.dna.affrc.go.jp)... 150.26.230.179\nConnecting to rapdb.dna.affrc.go.jp (rapdb.dna.affrc.go.jp)|150.26.230.179|:80... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 122168025 (117M) [application/x-gzip]\nSaving to: `IRGSPb5.fa.masked.gz'\n100%[======================================>] 122,168,025  973K/s   in 2m 40s  \n2013-10-15 09:38:42 (747 KB/s) - `IRGSP-1.0_genome.fasta.gz' saved [122168025/122168025]\n$ ls\nIRGSPb5.fa.masked.gz\n```\n\nAllright. We have fetched our first genome sequence!\n\n### Did your data get through correctly?\nLarge downloads or slow downloads like this can take a long time. Plenty of opportunity for the transfer to go wrong. Therefore, large downloads should always have a **checksum** mentioned. You can find the md5 checksum on the downloads page. The md5 checksum is an unique string identifying (and calculated from) this data. Once downloaded, you should calculate this string yourself with **md5sum**.\n```\n$ md5sum IRGSPb5.fa.masked.gz\n7af391c32450de873f80806bbfaedf05  IRGSPb5.fa.masked.gz\n```\n\nYou should go to the rice genome download page, and compare this string with the MD5 checksum mentioned over there. You can do this manually. Now that you know the concept of checksums, there is an easier way to verify the data using **md5sum**. Can you find the easier way?\n> Search how to use md5sum to check the downloaded files with the .md5 file from the website. Check the man page\n```\n$ man md5sum\n```\n\nIt does not say much: in the end it refers to \n```\n$ info coreutils 'md5sum invocation'\n```\n\nReading the options, there is one option sounding promising:\n```\n`-c'\n`--check'\n     Read file names and checksum information (not data) from each FILE\n     (or from stdin if no FILE was specified) and report whether the\n     checksums match the contents of the named files. \n```\n\nThis way we can check the download:\n```\n$ wget http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz.md5\n--2013-10-15 09:47:02--  http://rapdb.dna.affrc.go.jp/download/archive/build5/IRGSPb5.fa.masked.gz.md5\nResolving rapdb.dna.affrc.go.jp (rapdb.dna.affrc.go.jp)... 150.26.230.179\nConnecting to rapdb.dna.affrc.go.jp (rapdb.dna.affrc.go.jp)|150.26.230.179|:80... connected.\nHTTP request sent, awaiting response... 200 OK\nLength: 55 [application/x-gzip]\nSaving to: `IRGSPb5.fa.masked.gz.md5'\n100%[======================================>] 55          --.-K/s   in 0s      \n2013-10-15 09:47:03 (757 KB/s) - `IRGSPb5.fa.masked.gz.md5' saved [55/55]\n$ ls\nIRGSPb5.fa.masked.gz  IRGSPb5.fa.masked.gz.md5\n$ md5sum -c IRGSPb5.fa.masked.gz.md5 \nIRGSPb5.fa.masked.gz: OK\n```\n\n## Ensuring integrity of downloads\nA handy tool to use is the [DownThemAll](https://addons.mozilla.org/nl/firefox/addon/downthemall/) addon for Firefox, in which you have to provide the checksum at the time of download. It will automatically check whether the download is finished.\nThe Short Read Archive (SRA), storing NGS data sets, makes use of [Aspera](http://asperasoft.com/technology/transport/fasp/) to download data a great speeds, ensuring integrity. To download from SRA using aspera in linux, follow the [this guide from EBI](http://www.ebi.ac.uk/ena/about/read_download).\n\n### Extracting the data\n> What type of file have you downloaded?\n```\n$ file IRGSPb5.fa.masked.gz\nIRGSPb5.fa.masked.gz: gzip compressed data, was \"IRGSPb5.fa.masked\", from Unix, last modified: Wed Aug 18 03:45:47 2010\n```\n\nIt is a compressed file. Files are compressed to save storage space. Before using these files, you have to decompress them. What can you do with this type of file? Check the command apropos.\n```\n$ apropos gzip\ngzip (1)             - compress or expand files\nlz (1)               - gunzips and shows a listing of a gzip'd tar'd archive\ntgz (1)              - makes a gzip'd tar archive\nuz (1)               - gunzips and extracts a gzip'd tar'd archive\nzforce (1)           - force a '.gz' extension on all gzip files\n```\n\n**apropos** is a command that helps you discover new commands. In case you have a type of file that you don't know about, use apropos to search for corresponding programs.\n> Decompress the file. Check the man page of gzip. From the man page:<pre>gunzip [ -acfhlLnNrtvV ] [-S suffix] [ name ...  ]</pre>\n```\n$ gunzip IRGSPb5.fa.masked.gz \n$ ls\nIRGSPb5.fa.masked  IRGSPb5.fa.masked.gz.md5\n```\n\n","# 1. Status\nGit can display the state of your working directory and staging area. The command that we'll use for this is `git status` and depending on the situation the output will look differently, but it will always give you some informative status description.\n\n```\n$ git status\nOn branch main\nYour branch is up to date with 'origin/main'.\n\nnothing to commit, working tree clean\n```\nThe first sentence tells us that we're on the `main` branch, which is the default branch name in Git. More on branches later. The second sentence tells us that our local branch is exactly the same as our origin. This means that all of the files and folders within our local project are identical to the ones in the remote GitHub repo. Lastly, git tells us that there is nothing to commit, which makes sense as we don't have any changes at the moment. \n\n\nLet's make some changes to one of our files again.  Check the status again with `git status`.\n```\n$ git status\nOn branch main\nYour branch is up to date with 'origin/main'.\n\nChanges not staged for commit:\n  (use \"git add <file>...\" to update what will be committed)\n  (use \"git restore <file>...\" to discard changes in working directory)\n        modified:   plot1.R\n\nno changes added to commit (use \"git add\" and/or \"git commit -a\")\n```\nThis time, git tells us that there are changes in the file `plot1.R` and they are not in the staging area. There are two options here:\n- Use `git add plot1.R` to add the changes to the staging area \n- Use `git restore plot1.R` to remove the changes from your working directory. This will undo the changes that you made since the last time you committed it. \n\nAdd the file to the staging area and check the status again with `git status`\n\n```\n$ git status\nOn branch main\nYour branch is up to date with 'origin/main'.\n\nChanges to be committed:\n  (use \"git restore --staged <file>...\" to unstage)\n        modified:   plot1.R\n```\nThe file is now in the staging area and we have two options:\n- Use `git commit -m \"some informative text\"` to commit the changes to the commit repository\n- Use `git restore --staged plot1.R` to remove the file from the staging area.\n\nLet's do the latter, check the status again and then remove the changes from your working directory. \n\n\n\n\n\n# 5. The history (log)\nBesides checking the current state of your project with `git status`, there is also a possibility to have a look in your commit history. In order to list all your previous commits, enter `git log`. The output is a long list containing several blocks like this:\n```\ncommit e2d7e9a0b4614a6bee6b3ffd7583237125671dc1\nAuthor: username <user@xyz.com>\nDate:   Wed Jan 01 01:23:45 2020 +0200\n\n    The informative commit message\n```\n`git log` lists all commits made to a repository in reverse chronological order. Each commit starts with an identifier which is a unique code for each commit (hash). Besides the identifier, the commit’s author and date are given, and the commit message is given.\n\nIf we have pushed the commits to our Github repository (online) we will see the last commit ID somewhere in the upper right corner. This is a verification for us so we know that the remote repository is up to date with the local repository. Can you also find an overview of all commits in GitHub? \n\n---\n\n> ### {% icon question %} Question\n> \n> Why is it useful to have the author's name and e-mail address in the history log?\n>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > It's obvious that in this local project we've been doing all the changes & commits. However at a certain point you migth collaborate with someone else on the same project. In this case it's useful to know who did what changes. \n>    >\n>    > </details>\n>\n{: .question}\n\n---\n\nGit log can be extended with many other parameters, e.g. combine it with the `--oneline` argument, or add `--graph` to display the commit history as a text-based graph and `--decorate` to indicate which commits are associated with the current HEAD, the current branch main, or other Git references. Git's aliases are very useful in this case as the way how the history is displayed is very personal. With this information you should understand the last section of [Chapter 2](https://material.bits.vib.be/topics/git-introduction/tutorials/2_configurations/tutorial.html) better and create your own alias. \n\n\n\n---\n\n**Intermezzo / extra reading**:\n\nWhen the output of git log is too long to fit in your screen, git uses a program to split it into pages of the size of your screen. When this “pager” is called, you will notice that the last line in your screen is a :, instead of your usual prompt.\n- To get out of the pager, press `Q`.\n- To move to the next page, press `Spacebar`.\n- To search for `some_word` in all pages, press `/` and type `some_word`. Navigate through matches pressing `N` (next).\n\n---\n\nLet's continue with the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/5_branches/tutorial.html)!\n\n\n\n\n\n\n","# 1. Introduction\nHave you also been in a similar and recognizable situation as depicted below? Saving different versions of your files and scripts is essential to keep track of changes, though it can become chaotic very quickly if we do not use the excellent tools we have available to us. Git is one of these excellent tools. It works similar to [Google Docs'](https://support.google.com/drive/answer/2409045?co=GENIE.Platform%3DDesktop&hl=en) history feature in which Google automatically saves your document and the changes that happened at a particular moment in time. However, Git allows you to control and decide yourself when changes are worth saving, hence making it much more powerful and flexible. Each change is saved together with a message that enables you or your collaborators to keep an overview of the history of the project.  \n\n\n---\n\n<center><img src=\"../../images/version-control-meme.png\"/></center>\n\n---\n\nGit is an open-source tool that keeps track of the changes made to your project files throughout their history. \n\nWhy should you version control? \n- **Keeping track of changes** to your files done by yourself or your collaborators. At any moment you can exploit the history of the project to see who wrote what on a particular day. It even allows you to go back to a specific version or undo specific edits. \n- **Synchronizes files between different people or infrastructures** (i.e. laptops, servers, ...), making it a powerful collaborating system. \n- **Testing new code/changes**. Git can control multiple alternative versions of the same project in which you can make some changes and only when you or your collaborators are happy with hem, you can include them in the main version.\n\n\nThere is a major difference between Git and GitHub though. Git is software that works on your computer, whereas GitHub is a service for connecting and uploading/downloading files much like saving files in the cloud. There are some alternatives for Git ([link](https://www.g2.com/products/git/competitors/alternatives)) which will not be discussed in this course, and there are some for GitHub with Gitlab and Bitbucket as main competitors. These alternatives essentially share the same concepts and therefore we choose for the tools that enjoy the most traction in the community, namely Git and GitHub. In this course we will learn how Git works on your computer, giving us a proper understanding of its functionalities. Grasping these concepts is important if we want to use Git in other apps (e.g. in [Chapter 8](https://material.bits.vib.be/topics/git-introduction/tutorials/8_github_rstudio/tutorial.html) we will learn how GitHub and RStudio interact).\n\n# 2. Installations \nFor this course we will explore version controlling in a mixture of [Git](https://git-scm.com/) via the command-line and [GitHub](https://github.com/). The former requires some basic understanding of the Linux command line. If you're not familiar with Linux command line, you can have a look at the materials [here]((https://material.bits.vib.be/topics/linux/)). After discussing Git's essential features, we'll introduce how you can setup a collaboration with externals or colleagues, how to integrate version controlling in Rstudio, etc. \n\n- Git can be installed for any OS (Windows, Mac or Linux) from [this link](https://git-scm.com/downloads). Please keep the recommended and default settings as is. \n- Make an account on [GitHub](https://github.com/). \n\nWe will address further configurations in the next chapter. \n\n# 3. Three conceptual areas\nBefore diving in, let's have a look at how Git works. It's important to understand the three conceptual areas that exist locally when using Git on your computer: the development area, the staging area and the repository containing the commits. We already know that we want to use Git for keeping track of changes in our files. To keep track of those changes we need to run through these conceptual areas: first we edit a file on our computer (development area), then we tell Git about it (add it to the staging area) and lastly we commit those changes (commits repository). Let's have a closer look: \n\n![Three conceptual areas](../../images/conceptual_areas.png)\n\n1. The **development area** is where your coding happens. Usually this is a folder with multiple files on your computer. Git will never change anything at this level, actually it won't really do anything. The only thing Git does is remembering that it needs to keep track of changes made in this folder or its files. However, for this we first need to initialize Git on this folder (only once in the beginning).  \n2. The **staging area** is an intermediate stage which assembles the files that contain changes. We can select one or multiple files with changes and stage them for a commit. This means that we're telling Git that we will want to save those changes. Hence, imagine that we want to save a file, we first have to add it to the staging area before we can commit it.  \n3. Files that are in the staging area are then committed to what we'll call the **commit repository**. Once we have done that, we stored a specific version of the committed files. Committing is a synonym for saving the files in the Git terminology. The repository with commits contains a list of all the commits that we have done in a project. It's neatly structured in a history log which we can call at any point. Notice that all of this is still happening on our computer. \n\n\nHere's an example. Let's assume that we're starting a new project. Usually that also means that you make a new folder on your computer where you will keep all the files related to the project. The first thing you have to do is to tell Git that it has to keep track of this folder.In this step, we're initializing Git on this folder. Now, you just made your first file. Even though it is stored on your computer, it's not automatically saved in Git. First, you'll have to add it to the staging area and afterwards you need to commit it to the repository. When we initialized Git on the folder, a new folder `.git/` was created which will store the different versions. That allows us to only have the latest version of the files visible on our computer and all of its histories in the `.git/` folder.   \nIf we make a second file, the only thing we have to do is adding it to the staging area and then commit it. \n\nNotice that the repository is not yet visible on [github.com](https://github.com/). For this we would still need a fourth and last step, namely pushing the commits repository from your computer to GitHub. By pushing your commits repository, you will push the files within the project to GitHub. After this last step, your project and all of the files are accessible in a GitHub repository.\n\nDuring our adventure through Git & GitHub we'll use some specific glossary. Confused on what the meaning of all these new words are? Check out the [GitHub glossary](https://help.github.com/en/github/getting-started-with-github/github-glossary).\n\n\n---\n\nLet's go to the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/2_configurations/tutorial.html)!\n","# Data structures in R\n{:.no_toc}\n\nThe power of R lies not in its ability to work with simple numbers but in its ability to work with large datasets.  R has a wide variety of data structures including scalars, vectors, matrices, data frames, and lists.\n\n### Matrices\nA matrix is a table, the columns are vectors of equal length. \nAll columns in a matrix must contain the same type of data. The top row, called the header, contains column labels. Rows can also have labels. Data values are called elements. Indices are often used as column and row labels.\n\n### Creating a matrix\nTo create a matrix M use the matrix() function\n```\nM <- matrix(data,nrow=r,ncol=c,byrow=FALSE))\n```\n\nIt takes a long list of arguments:\n- *data* usually is a vector of elements to will fill the matrix\n- *nrow* and *ncol*: dimensions (number of rows and columns). Only one dimension argument is needed. If there are 20 elements in the *data* vector and *ncol=4* then R will automatically calculate that there should be 5 rows. \n- *byrow*: how the matrix is filled, *byrow=TRUE* fills the matrix row by row whereas *byrow=FALSE* fills the matrix column by column\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data creation: matrices** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 8a\n>\n> 1. Create a 2x2 matrix named mat containing numbers 2,3,1,5\n> 2. Print the matrix\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  mat<-matrix(c(2,3,1,5),nrow=2,ncol=2)\n>    >  mat\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 8b\n>\n> 1. Create a 2x3 matrix named onemat consisting of all ones\n> 2. Print the matrix\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  onemat<-matrix(1,nrow=2,ncol=3)\n>    >  onemat\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 8c\n>\n> 1. Create a 3x3 matrix containing numbers 1,2,3,4,5,6,7 \n> 2. Retrieve all elements that are larger than 3\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  m <- matrix(c(1,2,3,4,5,6,7),ncol=3) \n>    >  m[m > 3]\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Data frames\nJust like a matrix, a data frame is a table where each column is a vector. But a data frame is more general than a matrix: they are used when columns contain different data types, while matrices are used when all data is of the same type. \n\n> ### {% icon comment %} Comment\n>\n> R has a number of built-in data frames like mtcars. \n{: .comment}\n\n### Creating a data frame\nTo create a data frame D use the function data.frame() with the vectors we want to use as columns:\n```\nD <- data.frame(column1,column2,column3)\n```\n\n> ### {% icon comment %} Comment\n>\n> The columns of a data frame are all of equal length\n{: .comment}\n\nYou can provide names (labels) for the columns:\n```\nD <- data.frame(label1=column1,label2=column2,label3=column3)\n```\n\n> ### {% icon comment %} Comment\n>\n> As an argument of data.frame() you use label=vector_to_add: the equals (and not the assignment) operator is used because you are naming columns not creating new variables. \nIf you don't define labels (as in the first example), the names of the vector names are used as column names. \n{: .comment}\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data creation: data frames** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 9a\n>\n> Create a data frame called Plant_study containing days and Plants_with_lesions. Name the columns Days and Plants.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Plant_study <- data.frame(Days=days,Plants=Plants_with_lesions)\n>    >  Plant_study\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 9b\n>\n> Create a data frame called Drug_study consisting of three columns: ID, treatment and smoking\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Drug_study <- data.frame(ID,treatment,smoking)\n>    >  Drug_study\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 9c\n>\n> Create a data frame genomeSize containing genome sizes and print it. \n> - The first column is called organism and contains Human,Mouse,Fruit Fly, Roundworm,Yeast \n> - The second column size contains 3000000000,3000000000,135600000,97000000,12100000\n> - The third column geneCount contains 30000,30000,13061,19099,6034\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  organism <- c(\"Human\",\"Mouse\",\"Fruit Fly\", \"Roundworm\",\"Yeast\")\n>    >  size <- c(3000000000,3000000000,135600000,97000000,12100000)\n>    >  geneCount <- c(30000,30000,13061,19099,6034) \n>    >  genomeSize <- data.frame(organism,size,geneCount)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 9d\n>\n> Create a data frame ab and print it. \n> - The first column is called a and contains 1,3,2,1\n> - The second column is called b and contains 2,3,4,1\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  a <- c(1,3,2,1)\n>    >  b <- c(2,3,4,1)\n>    >  ab <- data.frame(a,b)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n##### Referring to the elements of a data frame\nReferring to elements of a data frame can be done in the same way as for matrices, using row and column **indices** in between square brackets. The only difference is that in data frames you can also use the **labels** of the columns to retrieve them.\n\nTo retrieve the element on the second row, first column:\n```\nD[2,1]\n```\n\nTo select all values from one dimension leave the index blank, e.g. all elements of the first column:\n```\nD[,1]\n```\n\n> ### {% icon comment %} Comment\n>\n> If you want to retrieve **all** the rows you don?t write any index before the comma inside the square brackets.\n{: .comment}\n\nYou can also use column labels for retrieving elements. Column names have to be written between quotes:\n```\nD[,\"label1\"]\n```\n\nYou can also use the range function to select elements:\n```\nD[2:4,1]\n```\n\nThe **$** symbol can be used to retrieve a column based on its label e.g. to retrieve column label1 from D:\n```\nD$label1\n```\n\n> ### {% icon comment %} Comment\n> With $ you do not have to put quotes around the column name\n{: .comment}\n\nSince the result of $ is a vector, you can address a specific element of a column using its index:\n```\nD$label1[2]\n```\nretrieves the second element of the column called label1\n\nSpecific for data frames is the **subset()** function that can be used to select columns that satisfy a logical operation:\n```\nsubset(D,select=columns to extract)\nsubset(D,logical expression,columns to extract)\n```\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data extraction: data frames** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 10a\n>\n> 1. Retrieve the data for the Volvo 142E from mtcars \n> 2. Retrieve the gas usage (mpg column) for the Volvo 142E \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  mtcars[\"Volvo 142E\",]\n>    >  mtcars[\"Volvo 142E\",\"mpg\"]\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  mtcars[\"Volvo 142E\"]\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  mtcars[Volvo 142E,]\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 10b\n>\n> 1. Retrieve the IDs of the smoking patients in Drug_study\n> 2. Retrieve ID and treatment of the smoking patients \n> 3. Retrieve the smoking behavior of all the patients\n> 4. Change the treatment of the fourth patient to A\n> 5. Add a column called activity with values: 4, NA, 12.1, 2.5\n> 6. Use subset() to retrieve the full ID and treatment columns\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  subset(Drug_study,smoking==TRUE,ID)\n>    >  subset(Drug_study,smoking==TRUE,c(ID,treament))\n>    >  Drug_study$smoking\n>    >  Drug_study$treatment[4] <- \"A\"\n>    >  Drug_study$activity <- c(4,NA,12.1,2.5)\n>    >  subset(Drug_study,select=c(ID,treatment))\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Drug_study[Drug_study$smoking==TRUE,\"ID\"]\n>    >   \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Drug_study[Drug_study$smoking==TRUE,ID]\n>    >   \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Drug_study[,\"smoking\"]\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  Drug_study[4,\"treatment\"] <- \"B\"\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Drug_study[\"activity\"] <- c(4,NA,12.1,2.5)\n>    >   \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  subset(Drug_study,c(ID,treatment))\n>    >   \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  subset(Drug_study,,c(ID,treatment))\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n\n> ### {% icon comment %} Comment\n>\n> The order of the arguments is important except when you specify their names. \n{: .comment}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 10c\n>\n> On which days did we observe more than 2 infected plants in the plant experiment? Answer this question with and without using the subset() function.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  > Plant_study[Plant_study$Plants > 2,\"Days\"]\n>    >  > subset(Plant_study,Plants > 2,Days)\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  Plant_study[Plant_study[\"Plants\"] > 2,\"Days\"]\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 10d\n>\n> 1. Create vector q by extracting the a column of data frame ab (exercise 9) with and without subset().\n> 2. Retrieve the second element of column a of data frame ab\n> 3. Add column c with elements 2,1,4,7 to data frame ab\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  q <- ab$a\n>    >  subset(q,select=a)\n>    >  ab$a[2]\n>    >  ab$c <- c(2,1,4,7)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n##### Removing elements from a data frame\nTo remove elements from a data frame use negative indices just as in a vector e.g. to remove the second row from data frame D use:\n```\nD <- D[-2,]\n```\n\n> ### {% icon comment %} Comment\n>\n> The minus sign only works with numbers not with column labels. \n{: .comment}\n\nTo remove columns based on labels assign them to NULL:\n```\nD$genome <- NULL\n```\n\n> ### {% icon comment %} Comment\n>\n> Setting a column to NULL is done via an assignment so the removal is permanent. \n{: .comment}\n\n> ### {% icon comment %} Comment\n>\n> Insteading of removing elements you can also define the elements you want to keep.\n{: .comment}\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data removal: data frames** section\n{: .hands_on}\n\n##### Reordering columns in a data frame\nReordering columns is a special case of retrieving columns, e.g. for a data frame that has 4 columns you can switch the position of the second and third column as follows:\n```\nD2 <- D[ ,c(1,3,2,4)]\n```\n\n> ### {% icon comment %} Comment\n>\n> The first comma means keep all the rows, and the 1,3,2,4 refer to column indices. \n> You can use indices or labels to refer to the columns. \n{: .comment}\n\nYou can also use subset():\n```\nD2 <- subset(D,select=c(1,3,2,4))\n```\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Column reordering: data frames** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 11a\n>\n> Switch the position of the second and the third column of Drug_study\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Drug_study[,c(1,3,2)]\n>    >   \n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  subset(Drug_study,select=c(1,3,2))\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n### Lists\nA list is an ordered collection of objects (of any data type: string, numbers, vectors, matrices, data frames). Lists can even contain other lists as objects! A list allows you to gather a variety of objects under one name. It is not mandatory but very useful to give each object in a list a label.\n\n##### Creating a list\nTo create a list L use the list() function:\n```\nL <- list(label1=object1,label2=object2,label3=object3)\n```\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 12a\n>\n> 1. Create a list called myList with the following objects: 5, 6, the word seven, the matrix mat.\n> 2. Print the list.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  myList<-list(5,6,\"seven\",mat)\n>    >   \n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  subset(Drug_study,select=c(1,3,2))\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n##### Referring to the elements of a list\nReferring to the elements of a list can be done in exactly the same way as for data frames, using row and column **indices or labels** in between square brackets. However, since a list can contain other lists or data frames you have to use **double square brackets** [[ ]] to retrieve elements. \n\n> ### {% icon comment %} Comment\n>\n> The $ operator also works to access the objects of a list.\n{: .comment}\n","# Genome assembly with Velvet: Background\n{:.no_toc}\n\nVelvet is one of a number of *de novo* assemblers that use short read sets as input (*e.g.* Illumina Reads). The assembly method is based on the manipulation of de Bruijn graphs, via the removal of errors and the simplication of repeated regions.\n\n> ### {% icon comment %} Comment\n>\n> For information about Velvet, you can check its (nice) [Wikipedia page](https://en.wikipedia.org/wiki/Velvet_assembler).\n{: .comment}\n\nFor this tutorial, we have a set of reads from an imaginary *Staphylococcus aureus* bacterium with a miniature genome (197,394 bp). Our mutant strain read set was sequenced with the whole genome shotgun method, using an Illumina DNA sequencing instrument. From these reads, we would like to rebuild our imaginary *Staphylococcus aureus* bacterium via a *de novo* assembly of a short read set using the Velvet assembler.\n\n> ### Agenda\n>\n> In this tutorial, we will deal with:\n>\n> 1. TOC\n> {:toc}\n>\n{: .agenda}\n\n# Get the data\n\nWe will now import the data that we will use for the tutorial.\n\n> ### {% icon hands_on %} Hands-on: Getting the data\n>\n> 1. Create and name a new history for this tutorial.\n> 2. Import from [Zenodo](https://doi.org/10.5281/zenodo.582600) or from the data library the files:\n>    - [`mutant_R1.fastq`](https://zenodo.org/record/582600/files/mutant_R1.fastq)\n>    - [`mutant_R2.fastq`](https://zenodo.org/record/582600/files/mutant_R2.fastq)\n>\n>    > ### {% icon tip %} Tip: Importing data via links\n>    >\n>    > * Copy the link location (Right-click on the filename then \"Copy Link Address\")\n>    > * Open the Galaxy Upload Manager\n>    > * Select **Paste/Fetch Data**\n>    > * Paste the link into the text field\n>    > * Change the data-type to **fastqsanger**\n>    > * Press **Start**\n>    {: .tip}\n>\n> 3. Change the name of the files to `mutant_R1` and `mutant_R2`.\n>\n>    As a default, Galaxy uses the link as the name of the new dataset. It also does not link the dataset to a database or a reference genome.\n>\n>    {% include snippets/rename_dataset.md %}\n>\n> 4. Inspect the content of a file.\n>\n>    > ### {% icon tip %} Tip: Inspecting the content of a dataset\n>    >\n>    > * Click on the {% icon galaxy-eye %} (eye) icon next to the relevant history entry\n>    > * View the content of the file in the central panel\n>    {: .tip}\n>\n>    > ### {% icon question %} Questions\n>    >\n>    > 1. What are four key features of a FASTQ file?\n>    > 2. What is the main difference between a FASTQ and a FASTA file?\n>    >\n>    > > <details markdown=\"1\">\n>    > > <summary>{% icon solution %} Solution\n>    > > </summary>\n>    > > 1. Each sequence in a FASTQ file is represented by 4 lines: 1st line is the id, 2nd line is the sequence, 3rd line is not used, and 4th line is the quality of sequencing per nucleotide\n>    > > 2. In a FASTQ file, not only are the sequences present, but information about the quality of sequencing is also included.\n>    > > </details>\n>    >\n>    {: .question}\n>\n{: .hands_on}\n\nThe reads have been sequenced from an imaginary *Staphylococcus aureus* bacterium using an Illumina DNA sequencing instrument. We obtained the 2 files we imported (`mutant_R1` and `mutant_R2`)\n\n> ### {% icon question %} Question\n>\n> Why do we have 2 files here if we only sequenced the bacteria once?\n>\n> > <details markdown=\"1\">\n> > <summary>{% icon solution %} Solution\n> > </summary>\n> > 1. The bacteria has been sequenced using paired-end sequencing. The first file corresponds to forward reads and the second file to reverse reads.\n> > </details>\n>\n{: .question}\n\n# Evaluate the input reads\n\nBefore doing any assembly, the first questions you should ask about your input reads include:\n\n- What is the coverage of my genome?\n- How good is my read set?\n- Do I need to ask for a new sequencing run?\n- Is it suitable for the analysis I need to do?\n\nWe will evaluate the input reads using the FastQC tool. This tool runs a standard series of tests on your read set and returns a relatively easy-to-interpret report. We will use it to evaluate the quality of our FASTQ files and combine the results with MultiQC.\n\n> ### {% icon hands_on %} Hands-on: FastQC on a fastq file\n>\n> 1. **FastQC** {% icon tool %} with the following parameters\n>    - \"Short read data from your current history\" to (**Multiple datasets**) `mutant_R1.fastq` and `mutant_R2.fastq`\n>\n> 2. **MultiQC** {% icon tool %} with the following parameters\n>    - \"Software name\" to `FastQC`\n>    - \"Result file\" to the raw data files generated by FastQC\n>\n{: .hands_on}\n\nMultiQC generates a webpage combining reports for FastQC on both datasets. It includes these graphs and tables:\n\n- General statistics\n\n    This is important in setting maximum k-mer size for an assembly.\n\n    > ### {% icon comment %} Getting the length of sequences\n    >\n    > * Click on **Configure Columns**\n    > * Check **Length**\n    > * Close the window\n    {: .comment}\n\n    > ### {% icon question %} Questions\n    >\n    > 1. How long are the sequences?\n    > 2. What is the average coverage of the genome, given our imaginary *Staphylococcus aureus* bacterium has a genome of 197,394 bp?\n    >\n    > > <details markdown=\"1\">\n    > > <summary>{% icon solution %} Solution\n    > > </summary>\n    > > 1. The sequences are 150 bp long\n    > > 2. We have 2 x 12,480 sequences of 150 bp, so the average genome coverage is: 2 * 12480 * 150 / 197394, or approximately 19 X coverage.\n    > > </details>\n    >\n    {: .question}\n\n- Sequence Quality Histograms\n\n    Dips in quality near the beginning, middle or end of the reads may determine the trimming/cleanup methods and parameters to be used, or may indicate technical problems with the sequencing process/machine run.\n\n    <figure id=\"figure-1\"><img src=\"../../images/fastqc_per_base_sequence_quality_plot.png\" alt=\"Sequence Quality Histograms with the mean quality value across each base position in the read\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> The mean quality value across each base position in the read</figcaption></figure>\n\n    > ### {% icon question %} Questions\n    >\n    > 1. What does the y-axis represent?\n    > 2. Why is the quality score decreasing across the length of the reads?\n    >\n    > > <details markdown=\"1\">\n    > > <summary>{% icon solution %} Solution\n    > > </summary>\n    > > 1. The y-axis represents the quality score for each base (an estimate of the error during sequencing).\n    > > 2. The quality score is decreasing accross the length of the reads because the sequencing become less and less reliable at the end of the reads.\n    > > </details>\n    >\n    {: .question}\n\n- Per Sequence GC Content\n\n    High GC organisms tend not to assemble well and may have an uneven read coverage distribution.\n\n- Per Base N Content\n\n    The presence of large numbers of Ns in reads may point to a poor quality sequencing run. You will need to trim these reads to remove Ns.\n\n- k-mer content\n\n    The presence of highly recurring k-mers may point to contamination of reads with barcodes or adapter sequences.\n\n\n> ### {% icon comment %} Comment\n>\n> For a fuller discussion of FastQC outputs and warnings, see the [FastQC website link](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/), including the section on each of the output [reports](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/), and examples of [\"good\"](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/good_sequence_short_fastqc.html) and [\"bad\"](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/bad_sequence_fastqc.html) Illumina data.\n{: .comment}\n\nWe won't be doing anything to these data to clean it up as there isn't much need. Therefore we will get on with the assembly!\n\n\n# Assemble reads with Velvet\n\nNow, we want to assemble our reads to find the sequence of our imaginary *Staphylococcus aureus* bacterium. We will perform a *de novo* assembly of the reads into long contiguous sequences using the Velvet short read assembler.\n\nThe first step of the assembler is to build a de Bruijn graph. For that, it will break our reads into k-mers, *i.e.* fragments of length *k*. Velvet requires the user to input a value of *k* (k-mer size) for the assembly process. Small k-mers will give greater connectivity, but large k-mers will give better specificity.\n\n> ### {% icon hands_on %} Hands-on: Assemble the reads\n>\n> 1. **FASTQ interlacer** {% icon tool %} with the following parameters\n>    - \"Type of paired-end datasets\" to `2 separate datasets`\n>    - \"Left-hand mates\" to `mutant_R1.fastq`\n>    - \"Right-hand mates\" to `mutant_R2.fastq`\n>\n>    Currently our paired-end reads are in 2 files (one with the forward reads and one with the reverse reads), but Velvet requires only one file, where each read is next to its mate read. In other words, if the reads are indexed from 0, then reads 0 and 1 are paired, 2 and 3, 4 and 5, etc. Before doing the assembly *per se*, we need to prepare the files by combining them.\n>\n> 2. **velveth** {% icon tool %} with the following parameters\n>    - \"Hash Length\" to `29`\n>    - \"Input Files\": click on `Insert Input Files`\n>    - \"file format\" to `fastq`\n>    - \"read type\" to `shortPaired reads`\n>    - \"Dataset\" to the pairs output of **FASTQ interlacer**\n>\n>    The tool takes our reads and break them into k-mers.\n>\n> 3. **velvetg** {% icon tool %} with the following parameters\n>    - \"Velvet Dataset\" to the output of **velveth**\n>    - \"Using Paired Reads\" to `Yes`\n>\n>    This last tool actually does the assembly.\n{: .hands_on}\n\nTwo files are generated:\n\n- A \"Contigs\" file\n\n    This file contains the sequences of the contigs longer than 2k. In the header of each contig, a bit of information is added:\n    - the k-mer length (called \"length\"): For the value of k chosen in the assembly, a measure of how many k-mers overlap (by 1 bp each overlap) to give this length\n    - the k-mer coverage (called \"coverage\"): For the value of k chosen in the assembly, a measure of how many k-mers overlap each base position (in the assembly).\n\n    ![Contigs output](../../images/image10.png)\n\n- A \"Stats\" file\n\n    This is a tabular file giving for each contig the k-mer lengths, k-mer coverages and other measures.\n\n    ![Contigs stats output](../../images/image11.png)\n\n# Collect some statistics on the contigs\n\n> ### {% icon question %} Question\n>\n> 1. How many contigs have been built?\n> 2. What is the mean, min and max length of the contigs?\n>\n> > <details markdown=\"1\">\n> > <summary>{% icon solution %} Solution\n> > </summary>\n> > 1. 190\n> > 2. To compute this information, we can use the Datamash tool on the 2nd columns (length). Be careful with the first line, the header. As a result, we obtain: 597.82 as mean, 1 as min and 12904 as max. It would mean that the smallest contig has a length of 1 bp, even smaller than k. The length on the 2nd column corresponds to length of the contig in k-mers. This means that the smallest contig has a length of 1k = 29. So to obtain the real length, we need to add k-1 to the length. We then obtain a mean contig length of 625.82 bp, a min contig of 29 bp and a max contig of 12,932 bp.\n> > </details>\n>\n{: .question}\n\nThis table is limitted, but we will now collect more basic statistics on our assembly.\n\n> ### {% icon hands_on %} Hands-on: Collect fasta statistics on our contigs\n>\n> 1. **Quast** {% icon tool %} with\n>    - \"Contigs/scaffolds output file\" to the output of **velvetg**\n>    - \"Type of data\" to `contig`\n>    - \"Reference File\" to `wildtype.fna`\n>    - \"Type of organism\" to `Prokaryotes`\n>    - \"Lower Threshold\" to `500`\n>    - \"Thresholds\" to `0,1000`\n{: .hands_on}\n\nThis tool generates 5 output files, but we will focus on the HTML report and the Icarus viewer.\n\n> ### {% icon question %} Question\n>\n> 1. What is represented in the Icarus viewer?\n>\n> > <details markdown=\"1\">\n> > <summary>{% icon solution %} Solution\n> > </summary>\n> > 1. Icarus is a novel genome visualizer for accurate assessment and analysis of genomic draft assemblies. It draws contigs ordered from longest to shortest, highlights N50, N75 (NG50, NG75) and long contigs larger than a user-specified threshold\n> > </details>\n>\n{: .question}\n\nThe HTML report reports many statistics computed by QUAST to assess the quality of the assembly:\n\n- Statistics about the quality of the assembly when compared to the reference (fraction of the genome, duplication ratio, etc)\n- Misassembly statistics, including the number of misassemblies\n\n    A misassembly is a position in the contigs (breakpoints) that satisfy one of the following criteria:\n    - the left flanking sequence aligns over 1 kbp away from the right flanking sequence on the reference;\n    - flanking sequences overlap on more than 1 kbp\n    - flanking sequences align to different strands or different chromosomes\n\n- Unaligned regions in the assembly\n- Mismatches compared to the reference genomes\n- Statistics about the assembly *per se*, such as the number of contigs and the length of the largest contig\n\n> ### {% icon question %} Question\n>\n> 1. How many contigs have been constructed?\n> 2. Which proportion of the reference genome do they represent?\n> 3. How many misassemblies have been found?\n> 4. Has the assembly introduced mismatches and indels?\n> 5. What are N50 and L50?\n> 6. Is there a bias in GC percentage induced by the assembly?\n>\n> > <details markdown=\"1\">\n> > <summary>{% icon solution %} Solution\n> > </summary>\n> > 1. 190 contigs have been constructed, but only 47 have a length > 500 bp.\n> > 2. The contigs represents 87.965% of the reference genome.\n> > 3. 1 misassembly has been found: it corresponds to a relocation, *i.e.* a misassembly event (breakpoint) where the left flanking sequence aligns over 1 kbp away from the right flanking sequence on the reference genome.\n> > 4. 8.06 mismatches per 100 kbp and 4.03 indels per 100 kbp are found.\n> > 5. N50 is the length for which the collection of all contigs of that length or longer covers at least half an assembly. In other words, if contigs were ordered from small to large, half of all the nucleotides will be in contigs this size or larger. And L50 is the number of contigs equal to or longer than N50: L50 is the minimal number of contigs that cover half the assembly.\n> > 6. The GC % in the assembly is 33.64%, really similar to the one of the reference genome (33.43%).\n> > </details>\n>\n{: .question}\n\n# Discussion\n\n> ### {% icon hands_on %} (Optional) Hands-on: Rerun for values *k* ranging from 31 to 101\n>\n> 1. **velveth** {% icon tool %} with the same parameters as before except\n>    - \"Hash Length\" to a value between 31 and 101\n> 2. **velvetg** {% icon tool %} with the same parameters as before\n> 3. **Quast** {% icon tool %} with the same parameters as before\n{: .hands_on}\n\nWe have completed an assembly on this data set for a number of k values ranging from 29 to 101. A few of the assembly metrics appear below.\n\n<figure id=\"figure-2\"><img src=\"../../images/number_of_contigs.png\" alt=\"contigs\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Number of contigs in the assembly for various k-mer sizes</figcaption></figure>\n\n<figure id=\"figure-3\"><img src=\"../../images/largest_contig.png\" alt=\"largest_contig\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Largest contig in each of the assemblies by k-mer size</figcaption></figure>\n\n<figure id=\"figure-4\"><img src=\"../../images/total_bp.png\" alt=\"total_bp\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Total number of base pairs in all the contigs for each assembly by k-mer size</figcaption></figure>\n\n<figure id=\"figure-5\"><img src=\"../../images/n50.png\" alt=\"n50\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> N50 metric for each of the assemblies by k-mer size</figcaption></figure>\n\n> ### {% icon question %} Questions\n>\n> 1. Are there any distinct features in the charts?\n> 2. Does it look like one assembly might be better than some of the others?\n>\n{: .question}\n\nThe reasons for these patterns will be discussed in detail in the De Bruijn graph assembly slides and tutorial.\n","# Manipulation of variables \n{:.no_toc}\n\n### General functions\nThe big difference between R and other programming languages is that functions in R are designed to be applied to variables rather than to individual values to avoid loops e.g. if we want to log transform a whole dataset we can do this using a single operation:\n```\n> v <- c(1,10,100,1000,10000)\n> log10(v)\n[1] 0 1 2 3 4\n```\nThe log10() function is written in such a way that it can be applied on a vector. This is true for all functions and operators in R:\n```\n> v - 1\n[1] 0     9    99   999  9999\n```\nR has built-in functions for virtually any standard mathematical task.\n \n<figure id=\"figure-1\"><img src=\"../../images/Rgeneral_functions.png\" alt=\"general_functions\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Overview of built-in functions</figcaption></figure>\n\nArithmetic operators can be used on variables. Provided that the variables have the same dimensions, you can do element-wise addition, subtraction, multiplication and division of two vectors or tables. Element-wise means that the calculation is performed on the equivalent positions between the two variables: first element + first element, second element + second element etc.\n\n```\n> v1<-c(1,2,3)\n> v2<-c(4,5,6)\n> z<-v1+v2\n> z\n[1] 5 7 9\n```\n\nIf you perform operations on vectors with different lengths (not recommended) then the vector with the shorter length is recycled to the length of the longer vector so that the first element of the shorter vector is appended to the end of that vector (a way of faking that it is of equal length to the longer vector) and so forth. You will get a warning, but R does let you perform the operation:  \n\n```\n> x1 <- c(1,2,3)\n> x2 <- c(3,4)\n> x3 <- x1 + x2\nWarning message: \nIn x1 + x2:\n  longer object length is not aa multiple of shorter object length\n> x3\n[1] 4 6 6\n```\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Operations on variables** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 13a\n>\n> 1. Calculate log base2 of the activity in Drug_study\n> 2. Round the result to the nearest integer\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  log.act <- (log2(Drug_study$activity))\n>    >  round(log.act)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 13b\n>\n> 1. Create vector v as the sum of newVector and threes using an arithmetic operator \n> 2. Print the content of v\n> 3. Do the same for newVector and vector x2 with elements 3,1\n> 4. Join the elements of newVector and threes into 1 vector q\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  v <- newVector + threes\n>    >  v\n>    >  x2 <- c(3,1)\n>    >  newVector + x2 \n>    >   q <- c(newVector,threes)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 13c\n>\n> 1. Add a column called geneDensity to genomeSize containing the number of bp per gene for every organism \n> 2. Round the numbers to the nearest integer\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  dens.fl <- genomeSize$size / genomeSize$geneCount\n>    >  genomeSize$geneDensity <- round(dens.fl)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\nSome functions only work on vectors. For instance sort() will sort data from smallest to largest (arguments allow other ordering) and order() returns the indices of the sorted elements:\n```\nx\n[1] 1 3 11 1 7\nsort(x)\n[1] 1 1 3 7 11\norder(x)\n[1] 1 4 2 5 3\n```\nIn the sorted vector the first element is also the first element of the original vector, the second element of the sorted vector has index 4 in the original vector etc.\nTo sort a data frame use order() inside square brackets:\n```\nmtcars[order(mtcars$mpg),]\n```\nTo sort on two columns (first on mpg, then on cyl): \n```\nmtcars[order(mtcars$mpg,mtcars$wt),]\n```\nTo sort in descending order place a minus sign in front of the variable:\n```\nmtcars[order(mtcars$mpg,-mtcars$wt),]\n```\n\nSelect the **labels** of a vector or table using names(). For tables rownames() and colnames() can access or set the either row or the column labels. Both functions will not work on vectors. \n\nThe length() function retrieves the number of elements of a vector. Used on data frames it doesn't throw an error but returns the number of columns instead. \n\nThe same is true for match(x,y). It compares x and y and returns a vector with the same length as x containing: \n-  NA for elements of x that are not in y  \n- the index in y for elements in x that are in y\n\nOn data frames it will not do an element-wise comparison but a column-wise comparison: \n```\nmatch(D1,D2) \n```\nwill return a vector with length equal to the number of columns in D1 containing:\n- NA for columns of D1 that are not in D2\n- the index in D2 for columns in D1 that are in D2 (so the complete column has to match, not the individual elements)\n\nImportant is to see the difference between the + operator and sum(). The former works element-wise on two variables, the latter calculates the sum of all elements of one vector.\n\nThere are also functions to be used only on tables, e.g. \n- dim() returns how many rows and columns a table has, nrow() and ncol() will get these values individually\n- t() transposes matrices (exchanges rows and columns), the output is a transposed matrix: the columns are the rows of the original matrix and vice versa\n\nUse merge() to join two data frames. Let?s say D1 has a column A with values. Data frame D2 has the same values stored in column A. Merge the two data frames on the basis of this common column:\n```\nnewD <- merge(D1,D2)\n```\nIf (some of) the values of the common column differ, merge() will ignore these values. Use argument *all.x* to add an extra row for every different value to the resulting data frame. All rows where the values of the two data frames don?t correspond, will be filled up with NA values.\n\nMost functions operate on numbers but there are also functions for manipulating text, e.g. \n```\npaste(x,y,sep=\" \") \n```\t\nconcatenates two strings x and y (glues them together into one string) separating them by the character defined by *sep*. Arguments *x* and *y* can be strings but they can also be vectors. If they are vectors, they are concatenated element-wise to give a character vector result.\n\nFurthermore there are also functions specific for factors. For instance to select the names of the categories (levels) of a factor use levels() and table() to create a contingency table. \n```\n table(cell_phone_data$own, cell_phone_data$grade)\n```\n\n<figure id=\"figure-2\"><img src=\"../../images/Rtable_function.png\" alt=\"table_function\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Example of a contingency table</figcaption></figure>\n\n> ### {% icon hands_on %} Hands-on: Exercise 13d\n>\n> You repeat the plant study experiment this time having the following numbers of plants developing lesions: 1, 6, 6, 5, 4\n> 1. Add these data as a third column to the data frame \n> 2. Relabel columns to Day, Infected and Repeat\n> 3. Use paste() to add the word ?day? to the elements of the Day column. Look at the documentation first !\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Plant_study$repeated <- c(1,6,6,5,4)\n>    >  names(Plant_study) <- c(\"Day\",\"Infected\",\"Repeat\")\n>    >  ?paste\n>    >  Plant_study$Day <- paste(Plant_study$Day,\"day\",sep=\"\")\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  paste(Plant_study[,\"Day\"],\"day\",sep=\"\")\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 13e\n>\n> 1. Change the label of the second column of Drug_study to drug\n> 2. How many rows does Drug_study contain?\n> 3. Order the rows according to decreasing activity\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  colnames(Drug_study)[2] <- \"drug\"\n>    >  nrow(Drug_study)\n>    >  Drug_study[order(Drug_study$activity,decreasing=TRUE),]\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What happens when you run this code ?\n>    >  ```\n>    >  colnames(Drug_study$ID) <- \"id\"\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What happens when you run this code ?\n>    >  ```\n>    >  colnames(Drug_study[2]) <- \"blabla\"\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  Drug_study[order(Drug_study$activity),\"ID\"]\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  n <- order(Drug_study$activity,decreasing=TRUE)\n>    >  Drug_study[n,]\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 13f\n>\n> 1. Sort the elements of z from smallest to largest\n> 2. Now use order(z). What's the difference with the previous exercise?\n> 3. How many elements does z contain?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  sort(z)\n>    >  order(z)\n>    >  length(z) \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 13g\n>\n> Add a new row to data frame ab containing values: 3,4,7\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  d <- c(3,4,7)\n>    >  ab <- rbind(ab,d)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 13h\n>\n> 1. How many rows and columns are in the built-in data frame CO2 (data on CO2 uptake by plants)\n> 2. Use levels() to retrieve the names of the Treatment categories\n> 3. Create a contingency table with counts (number of plants) in every category of CO2 that is defined by Type and Treatment\n> 4. Use unique() to count how many plants were studied\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  dim(CO2)\n>    >  levels(CO2$Treatment)\n>    >  table(CO2$Type,CO2$Treatment)\n>    >  length(unique(CO2$Plant))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Functions helpful for working with large data sets\nResearch in biology/medicine often generates very large data sets. When you work with very large data sets, it is often useful to show only a small part of the data set;\n- head() shows the first 6 elements (vector) or rows (table) of a variable \n- tail() prints the last 6 elements or rows\n\n> ### {% icon hands_on %} Hands-on: Exercise 14a\n>\n> 1. View the first 6 rows of the mtcars data frame\n> 2. Return TRUE if mtcars contains cars with 6 gears and FALSE if not\n> 3. How many cars with 3 gears are in mtcars?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  head(mtcars)\n>    >  nrow(subset(mtcars,gear==6))!=0\n>    >  nrow(subset(mtcars,gear==3))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Functions for finding indices of specific elements\nThere are functions that help you locate specific values, the which functions:\n```\nwhich.min(x)\nwhich.max(x)\n```\nreturn the location (index) of the minimum, maximum or a specific value of a vector x. So max() will return the highest value in the data, which.max() will return the index of the highest value in the data.\n\nThe argument of which() is a logical expression and which() will return the indices of the elements for which the logical expression is TRUE. \n```\nx <- c(1,5,8,4,6)\nx\n# [1] 1 5 8 4 6\nwhich(x == 5)\n# [1] 2\nwhich(x != 5)\n# [1] 1 3 4 5\n```\n\n> ### {% icon hands_on %} Hands-on: Exercise 15a\n>\n> Get the data of the patient with the highest activity in Drug_study\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Drug_study[which.max(Drug_study$activity),]\n>    >  \n>    >  ```\n>    > </details>\n>\n>    >    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  n <- which.max(Drug_study$activity)\n>    >  Drug_study[n,]\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 15b\n>\n> 1. Get the index of the column called cyl in mtcars\n> 2. Create a data frame that contains the car with the lowest mpg for each category of cyl\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  which(names(mtcars)==\"cyl\")\n>    >  C4m <- mtcars[order(mtcars$cyl,mtcars$mpg),][1,]\n>    >  C6 <- subset(mtcars,cyl==6)\n>    >  C6m <- C6[which.min(C6$mpg),]\n>    >  C8m <- mtcars[order(-mtcars$cyl,mtcars$mpg),][1,]\n>    >  rbind(C4m,C6m,C8m)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Checking and converting types of variables\nTo check the data structure of an object you can use str() and the generic class() function:\n```\nclass(c(10,12,30))\n# [1] \"numeric\"\nclass(c(\"alana\",\"britt\",\"chris\"))\n# [1] \"character\"\nclass(c(TRUE,TRUE,FALSE))\n# [1] \"logical\"\n```\n\nYou can also use the specific is. functions e.g. is.numeric(), is.character(), is.Date(), is.vector(), is.matrix(), is.data.frame() etc.\n\nThe is.na(x) function returns TRUE when an element of x is missing:\n```\nx <- c(1,2,3,NA)\nis.na(x)\n# [1] FALSE FALSE FALSE TRUE\n```\nTo recode values to missing values you don?t need is.na(). Select the rows that contain the value you want to recode, e.g. 99, and change the value using an assignment:\n```\ndata$v1[data$v1==99] <- NA\n```\nTo exclude missing values you can use is.na() but there are alternatives. The problem with missing values is that when you apply arithmetic functions on variables that contain missing values they will return missing values and you will have no result. To circumvent this problem many functions have the *na.rm* argument. If you set *na.rm=TRUE* missing values are deleted before calculations are done.\n```\nmean(x) \t\t\t\n# NA\nmean(x,na.rm=TRUE) \t\n# 2\n```\nThe function na.omit() allows to create a new vector without missing values. If you apply this function on a data frame it will remove complete rows that contain one or more NA-values.\n```\nnewdata <- na.omit(x)\n```\nYou can convert the data type of an object by using the as. functions e.g. as.numeric(), as.character(), as.Date(), as.vector(), as.matrix(),\nas.data.frame() etc.\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Checking and converting data types** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 16a\n>\n> We created a vector containing the days of the week and loaded this into a data frame called Plant_study. If we want to replace the days of the week by real dates, how should we proceed?\n> \n> To create a Date object in R:\n> - define the date as a string in the following format: 1970-01-01\n> - transform the string into a date by using as.Date()\n> 1. Replace the days of the week by the dates of this week\n> 2. What type of data is Plant_study ?\n> 3. Convert Plant_study into a matrix called PS\n> 4. Did the conversion work? Look at the matrix to see if there is a problem. \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  Plant_study$Days <- as.Date(c(\"2019-01-09\",\"2019-01-10\",\"2019-01-11\",\"2019-01-12\",\"2019-01-13\"))\n>    >  class(Plant_study)\n>    >  PS <- as.matrix(Plant_study)\n>    >  PS\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 16b\n>\n> 1. Check the data type of the second column of Drug_study. Retrieve the column using a comma.\n> 2. Convert the second column into a vector. \n> 3. What is different now? Look at the vector.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  class(Drug_study[,2])\n>    >  v <- as.vector(Drug_study[,2])\n>    >  v\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 16c\n>\n> Instead of deleting missing values with na.omit() you can select the non-missing values.\n> 1. Create a vector with a missing value \n> 2. Multiply all elements with 2. What happens?\n> 3. Check if the 2nd element is missing\n> 4. Delete the missing value using is.na() and the strategy above\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  x <- c(1,2,3,NA)\n>    >  x*2\n>    >  is.na(x[2])\n>    >  x[!is.na(x)]\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 16d\n>\n> 1. Check if z is a vector or a data frame \n> 2. Check if z contains numbers or characters\n> 3. Convert z into a matrix\n> 4. Convert the elements of z into characters\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  is.vector(z)\n>    >  is.data.frame(z) \n>    >  is.character(z)\n>    >  is.numeric(z)\n>    >  as.matrix(z) \n>    >  as.character(z)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 16e\n>\n> 1. Create a vector called words containing Hello, Hi \n> 2. Convert the words into numbers. What happens?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  words <- c(\"Hello\",\"Hi\")\n>    >  as.numeric(words) \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\nR is smart enough to catch you if you try to do an illogical conversion, such as convert characters to numbers. It does the conversion but the data is converted to NA values.\n","# 1. Introduction\nIn this chapter we will discuss a strategy for collaborating on projects. These strategies are especially useful when we do not have authorisation to change the content of someone else's project, even though we still have a useful edit/suggestion in mind.  \n\nImagine that you're starting a project with some colleagues and you want to version control the project. If it were to be a document where each of you needs to write part of it, you could simply start a Google Doc. For coding purposes the situation is a bit more complex. There might be a base version of the code already to which you need to add separate parts, however you always need to test whether your part is working together with the rest of the code. \n\nFor this purpose, GitHub encourages the Fork & Pull workflow. Basically one **forks** a central repository, making it a personal forked repository. This repository can constantly be up to date with the central repository by merging those upstream changes in your personal forked repository.  \n\nAfter you forked a repository, it will appear as a new repository in your GitHub account. The next step would be to **clone** the repository locally so you can work on the project from your computer. It's always a good idea to make changes in a **new branch** and keep the *main* branch clean. Hence, after cloning the repository, you could make a new branch. Editing the files, staging, committing and pushing your changes remains the same and they will appear in your new personal forked repository. \n\nWhen you are happy about your changes, when all the commits are pushed to your forked repository, these changes can be merged back into the central repository by creating a **pull request**. The main author can now decide whether he/she is happy about your suggestions and can include (part of) them. This workflow leaves the central repository untouched untill the moment you want to incorporate changes.\n\n---\n\n![Representation of forking & pulling](../../images/fork_pull.png)\n\n---\n\n\nTwo important terms in this fork & pull workflow are:\n- `upstream`: generally refers to the original repository that you have forked\n- `origin`: is your fork: your own repository on GitHub  \n\nAs mentioned in section 4.4, the \"origin\" is used to refer to the GitHub original repository's URL. This also lasts here. The remote `origin` refers to your fork on GitHub, not the original repository it was forked from. \n\nTo summarize the above, the Fork & Pull workflow consists of the following steps:\n1. Fork\n2. Clone\n3. Branch\n4. Stage-commit-push\n5. Pull request\n\n# 2. Fork\nLet's explore GitHub first. GitHub is like the Facebook of programmers. You can see someone's account, what that person has been working on, find new projects (relatable to a Facebook page), etc. Exploring new repositories is possible by clicking on the 'Explore' button in the navigation bar. Searching a specific repository or searching for an account, on the other hand, is possible by simply typing it in the search bar in the navigation bar. \n\n---\n<center><img src=\"../../images/nav-bar.PNG\" /></center>\n\n---\n\nSearch for the VIB Bioinformatics Core account 'vibbits' and find the repository 'fork-repository'. This repository was made specifically for learning the concept of forking. Do this by clicking the fork button in the upper right corner.\n\n---\n<center><img src=\"../../images/fork-button.PNG\" /></center>\n---\n\n\nThe repository has been successfully forked if you see something similar to the figure below. The icon represents a fork, followed by your GitHub account name and the name of the repository. The second line tells us that the upstream repository is the 'vibbits/forked-repository'. \n\n---\n\n<center><img src=\"../../images/forked-repository.PNG\" /></center>\n---\n\n\n# 3. Changes\nClone this repository locally, make a branch (e.g. name it *yourname*) and do some edits in that branch. Add your name, accountname or initials and the date to the `participants.txt` file. For this exercise we will only edit the `participants.txt` file. The flow here remains the same: stage-commit-push. After these changes took place, we will have a similar situation \n\n---\n\n<center><img src=\"../../images/edited-forked-repository.PNG\" /></center>\n---\n\n\nIn normal circumstances it is possible that the upstream repository has changed in the meantime. The indicator would then note that there are new commits in the upstream (`1 commit behind vibbits:main`), while the branch/repository itself is one commit ahead.  \n\n---\n\n<center><img src=\"../../images/forked-repository-ahead.PNG\" /></center>\n---\n\nThis does not (really) affect the pull request. In any case, the following step is to create a pull request.\n\n# 4. Pull request\nThe two repositories have diverged during the previous steps. Now its time to create a pull request between these repositories. After clicking the **Pull request** a new screen pops up that looks very similar to the one seen in Chapter 5 (Branching & merging). Note that moste developers do not really appreciate it if you try to merge your changes straight into the *main* branch. Usually, they would write some suggestions as to how we can collaborate on a project preferably. Let's assume that the developers of this repository expect you to merge changes into the *dev* branch, than it would look something like this:\n\n---\n\n<center><img src=\"../../images/forked-pull-request.PNG\" /></center>\n---\n\nGitHub tells us:\n- It compared the main branch of the forked repository (in my case *tmuylder/fork-repository*) with the upstream (base) repository *vibbits/fork-repository*. \n- It's able to merge these two branches without any conflicting errors\n- It summarizes the changes that have been done in the branch that will be merged into the upstream.  \n\nIf all seems good, we can create the pull request. In the case that there are any conflicting errors, they will need to be solved first. Afterwards we only need to add a message that accompanies the pull request. \n\nA brief overview of the pull request is given in the following screen which either allows you to merge the pull request into the upstream repository yourself or which requests the maintainer of the upstream repository to review and merge the pull request. In the latter case, the maintainer will thereafter receive a notification showing the pull request. An overview of all pending pull requests where you are involved in, are consultable on the [pull requests](https://github.com/pulls) tab of the navigation bar.   \n\n\n# 5. Overview\n\nTo briefly summarize, the steps that we took were: *fork > clone(> branch > edit-stage-commit-push > pull request (> merge)* and represent a strategy for collaborating on projects. These strategies are especially useful when we do not have authorisation to change the content of someone else's project, even though we still have a useful edit/suggestion in mind.    \nWhat if the upstream repository changed while you were working on your local repository? In this case a pull request should be done in which the receiving branch is your forked repository. Hence, the order of the branches as depicted in the figure above would be swapped.    \n\n\n\n---\n\n> ### {% icon hands_on %} Exercise \n>\n> Merge upstream changes in your forked repository. This approach is useful if you are working on a project that is prone to lots of changes and you need to keep up to date. \n> Note: This exercise is only possible to be performed if the repository `vibbits/fork-repository` has changed after you forked it.  \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > You need to merge any upstream changes into your version, and you can do this with a pull request on GitHub too. This time though you will need to switch the bases of the  comparison around, because the changes will be coming from the upstream version to yours. First find the following notification in your repository and click on pull request:  \n>    > <center><img src=\"../../images/Exercise-fork-1.PNG\" /></center>\n>    > In my case, the order is not how it's supposed to be and the message reads: \"There isn't anything to compare. vibbits:main is up to date with all commits from tmuylder:main.\". Click on *switching the base* in order to insert the changes from the upstream in your forked repository.  \n>    > \n>    > A message similar to the following will allow to create a pull request and subsequently merge the changes into your forked repository. \n>    > \n>    > \n>    > <center><img src=\"../../images/Exercise-fork-2.PNG\" /></center>\n>    > \n>    > \n>    > </details>\n> \n{: .hands_on}\n\n---\n \n\nLet's continue with the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/7_gitignore/tutorial.html)!\n","## 5.1 Introduction\n\nSo far we've seen variables where you essentially assign a value to a name that you can use in the program. It is also possible to assign groups of values to a name, in Python these are called *lists* and *tuples* - variables that contain multiple values in a fixed order. Python also has *sets*, which are also variables that contain multiple values, but in no particular order. In [section 8](8_Dictionaries.ipynb) we will also discuss dictionaries. By means of a brief summary, already in this stage; there are four collection data types in Python:\n- `List` is a collection which is ordered and changeable. Allows duplicate members. Use square brackets [] for lists.\n- `Tuple` is a collection which is ordered and unchangeable. Allows duplicate members. Use normal brackets () for tuples.\n- `Set` is a collection which is unordered and unindexed. No duplicate members. Use curly brackets {} for sets. \n- `Dictionary` is a collection which is unordered, changeable and indexed. No duplicate members. Use curly brackets {} for dictionaries (see [section 8](8_Dictionaries.ipynb)).\n\nThey are useful in different circumstances and each data-type has its own advantage. On a small-case example this might not be noticable, however on a larger scale using the right data-type can save you a lot of time. \n\n\n\n\n\n## 5.2 Lists and range\n\nYou can make your own Python list from scratch:\n\n\n```python\nmyList = [5,3,56,13,33]\nmyList\n```\n\nYou can also use the `range()` function. Try this:\n\n\n\n```python\nmyList = list(range(10))\nmyList\n```\n\nYou should get the following output: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]. This is a list of integers - you can recognize a list by the square [ ] brackets. **Note** that Python always starts counting from 0. The command above will give you a series of integers starting from 0 and stopping at the number you defined, however with this number **not** included in the list. Hence, it stops at 9. You can start from a different number as well:\n\n\n```python\nmyList = list(range(3,12))\nmyList\n```\n\nor increase the step size (the default is step size is 1):\n\n\n\n\n```python\nmyList = list(range(1,12,2))\nmyList\n```\n\nAn important feature of lists is that they are flexible - you can add and remove values, change the order, ... . You can do such modifications by calling a *method* from the list itself. Some examples of methods are:\n- Add elements\n    - `append()` to append an item to the end of the list\n    - `insert()` to add an item at the specified index\n    - `extend()` to extend an item\n- Delete elements\n    - `remove()` to remove the specified item\n    - `pop()` to remove the specified index (or the last item if index is not specified)\n    - `del` keyword removes the specified index\n    - `clear()` method empties the list\n- Sorting:\n    - `sort()` will sort the list in an ordered way\n    - `reverse()` will reverse the order of the list\n- Copy of a list with the `copy()` method\n\n\n\n```python\nmyList = []             # Create an empty list\nmyList.append(5)        # Add a single value to the back of the list\nmyList\n```\n\n\n```python\nmyList.insert(0,9)      # Insert a value in the list at index (element position) 0\nmyList\n```\n\n\n```python\nmyList.extend([99,3,5]) # Extend the list with another list\nmyList\n```\n\n\n```python\nmyList[0]               # Return the first element in the list (counting starts at zero) \n```\n\n\n```python\nmyList[2]               # Return the third element in the list\n```\n\n\n```python\nmyRemovedElement = myList.pop(3)  # Remove the fourth element in the list and return it\nprint(\"I removed {}\".format(myRemovedElement))\nmyList\n```\n\n\n```python\nmyList.sort()           # You can sort the elements in a list - this will change their order\nmyList\n```\n\n\n```python\nmyList.reverse()        # Or reverse the order of the list\nmyList\n```\n\nYou can also select a slice from a list - this will give you a new list:\n\n\n```python\nmyList = list(range(15))\n \nmyListSlice = myList[3:6]\nmyListSlice\n```\n\n\n```python\nmyListCopy = myList[:]\nprint(myListCopy)\n```\n\n\n```python\nprint(myList[-4:])     # This will select the fourth-last to the last element in the list\n```\n\nThere are two other methods you can use on lists:\n- `index()` returns the index of the first element with the specified value\n- `count()` returns the number of elements with the specified value\n\n\n```python\nmyList = list(range(1,15))\nmyList\n```\n\n\n```python\nmyList.count(10)   # Will count the amount of times the value 10 occurs in this list\n```\n\n\n```python\nmyList.count(\"A\")  # This always works, and will return 0 if nothing is found\n```\n\n\n```python\nmyList.index(10)   # Will give the index of the element with value 10 - in this case 9 because the list index starts at 0.\n```\n\n\n```python\n#print(myList.index(\"A\"))  # This will crash the program - the value to look for has to be present in the list!!!\n```\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 5.2.1\n>\n> Take the list [54,56,2,1,5223,6,23,57,3,7,3344], sort it in reverse order (largest value first) and print out the third value.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Take the list [54,56,2,1,5223,6,23,57,3,7,3344], sort it in reverse order (largest value first) and print out the third value.\n>    >  myList = [54,56,2,1,5223,6,23,57,3,7,3344]\n>    >  \n>    >  myList.sort()\n>    >  myList.reverse()\n>    >  \n>    >  print(myList[2])\n>    >  #The first element is at index 0, the third at index 3!\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n## 5.3 Tuples  \nSimilar to *lists* are *tuples* - essentially they are the same, except that a tuple cannot be modified once created. This can be useful for values that don't change, like (part of) the alphabet for example:\n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nmyTuple\n```\n\nImportant to remember is that if you create a tuple with one value you have to use a comma:\n\n\n```python\nmyTuple = (\"My string\",)\nmyTuple\n```\n\n\n```python\nmyWrongTuple = (\"My string\")  # The brackets here don't do anything.\nmyWrongTuple\n```\n\nA tuple is indicated by round brackets **( )**. You can interconvert between lists and tuples by using `list()` and `tuple()`:\n\n\n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nmyList = list(range(10))\n \nmyNewTuple = tuple(myList)\nmyNewList = list(myTuple)\n \nprint(\"{} and {}\".format(myList, myNewTuple))\nprint(\"{} and {}\".format(myTuple, myNewList))\n```\n\nYou can find out the length (number of elements) in a list or tuple with `len()`:\n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nmyTupleLength = len(myTuple)\nmyTupleLength\n```\n\nTuples are faster during iteration procedures due to their immutability. \n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 5.3.1\n>\n> Start with the tuple `('a','B','c','D','e','F')`, sort it, take the fourth value out, and print the result.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Start with the tuple ('a','B','c','D','e','F'), sort it, take the fourth value out, and print the result.\n>    >  myTuple = ('a','B','c','D','e','F')\n>    >  myList = list(myTuple)\n>    >  myList.sort()\n>    >  #print(myList)\n>    >  \n>    >  print (\"Removing {}\".format(myList.pop(3)))\n>    >  print (\"Result is {}\".format(str(tuple(myList))))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n\n## 5.4 Strings\n**Strings are a bit like lists and tuples** \n\nStrings are really a sequence of characters, and they behave similar to lists:\n\n\n```python\nmyString = \"This is a sentence.\"\n \nmyString[0:5]          # Take the first five characters\n```\n\n\n```python\nmyString.count(\"e\")    # Count the number of 'e' characters\n```\n\n\n```python\nmyString.index(\"i\")    # Give the index of the first 'i' character\n```\n\nYou cannot re-assign strings as you do with lists though, the following example does not work:\n\n\n```python\nmyString = \"   This is a sentence.  \"\n```\n\n\n```python\nprint(myString.upper())       # Upper-case all characters\n```\n\n\n```python\nprint(myString.lower())       # Lower-case all characters\n```\n\n\n```python\nprint(myString.strip())       # Strip leading and trailing spaces/tabs/newlines\n```\n\n\n```python\nprint(myString.split())       # Split the line into elements - default is splitting by whitespace characters\n```\n\n\n```python\nprint(myString.replace(' is ',' was '))  # Replace ' is ' by ' was '. Spaces are necessary, otherwise the 'is' in 'This' will be replaced!\n```\n\nA list with all string methods and a full description can be found in the [Python documentation](https://docs.python.org/3/library/stdtypes.html#string-methods), or simply type `dir(myString)`\n\n\n```python\ndir(myString)\n```\n\n\n---\n> ### {% icon hands_on %} Exercise 5.4.1\n>\n> Ask the user for two words, then check whether they are the same (upper or lower case should not matter),if not check whether they have the same first letter (again case should not matter). If not, then print their length. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Ask the user for two words, then check whether they are the same (upper or lower case should not matter),if not check whether they have the same first letter (again case >    >  should not matter). If not, then print their length. \n>    >  firstWord = input(\"Give first word:\")\n>    >  secondWord = input(\"Give second word:\")\n>    >  \n>    >  print(len(firstWord))\n>    >  \n>    >  if firstWord.upper() == secondWord.upper():\n>    >      print(\"Words are the same (ignoring case).\")\n>    >  elif firstWord[0].upper() == secondWord[0].upper():\n>    >      print(\"Words share the same first letter (ignoring case).\")\n>    >  else:\n>    >      print(\"Word lengths are {} and {}\".format(int((len(firstWord))),int(len(secondWord))))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n## 5.5 Sets  \nVery useful as well are sets. These are unordered and unindexed (so the order in which you put in elements doesn't matter), and it is much easier to compare them to each other. Because sets cannot have multiple occurrences of the same element, it makes sets highly useful to efficiently remove duplicate values from a list or tuple and to perform common math operations like unions and intersections.\n\n![sets](../../images/Python-Set-Operatioons.png)  \n\nSource: https://www.learnbyexample.org/python-set/\n\nYou initialise them by using **set()** on a list or tuple:\n\n\n```python\nmySet1 = set(range(10))\nmySet2 = set(range(5,20))\n \nprint(mySet1)\nprint(mySet2)\n \nmySet.add(5)  # Elements in a set are unique - the set will not change because it already has a 5\n \nprint(mySet1.intersection(mySet2))\nprint(mySet1.union(mySet2))\n```\n\n\n```python\ndir(mySet1)\n```\n\nThe principle of using intersection and union is the same as the Venn diagrams you probably saw in school... You can also make a set out of a string:\n\n\n```python\nmyString = \"This is a sentence.\"\n \nmyLetters = set(myString)\nmyLetters    # Note that an upper case T and lower case t are not the same!\n```\n\nThere are more things you can do with sets which we will not go into here, see the [Python sets](https://docs.python.org/3/library/stdtypes.html#types-set) documentation for more information.\n\n---\n> ### {% icon hands_on %} Exercise 5.5.1\n>\n> Which letters are shared between the words \"perspicacious\" and \"circumlocution\"?\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Which letters are shared between the words \"perspicacious\" and \"circumlocution\"?\n>    >  firstWord = \"perspicacious\"\n>    >  secondWord = \"circumlocution\"\n>    >  \n>    >  firstLetterSet = set(firstWord)\n>    >  secondLetterSet = set(secondWord)\n>    >  \n>    >  print(firstLetterSet.intersection(secondLetterSet))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n","## 3.1 Introduction  \nThere are several ways to present the output of a program, data can be printed in a human-readable form, or written to a file for future use. Sometimes users want more control over the formatting of output, rather than simply printing space-separated values. There are several ways to format output which we will cover in this section.\n\nThe following figure (which I shamelessly copied from [here](https://www.python-course.eu/python3_formatted_output.php)) helps to visualize the `.format()` argument. If you don't understand it completely, don't worry, we'll cover it in this section:\n\n<center><img src=\"../../images/format_method_positional_parameters.png\" /></center>\n\nEverything between the double quotation marks is what will be printed (thus the `print()` statement is missing). Between curly brackets you can find lay-out options for the arguments, the arguments themselves are given within the `.format()` statement. The first number defines the argument that will be printed (Python starts counting at 0), the number behind the colon (`:`) defines the number of characters that is foreseen for the argument, and lastly the number behind the point (`.`) is only applicable for floats and defines the amount of decimals that will be printed. E.g.: `1:8.2f` will print the first argument with 8 characters/numbers of which two decimals and the type of the argument is a float. If the argument has less than 8 characters/numbers than whitespace will be used. \n\n## 3.2 Using .format()\nThe following example gives the most basic use form of the `.format()` statement. \n\n\n\n```python\nprint(\"My name is {}.\".format(\"Jane\"))\n```\n\nThe above doesn't do anything interesting; you can however put a number in between the curly brackets `{}` to force the output to take up a number of characters. Try this:\n\n\n```python\nprint(\"My name is {:>10}.\".format(\"Jane\"))\n```\n\nYou'll now see that you force an area of 10 characters to put the name. If the name is shorter, the remaining empty characters will be whitespaces. If the name would be longer, the number will be overruled. Note that the > character in the .format() form can be used to determine the alignment (use < for left align, > for right align and = for centered). \n\nThere are a number of differences between the old Python (version <2.5) and the version you're using now (Python 3.7). In older scripts you might see different print statements. Instead of using the `.format()` statement, it used to be a `%`-symbol to position information in the right place. E.g.:\n```print(\"My name is %s.\" % \"Jane\")```  \n\n\n----\n\n> ### {% icon hands_on %} Hands-on: Exercise 3.2.1\n>\n> Try to print the text: *Percent of alignment: 100%* using a formatting character for the number 100. \n> If this worked out succesfully, try to align it explicitly to the right with five whitespaces. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # part 1\n>    >  print(\"Percent of alignment: {}%\".format(100))\n>    >  \n>    >  # part 2\n>    >  print(\"Percent of alignment: {:>8}%\".format(100))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n----\n\n\n\n\n## 3.3 Formatting numbers  \nHere are some examples of formatting integers (digits):\n\n\n```python\nprint(\"This is {:d}.\".format(252))\nprint(\"This is {:d} and {:d}.\".format(25,30))\n```\n\nHere are some examples of formatting decimal number (floating point):\n\n```python\nmyFloat = 4545.4542244\n \nprint(\"Print the full float {},\\ncut off decimals {:5.2f},\\nor determine the characters before the decimal {:10.1f}.\".format(myFloat,myFloat,myFloat))\n \n# Or in old style\n# print(\"Print the full float %f,\\ncut off decimals %.2f,\\nor determine the characters before the decimal %10.1f.\" % (myFloat,myFloat,myFloat))\n```\n\n## 3.4 Special characters  \nFor some characters it is necessary to use what are called 'escape' codes because you cannot type the character normally from the keyboard. Try this:\n\n```python\nprint(\"The \\ sign\\ncan\\talso\\tbe\\tprinted.\")\n```\n\nHere the \\\\ will print a backslash (however Python might think you are trying to insert a special code and in order to be safe it's better to type a double \\\\\\\\), the \\n will print a new line, \\t a tab character.\n\n\nEscape codes are necessary if you are trying to print a single or double quote:\n\n```python\nprint(\"He said: \\\"Hello\\\".\")\n```\n","## 9.1 Introduction\n\nMore often than not the data you need for your program will come from somewhere else - either from user input or a file. Especially for more complex data, it becomes essential to be able to read in data files, do something with the data, and write out a new file with modified information or a set of analysis results.\n\n## 9.2 Reading files\n \nTo read in a file you have to create a *file handle*. This is a sort of connection to the file that you can use to pull data from it. You create a connection to a file by using the **open()** function. Whenever you're done using the file, it's good practice to close the file handle. \n\n\n```python\n# Open the file\nfileHandle = open(\"data/readfile.txt\")  \n# Close the file\nfileHandle.close()\n# Nothing happened...\n```\n\nAll this does, is creating this connection, the file has not been read. In order to read in a file, there are a couple of possibilities:\n- `readline()` - read the first line of the file as one string. \n- `readlines()` - read all of the lines in the file. Each line is one string. The lines are combined as a list of lines (strings). \n- `read()` - read the whole file as one string. \nEach method has its advantage. E.g. if you're searching for the presence of a word or string in a file, given that the file is not too big, you can use *read*. If you want to process an enormously big file and from each line you need to extract, process and save the information, than it's better to read line by line with *readline* within a for-loop. Try to understand the difference of these methods while you go through this section. \n\nGiven the file `readfile.txt` in a folder named data:\n\n``` \nThis is the first line.\nHere is a second one. \nAnd there is also a third line. \n```\n\n1. Using `read`:\nNote that the three different lines are read in one long string. This is how the `read` function works. \n\n```python\nfileHandle = open(\"data/readfile.txt\")  \nfileHandle.read()\n```\n\n\n```python\nfileHandle.close()\n```\n\n\n2. Using `readline`:\nReadline reads in the following line. It starts with the first one. When you call the method again, it will print the second line. It's important to understand this as you can exploit this method in a for-loop to access each line separately.\n\n```python\nfileHandle = open(\"data/readfile.txt\")   \nfileHandle.readline()\n```\n\n```python\nfileHandle.readline()\n```\n\n```python\nfileHandle.close()\n```\n\n\n3. Using `readlines`:\nInstead of reading the lines of a file one by one, you can also do it in one go. As explained above, each line is one string and all of the lines/strings are stored in a list. \n```python\nfileHandle = open(\"data/readfile.txt\")   \nfileHandle.readlines()\n```\n\n\n```python\nfileHandle.close()\n```\n\nKnowing this we can move on to more complex examples. First make sure to find the PDB file *TestFile.PDB* in your data folder or download [this fake PDB coordinate file for a 5 residue peptide](http://wiki.bits.vib.be/images/3/3a/TestFile.pdb) and save it in the data directory. \n\nIn the example below we will read all the lines in the file (as separated by a newline character), and store them in the variable *lines*. Each element in this list corresponds to one line of the file! When this is done, we close the file. \n\n\n```python\n# Read in the file per line\nfileHandle = open(\"data/TestFile.pdb\")\nlines = fileHandle.readlines()\n \n# Close the file\nfileHandle.close()\n \n# Print number of lines in the file\nprint(\"There are:\", len(lines), \"lines in the file\")\n\n# Loop over the lines, and do some basic string manipulations\nfor line in lines:\n    line = line.strip()  # Remove starting and trailing spaces/tabs/newlines\n    print(line)\n```\n\n\n```python\nline = lines[10]\nline = line.strip().split()\nline[-1]\n```\n\nNow you can do many other things with the data in the file. E.g. if you want to count the number of times a carbon element appears in the file. \n\n\n```python\n# Open the file\nfileHandle = open(\"data/TestFile.pdb\")\n \n# Read all the lines in the file (as separated by a newline character), and store them in the lines list\n# Each element in this list corresponds to one line of the file!\nlines = fileHandle.readlines()\n \n# Close the file\nfileHandle.close()\n \n# Initialise the line counter\nlineCount = 0\n \n# Loop over the lines\nfor line in lines:\n    columns = line.strip().split()\n    if columns[-1] == 'C':       # Alternatively, use \"if ' C ' in line:\"\n        print(line, end='')     # Using the 'end' argument in the print because the line already contains a newline at the end\n                                # otherwise will get double spacing.\n        lineCount += 1\n\nprint(\"Number of lines with ' C ': {}\".format(lineCount))\n```\n\nYou should find 75 lines - note that in this case, for those who know the PDB format a bit, you're finding all carbon atoms.\n\nAlternatively, you can use the with() statement to open files. The example here above would then become:\n```python\nwith open(\"data/readfile.txt\") as fileHandle:\n    for line in fileHandle:\n        print(line)\n```\nThis method is often used as it does not require you to keep track of the open file in your mind, as well as clearer syntax.\n\n## 9.3 Writing a file\nWriting a file is very similar, except that you have to let Python know you are writing this time by adding the `'w'` parameter in the `open()` function. Actually Python needs two arguments, however it assumes that if you only give one parameter (the file that it has to read), the other one is `'r'` which stands for *reading* mode. \n\nFor the sake of the example, we're writing a new file and call it `writefile.txt`:\n\n```python\nf = open('data/writefile.txt','w')\nf.write('Now we have a new file \\n')\nf.write('Because Python automatically makes this file and writes some text to it.')\nf.write('Btw, if you don\\'t specify the newline characters, it will append the string at the end of the last line')\nf.close()\nf = open('data/writefile.txt')\ntext = f.read()\nprint(text)\nf.close()\n```\n\n**Be careful** - if the file exists already it will be overwritten without warning!\n\nThe file is written to the directory you're executing the program in - have a look!\n\n\n----\n\n> ### {% icon hands_on %} Exercise 9.3.1\n>\n> Read in the file `TestFile.pdb`, and write out all lines that contain 'VAL' to a new file.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    > # Read the file\n>    > f = open(\"data/TestFile.pdb\",\"r\")\n>    > g = open('data/withval.pdb','w')\n>    > \n>    > # Loop over the lines\n>    > for line in f:\n>    >     if 'VAL' in line:      # Alternatively, use \"if ' C ' in line:\"\n>    >         if 'ATOM' in line:\n>    >             g.write(line)\n>    > f.close()\n>    > g.close()\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >\n>    >  ```python\n>    > # Open the file\n>    > fileHandle = open(\"data/TestFile.pdb\")\n>    > \n>    > # Read all the lines in the file (as separated by a newline character), and store them in the lines list\n>    > # Each element in this list corresponds to one line of the file!\n>    > lines = fileHandle.readlines()\n>    >  \n>    > # Close the file\n>    > fileHandle.close()\n>    >  \n>    > # Track the lines with VAL\n>    > linesToWrite = []\n>    >  \n>    > # Loop over the lines\n>    > for line in lines:\n>    >     if line.count(\"VAL\"):      # Alternatively, use \"if ' C ' in line:\"\n>    >         linesToWrite.append(line)\n>    > \n>    > # Write out the lines\n>    > fileHandle = open(\"data/fileWithVAL.pdb\",'w')\n>    > for line in linesToWrite:\n>    >     fileHandle.write(line)\n>    > \n>    > # Close the file\n>    > fileHandle.close()\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 3\n>    > </summary>\n>    >\n>    >  ```python\n>    > # Read the file\n>    > f = open(\"data/TestFile.pdb\",\"r\")\n>    > \n>    > # Track the lines with VAL\n>    > linesToWrite = []\n>    > \n>    > # Loop over the lines\n>    > for line in f.readlines():\n>    >     if line.count(\"VAL\"):      # Alternatively, use \"if ' C ' in line:\"\n>    >         linesToWrite.append(line)\n>    > \n>    > # Write out the lines\n>    > fileHandle = open(\"data/fileWithVAL.pdb\",'w')\n>    > for line in linesToWrite:\n>    >     fileHandle.write(line)\n>    > \n>    > # Close the file\n>    > fileHandle.close()\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n\n\n## 9.4 Advanced file reading and interpretation \n\n> ### {% icon hands_on %} Exercise 9.4.1\n>\n> Read in the TestFile.pdb file, print out the title of the file, and find all atoms that have coordinates closer than 2 angstrom to the (x,y,z) coordinate (-8.7,-7.7,4.7). Print out the model number, residue number, atom name and atom serial for each; the model is indicated by:\n> ```\n> MODEL     1\n> ```\n> lines, the atom coordinate information is in:\n> ```\n> ATOM      1  N   ASP A   1     -10.341  -9.922   9.398  1.00  0.00           N\n> ```\n> lines, where column 1 is always ATOM, column 2 is the atom serial,  column 3 the atom name, column 4 the residue name, column 5 the chain code, column 6 the residue number, followed by the x, y and z coordinates in angstrom in columns 7, 8 and 9.\n> \n> note that the distance between two coordinates is calculated as the square root of (x1-x2)²+(y1-y2)²+(z1-z2)².\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution \n>    > </summary>\n>    >\n>    >  ```python\n>    > # Open the file\n>    > fileHandle = open(\"data/TestFile.pdb\")\n>    >  \n>    > # Read all the lines in the file (as separated by a newline character), and store them in the lines list\n>    > # Each element in this list corresponds to one line of the file!\n>    > lines = fileHandle.readlines()\n>    >  \n>    > # Close the file\n>    > fileHandle.close()\n>    >  \n>    > # Initialise some information\n>    > searchCoordinate = (-8.7,-7.7,4.7)\n>    > modelNumber = None\n>    >  \n>    > # Loop over the lines, and do some basic string manipulations\n>    > for line in lines:\n>    >     line = line.strip()  # Remove starting and trailing spaces/tabs/newlines\n>    >     \n>    >     # Only do something if it's not an empty line\n>    >     if line:\n>    >         cols = line.split()   # Split the line by white spaces; depending on the format this could be commas, ...\n>    >  \n>    >     # Print the title\n>    >     if cols[0] == 'TITLE':\n>    >         title = line.replace(cols[0],'')\n>    >         title = title.strip()\n>    >         print(\"The title is '{}'\".format(title))\n>    >  \n>    >     # Track the model number\n>    >     elif cols[0] == 'MODEL':\n>    >         modelNumber = int(cols[1])\n>    >  \n>    >     # For atom lines, calculate the distance\n>    >     elif cols[0] == 'ATOM':\n>    >  \n>    >         # Set some clear variable names and convert to the right type\n>    >         atomSerial = int(cols[1])\n>    >         atomName = cols[2]\n>    >         residueNumber = int(cols[5])\n>    >         x = float(cols[6])\n>    >         y = float(cols[7])\n>    >         z = float(cols[8])\n>    >  \n>    >         # Calculate the distance\n>    >         distance = ((x - searchCoordinate[0]) ** 2 + (y - searchCoordinate[1]) ** 2 + (z - searchCoordinate[2]) ** 2 ) ** 0.5\n>    >         if distance < 2.0:\n>    >             print(\"Model {}, residue {}, atom {} (serial {}) is {:.2f} away from reference.\".format(modelNumber,residueNumber,atomName,atomSerial,distance))\n>    > \n>    >  ```\n>    > </details>\n>\n{: .hands_on} \n\n\n\n## 9.5 Next session\nConclusion\n","## 4.1 Introduction\nPrograms start to become more interesting if you can do different things depending on the input. For this, you have to use *conditions*, which we will discuss in this section. \n\nDecisions will be taken based on a condition. In this perspective, we highlight the importance of understanding booleans **True** and **False**, as well as the **None**-keyword once more.\n\n## 4.2 If statement\n\nThe **if** condition allows you to only execute a bit of code if a (set of) condition(s) is satisfied. Python syntax requires that you put a colon : after the **if**, and that the *block* of code that is conditional is *indented* with the same amount of spaces (or tabs). Python doesn't really care about the number of spaces or tabs, as long as you're consistent. Jupyter notebook uses tabs, hence it is best to follow along. Now try this:\n\n\n```python\nx = 5\n \nif x == 5:\n    print(\"x is five!\")\n\nif x!=5:\n    print(\"x is not five!\")\n```\n\nyou will see that only the block of code under x == 5 is printed out. You can of course make the conditions more complex and combine them with **and** and **or**:\n\n\n\n```python\nx = 5\ny = 10\n \nif (y / x) == 2:\n    print(\"y divided by x is 2!\")\n\nif y == 10 or x == 2:\n    print(\"x is two or y is ten\")\n    \nif y == 10 and x == 2:\n    print(\"x is two and y is ten\")\n\nprint(\"The end\")\n```\n\nHere you see that the blocks for the first two conditions (which are True) are executed, but not the third. The last line of code is always printed off - it's on the same level as the start of the code, and not conditional.\n\n## 4.3 Indentation\nPython relies on indentation (whitespace at the beginning of a line) to define scope in the code. Other programming languages often use (curly) brackets for this purpose. The level of indentation is crucial, and Python will immediately give an error if there are inconsistent levels of indentation in the code. Try this:\n\n\n```python\nx = 5\ny = 10\n \nif (y / x) == 2:\n  print(\"y divided by x is 2!\")\n   print (\"And x is {}!\".format(x))\n```\n\nNote that this can also happen if you start mixing space and tab characters!\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 4.3.1\n>\n> Write a program where you ask the user for x and y, make sure that y is not zero, and print out x/y. \n> \n> ```python\n> # Modify the code below on the ... locations:\n> xString = input(...)\n> yString = input(...)\n> \n> x = ...(xString)\n> y = ...(yString)\n> \n> if ... :\n>     print(\"Error, your y-number is 0\")\n> if ... : \n>     print(\"x divided by y = {:.2f}\".format(...))\n> ```\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Write a program where you ask the user for x and y, make sure that y is not zero, and print out x/y. \n>    >  \n>    >  xString = input(\"Give a number: \")\n>    >  yString = input(\"Give another number that is not zero: \")\n>    >  \n>    >  x = float(xString)\n>    >  y = float(yString)\n>    >  \n>    >  if y == 0:\n>    >      print(\"Error, you're y-number is 0\")\n>    >  if y != 0:\n>    >      result = x/y\n>    >      print(\"x divided by y = {:.2f}\".format(result))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n## 4.4 Elif statement \n\nOnce you have an **if**-condition, you can directly follow it up with an **elif** (else if) condition. This is not the same as another **if**-statement. An **elif** is only executed if the previous if (and other preceding elifs) are not True. In the example below the code in section 4.3 is adapted. Now all if-statements are changed by elifs.\n\n\n```python\nx = 5\ny = 10\n \nif (y / x) == 2:\n    print(\"y divided by x is 2!\")\nelif y == 10 or x == 2:\n    print(\"x is two or y is ten\")\nelif y == 10 and x == 2:\n    print(\"x is two and y is ten\")\n\nprint(\"The end\")\n```\n\nNow only the code under the first condition is executed, not the second (the third is not True and is in any case irrelevant). If we switch the conditions around a bit:\n\n\n\n\n```python\nx = 5\ny = 10\n \nif y == 10 and x == 2:\n    print(\"x is two and y is ten\")\nelif y == 10 or x == 2:\n    print(\"x is two or y is ten\")\nelif (y / x) == 2:\n    print(\"y divided by x is 2!\")\n\nprint(\"The end\")\n```\n\nThe first condition is not True, so the second is evaluated. This one is True, so it is executed, and the text 'x is two or y is ten' is printed. For clarity it is often useful to leave some space before and after the (set of) condition(s) - it makes the code easier to 'read' afterwards.\n\n\n\n\n\n---\n> ### {% icon hands_on %} Exercise 4.4.1\n>\n> Write a program where you ask the user for two words. Compare the words; if they are the same, print a message, if the first or second word is 'Stop', then also print a message. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Write a program where you ask the user for two words. Compare the words; if they are the same, print a message, if the first or second word is 'Stop', then also print a >    >  message.\n>    >  print(\"Give two words.\")\n>    >  firstWord = input(\"Write a word: \")\n>    >  secondWord = input(\"Write another word: \")\n>    >  \n>    >  if firstWord == secondWord:\n>    >      print(\"These words are the same\")\n>    >  elif firstWord ==\"Stop\" or secondWord == \"Stop\":\n>    >      print(\"You're word was Stop, hence we stopped here\")\n>    >  \n>    >  print(\"The end\")\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n## 4.5 Else statement\nYou can also end an **if** (with or without **elif**s) with an **else** condition. The block of code following else is only executed if the previous (set of) conditions are all False. Try this:\n\n\n```python\nx = 7\n \nif not (x % 2):\n    print(\"x is divisible by two!\")\nelif not (x % 3):\n    print(\"x is divisible by three!\")\nelse:\n    print(\"x is not divisible by two...\")\n\nprint (\"x is {}\".format(x))\n```\n\nYou can modify the value of x a bit to see what else can happen. Can you spot a problem with this example? What will happen if x can be divided by both two and three? What can you do to solve this problem?\n\n\n\n> ### {% icon hands_on %} Exercise 4.5.1\n>\n> Modify the code above so it prints that it is divisible by two and three when this is the case.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # If a value can be divided by two and three, only the block of code under the first condition will be executed, so you will not find out whether your value can be divided by three! There are several solutions to this, for example:\n>    >  x = 12\n>    >   \n>    >  if not (x % 2):\n>    >      print(\"x is divisible by two!\")\n>    >      if not (x % 3):\n>    >          print(\"x is divisible by three!\")\n>    >  elif not (x % 3):\n>    >      print(\"x is divisible by three!\")\n>    >  else:\n>    >      print(\"x is not divisible by two or three...\")\n>    >  \n>    >  print (\"x is {}\".format(x))\n>    >  \n>    >  # This is not a very elegant solution however, as you are repeating the same bit of code twice to find out whether the value can be divided by three. This one might be   slightly better:\n>    >  x = 12\n>    >  \n>    >  if not (x % 2):\n>    >      print(\"x is divisible by two!\")\n>    >  \n>    >  if not (x % 3):\n>    >      print(\"x is divisible by three!\")\n>    >  \n>    >  if (x % 2) and (x % 3):\n>    >      print(\"x is not divisible by two or three...\")\n>    >  \n>    >  print (\"x is {}\".format(x))\n>    >  \n>    >  # However you still have to repeat the conditions, which would become very tedious (and error-prone) if you were to try division by many values. The next example is a bit more verbose but cleaner and more 'extendable' for other values:\n>    >  x = 12\n>    >  xDivisible = False\n>    >   \n>    >  if not (x % 2):\n>    >      print(\"x is divisible by two!\")\n>    >      xDivisible = True\n>    >  \n>    >  if not (x % 3):\n>    >      print(\"x is divisible by three!\")\n>    >      xDivisible = True\n>    >  \n>    >  if not xDivisible:\n>    >      print(\"x is not divisible by two or three...\")\n>    >  \n>    >  print (\"x is {}\".format(x))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n","# 1. What's a branch?\nThe idea of branching is that we can create a copy of the project in which we can add a new feature. This branch is a completely separate version of your project and lives next to your original version. If the new feature is working properly we can merge it back into the project. It's a great way of testing new changes in some code when you're not sure whether it will work, and in the meanwhile not messing up the code that you already have. \n\n---\n\n<center><img src=\"../../images/branching.png\" /></center>\n\n---\n\nThe original repository is now called the *master* branch, however historically was called the *main* branch.   \n\nA new GitHub repository is initialized by default with one branch: the *main* branch. All the changes in our project that we did so far, have hence always been in this main branch. Remember that when we did `git status` we read a line saying that we were on the main branch. \n\nIf we would make a new branch, we can name it however we like (e.g. *new-feature*). There are two ways of doing this: locally or on the GitHub website. We will first show you the latter (section 2) and afterwards how to do it locally via Git Bash or the Terminal (section 4). \n\nA repository can have numerous branches. Branches are ways of organising work on a project: you can have a branch for a new feature, for trying out something new, for exploring an issue - anything at all.\n\nIt’s a good practice to create a new branch for every new bit of work you start doing, even if it’s a very small one. It’s especially useful to create a new branch for every new feature you start working on. Branches are of course disposable, you can always remove them. \n\n# 2. Branching on GitHub\nWe can make a new branch on GitHub. \n1. Click the button: 'Main'\n2. In 'Find or create a branch...' type `new-feature` (or any other name)\n3. Click 'Create branch': new-feature\n\n---\n\n<center><img src=\"../../images/newbranch-github.PNG\" /></center>\n\n---\n\nGitHub will now display `new-feature`. It's very important to understand that any changes that happen in this branch, will not be influencing the main branch. \n\n---\n\n> ### {% icon hands_on %} Exercise 5\n>\n>  Edit the `plot2.R` file again, however make sure you're in the *new-feature* branch. Add the following lines that will make a new plot. These lines will allow us to investigate the relation between the weight, horsepower and miles per gallon variables of `mtcars` dataset in R. \n> \n> ```R\n> # Install requirements & plotting of 3D scatterplot\n> install.packages(\"scatterplot3d\")\n> library(scatterplot3d)\n> attach(mtcars)\n> scatterplot3d(wt,hp,mpg, pch=16, highlight.3d=TRUE,\n>               type=\"h\", main=\"3D Scatterplot\")\n> ```\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > Edit the file `plot2.R` by clicking on the pencil icon and add the following lines: \n>    > \n>    > <center><img src=\"../../images/solution5.PNG\" /></center>\n>    > Commit your changes with a useful commit message and save by clicking the green 'Commit changes'-button.\n>    > \n>    > </details>\n> \n{: .hands_on}\n\n---\n\nSwitch back to your *main* branch and have a look to the `plot2.R`-file. It shouldn't contain these changes. \n\n\n# 3. Merging branches on GitHub\nBefore exploring how we make branches on our computer locally, we'll merge the changes in the *new-feature* branch into the *main* branch. Branches are merged by making a **pull request**. In this section we will explain how to do a pull request, often shorted to PR. \n\nWhether you are on the *main* or *new-feature* branch, doesn't matter. In both cases you should see the following yellow screen. Alternatively, go to 'Pull requests' and find it there. \n\n---\n\n<center><img src=\"../../images/pull-request-pre.PNG\" /></center>\n\n---\n\nClick on **compare & pull requests** or go to the section **Pull requests** and create a **New pull request** (select the branches you want to incorporate). A new screen pops-up with the following information.\n\n---\n\n<center><img src=\"../../images/pull-request-1.PNG\" /></center>\n\n---\n\n- The pull request should be interpreted as a request to pull the new branch and all of its changes into the main branch.   \n- The base where it would be pulled towards is `base: main`. The branch where the changes are deriving from is `compare: new-feature`.   \n- Note that GitHub checks the compatibility of the branches: in this case there are no conflicting edits and the branches can be merged together.   \n- Give a descriptive title text and if appropriate some additional comment. \n\nUnderneath the pull request related information, GitHub also gives you a summary of the changes that were done. \n- Each commit from the branch *new-feature* (i.e. only added these 7 lines in this case)\n- Display of the file and a visual representation of what changed in that commit. \n\n---\n\n<center><img src=\"../../images/pull-request-2.PNG\" /></center>\n\n---\n\n\n\nClick on **Create pull request** to finalize the creation of the PR. Note that the the branches are not merged yet, one more comment before we do that! We know that GitHub allows us to collaborate on projects. Here we can find some of the features that GitHub is providing us to start collaborating. We could for example start a conversation here and discuss the PR, select a (couple of) reviewer(s), add assignees who authored, add labels representing what type of edits were done in the branch, etc. Essentially these are valuable for organizing bigger projects; keep track of who's working on what and who needs to review specific changes, etc.     \n\nFinally, we verify the merge pull request commit and you give your consent to GitHub to merge both branches by clicking 'Merge pull request'.\n\n---\n\n<center><img src=\"../../images/pull-request-3.PNG\" /></center>\n\n---\n\nIt might be possible that in a project with several people, you are not authorized to make changes to the *main* branch. In this case you will always have to work in a separate branch and someone else will get this last message. He or she will then decide whether this pull request should be merged. \n\n# 4. Branching locally\nBesides the possibility of making branches on GitHub, we can also do it locally on our computer. As we've made changes to the repository on GitHub, we'll start with pulling the changes into our local repository. Use `git pull` in your project folder. \n \nThere is always an indication in the Terminal or Git Bash of which branch we are in (i.e. *main*). Here are the most important commands related to making branches and switching between different branches:\n1. Listing all the existing branches is possible with `git branch -a`\n2. `git checkout -b <new-branch>`: will create a new branch and move into this branch. \n3. `git branch <new-branch>`: will create a new branch, but will remain in the current branch (i.e. the *main* branch in this case)\n4. With `git checkout <branch>` we will switch from one branch to the other. \n\nLet's start with listing all the existing branches (4). \n```\n* main\nremotes/origin/HEAD -> origin/main\nremotes/origin/main\nremotes/origin/new-feature\n```\nThe first branch is our local *main* branch in which we are currently working (as denoted by the asterisk \\*). The three others relate to the branches that exist **remotely** on GitHub. If we want to work on the branch *new-feature* we will have to import it first with: `git checkout new-feature`. Git will understand that there is a remote branch with the same name and you want to work on this one. \n\n**Note** that if you use `git checkout -b new-feature`, you would have created a new branch with the same name as the remote branch. This is error prone and will lead to problems! Hence, it is really important that you switch from branch and not create a new one!\n\n## 4.1. Example workflow\nAn example workflow is depicted in the figure below and is discussed in the following points. \n\n---\n\n<center><img src=\"../../images/conceptual_areas_branching.png\" width=\"1000\" /></center>\n\n---\n\n**1. Make a new branch:**\n```\ngit checkout -b <new-branch>\n```\nGit will make a new branch with the name `<new-branch>` and tell you it switched to the new branch. If you want to change branches, just type `git checkout` followed by the name of the branch, e.g. `git checkout main`.\n\n**2. Make some changes:**\n  - Add a new file\n  - Edit an existing file\n\n**3. Stage changes:**  \nUse the following  command to simply add all the new or changed files. \n```\ngit add -A\n```\n \n**4. Commit staging area:**  \nCommit all the staged files with:\n```\ngit commit -m \"some useful commit message\"\n```\n\n**5. Push commits to GitHub:**\n\n```\ngit push origin <new-branch>\n```  \nor alternatively:\n```\ngit push --set-upstream origin <new-branch>\n```\nThe `git push` command is now a bit longer. The first time we want to publish a new local branch on a remote repository (GitHub), we need to be explicit and tell Git to add the `<new-branch>` to the origin. In Git, the \"origin\" is used to refer to the GitHub original repository's URL and makes it much easier to talk about. \n\nNext time you want to push your commits from *new-branch*, you won’t need to be explicit - you can simply do `git push`, because now *new-branch* exists on GitHub and both branches know how to commmunicate with each other. \n\n\n---\n\n> ### {% icon hands_on %} Exercise 6\n>\n>  Make a new branch and make sure you're in the branch. Rewrite the README.md file so it contains the following text. Once the changes have been committed and pushed to GitHub, create a pull request and merge the changes into the main branch.  \n> \n> ```\n> # Downstream data-analysis R\n> This repository contains all the scripts for the downstream data analysis of my project.\n> ```\n> \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  \n>    >  ```\n>    >  git checkout -b readme\n>    >  ```\n>    >  Do the necessary changes\n>    >  ```\n>    >  git add README.md\n>    >  git commit -m \"changed README file completely\"\n>    >  git push origin readme\n>    >  ```\n>    >  Find the new branch in your GitHub repository. From there the solution is identical as discussed here above. \n>    > </details>\n>\n{: .hands_on}\n---\n\nAs a final note on merging branches, we mention here that it is obviously also possible to merge branches on our computer locally. For this, we refer to some further reading materials linked [here](https://git-scm.com/book/en/v2/Git-Branching-Basic-Branching-and-Merging).\n\n\n# 5. Deleting branches \n\n## 5.1. Via GitHub\n\nIf a branch is of no more use, we can delete it. To find all the existing branches in GitHub, click on *branches* in the top left corner of the repository.\n\n---\n\n<center><img src=\"../../images/deleting-branch-1-update.PNG\" /></center>\n\n---\n\n\nAfter successfully merging our changes in the *main* branch, the old one(s) can be deleted. Click on the waste bin:\n\n---\n\n<center><img src=\"../../images/deleting-branch-2.PNG\" /></center>\n\n---\n\nGo back to the main tab of the repository and find that the branch has been deleted. \n\n\n## 5.2. Locally\n\nDeleting a branch is as simple as typing the following command:\n\n```\ngit branch -d <name-of-the-branch>\n```\n\nIf git refuses to do so, there is a forced way to do it as well by using the capital `-D` parameter. \n\n\n---\n\n\nLet's continue with the [next session](https://material.bits.vib.be/topics/git-introduction/tutorials/6_forks/tutorial.html)!","# 12. Plotting figures\n*This chapter is based on the materials from [this book](https://www.packtpub.com/eu/big-data-and-business-intelligence/become-python-data-analyst) and [this website](https://python-graph-gallery.com/8-add-confidence-interval-on-barplot/)*\n\nMatplotlib is a Python 2D plotting library which produces publication quality figures. Although Matplotlib is written primarily in pure Python, it makes heavy use of NumPy and other extension code to provide good performance even for large arrays.\n\nWe will start with the basics concepts being figures, subplots (axes) and axis. The following line of code allows the figures to be plotted in the notebook results\n\n\n```python\n%matplotlib inline\n```\n\n`matplotlib.pyplot` is a collection of command style functions that make matplotlib work like MATLAB. Each pyplot function makes some change to a figure: e.g., creates a figure, creates a plotting area in a figure, plots some lines in a plotting area, decorates the plot with labels, etc. In matplotlib.pyplot various states are preserved across function calls, so that it keeps track of things like the current figure and plotting area, and the plotting functions are directed to the current subplot.\n\nWhat we first have to do is importing the library of course. \n\n\n```python\nimport matplotlib.pyplot as plt\n```\n\n\n```python\nplt.plot([1, 2, 3, 2.5])\nplt.ylabel('some numbers')\n```\n\n<center><img src=\"../../images/plotting1.png\" /></center>\n\n`plot()` is a versatile command, and will take an arbitrary number of arguments. For example, to plot x versus y, you can issue the command:\n\n\n\n```python\nx_list = list(range(1,10))\ny_list = [pow(i, 2) for i in x_list]\nprint(x_list)\nprint(y_list)\n```\n\n\n```python\nplt.plot(x_list, y_list)\nplt.title(\"Title of the plot\")\n```\n\n<center><img src=\"../../images/plotting2.png\" /></center>\n\nUsing the pyplot interphase, you build a graph by calling a sequence of functions and all of them are applied to the *current subplot*, like so:\n\n\n```python\nplt.plot([1, 2, 3, 4], [10, 20, 25, 30], color='lightblue', linewidth=3)\nplt.scatter([0.3, 3.8, 1.2, 2.5], [11, 25, 9, 26], color='darkgreen', marker='^')\nplt.xlim(0.5, 4.5)\nplt.title(\"Title of the plot\")\nplt.xlabel(\"This is the x-label\")\nplt.ylabel(\"This is the y-label\")\n# Uncomment the line below to save the figure in your currentdirectory\n# plt.savefig('examplefigure.png')\n```\n<center><img src=\"../../images/plotting3.png\" /></center>\n\nWhen working with just one subplot in the figure, generally is OK to work with the pyplot interphase, however, when doing more complicated plots, or working within larger scripts, you will want to explicitly pass around the *Subplot (Axes)* and/or *Figure* object to operate upon.\n\n\n\n```python\ndef gc_content(file):\n    \"\"\"Calculate GC content of a fasta file (with one sequence)\"\"\"\n    sequence=\"\"\n    with open(file, 'r') as f:\n        for line in f:\n            if line.startswith('>'):\n                seq_id = line.rstrip()[1:]\n            else:\n                sequence += line.rstrip()\n    \n    A_count = sequence.count('A')\n    C_count = sequence.count('C')\n    G_count = sequence.count('G')\n    T_count = sequence.count('T')\n    N_count = sequence.count('N')\n    GC_content = (sequence.count('G') + sequence.count('C')) / len(sequence) * 100\n    AT_content = (sequence.count('A') + sequence.count('T')) / len(sequence) * 100\n    print(\"The GC content of {} is\\t {:.2f}%\".format(seq_id, GC_content))    \n    return GC_content, AT_content, A_count, C_count, G_count, T_count, N_count\n    \n\nGC_content, AT_content, A_count, C_count, G_count, T_count, N_count = gc_content('../data/gene.fa')\nprint(GC_content)\nprint(AT_content)\nprint(A_count)\nprint(C_count)\nprint(T_count)\nprint(G_count)\n\n```\n\n\n```python\ntotal_count = A_count + C_count + G_count + T_count\nA_perc = A_count/total_count*100\nC_perc = C_count/total_count*100\nG_perc = G_count/total_count*100\nT_perc = T_count/total_count*100\nheight = [A_perc, C_perc, G_perc, T_perc]\nbars = ('A','C','G','T')\nplt.bar(bars, height)\n\nplt.xlabel('Nucleotide')\nplt.ylabel('Percentage of occurence (%)')\nplt.title('Distribution of nucleotides in fasta sequence')\n\nplt.show()\n```\n<center><img src=\"../../images/plotting4.png\" /></center>\n\n```python\ntotal_count = A_count + C_count + G_count + T_count\nA_perc = A_count/total_count*100\nC_perc = C_count/total_count*100\nG_perc = G_count/total_count*100\nT_perc = T_count/total_count*100\nheight = [A_perc, C_perc, G_perc, T_perc]\nbars = ('A','C','G','T')\n#plt.bar(bars, height, color=('green','red','yellow','blue'))\nplt.bar(bars, height, color=('#1f77b4','#ff7f0e','#2ca02c','#d62728'))\n\nplt.xlabel('Nucleotide')\nplt.ylabel('Percentage of occurence (%)')\nplt.title('Distribution of nucleotides in fasta sequence')\n\nplt.show()\n```\n<center><img src=\"../../images/plotting5.png\" /></center>\n\n```python\n# libraries\n#import numpy as np\nimport matplotlib.pyplot as plt\n \n# width of the bars\nbarWidth = 0.3\n \n# Choose the height of the blue bars\nexperimentA = [10, 9, 2]\n \n# Choose the height of the cyan bars\nexperimentB = [10.8, 9.5, 4.5]\n \n# Choose the height of the error bars (bars1)\nyer1 = [0.5, 0.4, 0.5]\n \n# Choose the height of the error bars (bars2)\nyer2 = [1, 0.7, 1]\n \n# The x position of bars\nr1 = list(range(len(experimentA)))\nr2 = [x + barWidth for x in r1]\n \n# Create blue bars\nplt.bar(r1, experimentA, width = 0.3, color = 'blue', edgecolor = 'black', yerr=yer1, capsize=5, label='Experiment A') # Capsize is the width of errorbars\n \n# Create cyan bars\nplt.bar(r2, experimentB, width = 0.3, color = 'cyan', edgecolor = 'black', yerr=yer2, capsize=7, label='Experiment B')\n \n# general layout\nplt.xticks([x + barWidth/2 for x in r1], ['cond_A', 'cond_B', 'cond_C'])\nplt.ylabel('effect')\nplt.legend()\n \n# Show graphic\nplt.show()\n\n```\n\n<center><img src=\"../../images/plotting6.png\" /></center>\n","## 1.1 Why Jupyter\nJupyter is an interactive code environment that allows you to write code and get immediate feedback from it. It's one of the most popular environment for Python programming. Especially for training purposes, as it interactively gives you your code and some informative text together. \n\n## 1.2 Installation\nThe easiest way to install Python and Jupyter is to install [Anaconda](https://docs.anaconda.com/anaconda/install/) (Navigator) on your computer. Anaconda Navigator contains several (GUI) applications like Jupyter in which you can run your Python code. As a side note, Anaconda is also a package manager which makes it ideal for reproducibility purposes as well. \n\nNowadays, Jupyter comes in two versions. More often you will hear about Jupyter Notebooks which is the precursor of Jupyter Lab. The latter has a couple of advantages, however for stability reasons we'll be using Jupyter Notebooks for now. \n\n> ### {% icon hands_on %} Installation instructions\n>\n> To install all prerequisites for this course \n> 1. Go to [Anaconda](https://www.anaconda.com/distribution/), scroll a bit down and select the right distribution system (Windows, MacOS or Linux), and download the Python 3.7 version. Follow the installation instructions.  \n> 2. You should be able to find Jupyter Notebooks within the installed apps now. Otherwise, open the Anaconda Navigator & launch a Jupyter Notebook\n> 3. Jupyter Notebooks opens a tab with a list of your folders. Make and/or select a folder in which you want to keep the training materials.\n> 4. Find the training materials on our Github repository: [Gentle hands on python](https://github.com/vibbits/gentle-hands-on-python)\n> 5. Click the button 'Clone or Download' and select 'Download ZIP'. Finally, extract the zipped file within the folder you just selected or created. \n> 6. In Jupyter Notebook you should see the materials now. \n>\n{: .hands_on}\n\n\n## 1.3 Getting familiar with Jupyter Notebooks\n\n**a. Make a new notebook**  \n\nNavigate to a folder and click on the right New --> Python 3. A new Notebook now pops up with an empty cell. In this cell you can directly input some Python code. Try out the following: \n\n```python\n1+1\n```\n\nClick on the triangle symbol on the top of the notebook or type 'Shift+Enter' to run the code. The output will immediately appear on the screen and should look like this. \n<center><img src=\"../../images/cells.PNG\" /></center>\n\nAlso, a new cell will have appeared in the notebook. A notebook is actually a set of cells in which you can input code. \n\nIf you want another cell, you can click the '+' symbol on top of the notebook. Other interesting symbols up there are the stop symbol and the reload symbol. Whenever your code is stuck, you can stop it right there, or whenever you want to restart in a clean and fresh environment, you hit that restart button. \n\n**b. Code or Markdown**\n\nThere are two modes that a cell can have. \n- A cell is by default in **Code** modus. This means that the environment expects a Python code as input and it will interpret it and give you some output upon running that cell.\n- The **Markdown** mode is a kind of text modus. In here you can type any kinds of text and edit it so headers, bold or italic texts, quotes, images are possible to integrate. It's called rich text. E.g. If you double click this text, you will see the Markdown code of this text. \n\n**c. Command or Edit mode**\n\nTo switch between these modes, hit 'Esc' or 'Enter'. When you hit 'Enter', you'll get into the Edit mode, the cell will have a blue border around it and you're free to edit the content of that cell (both in python code or markdown code). If you hit 'Esc', you're cell will be in the Command mode and you can use shortcuts to edit your notebook:\n- a (above): add a new cell above\n- b (below): add a new cell below\n- dd: remove the cell\n- z: undo the previous action\nthese are just a few of them. \n\nThe blue bar on the left of your cell indicates which cell is selected. In command mode, you can move through your cells with the up and down arrow keys. \n\nLastly, within the command mode, type 'y' to change the cell to a Python code cell and type 'm' to change the cell to a Markdown code cell. \n\n**d. Running a cell**\n\nTo stress the importance of the 'stop' button on top of this notebook, run the following code below. While it is running, the code has an asterisk which means it's still being executed and your notebook won't be able to process any other code in another cell. In order to stop it, because it's an infinite loop, hit the stop button or type 'ii' in command mode. \n\n\n```python\nimport time\nwhile True:\n    print(\"Hello\")\n    time.sleep(3)\n```\n\n## 1.4 Examples\nThe above will suffice for the Jupyter environment introduction. We will dive into our first examples before diving into the first chapter of our Python adventure. \n\nA program needs information (input) to run, and then needs to export its results so that you know what happened (output). The easiest way to do this is to send a 'text message' to the screen; this is possible with the print command which we will introduce here.\n\nIn this section we also discuss some basics of Python syntax, and the errors that occur if you don't get it right.\n\n**a. Let's do some math**\n\nPython is very intuitive and flexible in a way that there is no need of special colons, nor do you have to take spaces into account. Just note that Python is indent-sensitive, but we will get back to this. \n\n\n```python\n1+1\n```\n\n\n```python\n2 - 5\n```\n\n\n```python\n3  * 4\n```\n\n\n```python\n10/2\n```\n\n**b. Writing a message**  \n\nThe print command allows you to write data to the console screen. Try the following example:\n\n\n```python\n# Print 'Hello world' to the screen\nprint(\"Hello world\")\n```\n\nNotice that lines starting with a `#` symbol are not displayed, nor evaluated by Python. They usually contain extra information concerning the code. \n\n\n```python\n# What happens if you leave out the quotation marks? \nprint(Hello world)\n```\n\nYou should get the following error: **SyntaxError: invalid syntax**. This is because Python doesn't understand what Hello and world mean.\n\n**c. Writing numbers**\n\nYou can also print out numbers as text messages to the screen. You do not need quotation marks in this case; just the number is enough. If your number does not have a decimal point (.) in it, it's called an **integer**, if it does have a decimal point, it is a **float**. \n\n\n```python\n# Print an integer and a floating point \nprint(5)\nprint(3.1415)\n```\n\n**Note**  \nIn Python, programs often start with:\n```\n#!/usr/bin/python\n```\nThis line is called the 'Shebang' and tells the operating system where it can find the Python language interpreter so it can run the program without you having to specify where Python is. With Jupyter Lab/Notebooks we already have a Python environment so we do not need to redefine it every time. \n\n## 1.5 JupyterLab\n\nWith recent improvements, the environment grew a little bit more powerful to a full interface, called JupyterLab. You can see all of the files that are within a folder within a file explorer, you can open a Terminal window which is a Linux machine where you can install any packages that you would need. You can also make a text file or edit text files that are in your folder. However, the most simplest is still to open a Python console where you directly insert python code. \n\n","## 8.1 Introduction\n\nSo far we've seen variables that store one value or a series of values (see section 5: lists, tuples and sets). There is another way of storing information where you associate one value with another value; in Python this is called a dictionary. Dictionaries provide a very useful way of quickly connecting different values to each other.\n\n\n## 8.2 Dictionary creation & usage\n\nIt is best to think of a dictionary as a set of *key:value* pairs, with the requirement that the keys are unique (within one dictionary). Dictionaries are initiated by using curly brackets {}, and each pair of *key:value*s is separated with a comma. This is how a dictionary would look like:\n\n\n![Gentle-hands-on-introduction-to-Python-Programming Python Dictionary](../../images/myDictionary-cropped.png)\n\n\n\n\n\n```python\nmyDictionary = {'A': 'Ala', 'C': 'Cys', 'D': 'Asp'}\nmyDictionary\n```\n\nYou can recall values by using square brackets [ ] with the name of the key, or use the `get()`-method. \n\n\n```python\nmyDictionary['A']\n```\n\n\n```python\nmyDictionary.get('C')\n```\n\nIf you would like to add a new pair of key-value: \n\n\n```python\nmyDictionary['E'] = 'Glu'\nmyDictionary\n```\n\nNote however that keys are unique and if you try to add a *key:value*-pair with a key that already exists in the dictionary and a different value, it will overwrite the value. \n\n\n```python\nmyDictionary['A'] = 'Glu'\nmyDictionary\n```\n\nSo keys are unique, values are not!\n\nDictionaries, like lists, have several useful built-in methods. The most frequently used are listed here below:\n- `keys()`\tto list the dictionary's keys\n- `values()` to list the values in the dictionary\n- `get()`\tcall the value of a specified key\n- `pop()`\tto remove the specified key and its values\n\nListing the keys within a dictionary: \n```python\nmyDictionary = {'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu'}\nmyDictionary.keys()\n```\n\nPython tells us that the list is still in a dictionary-keys data structure type. If you would like to extract the keys for further processing, it's probably better to transform them into a list:\n```python\nlist(myDictionary.keys())\n```\n\nSimilarly for the values of a dictionary: \n```python\nlist(myDictionary.values())\n```\n\nWe've already exploited the `get` method, with `pop` we can remove a *key-value* pair:\n\n```python\nmyDictionary.pop('E')\nmyDictionary\n```\n\nIf you try to access a key that doesn't exist, Python will give an error:\n\n\n```python\nmyDictionary = {'A': 'Ala', 'C': 'Cys', 'D': 'Asp', 'E': 'Glu'}\n \nmyDictionary['B']\n```\n\nYou should therefore always check whether a key exists:\n\n\n\n```python\n# Newlines don't matter when initialising a dictionary...\nmyDictionary = {\n     'A': 'Ala',\n     'C': 'Cys',\n     'D': 'Asp',\n     'E': 'Glu',\n     'F': 'Phe',\n     'G': 'Gly',\n     'H': 'His',\n     'I': 'Ile',\n     'K': 'Lys',\n     'L': 'Leu',\n     'M': 'Met',\n     'N': 'Asn',\n     'P': 'Pro',\n     'Q': 'Gln',\n     'R': 'Arg',\n     'S': 'Ser',\n     'T': 'Thr',\n     'V': 'Val',\n     'W': 'Trp',\n     'Y': 'Tyr'}\n\nif 'B' in myDictionary.keys():\n    print(myDictionary['B'])\nelse:\n    print(\"myDictionary doesn't have key 'B'!\")\n```\n\nHowever, it's much cleaner if you use the `get()` method as it doesn't return an explicit error if a key doesn't exist in your dictionary. Instead it will return a `None`-value. \n```python\ntype(myDictionary.get('B'))\n```\n\n---\n\n> ### {% icon hands_on %} Exercise 8.2.1 \n>\n> Use a dictionary to track how many times each amino acid code appears in the following sequence:\n> ```\n> SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGISCKVTNIEGLLHKNNWNIED  \n> NNIKNASLVQIDASNEQSSTSSSMIIDAQISNALNAQQYKVLIGNREWMIRNGLVINNDVNDFMTEHERKGRTAVLVAVDDELCGLIAIADT\n> ```\n> Tip: use the one-letter code as key in the dictionary, and the count as value.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > ```python\n>    > # Use a dictionary to track how many times each amino acid code appears in the following sequence:\n>    > # SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGISCKVTNIEGLLHKNNWNIEDNNIKNASLVQIDASNEQSSTSSSMIIDAQISNALNAQQYKVLIGNREWMIRNGLVINNDVNDFMTEHERKGRTAVLVAVDDELCGLIAIADT\n>    > # Tip: use the one-letter code as key in the dictionary, and the count as value. \n>    > mySequence = \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGISCKVTNIEGLLHKNNWNIEDNNIKNASLVQIDASNEQSSTSSSMIIDAQISNALNAQQYKVLIGNREWMIRNGLVINNDVNDFMTEHERKGRTAVLVAVDDELCGLIAIADT\"\n>    >  \n>    > # First way to do this, using sets (condensed)\n>    > aminoAcidCount = {}\n>    > myUniqueAminoAcids = set(mySequence)\n>    > for aaCode in myUniqueAminoAcids:\n>    >     print(\"Amino acid {} occurs {} times.\".format(aaCode,mySequence.count(aaCode)))\n>    >     aminoAcidCount[aaCode] = mySequence.count(aaCode)\n>    > ```\n>    > \n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > ```python\n>    > # Another way to do this, a little bit more elaborate and using the myDictionary as a reference for iteration\n>    > mySequence = \"SFTMHGTPVVNQVKVLTESNRISHHKILAIVGTAESNSEHPLGTAITKYCKQELDTETLGTCIDFQVVPGCGISCKVTNIEGLLHKNNWNIEDNNIKNASLVQIDASNEQSSTSSSMIIDAQISNALNAQQYKVLIGNREWMIRNGLVINNDVNDFMTEHERKGRTAVLVAVDDELCGLIAIADT\"\n>    > \n>    > myDictionary = {\n>    >      'A': 'Ala',\n>    >      'C': 'Cys',\n>    >      'D': 'Asp',\n>    >      'E': 'Glu',\n>    >      'F': 'Phe',\n>    >      'G': 'Gly',\n>    >      'H': 'His',\n>    >      'I': 'Ile',\n>    >      'K': 'Lys',\n>    >      'L': 'Leu',\n>    >      'M': 'Met',\n>    >      'N': 'Asn',\n>    >      'P': 'Pro',\n>    >      'Q': 'Gln',\n>    >      'R': 'Arg',\n>    >      'S': 'Ser',\n>    >      'T': 'Thr',\n>    >      'V': 'Val',\n>    >      'W': 'Trp',\n>    >      'Y': 'Tyr'}\n>    > \n>    > lengthDict = len(myDictionary.keys())\n>    > for aa in range(lengthDict):\n>    >     aaCode = list(myDictionary.keys())[aa]\n>    >     aaCount = mySequence.count(aaCode)\n>    >     print(\"Amino acid {} occurs {} times.\".format(aaCode,aaCount))\n>    > ```\n>    > \n>    > </details>\n>\n{: .hands_on}\n\n\n## 8.3 A practical example of dictionaries\nAn practical example of dictionaries can be found in Biopython. Imagine that we want to extract some information from a GenBank file ([NC_005816](https://www.ncbi.nlm.nih.gov/nuccore/NC_005816/))   \n\n\n```python\n# Imports the SeqIO object from Biopython\nfrom Bio import SeqIO\n\n# Reads in (just one record of) the GenBank file\nrecord = SeqIO.read(\"data/NC_005816.gb\",\"genbank\")\nprint(record)\n```\n\nThe SeqRecord object (which we see here) has an id, name and description as well as a sequence. For other (miscellaneous) annotations, the SeqRecord object has a dictionary attribute *annotations*. Most of the annotations information gets recorded in the annotations dictionary.\n\n\n```python\nprint(record.id)\nprint(record.name)\nprint(record.description)\n#print(record.seq)\n```\n\n\n```python\nrecord.annotations\n```\n\n\n```python\nrecord.annotations['organism']\n```\n\n\n```python\nrecord.annotations['source']\n```\n\n(In general, `organism` is used for the scientific name (in Latin, e.g. *Arabidopsis thaliana*), while `source`\nwill often be the common name (e.g. thale cress). In this example, as is often the case, the two fields are\nidentical.)\n\n\n```python\nrecord.annotations['accessions'] # This could be a list of values, hence the list. \n```\n\n## 8.4 More with dictionaries\nAs mentioned here above, the value associated with a key can consist of a list with values (instead of one single value). In the example below we save the information of an experiment in a dictionary. The key that saves the *date* information contains a `list` of fictive dates (01-01-2020 and 02-01-2020):\n\n\n```python\nTriplicateExp1 = {'name': 'experiment 1', 'pH': 5.6, 'temperature': 288.0, 'volume': 200, 'calibration':'cal1', 'date':['01-01-2020','02-01-2020']}\nTriplicateExp1\n```\n\nFor the keys, however, the data structures should be immutable (so tuples are OK, lists are not). Recall that keys have to be unique; if you add a key that already exists, the old entry will be overwritten:\n\n\n```python\ndates = ('date1','date2') # tuple\n\nTriplicateExp1[dates] = ['01-01-2020','02-01-2020']\nTriplicateExp1\n```\n\nIt is also possible to have a so-called nested dictionary, in which there is a dictionary within a dictionary. Here we make two more dictionaries with information about the triplicate experiment. The information of each experiment is thus assembled in a separate dictionary. Then, the three dictionaries are combined into one dictionary. \n\n\n```python\nTriplicateExp2 = {'name': 'experiment 2', 'pH': 5.8, 'temperature': 286.0, 'volume': 200, 'calibration':'cal1', 'date':'03-01-2020'}\nTriplicateExp3 = {'name': 'experiment 3', 'pH': 5.4, 'temperature': 287.0, 'volume': 200, 'calibration':'cal1', 'date':'04-01-2020'}\nTriplicate = {\n    'exp1':TriplicateExp1,\n    'exp2':TriplicateExp2,\n    'exp3':TriplicateExp3\n}\nTriplicate\n```\n","## 2.1 Introduction\n\nJust printing things is not that interesting, what you really want to do with a computer program is manipulate data. This is why variables are so important - they allow you to assign information to a name that you can re-use later on.\n\nIn this section we will introduce the basic types of variables and how you can manipulate them. Just to get started, we give an overview of the different **built-in data types** that are present in Python and which you can assign to a variable. Although this variety of data types exist, not all of them will be discussed in this course.\n\n- Text type:       `str`\n- Numeric types:   `int`, `float`, `complex`\n- Sequence types:  `list`, `tuple`, `range`\n- Mapping types:   `dict`\n- Set types:       `set`, `frozenset`\n- Boolean types:   `bool`\n- Binary types:    `bytes`, `bytearray`, `memoryview`\n\nIn this section, we will cover the text type, numeric types (complex are out of scope) and booleans.\n\n**Operators** can be anything from:\n- Arithmetic: additions, substractions, multiplications, divisions, remainders and power\n- Comparison: equal to, not equal to, greater than, less than, etc. \n- Logical: AND, OR and NOT used for conditional statements\n- Identity: `is`, `is not`\n\n**Note**:  \nThis section doesn't really include any exercises. Try to follow and code along while we scroll through the examples so you start to have a feeling of it.\n\n## 2.2 Strings\nWe already saw strings in the previous section. You can assign a string to a variable like this:\n\n\n```python\n# Assign the sequence AGAATCGATACGA to a variable and print the variable.  \nmySequence = \"AGAATCGATACGA\"\nprint(mySequence)\n```\n\nWhat happens here is that you assign a **value**: \"*AGAATCGATACGA*\" to a **variable**: `mySequence` and then print it out. You can now keep on using `mySequence` throughout your program. Note that `mySequence` is not quoted because it is now part of the program, try this for example:\n\n\n```python\n# Repeat the above, but this time put the variable in quotation marks when you put in the print statement and see what happens\nmySequence = \"AGAATCGATACGA\"\nprint(\"mySequence\")\n```\n\nYou will now still assign the value \"*AGAATCGATACGA*\" to the variable `mySequence`, but because of the quotes you then print off the string \"mySequence\", not the variable.\n\nYou can assign strings in the following ways:\n\n\n```python\nmyString1 = \"Hello world!\"\nmyString2 = 'Hello sun!'\nmyString3 = \"\"\"Hello\nuniverse.\"\"\"\nprint(myString1)\nprint(myString2)\nprint(myString3)\n```\n\nThe single and double quotes are essentially the same. If you use triple double quotes - \"\"\" - you can assign a string over multiple lines.\n\n\n```python\n# Try assigning a string over multiple lines without using the triple double quotes and see what happens.\nmyString = \"Hello\nuniverse.\"\n```\n\nThis will give a SyntaxError, as Python 'reads' each line separately, and it doesn't find the ending (on the first line) and starting (on the second line) quote. Using the escape codes, you can however do the following:\n\n\n\n```python\n# Try to print two words in two different lines without using three \"\" marks. \nmyString = \"Hello\\nuniverse.\"\nmyString\n```\n\n## 2.3 Strings from user input\n\nPython provides a very simple way to get user input. This input is always returned as a string, so try the following:\n\n\n```python\n# Use input to ask for a sequence string, then print the input sequence\nmySequence = input(\"Give me a sequence:\")\nprint(mySequence)\n```\n\n## 2.4 Integers\nIntegers are non-decimal numbers. Python will recognize numbers in the code automatically, so you can do:\n\n\n```python\n# Assign integer 5 to a variable myInteger\nmyInteger = 5\nprint(myInteger)\n```\n\nAs described in the introduction, you can also do standard mathematical operations on integers. Mathematical operations are even possible within a print statement.\n\n\n```python\n5 + 5  # Addition\n```\n\n\n```python\n5 - 8  # Subtraction\n```\n\n\n```python\n2 * 5  # Multiplication\n```\n\n\n```python\n4 / 2  # Division\n```\n\n\n```python\n5 % 2  # Modulus, remainder of division\n```\n\n\n```python\n2 ** 3 # Power\n```\n\nIt doesn't matter if you use variables or integers for this:\n\n\n```python\nx = 5\ny = 2\n```\n\n\n```python\nx + 5  # Addition\n```\n\n\n```python\nx - 8  # Subtraction\n```\n\n\n```python\ny * x  # Multiplication\n```\n\n\n```python\n4 / y  # Division\n```\n\n\n```python\n5 % y  # Modulus, remainder of division\n```\n\n\n```python\ny ** 3 # Power\n```\n\nIn order to print an integer inside a string, you could simply use the following expression in which the string is separated from the integer with a comma.\n\n\n```python\nfirstResult = 5 * 4\nprint(\"The result is\", firstResult,\".\")\n```\n\nHowever, there is another way using the `.format()` method. The format method allows you to change the lay-out of the output that it prints. We will use it a lot during this course, here you see it in the most simplest form. The variable that you want to print is given within the rounded brackets of the format method, and the location in the string to where it prints is given with curly brackets:\n\n\n```python\nfirstResult = (5 * 4)\nprint(firstResult)\nprint(\"The result of the first calculation is {}.\".format(firstResult))\n\nsecondResult = (5 * (4 + 3) - 2)\nprint(secondResult)\nprint(\"The result of the second calculation is {}.\".format(secondResult))\n```\n\nNote here the precedence of operations; * and / take precedence over + and -. You can use () to change the results.\n\n## 2.5 Floats\n\nFloats (floating point numbers) are decimal numbers that behave in the same way as integers, except that they are more accurate\n\n\n```python\n# Assign float 5.5 to the myFloat variable\nmyFloat = 5.5 \nmyFloat\n```\n\n\n```python\ntype(myFloat)\n```\n\nMathematical operations are the same:\n\n\n```python\n5.2 + 4.8  # Addition\n```\n\n\n```python\n5.2 - 8.3  # Subtraction\n```\n\n\n```python\n2.0 * 5.11212  # Multiplication\n```\n\n\n```python\n4.2 / 2.7  # Division\n```\n\n\n```python\n5.4 % 2.0  # Modulus, remainder of division\n```\n\n\n```python\n4 ** 0.5 # Power\n```\n\nAlso floats can be incorporated in a string with the `.format()` statement. You can determine the number of characters before and after the decimal point as well, however we will cover this in the next section. \n\n\n```python\nmyFloat = 4545.4542244\nprint(\"Print the full float {},\\ncut off decimals {:.2f},\\nor determine the characters before the decimal {:10.1f}.\".format(myFloat,myFloat,myFloat))\n```\n\nNote here that we put three formatting characters in the string; we then also need three values to print out. \n\n## 2.6 Floats, integers and strings  \nYou can also force a conversion between the different value types float, integers and strings with the `str()`, `int()` and `float()` conversions:\n\n\n```python\n# Use the int() and float() statements to switch the value types and print out the values. Do you notice any differences?\nmyFloat = 4.5\nmyFloat\n```\n\n\n```python\nint(myFloat) # Note that it will print the result of the operation; myFloat remains an integer!\n```\n\n\n```python\nmyInteger = 5\nmyInteger\n```\n\n\n```python\nmyOtherFloat = float(myInteger)\nmyOtherFloat\n```\n\nThe same is possible to convert between strings with `str()`, you can also convert strings back to integers and floats but only if the content of the string is an integer or float:\n\n\n```python\n# Convert a float and an integer to a string with the str() statement \nmyFloat = 4.5\nmyFloatString = str(myFloat)\nmyInteger = 5\nmyIntegerString = str(myInteger)\nprint(\"My strings are {} and {}\".format(myFloatString,myIntegerString))\nprint(\"My string converted to integer is {}\".format(int(myIntegerString)))\nprint(\"My string converted to float is {}\".format(float(myFloatString)))\n```\n\n\n---\n> ### {% icon hands_on %} Exercise 2.6.1\n>\n> Write a program where you ask for a number, convert it to an integer, and print out in a formatted string what your number is.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  myFloatString = input(\"Give me a number:\")\n>    >  myInteger = int(float(myFloatString))\n>    >  print(\"My number in integer form is {}\".format(myInteger))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n--- \n\nYou can also add, substract, divide and multiple a variable by a number or other variable directly. These are the so-called assignment operators.\n\n\n```python\nmyFloat = 6\nmyString = \"ABC\"\n \nmyFloat += 5   # Same as myFloat = myFloat + 5\nprint(myFloat)\n \nmyString += \"DE\"  # Addition works for strings as well\nprint(myString)\n \nmyFloat -= 5   # Same as myFloat = myFloat - 5\nprint(myFloat)\n \nmyFloat /= 2   # Same as myFloat = myFloat / 2\nprint(myFloat)\n \nmyFloat *= 2   # Same as myFloat = myFloat * 2\nprint(myFloat)\n```\n\nFinally, you can check what data type a variable is by using `type()`:\n\n\n```python\nmyInteger = -6\nmyFloat = 5.22\nmyString = \"Text!\"\n \nprint(myInteger, type(myInteger))\nprint(myFloat, type(myFloat))\nprint(myString, type(myString))\n```\n\nNote here that you can print multiple values by using a comma in between the values.\n\n\n\n---\n> ### {% icon hands_on %} Exercise 2.6.2\n>\n> See what happens if you try to print a float as an integer, and an integer as a string. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    >  myFloat = 11.4\n>    >  myIntFloat = int(myFloat)\n>    >  print(\"My float as integer {}\".format(myIntFloat)) \n>    >  #This works\n>    >  myInt  = 12\n>    >  print(\"My integer as string {}\".format(str(myInt)))\n>    >  #This works as well... but:\n>    >  myString = \"Hello\"\n>    >  print(\"My string as float {}\".format(float(myString)))\n>    >  #will fail and give a TypeError - Python cannot convert \"aa\" into a float.\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n--- \n\n\n## 2.7 Booleans \nFinally, there are the boolean variables `True` and `False`. \nPython returns booleans when comparing values. In the code below python checks whether the comparison is `TRUE`, when this is the case it will print out the boolean True. In order to do a comparison, we use **comparison operators** like `==, >, <, <=, >=, !=`\n\n\n```python\nmyBoolean = True\nmyBoolean\n```\n\n\n```python\ntype(myBoolean)\n```\n\n\n```python\nmyInteger = 5\nmyInteger == 6   # This means 'is myInteger equal to 6?'\n```\n\n\n```python\nmyInteger < 6    # This means 'is myInteger smaller than 6?'\n```\n\n\n```python\nmyInteger > 6    # This means 'is myInteger greater than 6?'\n```\n\n\n```python\nmyInteger <= 6   # This means 'is myInteger smaller or equal to 6?'\n```\n\n\n```python\nmyInteger >= 6   # This means 'is myInteger greater or equal to 6?'\n```\n\n\n```python\nmyInteger != 6   # This means 'is myInteger not equal to 6?'\n```\n\nSimilarly to comparison operators, you can also use `is` and `not` which are the **identity operators**:\n\n\n```python\nmyInteger = 5\n```\n\n\n```python\nmyInteger is 6    # Same as ==\n```\n\n\n```python\nmyInteger is not 6   # Same as !=\n```\n\n\n```python\nnot myInteger > 6    # Same as <=\n```\n\nIf you want to combine multiple comparisons, it is possible to use the logical operators `and` and `or`. With the `and` operator both comparisons have to be True for the result to be True. With the `or` operator, only one has to be True for the result to be True.\n\n\n```python\nx = 5\ny = 6\n```\n\n\n```python\nx == 5 and y > 2    # Both have to be True for the result to be True\n```\n\n\n```python\nx != 5 or y > 2     # Only one has to be True for the result to be True\n```\n\n## 2.8 Nothing\n\nFinally, we highlight the `None` value which is comparable to other program's `null` values. In the code below we show that None, which you could interpret as nothing, is still something else than the value 0 or e.g. an empty string. \n\n\n```python\nmyNothing = None\nmyNothing\n```\n\n\n```python\ntype(myNothing)\n```\n\n\n```python\ntype(None)\n```\n\n\n```python\n0 == None\n```\n\n\n```python\n\"\" == None\n```\n\nHowever, the opposite of None is still True. \n\n\n```python\nnot None\n```\n\nReally 0 is still an integer, \"\" a string, so `None` is really nothing:\n\n","## 10.1 Introduction\n\nSo far we've been writing 'sequential' code, basically following the flow of the code from the top to the bottom of the program. Sometimes, however, you want to re-use code elsewhere without copy/pasting a bit of code. You can do this with functions; a function holds a block of code that can be called from other places. Functions are essential for larger projects and code maintenance - if there's a problem with that piece of code, for example, you only have to fix it in one place.\n\n## 10.2 Functions\n\nWe've already been using built-in Python functions, for example **abs()** or **len()**. However, in this section we will build our own functions. Generally, the syntax when calling a function is the name of the function followed by round brackets **( )**. When you're writing your own function, in essence it would look like this:\n\n```python\ndef name_function():\n    \"Some information about the function\"\n    \n    print(\"This is a very simple function\")\n```\n\nInformation is given to a function by means of an argument and this is passed on in the rounded brackets. In the example above an argument is not defined, hence whenever you call the function it will print the same text. Arguments are defined within the parenthesis and are separated by commas in case there are multiple arguments. Before exploiting functions with arguments, let's have a look to an example with no arguments that prints the same text always when you call the function. \n\n```python\ndef silly_function():\n    \"This is some information about the silly function that will print out some silly text\"\n    text = \"Some silly text\"\n    print(text)\n```\nNotice that nothing happened now. This is because we're not calling the function, we just defined it. In order to call the function, we use the following expression:\n\n```python\nsilly_function()\n```\n\n\nInformation about the function can be retrieved by using the `help()` function. \n\n\n```python\nhelp(silly_function)\n```\n\nThe following code is an example of a function that will take some value as an argument and return the absolute value:\n```python\ndef myAbsFunc(someValue):\n    \"myAbsFunc takes a number as input and will return the absolute value\"\n    if someValue < 0:\n        someValue = -someValue\n    return someValue\n```\n\nSo here we've emulated the Python built-in abs() function with myAbsFunc(). Within a function you can use **return** to 'send back' a value, which can then be used somewhere else in the code. \n\n\n```python\nmyAbsFunc(-10)\n```\n\nIt works exactly the same as a built-in Python function. \n\n\n```python\nabs(-10)\n```\n\nFunctions can also make code more 'readable', as you can give them a name that is easy to understand so that it's clear what is happening without having to examine the code. \n\n\n```python\ndef getMeanValue(valueList):\n    \"\"\"\n    Calculate the mean (average) value from a list of values.\n    Input: list of integers/floats\n    Output: mean value\n    \"\"\"\n    meanValue = sum(valueList)/len(valueList)\n    \n    return meanValue\n\ngetMeanValue([4,6,77,3,67,54,6,5])\n```\n\n\n```python\ngetMeanValue([3443,434,34343456,32434,34,34341,23])\n```\n\nNote that it's a good practice to add a comment (in this case a multi-line one) to the top of the function that describes what it does, what it takes as input and what it produces as output. This is especially important for more complex functions. You can invoke the information with `help(function_name)`\n\n\n```python\ndef compareMeanValueOfLists(valueList1,valueList2):\n \n    \"\"\"\n    Compare the mean values of two lists of values.\n    Input: valueList1, valueList2\n    Output: Text describing which of the valueLists has the highest average value\n    \"\"\"\n \n    meanValueList1 = getMeanValue(valueList1)\n    meanValueList2 = getMeanValue(valueList2)\n \n    if meanValueList1 == meanValueList2:\n        outputText = \"The mean values are the same ({:.2f}).\".format(meanValueList1)\n    elif meanValueList1 > meanValueList2:\n        outputText = \"List1 has a higher average ({:.2f}) than list2 ({:.2f}).\".format(meanValueList1,meanValueList2)\n    else:\n        # No need to compare again, only possibility left\n        outputText = \"List2 has a higher average ({:.2f}) than list1 ({:.2f}).\".format(meanValueList2,meanValueList1)\n \n    return outputText\n```\n\n\n```python\nvalueList1 = [4,6,77,3,67,54,6,5]\nvalueList2 = [5,5,76,5,65,56,4,5]\ncompareMeanValueOfLists(valueList1,valueList2)\n\n```\n\nYou can call functions within functions, or basically anywhere in your code, even in conditions, ...:\n\n```python\nif getMeanValue(valueList1) > 26 :\n    print(\"The mean value of list 1 is greater than 1.\")\n```\n\n\n---\n\nThere are several ways to solve this problem, however it might be easier to do it with `zip()` ;). \n> ### {% icon hands_on %} Exercise 10.2.1\n>\n> The Hamming distance between two strings of equal length is the number of positions at which the corresponding character are different. In a more general context, the Hamming distance is one of several string metrics for measuring the edit distance between two sequences. \n> \n> The Hamming distance between:\n> \n> \"karolin\" and \"kathrin\" is 3.\n> \n> Write a function called \"hamming_distance\":\n> - which accepts two strings, and \n> - raises an error if the lengths are unequal. \n> - Furthermore the function will return an integer that represents the number of mismatches between the two sequences. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    > # string1 and string2 should be the same length.\n>    > def hamming_distance(string1, string2): \n>    >     \"\"\"Return the Hamming distance between equal-length sequences.\"\"\"\n>    >     \n>    >     if len(string1) != len(string2):\n>    >         raise ValueError(\"Undefined for sequences of unequal length.\")\n>    >     \n>    >     # Start with a distance of zero, and count up\n>    >     distance = 0\n>    >     # Loop over the indices of the string\n>    >     L = len(string1)\n>    >     for i in range(L):\n>    >         # Add 1 to the distance if these two characters are not equal\n>    >         if string1[i] != string2[i]:\n>    >             distance += 1\n>    >     # Return the final count of differences\n>    >     return distance\n>    > \n>    > seq1 = \"GATCATAGA\"\n>    > seq2 = \"CATCATACA\"\n>    > print(hamming_distance(seq1,seq2))\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >\n>    >  ```python\n>    > # string1 and string2 should be the same length.\n>    > def hamming_distance(string1, string2): \n>    >     \"\"\"Return the Hamming distance between equal-length sequences.\"\"\"\n>    >     \n>    >     assert len(string1) == len(string2), \"Undefined for sequences of unequal length.\"\n>    >     \n>    >     # Start with a distance of zero, and count up\n>    >     distance = 0\n>    >     # Loop over the indices of the string\n>    >     for s1,s2 in zip(string1,string2):\n>    >         if s1 != s2:\n>    >              distance +=1\n>    >         return distance\n>    >     # Return the final count of differences\n>    >     return distance\n>    > \n>    > seq1 = \"GATCATAGA\"\n>    > seq2 = \"CATCATACA\"\n>    > print(hamming_distance(seq1,seq2))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\nThere are several ways to solve this problem, however it might be easier to do it with the `zip()` function.\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 10.2.2 \n>\n> Write a function that calculates the GC content of the sequence in a fasta file. For this example you can use [this fasta file](data/gene.fa) which contains the genetic sequence of a bone gla protein. The function must accept a fasta file as input file and will print the following:\n> \n> ```\n> The GC content of HSBGPG Human gene for bone gla protein (BGP) is\t 63.53%\n> ```\n> \n> The method [.startswith()](https://www.tutorialspoint.com/python/string_startswith.htm) might help. The function should read the lines of the fasta file and if it starts with a '>' define the text that comes afterwards as the sequence ID. The other lines are part of the sequence. After reading through the lines, you can easily define the GC content by counting the bases and taking the average. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    > # solution one\n>    > def gc_content(file):\n>    >     \"\"\"Calculate GC content of a fasta file (with one sequence)\"\"\"\n>    >     sequence=\"\"\n>    >     with open(file, 'r') as f:\n>    >         for line in f:\n>    >             if line.startswith('>'):\n>    >                 seq_id = line.rstrip()[1:]\n>    >             else:\n>    >                 sequence += line.rstrip()\n>    >     GC_content = (sequence.count('G') + sequence.count('C')) / len(sequence) * 100\n>    >     print(\"The GC content of {} is\\t {:.2f}%\".format(seq_id, GC_content))    \n>    > \n>    >     \n>    > gc_content('data/gene.fa')\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >\n>    >  ```python\n>    > # solution two - very similar to one. \n>    > def gc_content(file):\n>    >     f = open(file, 'r')\n>    >     sequence=\"\"\n>    >     for line in f.readlines():  \n>    >         if line.startswith('>'):\n>    >             seq_id = line.rstrip()[1:]\n>    >         else:\n>    >             sequence += line.rstrip()\n>    >     GC_content = (sequence.count('G') + sequence.count('C')) / len(sequence) * 100\n>    >     print(\"The GC content of {} is\\t {:.2f}%\".format(seq_id, GC_content))    \n>    > \n>    >     \n>    > gc_content('data/gene.fa')\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n## 10.3 Flexibility in functions\n\nIn the functions so far we've been using values (arguments) that are passed in and are required for the function to work. If you're not sure how many arguments the user will give, you can use an asterisk `*`. However, make sure that your code is flexible to access the number of arguments that the user is giving as input. In the example below we use the * asterisk to define a flexible number of arguments, and we use a for-loop to access each argument:\n\n\n```python\ndef MeanValue(*valueList):\n    \"\"\"\n    Calculate the mean (average) value from a list of values.\n    Input: list of integers/floats\n    Output: mean value\n    \"\"\"\n    meanValues = []\n    \n    for eachList in valueList:\n        meanOfList = sum(eachList)/len(eachList)\n        meanValues.append(meanOfList)\n        \n    return meanValues\n```\n\n\n```python\nMeanValue([1, 2, 3], [4,5,6])\n```\n\n\n```python\nMeanValue([1, 2, 3], [4,5,6], [7, 8, 9])\n```\n\nA second way of making flexible functions is by using *keywords* in a function; these are not required for the function to work because they are given a default value in the function definition. You can then set these keywords if necessary; consider the following example.\n\n\nBy default the parameter sortedList is `False` which means that Python will not make a sorted list in the function below, unless you explicitly ask it by setting the parameter to `True`. \n\n\n```python\ndef MeanValue(*valueList, sortedList = False):\n    \"\"\"\n    Calculate the mean (average) value from a list of values.\n    Input: list of integers/floats\n    Output: mean value\n    \"\"\"\n    meanValues = []\n\n    for eachList in valueList:\n        meanOfList = sum(eachList)/len(eachList)\n        meanValues.append(meanOfList)\n        \n    if sortedList == False:\n        print('I calculated all the mean values of your lists, however did not sort them')\n    else:\n        meanValues.sort()\n        print('I calculated the mean values and also sorted them')\n    return meanValues\n```\n\n\n```python\nvalueList1 = [4,6,77,3,67,54,6,5]\nvalueList2 = [5,5,76,5,65,56,4,5]\nvalueList3 = [5,9,75,8,65,34,4,4]\n```\n\n\n```python\nMeanValue(valueList1, valueList2, valueList3)\n```\n\n\n```python\nMeanValue(valueList1, valueList2, valueList3, sortedList = True)\n```\n\nUsing these keywords makes the function a lot more flexible - you can make the function do things (or not) depending on them.\n\n","## Extra exercises on functions\n\nThis chapter contains some extra exercises on functions. In the end, practice makes perfect...\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 10.2.1\n>\n> Download [this matrix file](http://wiki.bits.vib.be/images/4/4e/Matrix.txt) (`Matrix.txt`) and save it in your directory. Then write a function to read a matrix file in this format, reorder the rows by the values in the given column, and printing out the result. The function should take as argument a file name and a column number. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > def sortMatrixByColumn(fileName,columnNumber):\n>    >     #\n>    >     # Read the tab-delimited file and store the values\n>    >     #\n>    >  \n>    >     fin = open(fileName)\n>    >     lines = fin.readlines()\n>    >     fin.close()\n>    >  \n>    >     #\n>    >     # Convert the data from the file into a Python list\n>    >     #\n>    >  \n>    >     matrix = []\n>    > \n>    >     for matrixRow in lines:\n>    >         # Tab-delimited, so split line by \\t - this will give a list of strings\n>    >         matrixColumns = matrixRow.rstrip().split(\"\\t\") \n>    >  \n>    >         # Add a row to the matrix\n>    >         matrix.append([])\n>    >  \n>    >         # Add the columns, but convert the strings from the file into a float\n>    >         for matrixValue in matrixColumns:\n>    >             matrix[-1].append(float(matrixValue))\n>    >  \n>    >     #\n>    >     # Now sort by column - but have to track the row number as well!\n>    >     #\n>    >  \n>    >     selectedColumnValues = []\n>    >  \n>    >     for rowNumber in range(len(matrix)):\n>    >  \n>    >         selectedColumnValues.append((matrix[rowNumber][columnNumber],rowNumber))\n>    >  \n>    >         selectedColumnValues.sort()\n>    >  \n>    >     #\n>    >     # Now print out the new matrix - the column value is now not interesting\n>    >     # we want the row number!!\n>    >     #\n>    >  \n>    >     for (columnValue,rowNumber) in selectedColumnValues:  \n>    >         columnValueStrings = []\n>    >         for value in matrix[rowNumber]:\n>    >             columnValueStrings.append(\"{:.3f}\".format(value))\n>    >         print(\"\\t\".join(columnValueStrings))\n>    >  \n>    >  \n>    > sortMatrixByColumn(\"data/matrix.txt\",3)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 10.2.2\n>\n> \n> Modify the program to read in the TestFile.pdb file by using separate functions to \n> 1. get the title, \n> 2. dissect the information from the ATOM line and \n> 3. to calculate the distance to the reference distance\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > def getTitle(line,cols):\n>    >  \n>    >     # Gets the title\n>    >  \n>    >     title = line.replace(cols[0],'')\n>    >     title = title.strip()\n>    >  \n>    >     return (\"The title is '%s'\" % title)\n>    >  \n>    > def getAtomInfo(cols):\n>    >  \n>    >     # Get relevant information from an ATOM line and convert to the right type\n>    >  \n>    >     atomSerial = int(cols[1])\n>    >     atomName = cols[2]\n>    >     residueNumber = int(cols[5])\n>    >     x = float(cols[6])\n>    >     y = float(cols[7])\n>    >     z = float(cols[8])\n>    >  \n>    >     return (atomSerial,atomName,residueNumber,x,y,z)\n>    >  \n>    > def calculateDistance(coordinate1,coordinate2):\n>    >  \n>    >     # Calculate the distance between two 3 dimensional coordinates\n>    >  \n>    >     return ((coordinate1[0] - coordinate2[0]) ** 2 + (coordinate1[1] - coordinate2[1]) ** 2 + (coordinate1[2] - coordinate2[2]) ** 2 ) ** 0.5\n>    >  \n>    > \n>    > # Open the file\n>    > fileHandle = open(\"data/TestFile.pdb\")\n>    >  \n>    > # Read all the lines in the file (as separated by a newline character), and store them in the lines list\n>    > # Each element in this list corresponds to one line of the file!\n>    > lines = fileHandle.readlines()\n>    >  \n>    > # Close the file\n>    > fileHandle.close()\n>    >  \n>    > # Initialise some information\n>    > searchCoordinate = (-8.7,-7.7,4.7)\n>    > modelNumber = None\n>    >  \n>    > # Loop over the lines, and do some basic string manipulations\n>    > for line in lines:\n>    >  \n>    >     line = line.strip()  # Remove starting and trailing spaces/tabs/newlines\n>    >  \n>    >     # Only do something if it's not an empty line\n>    >     if line:\n>    >         cols = line.split()   # Split the line by white spaces; depending on the format this could be commas, ...\n>    >  \n>    >         # Print off the title\n>    >         if cols[0] == 'TITLE':\n>    >             print(getTitle(line,cols))\n>    >  \n>    >         # Track the model number\n>    >         elif cols[0] == 'MODEL':\n>    >             modelNumber = int(cols[1])\n>    >  \n>    >         # For atom lines, calculate the distance\n>    >         elif cols[0] == 'ATOM':\n>    >             (atomSerial,atomName,residueNumber,x,y,z) = getAtomInfo(cols)\n>    >  \n>    >             # Calculate the distance\n>    >             distance = calculateDistance((x,y,z),searchCoordinate)\n>    >  \n>    >             if distance < 2.0:\n>    >                 print(\"Model {}, residue {}, atom {} (serial {}) is {:.2f} away from reference.\".format(modelNumber,residueNumber,atomName,atomSerial,distance))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n","### Introduction\n\n#### NGS data repositories\n\nFirst of all, you need data to analyze. You can generate your own data but there's a lot of NGS data available on the internet.\n\nThe main repositories for NGS data:\n\n{|class=\"wikitable\"\n| align=\"center\" style=\"background:#f0f0f0;\"|''''''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''NCBI - US'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''EBI - Europe'''\n|-\n| ||||Close-by so faster downloads\n|-\n|'''Gene expression database'''||[http://www.ncbi.nlm.nih.gov/geo/ GEO]||[http://www.ebi.ac.uk/arrayexpress ArrayExpress]\n|-\n|Contain processed NGS data, no raw data||ID starts with G||ID starts with E-\n|-\n|'''NGS sequence database'''||[http://www.ncbi.nlm.nih.gov/sra SRA]||[http://www.ebi.ac.uk/ena ENA]\n|-\n|Contain raw NGS data||ID starts with SR||ID starts with ER\n|-\n| ||ENA IDs also used by SRA||SRA IDs also used by ENA \n|-\n| ||stores reads in sra format||stores reads in fastq format\n|}\n\n\nBoth GEO and SRA use multiple types of IDs, ordered according to a certain hierarchy:\n\n{|class=\"wikitable\"\n| align=\"center\" style=\"background:#f0f0f0;\"|'''GEO ID'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''points to'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''definition'''\n|-\n|ID starts with GSE||experiment||Data of a full NGS experiment consisting of multiple samples The samples belong to different groups that are to be compared e.g. treated and control samples\n|-\n|ID starts with GSM||sample||Data of one single sample \n|-\n| align=\"center\" style=\"background:#f0f0f0;\"|'''SRA ID'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''points to'''\n| align=\"center\" style=\"background:#f0f0f0;\"|'''definition'''\n|-\n|ID starts with SRP||study||Studies have an overall goal and may comprise several experiments. \n|-\n|ID starts with SRX||experiment||An Experiment describes what was sequenced and the method used.\nInfo on the source of the DNA, samples, sequencing platform and the processing of the data. \n|-\n|ID starts with SRR||run||Data of a particular sequencing experiment.\nExperiments may contain many runs depending on the number of instrument runs that were needed.\n|}\n\n\nThere are two other resources of NGS data:\n\n- [https://insilicodb.org/ In Silico DB] from the ULB <ref name=\"insilicoDB\">https://insilicodb.org/</ref>\n- [http://www.illumina.com/science/data_library.ilmn Illumina's NGS data library] <ref name=\"Illumina Sequence Data Library\">http://www.illumina.com/science/data_library.ilmn</ref>\n\n\n\nIf you have an article describing an NGS dataset that is of interest to you, you should search in the article for a sentence mentioning the ID of the data in one of these databases.\n\n\n#### Metadata of NGS data sets\n\nYou do not only need the data, you also need extra inforrmation to be able to do the analysis. For instance, you need to know where each sample comes from: in clinical datasets it is important to know if the reads are coming from a patient or from someone in the control group...\nThis kind of information is called metadata and is stored together with the actual data.\n\n### Exercise 1: Downloading a data set for the introduction training\n\nFor the introduction training we will use a data set containing short Illumina reads from *Arabidopsis thaliana* infected with a pathogen, *Pseudomonas syringae*, versus mock treated controls. The data set is described in [the article of Cumbie et al., 2011](http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3188579/).\n\nThe authors provide an ArrayExpress ID (**E-GEOD-25818**) in the section **Analysis of a pilot RNA-Seq experiment**, but this ID points to Affymetrix microarray data and not to NGS data:\nGo to [the ArrayExpress home page](https://www.ebi.ac.uk/arrayexpress/)\n\n> Find the description of the experiment with ArrayExpress ID E-GEOD-25818 ?\n|-\n|\n- Type the ID in the search box on the ArrayExpress home page\n- Click **Search**\n\n{{Wiki-img|NGS/Intro/AE1.png|500px}}\n\nYou see that the experiment is stored as a **Transcription profiling by array** experiment (red) and that **Affymetrix GeneChip Arabidopsis Genome [ATH1-121501]** is the platform that was used (green).\n- Click the **Click for detailed sample information and links to data** link (blue)\n\n{{Wiki-img|NGS/Intro/AE2.png|500px}}\n\nYou see that you will download .CEL files, the file type for storing raw Affymetrix **microarray** data.\n\n|}\n\n{{Warning | So you see that IDs that are provided in articles are not always accurate !}}\n\nFortunately I could find the data in NCBI's SRA database, so we know the SRA ID. Since the connection with NCBI is too slow, we will do the download from ENA using the SRA ID.\n\nGo to [the EBI website](http://www.ebi.ac.uk/).\n\n> Download the data set with SRA ID SRR074262 from ENA ?\n\n- Type **SRR074262** in the search box\n- Click **Search**\n\n{{Wiki-img|NGS/Intro/SRA1.png|500px}}\n\nSince we are using an SRA run ID as a search term, we do a very specific search so the search returns a single SRA record:\n\n{{Wiki-img|NGS/Intro/SRA2.png|500px}}\n\n- Click the SRA run ID on the results page to go to [http://www.ebi.ac.uk/ena/data/view/SRR074262&display=html the ENA record containing the actual data of the run]\n- Scroll to the table at the bottom of the page\n- Click the link in the **Fastq files (ftp)** column (red): \n\n{{Wiki-img|NGS/Intro/SRA6A.png|700px}}\n\n|}\n\nIt can take some time to download the file since it's very big. Firefox will give you an estimate on how long it's going to take. If it takes too long, cancel the download and use the file that is already present on the BITS laptops in the **/Documents/NGSdata** folder as **SRR074262.fastq**.\n\nIn a normal analysis we would of course download all 6 data files of this study. It's only because of time limits that we will only use a single sample during the training. If you are analyzing the 6 samples you need to take a look at the metadata to know which samples represent controls and which samples represent the treatment (in this case treatment with a plant pathogen).\n\nIn ENA and SRA, annotation is found in the record of the NGS study.\n\n> Go to the ENA record of the study the downloaded sample belongs to and look at the grouping of the samples.\n|-\n|\n\n- Click the SRA ID of the study the downloaded sample belongs to (green) to access the record of the study:\n\n{{Wiki-img|NGS/Intro/SRA6A.png|700px}}\n\n- Click the **Select columns** link on the **Read files** tab to visualize all the fields with metadata that you can visualize.\n{{Wiki-img|NGS/Intro/SRA6B.png|200px}}\n\n- Deselect the fields that you are not interested in and select the fields you want to view; If you are interested in the grouping of the samples you need to select **Library name**(red): \n\n{{Wiki-img|NGS/Intro/SRA6C.png|700px}}\n\nThis adds a column called **Library name** in the table containing the grouping annotation of the samples.\n- If you want to know exactly what the names mean, you have to consult [http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0025279 the paper that describes the analysis of the data set]. In the **RNA preparation and sequencing** section you see that hrcC means infected with the pathogen, while MgCL2 represent the control treatment\n{{Wiki-img|NGS/Intro/SRA6D.png|500px}}\n\n\nThe sample that we have downloaded for the introduction training thus comes from the group of infected samples. \n|}\n\n\n### Exercise 2: Downloading a data set for the ChIP-Seq training\n*Exercise created by Morgane Thomas-Chollier*\n\nFor the ChIP-Seq training, we are going to use the data set that is described in [http://www.ncbi.nlm.nih.gov/pubmed/23818864 the article of Myers et al., 2013] <ref>http://www.ncbi.nlm.nih.gov/pubmed/23818864</ref>. The article contains the following sentence at the end of the Materials and Methods section:\n\"All genome-wide data from this publication have been deposited in NCBI’s Gene Expression Omnibus (GSE41195).\"\nIn this case **GSE41195** is the ID of the experiment in the GEO database.\n\nGo to [http://www.ncbi.nlm.nih.gov/geo/ the GEO home page]\n\n> Download the data of the experiment with GEO ID GSE41195 ?\n|-\n|\n- Type the ID in the search box on the GEO home page\n- Click **Search**\n\n{{Wiki-img|NGS/Intro/GEO1.png|200px}}\n\nThis redirects you to the GEO record of the experiment.\n\n{{Wiki-img|NGS/Intro/GEO2.png|400px}}\n\nIn the **Experiment type** section you can see that this GEO record is a mixture of expression analysis and ChIP-Seq.\n- Scroll to the bottom of the page:\n\n{{Wiki-img|NGS/Intro/GEO3.png|400px}}\n\nYou can see that the data of the ChIP-Seq experiment have their own identifier: GSE41187\n\n- Click the ChIP-Seq data identifier.\nThis brings us on the GEO record of the ChIP-Seq experiment.\n- Scroll down to the **Samples** section:\n\n{{Wiki-img|NGS/Intro/GEO4.png|400px}}\n\nNote that GEO contains the grouping annotation here in the **Samples** section.\nFor time's sake, we will focus on a single sample: FNR IP ChIP-seq Anaerobic A\n- Click the ID **GSM1010219** of that sample to go to the GEO record of the sample\n- Scroll to the bottom of the page to the **Relations** section:\n\n{{Wiki-img|NGS/Intro/GEO5.png|400px}}\n\nGEO only contains processed NGS, no raw data. The corresponding raw data is stored in the SRA database. In the **Relations** section you can find the SRA ID of this data set. For the training we need a fastq file containing raw data.\n- Copy the SRA ID of the ChIP-Seq experiment (SRX189773)\n\n|}\n\nAgain, it will take too long to download the data from NCBI. So we will do the download from EBI.\n\nGo to [http://www.ebi.ac.uk/ the EBI website].\n\n> Download the data with SRA ID SRX189773 ?\n|-\n|\n- Type the ID in the search box on the EBI home page\n- Click **Search**\n\n{{Wiki-img|NGS/Intro/ENA1.png|300px}}\n\nThis returns two results: a link to the record of the experiment and a link to the record of the run:\n\n{{Wiki-img|NGS/Intro/ENA2.png|400px}}\n\n- Click the record of the full experiment (red)\n\n{{Wiki-img|NGS/Intro/ENA3.png|600px}}\n\nThe table at the bottom of the page contains a column called **Fastq files (ftp)**\n- Click the link in this column to download the data in fastq format\n\n|}\n\nIt took only a few minutes to download the data on my laptop at work, but the internet connection at work will be faster than the one in the training room. Firefox will give you an estimate of the time it takes for the download. If it is too long, cancel the download and use the file that has already been downloaded and is available on the BITS laptops:\n\n- on Windows: in the **/Documents/NGSdata** folder as **SRR576933.fastq**\n- In Linux: in the **/home/bits/NGS/ChIPSeq** folder as **SRR576933.fastq**\n\n\nChIP-Seq always compares the ChIP sample to a control sample, consisting of genomic DNA isolated from cells that were cross-linked and fragmented under the same conditions as the ChIP sample or of DNA fragments isolated in a “mock” ChIP reaction using an antibody that reacts with an irrelevant, non-nuclear protein.\n\nIn this data set, control samples consist of full genomic DNA. To download a control sample, we should redo the same steps starting from the GEO record of the ChIP-Seq experiment and click the GEO sample ID of the **anaerobic INPUT DNA** sample... However, the fastq file is available in the same data folders (SRR576938.fastq)\n\n\n### Downloading data sets via Linux command line\n\nSee Linux command line training pages\n\n### Downloading data sets via R\n\n*Exercise created by Stephane Plaisance*\n\nOnce you know the SRA or ENA ID of the data set you can download the data and the metadata automatically via an R script.\nSee [http://wiki.bits.vib.be/index.php/NGS_RNASeq_DE_Exercise.1#Obtain_data_and_metadata_from_ENA_using_R the exercises of the RNA-Seq training] to learn how to do this.\n","## 6.1 Introduction\n\nAnother important feature of computer programs is that they can do the same thing over and over again with different information. This is possible by using loops in your code; essentially a loop is executed until it runs out of data or the code decides to break out of it.\n\n\n\n## 6.2 For loop\nNow that we have these variables that can hold multiple elements (previous exercise), it would be useful to be able to loop over them one by one. This is possible with the **for** loop:\n\n\n\n\n```python\n# Make a list of integers from 0 to 9 with steps of 1 (0, 1, 2, ..., 9)\nmyList = range(10) \n \n# for each value (myElement) in this list (myList); do the following:\nfor myElement in myList: \n    # Print that value\n    print(\"Counting {}\".format(myElement))  \n```\n\nIn the first iteration myElement will take up the first value of myList and perform the code that is indented (in this cas it will print *counting 0*), then it will go back to the start of the loop and take up the second value of myList (which is 1) and perform again the code that is indented (*counting 1*), etc.  \n\nNote that again we have to use indentation, as there is a block of code that is only relevant within the for loop. \n\nPython will always need a list, tuple, set or dictionary to iterate through and it will work exactly the same with tuples (see example below). The iterator will always take up the value of the list/tuple/set/dict! \n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\n \nfor myElement in myTuple:\n    print(\"Letter {}\".format(myElement))\n```\n\nBecause you can access individual elements within a list or tuple, you can also count the element index in the list or tuple, so that you know both index and value. If you want to iterate over  a list of letters, in this case it's in a tuple type, you'll first have to find the length of the list and then use range to make a list of integers that can be used as an index. \n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nmyTupleLength = len(myTuple)\n \nfor tupleIndex in range(myTupleLength):\n    myElement = myTuple[tupleIndex]\n    print(\"Letter {} is at position {}\".format(myElement,tupleIndex + 1))  # We have to add 1 to the index here because Python starts at zero...\n```\n\nPython has a built-in function `enumerate()` which eases this task for you as a programmer. For the tuple which we defined above, you could make the following table with indeces and accompanied values:\n\n<center>\n\n| index | value |\n|---|---|\n| 0 | A |\n| 1 | B |\n| 2 | C |\n| 3 | D |\n| 4 | E |\n| 5 | F |\n</center>\n\n`enumerate()` mimics this table and you can use it in this way which immediately gives you the indeces:\n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nfor line in enumerate(myTuple):\n    print(line)\n```\n\nThe enumerate function has some similarities with dictionaries, especially in how to access a value. Don't worry if you're confused with the squared brackets, we'll cover this in Chapter 8. \n\n\n```python\nmyTuple = (\"A\",\"B\",\"C\",\"D\",\"E\",\"F\")\nfor line in enumerate(myTuple):\n    print(\"Letter {1} is at position {0}\".format(line[0]+1, line[1])) # For the sake of exercising I switched the format positions for once. \n```\n\n----\n\n**Intermezzo:**\n\n\nBefore starting with exercises, we want to highlight the if-conditions from chapter 4 again, especially the fact that Python interprets the integer 0 to be `False`, the integer 1 is interpreted as `True` and any other integer different than 0 and 1 is considered to be not `False` (it's also not `True` though)\n\n\n```python\na = 0\na == False\n```\n\n\n```python\nb = 1\nb == True\n```\n\n\n```python\nc = 2\nc == True\n```\n\n\n```python\nc == False\n```\n\n\n```python\nc != True\n```\n\n\n```python\nc != False\n```\n\nWhy is this important to know? We see sometimes code similar to the one below in which an arithmetical operation is evaluated in an `if` statement. If the result of this arithmetical operation is an integer like 2, 3, etc. we know now how we can deploy this knowledge to evaluate the statement.  \n\n\n\n```python\nc = 2 \nif c != False:  # \n    print(\"C is equal to\", c)\n```\n\n\n```python\na = 0\nif a:\n    print(\"A is equal to 0\")\n```\n\n\n```python\nb = 1\nif b:   \n    print(\"B is equal to\", b)\n```\n\n\n```python\nif not a:\n    print(\"A is still equal to 0\")\n```\n\n----\n\nNow we want to find out if a number is divisible by another number. In the code below, we will iterate over each value in the list of numbers. If the remainder after division is 0 (comparison is True), we print the number out. \n\n\n\n```python\nmyNumbers = range(1,50)\nmyDivider = 17\n \nfor myNumber in myNumbers:\n    if not (myNumber % myDivider):  # Nothing left after division, so number is divisible.\n        print(\"Number {} cannot be divided by {}!\".format(myNumber,myDivider))\n```\n\nHere we now have two levels of code besides the main one; the **if** is checked for every value, but the print is only executed for numbers divisible by myDivider.\n\n\nYou can also control the loop by using **continue** and **break**. They alter the flow of a normal loop:\n\n\n```python\nmyNumbers = range(1,100)\n \nfor myNumber in myNumbers:\n    if myNumber == 5:   \n        continue     # This means that the code within the for loop will be ignored if myNumber is equal to 5, we 'jump back' to the start and use the next number (6)\n    print(myNumber)\n\n    if myNumber == 8:\n        break        # This means we will exit the loop alltogether, all other values after this one will not be dealt with.\n\n```\n\n---\n> ### {% icon hands_on %} Exercise 6.2.1\n>\n> Write a program where you print out all positive numbers up to 1000 that can be divided by 13, or 17, or both. The output should be printed as : `Number 13 is divisible by [13]`. If you want a little more challenge, the output should be printed as `Number 884 is divisible by 13, 17`\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    >  myNumbers = range(1,100) # should be 1001\n>    >  myDividers = (13,17)    # We will loop over these in the loop itself, so it's easy to add new numbers to this\n>    >   \n>    >  for myNumber in myNumbers:\n>    >      validDividers = []     # In this list we will put all the valid dividers\n>    >      for myDivider in myDividers:\n>    >          if not (myNumber % myDivider):\n>    >                validDividers.append(myDivider)\n>    >      if validDividers:      # This means that the list has to have values in it\n>    >          print(\"Number {} is divisible by {}\".format(myNumber,validDividers))       \n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >  ```python\n>    >  # Extra: The output is not very nice here as you print off the list with the square brackets, you could try the following bit of code under the if validDividers: condition:\n>    >  myNumbers = range(1,100) #should be 1001\n>    >  myDividers = (13,17)    # We will loop over these in the loop itself, so it's easy to add new numbers to this\n>    >   \n>    >  for myNumber in myNumbers:\n>    >      validDividers = []     # In this list we will put all the valid dividers\n>    >      for myDivider in myDividers:\n>    >          if not (myNumber % myDivider):\n>    >                validDividers.append(myDivider)\n>    >      if validDividers:      # This means that the list has to have values in it\n>    >          # First make strings out of the integers; this is valid Python syntax where you make a list out of a list    \n>    >          validDividerStrings = [\"{}\".format(validDivider) for validDivider in validDividers]\n>    >  \n>    >          # Now you can join the elements of a list (if they are strings) together using the .join() method for a string:\n>    >          validDividerString = ', '.join(validDividerStrings)\n>    >   \n>    >          print(\"Number {} is divisible by {}\".format(myNumber,validDividerString))\n>    >      \n>    >      ######### Or as an alternative for the nice printing:\n>    >          #if len(validDividers) == 1:\n>    >          #    print(\"number is div by {}\".format(validDividers[0]))\n>    >          #elif len(validDividers) == 2:\n>    >          #    print(\"number x is div by {}, {}\".format(validDividers[0],validDividers[1]))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n\n\n---\n\n> ### {% icon hands_on %} Exercise 6.2.2\n>\n> Write a program where you find, for each positive number up to 50, all numbers that can divide each number. E.g. 16 can be divided by 1, 2, 4, 8 and 16. 17 can be divided by... \n> \n> It's fine if you print the output like this: \n> ```\n> Number 1 can be divided by 1!\n> Number 2 can be divided by 1!\n> Number 2 can be divided by 2!\n> Number 3 can be divided by 1!\n> ```\n> However, you can also try to print the output like this:\n> ```\n> Number 4 can be divided by 1, 2, 4!\n> ```\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 1\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # Write a program where you find, for each positive number up to 50, all numbers that can divide each number. E.g. 16 can be divided by 1, 2, 4, 8 and 16. 17 can be divided by...\n>    >  myNumbers = range(1,5) #should be 51\n>    >   \n>    >  for x in myNumbers:\n>    >      dividers = []\n>    >      for y in range(1,x+1):\n>    >          if not (x % y):\n>    >              dividers.append(y)\n>    >   \n>    >      for divider in dividers:\n>    >          print (\"Number {} can be divided by {}!\".format(x,divider))\n>    >  ```\n>    > </details>\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution 2\n>    > </summary>\n>    >\n>    >  ```python\n>    >  # The output is again not very nice here, you can replace the last two lines by this for nicer output:\n>    >  myNumbers = range(1,5)\n>    >   \n>    >  for x in myNumbers:\n>    >      dividers = []\n>    >      for y in range(1,x+1):\n>    >          if not (x % y):\n>    >              dividers.append(y)\n>    >   \n>    >      #for divider in dividers:\n>    >      dividerList = \", \".join([str(divider) for divider in dividers])\n>    >          \n>    >      print (\"Number {} can be divided by {}!\".format(x,dividerList))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n\n## 6.3 While loop\nA **while** loop is dependent on a condition, as long as this condition is evaluated as `True` the loop will continue. Its structure is very similar to the for-loop we saw here above.\n\n```python\nresult = 0\nwhile result < 10:\n    # add 1 to the result\n    result += 1\n    print(result)\n```\n\n\nThis is an endless loop:\nFYI, if you execute this, you'll end up in an enternal loop. To break the loop, press stop button.\n\n\n```python\nwhile True:\n    print(\"Endless...\")\n```\n\nWhile loops are more flexible than for loops, as you can make them end whenever necessary depending on code within the loop itself:\n\n\n\n\n```python\nbaseValue = 2\npowerValue = 1\npowerResult = 0\nwhile powerResult < 1000:\n    powerResult = baseValue ** powerValue\n    print(\"{} to the power {} is {}\".format(baseValue,powerValue,powerResult))\n    powerValue += 1 # Add one to itself - this kind of step is crucial in a while loop, or it will be endless!\n```\n\nNote that the last value printed is greater than 1000, the while condition is only checked at the start of the loop. You should check where the first result is calculated as this may impact the result! Here we changed the order of calculating the value. We *initialized* the loop and put the calculation at the very end:\n\n\n\n```python\nbaseValue = 2\npowerValue = 1\npowerResult = 0\npowerResult = baseValue ** powerValue\n\nwhile powerResult < 1000:\n    print(\"{} to the power {} is {}\".format(baseValue,powerValue,powerResult))\n    powerValue += 1 # Add one to itself - this kind of step is crucial in a while loop, or it will be endless!\n    powerResult = baseValue ** powerValue\n```\n\n---\n\n> ### {% icon hands_on %} Exercise 6.3.1 \n>\n> Try to reproduce a for-loop (the example of numbers divisible by 17) by using a while-loop.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > \n>    >  ```python\n>    >  # Try to reproduce a for-loop (the example of numbers divisible by 17) by using a while-loop.\n>    >  myNumber = 1\n>    >  myDivider = 17\n>    >  \n>    >  while myNumber <= 50:\n>    >      if not (myNumber % myDivider): # Nothing left after division, so number is divisible.\n>    >          print(\"{} is divisible by {}\".format(str(myNumber),str(myDivider)))\n>    >      myNumber += 1\n>    >  \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n---\n\n> ### {% icon hands_on %} Exercise 6.3.2\n>\n> Write a program where you start with a list of numbers from 1 to 100, and you then remove every number from this list that can be divided by 3 or by 5. Print the result.  \n> Tip: you have to make a copy of the original list here, otherwise Python will get 'confused' when you remove values from the list while it's looping over it. Use `[:]` for this purpose.  \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    > ```python\n>    > # Write a program where you start with a list of numbers from 1 to 100, and you then remove every number from this list that can be divided by 3 or by 5. Print the result.\n>    > # Tip: you have to make a copy of the original list here, otherwise Python will get 'confused'\n>    > # when you remove values from the list while it's looping over it\n>    > \n>    > myNumberList = list(range(1,101))\n>    > \n>    > for number in myNumberList[:]:  \n>    >     if not (number % 3) or not (number % 5):\n>    >         myNumberList.pop(myNumberList.index(number))\n>    > \n>    > print(myNumberList)\n>    > ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n---\n> ### {% icon hands_on %} Exercise 6.3.3\n>\n> Write a program where you ask the user for an integer (whole number), and keep on asking if they give the wrong input. Check whether the number can be divided by 7, and print the result.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > # Write a program where you ask the user for an integer (whole number), and keep on asking if they give the wrong input. Check whether the number can be divided by 7, and print the result.\n>    > myNumberList = range(1,101)\n>    >  \n>    > # Keep on checking until you have a number, prime the while loop as well\n>    > isNumber = False\n>    > while not (isNumber):\n>    >     inputString = input(\"Give a number:\")\n>    >     if inputString.isdigit():\n>    >         isNumber = True\n>    >         number = int(inputString)\n>    >     else:\n>    >         print(\"Incorrect, not a whole number, try again.\")\n>    >     if not (number % 7):\n>    >         print(\"{} can be divided by 7!\".format(number))\n>    >     else: \n>    >         print(\"Number not divisible by 7\")\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n## 6.4 Iterating through two files at the same time\nPython has a built-in function which allows you to iterate through multiple e.g. lists or strings at the same time. For two strings, it would look like this:\n\n\n```python\nx = 'abcde'\ny = 'fghij'\n\ncount = 0\nfor i,j in zip(x,y):\n    count += 1\n    print(\"Iteration: {}. The value i is {}, and the value j is {}\".format(count, i, j))\n```\n\nAnd the principle is practically the same for three (or more) strings. \n\n\n```python\nx = 'abcde'\ny = 'fghij'\nz = 'klmno'\n\ncount = 0\nfor i,j,k in zip(x,y,z):\n    count += 1\n    print(\"Iteration: {}. The value i is {}, the value j is {} and the value k is {}\".format(count, i, j, k))\n```\n\n\n","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\n## Introduction to data selection and preservation\n\nResearch should be transparent and you should always be able to revert back to your data if necessary and be able to show others how you came to your results. Therefore, your research data with all information reasonably necessary for verification needs to be preserved.\n\nWith well-managed and preserved research data, you can defend yourself against allegations of mistakes. You can also prevent wrong conclusions from further spreading into the scientific community if there really are mistakes.\n\n## Long term data preservation\n\nResearch data can be preserved for different reasons such as verification and/or possible reuse. It can be your own wish or that of your university, funder or journal.\n\n**Verification**\nTODO: adapt this part\n\nThe Netherlands Code of Conduct for Academic Practice (VSNU) states that raw data from research must be kept available for a minimum of ten years. This statement is also included in the Utrecht University Policy framework for research data: “Archived research data are to be retained for a minimum of ten years, commencing from the date that the research results are published.”\n\n**Reuse**\nIt may be worthwhile to make (part of) your data available for a longer period of time and/or for a wider audience. Data which are suitable to keep for reuse are interpretable data on which new research can be based,  independent of the publication.\n\nOn the one hand, making research data reusable will need extra effort. On the other hand, possible reuse, even by your future self, might bring you lots of benefits and credits. Consider if your data is worth the effort of making it reusable or if preserving and archiving for verification is enough.\n\nReuse is explained more in depth in the next part of this course: ‘Availability for reuse’. In this part we will focus on selection and preservation of research data for verification purposes.\n\n## Data package\n\nKeeping data for verification serves the specific goal of having transparent, reproducible research.\n\n**Alternatives to preserving raw data**\nIf preserving your raw data poses problems, alternatives can also ensure verfication. For instance, transcripts of recorded interviews could hold all important information and may be less privacy-sensitive, so it is reasonable to preserve those instead of the recordings themselves. Also, if raw data is very large, preserving your data only in some processed form could be an alternative. Combined with, for instance, a demonstrable quality check on the processing.\n\n**The contents of your data package**\n\nTODO: add image for illustration/zenodo?\n\nOthers should be able to understand what you did. It is not enough to just provide data. Instead you should preserve a package with everything included that is necessary to reproduce your results. Think of including the following:\n\n* Primary (raw) data;\n* Secondary (processed) data;\n* Protocols;\n* Computer code/scripts;\n* Lab journals;\n* Metadata and/or codebooks describing the data;\n* An overview of what the contents of the data package stating what file contains what information, and how these are related.\n\nThe data should contain a reference to any publication which is based on the data.\n\nTo make understanding your data less dependent on information in the publication, you can also add information on:\n\n* Collection methods;\n* Procedures;\n* Experimental protocol;\n* Your research question;\n* Stimuli used;\n* Sample descriptions.\n\nThis is especially practical if the data package can be found and used on its own account. This is the case if it is published in a data repository or data journal as a data package for reuse.\n\nDo not forget to explicitly state who is responsible for the content of the data package, who is to be contacted in case of a request for access, and under what conditions access is granted.\n\n## Where to preserve what type of data?\n\nDuring your research, you generate research results that can be made available for others.\n\nA paper or publication is the most traditional way of making results available, but it is by no means the only way. A relatively new way of making results available is using a public data repository.\n\nAs you have just learned, preserving your data may serve the purpose of verification or  reuse. Public data repositories cater to both needs. In addition, they handle requests to view or use your data which means you do not have to take care of such requests yourself.\n\nIn the example below, you find a workflow for experimental research. What information can be made available in what place? Drag the items on the right to the correct place in the figure. Please note that some items can be used more than once.\n\nTODO: add H5P quiz and PDF solution?\n\n### Accounting for data of others\n\nIf you are permitted to use data from other parties, you will have to account for those as well if your research is to be verifiable and reproducible by others. You may recognise this from chapter 1 of this course: Data collection: Discover existing data, weblecture ‘Assessing usefulness of research data of others’ (5 of 10).\n\nYou have the following options:\n\nIf the used data is preserved correctly somewhere for the coming ten years, refer to the data repository in question.\nIf it is not taken care of, contact the responsible persons, negotiate correct preservation in a data repository for ten years, and refer to that repository.\nIf this isn’t possible, try to arrange a local copy that you preserve yourself;\nIf this isn’t allowed, you will not be able to present the data in case of questions. Therefore, you should question yourself whether you can actually use the data.\n\n<figure id=\"figure-1\"><img src=\"../../images/Cont_5_Share_SelectPreserve_Chart10years.png\" alt=\"alt-t\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Preserve for 10 years</figcaption></figure>\n\n**Accounting for data of others on websites**\n\nIf you find interesting information on a website that you want to refer to, it is possible that this information will not be future proof.\n\nThe link or web address might change over time (link rot). Or the information on a website is updated, changed or replaced with other content (content drift).\n\nIt is possible to archive web pages on a web archive like the [Internet Archive](https://archive.org/web/). You can capture a web page as it appears now for use as a trusted citation in the future (save a page). You will get an alternative link, pointing to the archived, static version of the page. Use this alternative link as a reference to the online information.\n\n## How to preserve your data correctly\n\nIn order for the data to survive for the long term, an active preservation regime has to be applied. The bad news is, data automatically gets lost over time.\n\nThere are five main ways your data can be lost:\n\n* Digital sources degrade over time ('bit rot');\n* File formats and software become outdated;\n* The media on which your data is stored becomes outdated or defective;\n* Disaster strikes the storage location;\n* The person that understands the data finds another job or data simply becomes forgotten.\n\nIn this video below you will learn how to minimise the risk of losing data. You are also given good preservation practices.\n\n<iframe src=\"https://www.youtube.com/embed/qENaO0Lk6eo\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n## Match the solutions to the data loss\n\nFrom the weblecture you learned how to prevent data loss. Can you recall all applicable active regimes, as explained in the weblecture?\n\nBelow you see a list of solutions to prevent data loss. Underneath that list you see a list of risks for data loss. Please add the number of each solution to the correct risk.\n\n**Solutions to prevent data loss**\n\n1. Have multiple copies. Use a checksum to identify faulty copies\n2. Use preferred file formats that can be opened by a wide range of software. Update the file format to a current one.\n3. Move data to fresh media well before the media’s expiration date.\n4. Have multiple copies. Move data to fresh media well before the media’s expiration date.\n5. Document your data well.\n6. Advertise the content in a data catalogue.\n\nTODO: add quiz text solution\n\n### Write your data management plan for your data preservation\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module on data selection and preservation. You should be able to complete the following questions in the section ‘Data selection and preservation’:\n\n* Which data should be preserved and/or shared?\n* How and where will you keep your data for the long term?\n","# OTU creation using LotuS \n{:.no_toc}\n\nIn this tutorial we will create a genus abundance table from two 454 sequencer runs using a pipeline called LotuS. A genus abundance table contains counts of different genera in a several of samples – Rows are the different genera and columns the samples. As a simple example, take a look at this table:\n\n|                     |        |       |        |       |      |\n|:--------------------|:-------|:------|:-------|:------|:-----|\n|Genus\t              | bl10   |bl11   |bl12\t|bl128\t|bl13  |\n|Bacteria             |24      |52     |39\t|63\t|181   |\n|Bacteroides\t      |169     |27     |7\t|42\t|6     |\n|Porphyromonadacea    |370     |346    |621\t|565\t|224   |\n\nThis table tells us how often we observe unclassified Bacteria, Bacteroides and unclassified Porphyromonadaceae in the 5 samples bl10, bl11, bl12, bl128 and bl13. A matrix like this will be used for the next tutorial on numerical ecology and created from raw sequence data within this tutorial.\n\n## The data\n\nIn a recent experiment, we sequenced 73 samples in two 454 runs, the raw fasta and quality files are in `/home/VIBTrainingX/metagenomics/` on the bits server. For each run we have a fasta (.fna) and quality (.qual) file. Go to this directory using the command `cd` and become aware of the files required from the experimenter (command `ls`). You can always take a look at files and their contents using viewing commands like `less`.\n\nThe sequence files were multiplexed before the experiment, that is a small nucleotide sequence – the barcode - was attached to each read, specific for each experiment. A mapping file is typically used, containing the link between a sequence barcode and the name of the experiment and is essential to demultiplex the fasta files. \n\n## The tools\n\nLotuS is actually a set of tools that were installed in the `/opt/` folder. First go to [the lotus website](http://psbweb05.psb.ugent.be/lotus/) and familiarize yourself with the basic documentation.\n\nTo start the exercises, go to the directory where Lotus is installed. \n```bash\ncd /opt/lotus-1.62/lotus_pipeline/\n```\n\nFrom this directory you can run all the tools. To reach all data files (e.g. input files) you have to provide the path to the files: `~/metagenomics/`\n\n## The analysis\n\n### Creation of Mapping file. \n\n[An Excel](http://data.bits.vib.be/pub/trainingen/metagenomics/Mice_experiment.xlsx) is provided, with some basic experiment annotation. The fwd primer is given as `ACTYAAAKGAATTGACGG`, but if you search for the primer sequence in the reads (in one of the .fna files) you will not find it because you need to reverse translate the primer sequence first using [http://www.bioinformatics.org/sms/rev_comp.html this tool]. So you see annotation provided by the provider is not always correct.\n \nLotus needs experiment annotation to map input files to barcodes. Based on the documentation on [http://psbweb05.psb.ugent.be/lotus/documentation.html#MapFile the Lotus website], create a mapping file for this experiment. This means that you need to replace the column headers of the Excel file to terms that are accepted by Lotus and that you have to indicate that there is a .fna and a .qual file for each run. The header line should be preceeded by a `#`. The mapping file should at least contain four columns with the following headers:\n\n* SampleID\n* BarcodeSequence\n* fnaFile\n* qualFile\n\nSave the file as a tab-delimited text file.\n\nYou can always test the validity of your mapping file with the command \n```bash\n./lotus.pl -check_map [your mapping file]\n```\n\nIf you have fastq files as input the fnaFile and qualFile columns would be replaced by one fastqFile column.\n\n### Changing  the data format of the input files.\n\nSometimes you need to transcribe data from one format to another. For instance we could transform the fasta files (.fna) to fastq files (.fq). This can be done with the program `sdm`, that is part of the LotuS pipeline. Take a look at the sdm help by typing `./sdm` and exploring the options, e.g.\n \n```bash\n./sdm -help_commands\n```\n\nThen, using command line arguments, transcribe the fasta + qual files of the Anh experiment into fastq files. Take a look at output and log files created by sdm.\n\n> ### {% icon hands_on %} Hands-on: Exercise 1 \n>\n>  > ### {% icon question %} Question\n>  > \n>  > How to transform fasta + qual files into fastq files ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```bash\n>  > > sudo ./sdm -i_fna ~/metagenomics/Anh.1.fna -i_qual ~/metagenomics/Anh.1.qual -o_fastq t1.fq\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nIn the lotus_pipeline folder the fastq file t1.fq was generated, to take a look at the file use\n```bash\nhead t1.fq\n```\n\nDo the same for the t1.log file: you see that sdm is not only used to transform fasta into fastq files but it is also capable of doing quality filtering on the raw reads files.\n\n### Setting up a quality filter of the input sequence files.\n\nSince we want to make sure the quality filtering of the input file is strict, LotuS offers several quality filtering options. Quality settings are different for different data formats, that´s why Lotus offers a file with specific settings for each platform. Since we have 454 data we will take a look at the file sdm_454.txt.\n```bash\nless sdm_454.txt\n``` \n\nRead the comments (line starting with “#”) to each option and think which quality filtering options might be important in order to create OTUs from the raw sequences. (Hint: an OTU is a clustering of similar sequences with the aim to have one cluster of sequences for each species that was originally present in the samples. Take into account that sequencing machines make errors and that PCR amplification of the 16S rDNA is similarly with errors). Think about a set of parameters, including the statistical information from step 2, and save these in your copy of sdm_options.txt for later use.\n\nCheck the sdm [quality filter settings](http://psbweb05.psb.ugent.be/lotus/documentation.html#SDMconfig). Some of the default filter settings are:\n\n* MinSeqLength=250 : Only use reads of at least 250 nt long after processing (remember we are working with long reads from 454 sequencing)\n* TruncateSequenceLength = 250 : Cut all reads after 250 nt\n* QualWindowWidth = 50 and QualWindowThreshold = 25 : Remove all reads where average quality is <= 25 over a 50bp window\n* maxAccumulatedError = 0,5 : Remove all remaining bases when accumulated error score >= 0,5</li>\n* maxHomonucleotide = 8 : Remove all reads with a homonucleotide run (repeat of same nt) >= 8\n* RejectSeqWithoutFwdPrim = TRUE : Remove all reads that do not contain the forward primer\n\n### Demultiplexing  and quality filter the input files.\n\nFor this step you will need the mapping file from Step 1 and the file with the quality filtering settings for 454 data. Use the sdm command to demultiplex and filter all input files at the same time into a local folder ''demultDir''. First create the folder where the demultiplexed files will be written in ~/metagenomics/:\n```bash\nmkdir ~/metagenomics/demultDir\n```\n\nSince the mapping file contains information on all files, you have to provide an input path to the folder that contains the input files (.fna + .qual) to sdm.\n\n> ### {% icon hands_on %} Hands-on: Exercise 2 \n>\n>  > ### {% icon question %} Question\n>  > How to demultiplex and quality filter files ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```bash\n>  > > ./sdm -i_path ~/metagenomics/ -o_fastq t1.fq -o_demultiplex ~/metagenomics/demultDir/ -map ~/metagenomics/map.txt -options sdm_454.txt \n>  > > ```\n>  > > Input is the folder containing the .fna and .qual files. The demultiplexing will fill the demultDir folder with fastq files.\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nDiscuss the output files and what each of these represents. In this experiment multiple samples were sequenced in the same lane. Two lanes were used, each containing 37 samples. After sequencing, this results in two files with reads. To know which sample a read comes from, unique bar codes are incorporated into the adapter sequences. One specific bar code for each sample. In this step reads from different samples (but from the same lane thus in the same fasta file) are split over separate fastq files, one for each sample. \n\n### Mapping file creation when sequence provider provides demultiplexed files.\n\nNow that you have demultiplexed the files into a single folder, you might be aware that sequence providers often deliver files in this format: already demultiplexed into single files. In this case slight modifications to the mapping file are enough to change the input from non-demultiplexed large file(s) to demultiplexed-many-small-files.\n\nNote that lotus has a special script that creates the mapping file for you in this case. The script is autoMap.pl. It is used to link SampleIDs to demultiplexed files. Run autoMap.\n\n```bash\n./autoMap.pl ~/metagenomics/demultDir/ ~/metagenomics/automap.txt 1,1\n```\n\n### Running Lotus.\n\nWe will run Lotus on the demultiplexed files. Use the mapping file you generated in Step 5 and the settings file sdm_454.txt. Use the utax taxonomy to assign a phylogeny to the derived OTUs. Run lotus from out the /opt/lotus_pipeline/ and save the output in the folder ''testR''\n\n> ### {% icon hands_on %} Hands-on: Exercise 3 \n>\n> > ### {% icon question %} Question\n> > How to run lotus\n> >\n> > > <details markdown=\"1\">\n> > > <summary>{% icon solution %} Solution\n> > > </summary>\n> > > ```bash\n> > > sudo ./lotus.pl -i ~/metagenomics/demultDir/ -o ~/metagenomics/testR/ -m ~/metagenomics/automap.txt \n> > > ```\n> > > Input is the folder containing the .fna and .qual files. The demultiplexing will fill the demultDir folder with fastq files.\n> > > </details>\n> >\n> {: .question }\n{: .hands_on }\n\nIn case you haven't done any quality filtering yet, you can still do it now. The command would then be:\n```bash\nsudo ./lotus.pl -i ~/metagenomics/demultDir/ -o ~/metagenomics/testR/ -m ~/metagenomics/automap.txt -s sdm_454.txt\n```\n\n* Peek at the file hiera_RDP (using `head`). The file maps eachg OTU to a genus.\n* Peek at the file OTU.txt (using `head`). The first line contains the number of reads that represent OTU_1 in each sample.\n* Peek at the file otus.fa (using `head`). It contains the reads representing each OTU. You can use this file to blast the sequences to check if they are really from the OTU they were assigned to.\n* Go to the folder higherLvl. This contains the data that we are going to use in the Ecology analysis.\n* Go to the folder LotuSLogs. This contains the settings of the analysis. For instance, peek a the file demulti.log: it shows how many reads were rejected... The file citations.txt contains the references for reporting your LotuS results. \n\n### Using a different taxonomy assignment on a finished run.\n\nIn this step we want to reassign the taxonomy to a LotuS run, but keep exactly the same OTUs. In this exercise, assign the OTUs to the Silva taxonomy. \n\nThis option is useful, if e.g. you want to keep your work on a given OTU set (as well as the phylogenetic tree), but want to try out what would have happened if you had used e.g. Silva as reference database instead of utax.\n\n> ### {% icon hands_on %} Hands-on: Exercise 4 \n>\n> > ### {% icon question %} Question\n> > How to reassign the taxonomy with Silva as reference database? \n> >\n> > > <details markdown=\"1\">\n> > > <summary>{% icon solution %} Solution\n> > > </summary>\n> > > ```bash\n> > > sudo ./lotus.pl -i ~/metagenomics/demultDir/ -o ~/metagenomics/testR2/ -m ~/metagenomics/automap.txt -s sdm_454.txt -refDB SLV -redoTaxOnly 1\n> > > ```\n> > > Input is the folder containing the .fna and .qual files. The demultiplexing will fill the demultDir folder with fastq files.\n> > > </details>\n> >\n> {: .question }\n{: .hands_on }\n\n### Using  a custom database.\n\nThe research of honey bee gut communities have very specific taxonomic names for already known bacteria. In order to accomodate for their naming sheme, we will use a very specific database that contains 16S sequences of bacteria mostly found in the honey bee gut. Download the [bee taxonomy in tax format](http://psbweb05.psb.ugent.be/lotus/packs/DB/beeTax/beeTax.tax) and [http://psbweb05.psb.ugent.be/lotus/packs/DB/beeTax/beeTax.fna bee taxonomy in fna format].\n\nUse the two provided files (fna, tax) to again redo the taxonomy, but this time assigning first using the honey bee DB and secondly everything with low hit should be assigned with the SILVA database. \n\n> ### {% icon hands_on %} Hands-on: Exercise 5 \n>\n> > ### {% icon question %} Question\n> > Use honey bee taxonomy database ? \n> >\n> > > <details markdown=\"1\">\n> > > <summary>{% icon solution %} Solution\n> > > </summary>\n> > > ```bash\n> > > sudo ./lotus.pl -i XX -o ~/metagenomics/testR3/ -redoTaxOnly 1 \\\n> > > -m ~/metagenomics/LASI_Spring_2_bees_barn_3_map_lts_5.txt \\\n> > > -refDB ~/metagenomics/beeTax.fna,SLV -tax4refDB ~/metagenomics/beeTax.tax \n> > > ```\n> > > Input is the folder containing the .fna and .qual files. The demultiplexing will fill the demultDir folder with fastq files.\n> > > </details>\n> >\n> {: .question }\n{: .hands_on }\n\n### Get  everything assigned!\n\nIn this step we want to assign every OTU sequence to a database target – and we don’t care about false positive assignments! Of course this is per se wrong, but in some cases you just want to know what the best hit would be, even if it is only 90% similar to your OTU sequence. LotuS provides several options that allow tweaking towards more lenient assignments. Find all options related to this and try to create the most extreme case with these options, by reassigning the taxonomy again as in the previous step.\n\n### Try a different sequence clustering algorithm.\n\nNow rerun lotus, but try to optimize for a lot of small, hard defined OTUs (that might correspond to something like strain level). Which clustering algorithm might be suitable? Which clustering cutoffs make sense? For this specific run, use the first mapping file you created (step 1) and the non-demultiplexed input files. Save this output in the folder ''testR4''\n\n### Your own best run.\n\nNow that you have run LotuS with various databases and options, go back and look at the output folder of the different runs, look at the statistics provided in the ''LotuSLogS'' subfolder. Based on this, tune the sdm filtering parameter file from step 3 (again), choose the database you think best suited/most interesting, and choose a clustering algorithm. With this create run the sample set again, saving the output in folder ''testrun1.3''. This output folder you can use in the following session on numerical ecology.\n\nIf LotuS run has finished, go to the specified output folder and copy the genus.txt from the output folder to your home folder: \n```\ncp testrun1.3/ higherLvl/genus.txt ~\n```\n\n### Using Illumina data as input.\n\nIn all the analysis before we were using 2 x 454 runs from an outdated next generation sequencing technology. For the next exercise we will look at the output of an Illumina miSeq sequencing platform, that is still being used a lot nowadays.\n\nSet up the mapping file, using [http://data.bits.vib.be/pub/trainingen/metagenomics/Miseq.xlsx the provided Miseq.xlsx file]. Run LotuS, after you set up a custom sdm configuration file and using a combination of parameters you learned about in previous steps.\n\nThis run might take some time longer to finish. Be sure you set it to use all the cores of your computer and let it run over the lunch break.\n\nCongratulations, now you know how to process raw sequence files to meaningful summary tables, that can be directly analyzed in R or even Excel! In the next tutorial this matrix will be analyzed with the help of R, after the lunch break.\n\n","# Tools  \n{:.no_toc}\n\n## Lotus pipeline \nLotuS offers a lightweight complete 16S/18S/ITS pipeline to\n- Demultiplex and filter fasta or fastq sequences\n- Denoise, remove chimeric sequences and cluster sequences into very high quality OTUs that perform at a similar level to mothur / dada2\n- Determine taxonomic origin of each OTU using >5 spezialized and general purpose database or statistical algorithms\n- Construct OTU, genus, family, class, order and phylum abundance tables in .txt or .biom format\n- Reconstruct OTU phylogenetic tree\n\nMore information at [LotuS home page](http://psbweb05.psb.ugent.be/lotus/downloads.html)\n\n## usearch \n\nDownload [usearch version 8](http://www.drive5.com/usearch/download.html) and copy the executable in a folder e.g. /usr.bin/tools/ which you can reach (you might to be superuser for this)\n\nMake executable:\n```\nsudo chmod +x /usr/bin/tools/usearch8.1.1861_i86linux32\n```\n\nCreate a symbolic link into the folder where Lotus will search for it:\n\n``\nsudo ln -s /usr/bin/tools/usearch8.1.1861_i86linux32 /usr/bin/tools/lotus_pipeline/bin/usearch_bin\n```\n\n## R package \n\nYou also need R with the vegan package installed.\n","# Ecology Analysis using vegan \n{:.no_toc}\n\nIn this exercise we will look at a data matrix of 16S rRNA counts in 74 samples.\n\nThis dataset is the microbiota composition of 74 mice from 5 different mice strains. The original research aim was to define the effect that the mouse genome has on the microbiota and what the effect of living in the same cage would be. However, we found much stronger trends in the data, and these we will look at in this exercise.\n\nThe 454 data was already compiled into a matrix with genus abundance per sample in a previous step. This matrix is called a feature abundance matrix, or abundance matrix for short. We will do an ecology-oriented analysis of the data, in later steps also taking metadata (experimental, environmental or clinical data that was collected for each sample, independent of the DNA) into account. The aim of this tutorial is to get an idea of the very basic steps of ecological data analysis using the programming language R.\n\nThe gene abundance table (Genus.txt) can be found in the folder /home/VIBTrainingX/NGS/metagenomics/higherLvl folder on the server. Those who are working on their own laptop can download it [from the lotus website](http://psbweb05.psb.ugent.be/lotus/data/LotuStutorial.tar.gz).\n\n1. Set the folder with the provided files as your working directory in R using `setw`. This way required files can be easily loaded. To find out how to use this command, you can type ?setwd() to open the help. If there are other R-commands that you want to know more about, you can open the R-help for that command by entering in the R-prompt `?command`. This will be very useful when working with R, make sure to use this a lot as you can only learn more :o). \n\n\n> ### {% icon hands_on %} Hands-on: Exercise 1 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to set the working directory in R \n>  >    > <details markdown=\"1\">\n>  >    > <summary>{% icon solution %} Solution\n>  >    > </summary>\n>  >    > ```R\n>  >    > setwd(\"dir_to_data\")\n>  >    > ```\n>  >    > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n2. Load the provided data into the matrix M (Genus.txt, actual genus abundance data), using the read.delim command, saving the loaded table as `M`. Make sure, the row names are correctly read in. As R reads the matrix as an object of class data.frame, we convert M from a data.frame to a matrix `M=as.matrix(M)`. This is important for some of the following calculations, where we need a `matrix` class object. \n\n> ### {% icon hands_on %} Hands-on: Exercise 1 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to read in data as matrix ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # read in data as matrix\n>  > > M = read.delim(file=\"Genus.txt\",row.names=1)\n>  > > M = as.matrix(M) \n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nThe matrix you loaded represents the number of 16S sequences assignable to each genus, which we could find in the samples. Also note that not all genera are real genera, but partly assigned unknown sequences. With these groups we do not know if this is a single genus or in fact several genera or in extreme cases even several classes, that just all fall under the same phylum tag. What are the advantages and disadvantages of keeping such undefined groups in the data?\nUse the function `edit(M)` to better view the abundance matrix.\n\n3. Let’s look at some  basic features of the abundance matrix. The `summary(M)` command is a good start, but also look at total row and column counts (`colSums`, `rowSums` command). To see how the genera are distributed within each sample, we will plot a sample-wise density plot.We will be using a combination of the `density`, `lines` and `lapply` functions, to draw the densities of values found in each sample. Let’s start with looking at the density of the first sample. In R you can access specific columns by writing the matrix coordinates in square brackets. For example `M[1,]` shows the first row of a matrix, `M[,7]` shows the 7th column etc:\n\n> ### {% icon hands_on %} Hands-on: Exercise 3 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to estimate density of first sample ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # estimate density of first sample\n>  > > densityOfSample1 = density(M[,1])\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nLook at the object densityOfSample1 by simply entering the object name into the command prompt. Next try to visualize it with `plot(densityOfSample1)`. In this plot you see that most genera are at 0 abundance, some genera have an abundance <10 and some rare genera actually occur with a higher frequency, one genus even having ~1100 16S reads assigned to it. Which genus is this?\n\nAlternatively you can also use the function `hist`, to plot a histogram of the abundances. Try to do this now.\n\n> ### {% icon hands_on %} Hands-on: Exercise 4 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to plot histogram of abundances ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # plot histogram of abundances\n>  > > hist(M[,1], nclass = 50)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nWe can use the `apply` command, to apply the density command to every column of M, which will return a list of density objects. The second argument to the `apply` function is the `margin` and is set to 2, which tells the `apply` function that we want to work on columns (margin = 2) and not on rows (margin = 1). Save this into object  `S_densities`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 5 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to estimate densities of all samples ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # estimate densities of all samples\n>  > > S_densities = apply(M,2,density)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\n\nTo plot this start with:\n```R\n# open a new plot window and set range of x and y axis\nplot(1,1,type=\"n\",ylim=c(0,3),xlim=c(0,5000)) \n```\n\nThis will open a new plotting window, already set to the range of x and y coordinates (xlim, ylim) we will need in this example. In this case we just want to plot a blank space, this is done with the `type=n` argument. Try to replace the argument by `type=p`, to actually see that point! S_densities is a list, so we use `lapply` (list apply), in combination with the `lines` function, try this now to plot all the density lines into the open plot window.\n\n> ### {% icon hands_on %} Hands-on: Exercise 6 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to plot density distributions of all samples ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # plot density distributions of all samples\n>  > > lapply(S_densities,lines)\n> > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nWhat you should see now in the plot window is the density distribution of all samples. The lines function is adding new lines, while a plot function makes a completely new plot. Try to replace the `lines` with `plot` to see this (it’s very fast, so keep a close eye on your plot). How are these lines already telling us something about the differences between the communities of each sample?\n\n4. Maybe you noticed that the `colSums` command showed that the totals are not equal. What does this mean? In this state the data is actually not comparable among each other. One way to `correct` the data for this shortcoming is to normalize the matrix. In this step we will normalize the abundance matrix into variable M1: \n\n```R\n# normalize matrix: divide each column by the total of that column\nM1 = sweep(M,2,colSums(M),\"/\")\n```\n\nThe `sweep` command is extremely useful, as it will apply a simple arithmetic operation (like divide) in a matrix column- or row-wise with a vector of your choice. So it is very similar to `apply`, but takes more basic functions. In this case we will divide each column by the sum of the column, this is called normalization.\n\nNow we will compare these matrices using the `barplot` function. For this we need to open another graphical window, using the `X11` function:\n```R\n# create barplot of original and normalized data\nbarplot(M)\nX11()\nbarplot(M1)\n```\n\nWhat do you notice about the sample composition? What does the graph mean? Discuss where you would want to normalize the data (and where not).\n\nClose all open plots.\n\nNow replot the sample-wise density plot (as you did in step 3), but start the plot with these adapted x and y ranges. Additionally we will this time label the x- and y-axis:\n\n```R\n# open a new plot and define ranges and titles of x and y axis\nplot(1,1,type=\"n\",ylim=c(0,80),xlim=c(0,1),xlab=\"relative genus abundance\", ylab=\"Frequency of genera\") \n``` \n\nYou will notice that the graph looks different from you previous plot. What changed due to the normalization? Are the samples more similar to each other using M or M1? \n\nIf you spot a difference in species abundance between two samples using matrix M, is this difference real, does it have scientific value?\n\nFor the next step the R-library vegan is required. It is a set of functions specifically designed for ecological data analysis. The package has been installed on the bits laptops. If you were to install the package, you could do so using the command: `install.packages(“vegan”)`. More details on the [vegan web site ](http://cc.oulu.fi/~jarioksa/softhelp/vegan.html). Load vegan, using the `library` command.\n\n\n> ### {% icon hands_on %} Hands-on: Exercise 7 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to load the vegan package ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # load vegan package\n>  > > library(vegan)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nLet’s try to put the differences we observed in sample density into numbers. To do this, ecologists rely on the concept of diversity. Diversity describes the evenness of species distributions as well as the richness of species that are observed in a given ecological system. We will first calculate the Shannon diversity, using vegan’s `diversity` command. Try to do this per sample, using the `apply` function again. Save the result in object `div`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 8 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to calculate Shannon diversity index for each sample using the normalized data ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # calculate Shannon diversity index for each sample using the normalized data \n>  > > div = apply(M1,2,diversity,index=\"shannon\")\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nNow we can see in action what these indices are actually doing for us. Plot the density of the sample with the lowest and highest diversity in red and blue on your previous density plot of M1, this you do by first finding out which diversity indexes are the maximum and minimum values using the `which.max` and `which.min` functions on the object `div`. Don’t forget to have the last density plot still open (or replot it from step 4 on M1), than add the lowest samples as a blue line and the highest sample as a red line, using the `lines` command. \n\n> ### {% icon hands_on %} Hands-on: Exercise 9 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > Find samples with lowest and highest Shannon diversity index and add them to the density plot ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # find samples with lowest and highest Shannon diversity index and add them to the density plot\n>  > > which.min(div) #should be bl16\n>  > > which.max(div) #should be bl48\n>  > > lines(density(M1[,\"bl16\"],adjust =0.5),col=\"blue\")\n>  > > lines(density(M1[,\"bl48\"],adjust =0.5),col=\"red\")&\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nYou can now readjust the window by changing the `ylim` and `xlim` attribute in the plot function, if necessary (tip, try to rerun using `ylim=c(0,180)`). Try to explain why the colored samples have the highest & lowest diversity. What does this tell about an ecosystem (remember that these are genus abundances).\nRaise your hand if you reached this step.\n\nA different way to normalize the data is to sample exactly equal amounts of 16S rDNA for each sample in this experiment. Of course in practice this is impossible to do, but we can simulate this, by randomly selecting a subset of 16S rDNA. This is called rarefaction. Rarefy your original abundance matrix (M) into M2, using 1000 reads per sample, using the `rrarefy` function of vegan. Note that you need to transpose (command `t()`) the matrix, before giving it to `rrarefy`. Transform the matrix back and save it as `M2`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 10 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to normalize via rarefaction ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Alternative way of normalization\n>  > > M2 = t(rrarefy(t(M),sample=2000))  #vegan needs transformed matrix, and we need it back-transformed\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nUse `colSums(M2)` to check if the rarefaction worked. The main use of rarefaction is in calculating diversity and richness correctly, for this we will look in the following step at observed richness.\n\nThe concept of observed richness within a sample is pretty simple (but useful): richness describes the number of different species that occur at least once in a sample. This can be calculated in two steps:\n\n```R\n# Species present in sample: TRUE or 1 if species is present, FALSE or 0 if species is absent\nOnceOrMoreOftenPresent = M1>0\n``` \n\nThe sum of each column in this matrix will tell us how many species were detected in total within the respective sample, use the `apply` and `sum` functions , saving the result in `rich1`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 11 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to calculate the sum of each column ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Calculate sum of each column\n>  > > rich1 = apply(OnceOrMoreOftenPresent,2,sum)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nCompare the richness values in `rich1` to the richness obtained on the rarefied matrix `M2`, calculated with a shortened command:\n\n```R\n# Calculate number of present species in each sample using the rarefied data\nrich2 = apply(M2>0,2,sum)\n``` \n\nCompare rich1 and rich2 in a matrix value by value. We use the `cbind` command to bind two vectors column wise together, so we get a matrix with 2 columns. Order this matrix by the richness values in rich1, using the `order` command and accessing the vector representation with `[]` square brackets.\n\n```R\n# Create new matrix with two columns: rich1 and rich2 and order rows according to rich1 values\ncbind(rich1,rich2)[order(rich1),]\n```\n\nWhat does the second part of the formula do? What happens if you change that to order(rich2)?\n\nDiscuss which richness values have the highest value to the researcher and why the order is very different between these two richness estimates. Is one way clearly wrong?\n\nWhy did we choose 1000 as cutoff for the sequences per sample? What is the maximum value we could choose? \n\nFirst samples are clustered to see underlying data structures. For this tutorial we will choose a hierarchical clustering, based on a bray-curtis distance between samples, using the function `vegdist`. Make  sure the distances are calculated between Samples and not Genera.\n\nNext, use the function `hclust` on the distance matrix, saving the output in variable `cluster`, and subsequently plot the clustering of the samples (using `plot`).\nTake a guess of how many groups there might be in this clustering?\n\n> ### {% icon hands_on %} Hands-on: Exercise 11 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to cluster samples and plot results ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # cluster samples and plot results\n>  > > BCD = vegdist(t(M1), dist=\"bray\")\n>  > > cluster = hclust(BCD)\n>  > > plot(cluster)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nTo visualize the samples and their relatedness to each other in a two-dimensional space, we can use an ordination to visualize the data in a low dimensional space. The dimensionality of the original matrix (73 genera=73 dimensions) is reduced to two dimensions. If you know what a PCA (Principal component analysis) is, this step will use a conceptually similar, but methodologically quite different technique to perform an ordination of the data, NMDS (non-metric multidimensional scaling).\n\nStart by calculating a 2-dimensional NMDS of the data using M1, using the Bray-Curtis distance in the function `metaMDS`, saving the result to `nmds`. Again, make sure that samples are being ordinated and not Genera.\n\n> ### {% icon hands_on %} Hands-on: Exercise 11 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to calculate the NMDS ?\n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # calculate NMDS\n>  > > nmds = metaMDS(t(M1),distance = \"bray\") #actual NMDS command, matrix needs to be transformed to conform with vegan’s standards\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nTake a look at the `nmds` object and explore some of its features (e.g. type `str(nmds)` to see what variables are stored within the NMDS object). Try to find out what the `stress` of your ordination is. What does stress stand for (tip: go to the R help on metaMDS)? Next we can visualize the NMDS, similar to what you get out of PCA’s, displaying samples only:\n```R\n# plot NMDS\nplot(nmds,display =\"sites\")\n```\n\nThe important difference of NMDS compared to PCA is, that NMDS works with any kind of distance metric, while PCA can only use Euclidean distances between samples. A second important feature of NMDS is, that this method finds non-parametric, monotonic relationships between objects; in short: it doesn’t assume a specific data distribution. Why might these two features be important for ecologists? \n\nYou might have noticed that you see two clusters, similar to the hierarchical clustering of the data. We can get for each sample the identity within the two clusters using the `cutree` commands, specifying k=2 (2 clusters). This can be plotted into the NMDS with the following command:\n\n```R\n# identify clusters\nmemb = cutree(cluster, k = 2)\nordispider(nmds,memb)\n```\n\nCongratulations, you have just visualized the mouse enterotypes. Next we are going to look closer at these. If you want to know the exact methods to detect enterotypes in your data visit [http://enterotype.embl.de/enterotypes.html http://enterotype.embl.de/enterotypes.html]\n\nIn the last step, we will test for all the genera in the matrix whether they show significant differences between two clusters. The scientific question we are posing here is: what are the significant differences in the gut microbiota of between enterotypes? We will use a non-parametric test (kruskal-wallis) to do the tests, as ecological data is in most cases not normally distributed. This test is very similar to the student t-test, and the interpretation works just the same way. Use the function `kruskal.test` to test the first genera (M[1,]) for significant differences between the two cluster groups (in object `memb`). Save the output of this command in variable `Kt`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 12 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to test if there is a difference between the two clusters for the first genus ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Test if there is a difference between the two clusters for the first genus\n>  > > Kt = kruskal.test(M1[1,],memb)\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\n\nLook at the output of this function. This will show you a human readable summary of the test and the result. You can access elements of a list (`Kt` is a list in this case) using the `$` operator. Try to extract the p-value from the `Kt` object.\n\nOnce you know how, we can start to calculate the significance for every genus in the M1 matrix,. These p-values we will store in a newly created vector `pvals`. Let’s add the first 2 p-values to the vector:\n\n```R\n# Test if there is a difference between the two clusters for the first and second genera. Store p-values in a vector.\npvals = c()\npvals[1] = kruskal.test(M1[1,], memb)$p.value\npvals[2] = kruskal.test(M1[2,], memb)$p.value\n```\n\nSince doing this 73 times takes a long time, we will be using a for-loop to `loop` over the matrix and do this for us. We could as well use the apply function, but the syntax would get a little more complicated, since we are only interested in a subpart of the result, the $p.value part. Try to write a for-loop, to calculate the p-value 73 times.\n\n> ### {% icon hands_on %} Hands-on: Exercise 13 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to test if there is a difference between the two clusters for all genera ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Test if there is a difference between the two clusters for all genera\n>  > > for (i in 1:dim(M1)[1])\n>  > > {\n>  > >         pvals[i] = kruskal.test(M1[i,], memb)$p.value\n>  > > }\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nAs an additional help, you can add the name of the taxa to the pvals vector using the names command (that will name a vector):\n\n```R\n# Add names to the vector\nnames(pvals) = dimnames(M1)[[1]] \n```\n\nWhich taxa are significantly different?\n\nIn this case we will use the normalized M1 matrix, can you explain why we do not use the M or M2 matrix? Would either be wrong to use?\n\nIn total we were testing in 73 genera, if their p-value was below a threshold of 0.05. What is the chance of observing data with a p-value >0.05 by random chance? How many genera do you expect to be below this threshold by random chance? \n\nTo avoid statistical errors of this kind, we will use a Benjamini-Hochberg multiple testing correction, implemented in the R function `p.adjust`. Save the result as `qvals`.\n\n> ### {% icon hands_on %} Hands-on: Exercise 14 \n>\n>  > ### {% icon question %} Question\n>  >\n>  > How to perform multiple testing correction of p-values using Benjamini-Hochberg method ? \n>  >\n>  > > <details markdown=\"1\">\n>  > > <summary>{% icon solution %} Solution\n>  > > </summary>\n>  > > ```R\n>  > > # Multiple testing correction of p-values using Benjamini-Hochberg method\n>  > > qvals = p.adjust(pvals,method = \"hochberg\")\n>  > > ```\n>  > > </details>\n>  >\n>  {: .question }\n{: .hands_on }\n\nWhat do you see in this test? What would you report on this dataset, based on these values?\n\nTry sorting the q-values to see the most significant differences first:\n```R\n# Sorting q-values\nsort(qvals)\n```\n\nNow that you have finished the tutorials, you should be able to analyze any new dataset of amplicon data, using the LotuS pipeline and performing a basic analysis with R, including\n* Data normalization\n* Clustering analysis\n* Ordination\n* Univariate statistics\nYou can always expand upon these concepts, using this tutorial as starting point. Just remember that R is a very flexible language, and all these commands can be expanded for new purposes and visualizations.\n\n### Data sources\nAll the material provided in this tutorial are from metagenomic study on mice knockouts. Further analysis of the data can be found in the reference below.\n\n### Reference \n\nHildebrand, F., Nguyen, A. T. L., Brinkman, B., Yunta, R. G., Cauwe, B., Vandenabeele, P., … Raes, J. (2013). Inflammation-associated enterotypes, host genotype, cage and inter-individual effects drive gut microbiota variation in common laboratory mice. Genome Biology, 14(1), R4. doi:10.1186/gb-2013-14-1-r4\n\n","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\n## Introduction to data collection\n\nBy now you will have obtained some idea of what research data management is all about. Now we will have a more in-depth look into the different phases of your research by starting with data collection.\n\nData collection involves understanding the different types of data you collect. Depending on the nature of your research, there are different methods of collecting data and thus different types of data.\n\nYour data may be physical (paper records or archival forms) or digital (database contents or Excel data). The source of your data may be external, you collect it yourself or you generate it from a machine.\n\nWhen you write your data management plan you will need to take into account the type of data you collect, the source of the data, and how you will process and analyse your data.\n\nYou can watch the video below, provided by TU Delft, about data collection. The video stops at 1:12.\n\n<iframe src=\"https://www.youtube.com/embed/AqnVrnVdv2Y\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n## Preferred formats for your research data\n\nThis part is based on the online Research Data Management training 'MANTRA' of The University of Edinburgh (CC BY: [https://mantra.edina.ac.uk/](https://mantra.edina.ac.uk/)) and Managing Data @ Melbourne.\n\n<figure id=\"figure-1\"><img src=\"../../images/01_Preferred-formats_Learning_Objective.png\" alt=\"Introduction \"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Learning objectives</figcaption></figure>\n\n---\nThe file formats you use to generate your research data will influence how you can manage them over time, i.e. a program or application must be able to recognise the file format in order to access your data within the file.\nFor example, a web browser is able to process and display a file in the HTML file format so that it appears as a web page. If the browser encounters another file type, it may need to call on a special plug-in to view it. Or it may simply let you download the file to view if it can recognise it in another program.\n\nTo identify the file format, files usually have a file name extension, or suffix that follows a full stop in the file name and contains three or four letters, like for example:\n\nTODO: add PDF with links to preferred file formats\n\n* .txt    text\n* .pdf    portable document format\n* .jpg    joint photographic experts group\n* .csv    comma separated values\n* .html   hypertext markup language\n* .xml  extensible markup language  \n* .rtf  rich text format\n\n<figure id=\"figure-2\"><img src=\"../../images/02_Preferred-formats_proprietary-formats-01.png\" alt=\"Proprietary formats\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Background on proprietary and open formats</figcaption></figure>\n\n---\n\n<figure id=\"figure-3\"><img src=\"../../images/02_Preferred-formats_proprietary-formats-02.png\" alt=\"Proprietary formats\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Background on proprietary and open formats</figcaption></figure>\n\n### {% icon question %} Question\n\nDetermine which format is proprietary and which is an open format\n\n.xml .pdf .psd .odf .ppt .docx .csv .xls\n\n<details markdown='1'>\n  <summary>Check your answers!</summary>\n\n1. Proprietary: .psd, .docx, .xls, .ppt \n2. Open format: .csv, .xml, .odf, .pdf\n\n</details>\n\nTODO: list of preferred formats\n\n<figure id=\"figure-4\"><img src=\"../../images/03_Preferred-formats-file-conversion.png\" alt=\"Risks of file conversion\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Risks of file conversion</figcaption></figure>\n\n### {% icon question %} Question\n\nWhile file conversion or migration sometimes has to be done, there are also risks.\n\nWhich ones can you think of?\n\n<details markdown='1'>\n  <summary>Check your answers!</summary>\n\n* file size may change and even become surprisingly large\n* blanks used as missing data code\n* special characters and end of line returns may change\n* relation among items in a table and among tables may be lost\n* layers, color fidelity and resolution may be lost or changed in image files\n* fonts, footnotes and links to other documents may change\n* frame rate, sound quality, codecs and wrappers may be altered in multimedia files\n* last characters in rows (due to row size limitations) may be altered\n\n</details>\n\n### {% icon hands_on %} Hands On \n\nOpen the following .docx file to the preferred format .txt: [PreferredFormatsExcersizePenguinDOC.docx](../../images/PreferredFormatsExcersizePenguinDOC.docx)\n\n1. Convert this docx file to the preferred format .txt\n2. Open the text file in an editor\n3. Is all formatting preserved OK?\n\n<details markdown='1'>\n  <summary>Check your answers!</summary>\n\nNo, the format Microsoft Word creates saves the layout together with the textual and other elements. The .txt format created by Word is only the textual information in your file.\n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nOpen the following .docx file to the preferred format .txt: [PreferredFormatsExcersizePenguinDOC.docx](../../images/PreferredFormatsExcersizePenguinDOC.docx)\n\n1. Convert this docx file to the preferred format .odt\n2. Open the .odt file\n3. Is all formatting preserved OK?\n \n<details markdown='1'>\n  <summary>Check your answers!</summary>\n \nNo, ODT files are formatted using the OASIS OpenDocument XML-based standard. When you open an OpenDocument Text file in Word, it might not have the same formatting as it did in the original application it was created in. This is because of the differences between applications that use the OpenDocument Format.  \n \n</details>\n\n![Data compression](../../images/04_Preferred-formats-data-compression.png)\n\n## Discovering existing data\n\n### Where to discover existing data?\n\nWatch the screencast below. In this screencast, you will be guided through different ways to find data.\n\n<iframe src=\"https://www.youtube.com/embed/AZMUKgM8X-A\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n> ### {% icon hands_on %} Hands On  \n> \n> You have just learned that there are different places to find data. By actively searching the different places, you will get an understanding of the differences.\n> Look at the different portals below. Some of them have been showcased in the screencast, some of them are additional.\n> \n> [Google](http://www.google.be) - add \"database OR registry OR dataset OR archive OR statistics\" to your search\n>\n> [Registry of Research Data Repositories re3data](https://www.re3data.org/) - find appropriate repositories holding interesting data\n>\n> [ZanRan]() - search engine for tables and graphes within .pdf or .html on the internet\n> \n> [Elsevier Data Search](https://datasearch.elsevier.com/#/) - try out chip-seq drosophila\n> \n> [Google Dataset Search]() - try out chip-seq drosophila. Google Dataset Search indexes [OmicsDI](https://www.omicsdi.org/), an index providing a knowledge discovery framework across heterogeneous omics data (genomics, proteomics, transcriptomics and metabolomics).   \n\n### Assess the usefullness of existing data\n\nHow useful is a dataset? Follow this short tutorial.\n\n<iframe src=\"https://www.youtube.com/embed/t1SZutbCAxI\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### Assess the usefullness of existing data yourself\n\nIn the previous activity, the lecture described four main points to check if you want to reuse existing data:\n\n* Condition for reuse\n* Context\n* Trustworthiness\n* Persistence\n\nIn the following quizzes, take a closer look at the description or metadata of some datasets and assess the usefulness of these datasets yourself. As the description or metadata of datasets can be lacking in several different areas at the same time, it will be indicated per assignment on which of the four main points your focus should be.\n\n### {% icon hands_on %} Hands On  \n\nCan you re-use [this dataset](https://dataverse.nl/dataset.xhtml?persistentId=hdl:10411/UWAU3K) on Spatial Patterns of Water-dispersed Seed Deposition along Stream Riparian Gradients in DataverseNL?\n\n1. Maybe \n2. Yes \n3. No \n\n<details markdown='1'>\n  <summary>Check your answer!</summary>\n\nYes, the Terms of use indicate that there is a Creative Commons license 'Public Domain Dedication', which means you can copy, modify, distribute and perform thge work, even for commercial purposes, all without asking permission. \n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nCan you re-use [this weather dataset](http://www.climatemps.com/)?\n\n1. Maybe\n2. Yes\n3. No\n\n<details markdown='1'>\n  <summary>Check your answer!</summary>\n\nMaybe, although the website states 'We hope that you will enjoy using ClimaTempss as much as we have enjoyed developing it!\", there is no clear license or use agreement and directions on how to cite the data are lacking. The use has not been defined nor explained. In this case of re-use you should simply contact the creators.\n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nGiven the follwing description of a dataset: can you assess the usefulness of this dataset to establish cholestasis (an unhealthy condition of the liver) parameters in livers in the age group of puberty through adulthood? Please focus on the context.\n\nDescription: \"We measured livers for several parameters of cholestasis. The subjects were in advanced stages of liver cancer.\"\n\n1. Maybe\n2. Yes\n3. No\n\n<details markdown='1'>\n  <summary>Check your answer!</summary>\n\nNo, the dataset is not useful because the subjets have cancer. This should affect the values of parameters for cholestasis. You would rather have a dataset of healthy subjects.\n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nWould you trust the following dataset on heart rate under severe physical stress?\n\nHeart rate (beats per minute): 124, 160, 240, 0, 120, 400, 198, 156, 167 \n\nPlease focus on the trustworthiness. \n\n1. Maybe\n2. Yes \n3. No\n\n<details markdown='1'> \n  <summary>Check your answer!</summary>\n\nNo, there are weird values in the dataset, a value of zero is unlikely. And overall, the values are on the high side.\n\n</details>\n\n### {% icon hands_on %} Hands On  \n\nIs your research likely to be reproducible when you use the following the following infrastructure?\n\nThe datasets is created during a PhD. Conditions for use state that it is a dataset stored and shared by the PhD student on his university account.\n\n1. Maybe\n2. Yes \n3. No\n\n<details markdown='1'> \n  <summary>Check your answer!</summary>\n\nNo, it is unlikely that the dataset can be reused since you do not have certainty that the files stored on the university file drives are availble for at least 10 years which is the current rule for data availablity.\n\n</details>\n\n\n## Describe what kind of data you will generate\n\nHaving a clear view of what data you will generate will enable you to plan its management. You can create an overview of the data you produce or collect by drawing the data in a workflow, or noting down in a table.\n\nPlease watch the video below. Tessa Pronk will explain to you how to describe your data.\n\n<iframe src=\"https://www.youtube.com/embed/KE2UpZY4wYA\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### Order elements in your data flow\n\nTODO: add H5P quiz\n\n### Copyright and Intellectual Property Rights (IPR) issues\n\nCopyright is a form of intellectual property right which arises automatically if an original work is created. Copyright may affect the way data may be stored, shared and reused. You should ask yourself who the copyright holder of your datasets is, especially when you use existing data or when you collaborate with external parties.\n\n**Using someone else’s research data**\nSURF provides a brief guide to determining what consent is necessary to reuse someone else’s data (see \"A brief guide ... someone else's data\" in the resources below)  \n\n**Clarifying the ownership of your research data**\n\nTODO: change accordingly for VIB\n\nOfficially VIB, as your employer, is considered the rights holder to the research data you create. You, as a researcher, have the primary responsibility for taking care of the data. Questions on data exploitation may be even more important than those of ownership. Who can use the data? Who can publish it? Who can provide it to third parties?  \n\nWe strongly recommend that you deal with the issues around data exploitation at an early stage of your research project. Write down agreements between yourself, your supervisor, project members and other interested parties in your Data Management Plan.\n\nTODO: change accordingly\n\nRDM Support offers you a Guide to legal instruments and agreements for research data management (see the Guide 'Legal instruments and agreements')\n\n**Confidential or privacy-sensitive data**\nWhen your research project has received data under confidentiality or under legal privacy restrictions, you will have to identify and explain how you will deal with these restrictions in your data management plan (also see ‘Learning Unit: Handle - Data security’).\n\n### Costs involved with managing your data\n\nTODO: https://www.uu.nl/en/research/research-data-management/guides/costs-of-data-management\n\nThe costs of data management and sharing activities must be included into your research, in terms of time and resources needed.\n\n**1. Data Management Cost Guide**\n\nWhen you plan your research you may not be able to oversee all costs involved. Nevertheless, it is useful to have an idea of possible costs at an early stage. You can use the Guide 'Costs of Data Management', which is a practical overview of possible costs per activity within each phase of the research process. Note: The Cost Guide offers cost indications and examples. These are not real prices.\n\n**2. Budget your data management costs**\n\nYou are advised to budget the data management costs as separate data management costs. These costs are eligible for funding with funders like NWO and the European Commission, as long as the costs are invoiced before the end of the project.\n\n**3. Planning can save time and money**\n\nPlanning an early start for certain activities within your research project can lower the costs for data management in the run of your project. You can save time by:\n\nProperly describing your data while collecting it, instead of doing it afterwards\nChoosing the right file format so that file conversion afterwards is not necessary\nHiring an experienced data manager\nSpending time to think about data activities beforehand can help prevent unexpected extra efforts and costs later on in your research project.\n\n### Check the current and expected costs for your research data\n\nYou have just learned that in many parts of a research project there are data related costs. These costs depend on the type and volume of data you produce, analyse and store.\n\nTODO: link to file (calculation) https://lll-platform.uu.nl/pluginfile.php/4907/format_elevated/resource/0/Cost%20overview.docx\n\n### Write your data management plan for your data collection\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module Data collection. You should be able to complete the following questions in the section Data collection:\n\n* Will you use existing data?\n* What data will you collect or create?\n* How will the data be collected or created?\n* How will you manage rights issues?\n* What are the costs involved in managing and storing your data?\n\n","# Basic Statistics Theory \n{:.no_toc}\n\nThis introductory video has been created during a livestream session in March 2020.\n\n<iframe src=\"https://www.youtube.com/embed/Zd9FkB348zk\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n","<!-- This is a comment. -->\n\n## Why manage your research data?\n\nIn this video Katarzyna Biernacka explains what data in a research context is.\n\n<div>\n<iframe src=\"https://www.youtube.com/embed/XCckz_4mlhU\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n</div>\n\nCC-BY-4.0: Katarzyna Biernacka, HU Berlin & [Discipline Workshops 2019](http://www.discipline-workshops.com/)\n\nManaging your data effectively is crucial to the success of your research. This doesn't only apply to the immediate context of your thesis or publications. Managing your data is a practice that will benefit you throughout your research career. The following list gives an overview of what benefits are evident.\n\n1. **Access, Re-use & Recognition**\n   * Facilitating future research by allowing others to build on or add to your research data.\n   * Increased citations of research data and of publications based on that data.\n2. **Efficiency**\n   * Increasing your research efficiency by saving time and resources.\n   * Preventing duplication of effort by enabling others to use your data.\n3. **Quality & Security**\n   * Ensuring the integrity and reproducibility of your research.\n   * Ensuring that research data and records are accurate, complete, authentic and reliable.\n   * Enhancing data security and minimising the risk of data loss.\n4. **Compliance**\n   * Meeting legal obligations, restrictions and codes of conduct.\n   * Meeting the University policy for research data requirements.\n   * Meeting funding body grant requirements.\n   * Meeting publisher requirements for data access.\n\n\n## A case to consider\n\nMarleen is an early career researcher. She completed her PhD about four years ago and is now a postdoctoral research fellow at a different university. Since she obtained her PhD, she has published a number of journal articles based on her doctoral research. Her papers have been cited widely in the literature of her field. But just recently a fellow researcher has questioned her findings. He has gone so far as to suggest that the data on which her research was based is inaccurate. One implication is that the data could even have been falsified. Marleen is confident that her research is valid and that her data is accurate.\n\n\n- What steps could Marleen take to verify her research findings?\n- What evidence would she need to demonstrate that she hasn't falsified her data?\n\nThink about your own research. If someone accused you of research misconduct, would you be in a position to defend your research and reputation? List some strategies you could implement right now that would assist you, should you ever find yourself in Marleen’s situation.\n\n## Data disasters – postcards from the edge\n\nThe following are real examples where researchers or data centers have lost crucial data. Could any of these ever happen to you? With good planning you could avoid or reduce the impact of such occurrences.\n\nTODO: add H5P \n\n<iframe src=\"https://lll-platform.uu.nl/mod/hvp/embed.php?id=2295\" width=\"800px\" height=\"664\" frameborder=\"0\" allowfullscreen=\"allowfullscreen\"></iframe><script src=\"https://lll-platform.uu.nl/mod/hvp/library/js/h5p-resizer.js\" charset=\"UTF-8\"></script>\n<script src=\"https://lll-platform.uu.nl/mod/hvp/library/js/h5p-resizer.js\" charset=\"UTF-8\"></script>\n\n## University policy framework for research data\n\nFor the Flemish universities, it is important that all researchers honour scientific standards, including the meticulous and ethical treatment of research data.\nThis policy is intended to set out parameters to safeguard the quality, availability and accessibility of research data within any Flemish university. It provides a basis for evaluating compliance with laws, regulations and codes of conduct. The policy also clarifies the various roles and responsibilities of university staff in managing research data.\n\nThe highlights of the policy are:\n* Archive (relevant and valuable) research data for a minimum of ten years;\n* Store data in a structure that is suitable for long-term preservation and later consultation;\n* Provide metadata to describe the data with sufficient clarity to ensure they are findable for further research;\n* Make archived research data available for access and reuse at and outside VIB insofar as is reasonably possible;\n* Each individual researcher / research leader is responsible to draw up a Data Management Plan (DMP) at the start of the research project and to follow up the agreements made in this plan;\n* Scientific directors are responsible for the implementation and monitoring of the University policy framework and for drawing up additional faculty guidelines to this end if needed.\n\nLinks to the Policy Frameworks of the Flemish Universities\n\n* [Policy Framework from Ghent University](https://www.ugent.be/en/research/datamanagement/rdm-policy.pdf/at_download/file)\n\n* [Policy Framework from KU Leuven](https://www.kuleuven.be/english/research/scholcomm/rdm/policy-plan-rdm-ku-leuven-2014)\n\n* [Policy Framework from UHasselt](https://bibliotheek.uhasselt.be/sites/default/files/uploads/RDM/20180517_UH_RDM_PolicyPlan_NL.pdf)\n\n* [Policy Framework from VUB](https://vub.sharepoint.com/sites/ORG-RandD/SitePages/RESEARCH-DATA-MANAGEMENT.aspx?web=1)\n\n## Policy in Practise\n\nIn this short video Prof. dr. Chantal Kemner explains the importance of good data management for Utrecht University. Chantal is full professor of Biological Developmental Psychology in Utrecht at the faculty of social sciences and since 2013 also at the UMCU.\n\n<iframe src=\"https://www.youtube.com/embed/f48l4Uca9nA\" allowfullscreen=\"\" allow=\"accelerometer; au\ntoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block\n;\" width=\"800px\" title=\"\"></iframe>\n\n## Funder requirements\n\nMore and more research funders explicitly require you to consider the management and publication of your research data, both during and after your research project. The European Commission and the Flemish funders FWO have explicit policies on research data management.\n\n**European Commission - Horizon 2020**\n\nThe European Commission wants “Horizon 2020 beneficiaries to make their research data findable, accessible, interoperable and reusable (FAIR), to ensure it is soundly managed. Good research data management is not a goal in itself, but rather the key conduit leading to knowledge discovery and innovation, and to subsequent data and knowledge integration and reuse.” Horizon 2020 is the biggest research and innovation program of the European Commission.\n\n[![European Commission - Horizon 2020](../../images/O_Funders_Screenshot_H2020guidelines.JPG)](https://www.nwo.nl/en/policies/open+science/data+management)\n\n**FWO**\n\nFWO states that “FWO has made data management a key element of its policy for all support channels provided by the FWO. The FWO expects researchers to pay due attention to this dimension before, during and for at least five years after their research.”\n\n[FWO Overview Data Management Plan](https://www.fwo.be/en/the-fwo/organisation/data-management-plan/)\n\n## Funder guidelines and templates\n\nMost funders require you to write a Data Management Plan. A DMP outlines all key aspects of collecting, storing and managing research data during and after a project. For this they provide you with guidelines, forms, templates and examples. For more information you can download the documents under Resources or check out the websites. You can also contact your faculty Research Support Office:\n\n- [EC – Horizon 2020: guidelines](https://ec.europa.eu/research/openscience/index.cfm)\n- [FWO template](https://www.fwo.be/media/1023898/fwo-dmp-templatedocx.docx)\n\n## Writing a data management plan\n\nBy now it should be clear that data needs to be properly managed throughout its lifecycle. The most effective way to do this is to create a Data Management Plan (DMP). This will take into account all the stages of the research data lifecycle. As outlined earlier, each individual researcher or research leader is responsible to draw up a data management plan. He or she should do this at the start of the research project. And during the research you should actively follow up on the agreements made in this plan.\n\nThink about our early career researcher Sasha (introduced in ‘Why manage your research materials and data?’) who needs to defend herself against accusations of researcher misconduct. As well as defending against misconduct accusations, some additional benefits of creating a data management plan include:\n\n- Accessing your data more easily;\n- Prioritising and balancing activities relating to research data collection and storage;\n- Mitigating data loss;\n- Reaching agreement between stakeholders about ownership of data;\n- Reducing time and effort in the long term.\nThe good news is that this online training will take you through the necessary steps to create a plan during the subsequent modules.\n\n## Getting started with DMPonline\n\nWe offer you DMPonline to create your Data Management Plan. DMPonline is an international online service that guides you in creating a DMP by answering a series of questions about your research project. It allows you to create, share, store, and revise your data management plans online. You will be asked to complete different sections of your DMP as we go through the other modules. As a result you will have written your own data management plan at the end of this course.\n\nWith DMPonline you can:\n\n* Write your plan and keep it up-to-date\n  * You can easily update your DMP throughout the lifecycle of a project\n\n* Share plans online\n  * DMPonline allows collaborative access, so you can share your DMP with other researchers, within and outside of your university.\n\n* Create multiple plans\n  * You can store different DMPs for different projects. And you can make a copy of a previous plan as the basis for writing a new one.\n\n* Download plans\n  * You can download your DMP in a variety of formats.\n\nWe recommend that graduate researchers share their data management plans with their supervisor(s).\n\n<iframe src=\"https://player.vimeo.com/video/251506151\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n## About RDM Support\n\nRDM Support provides all kinds of research data management assistance to researchers of VIB in all stages of their research. This can range from one-off individual advice to large-scale infrastructure coordination.\n\nYou can find an overview of the contact details of the main host institutions for DMP related questions and guidance are as follows:\n\n* AMS: Bart Cambré (bart.cambre@ams.ac.be)\n* Hogere Zeevaartschool: Marc Vervoort (marc.vervoort@hzs.be)\n* ITG: Ann Verlinden (averlinden@itg.be)\n* KU Leuven: rdm@kuleuven.be\n* UAntwerpen: RDM-support@uantwerpen.be\n* UGent: Myriam Mertens and Annik Leyman (rdm.support@ugent.be)\n* UHasselt: Sadia Vancauwenbergh (rdm@uhasselt.be)\n* Vlerick: Eva Cools (eva.cools@vlerick.com)\n* VUB: dmp@vub.be\n* VIB: bits@vib.be\n","# Installation\n## Windows\n> Requirements to install E-Notebook 2014: \n> 1. Microsoft Windows\n> 2. MS Office, Adobe Reader (or similar)\n> 3. ChemBioDraw (optional - see STEP 2)\n> 4. Valid VIB login credentials. Check your login and password on [https://storefront.vib.be/](https://storefront.vib.be/).\n\n**STEP 1: E-Notebook 2014**\n\n1. Browse to [https://eln.vib.be/clickonce/](https://eln.vib.be/clickonce/)\n2. Click “Install” and open the file\n3. After the installation, the software is automatically launched and the login window appears\n4. Log in with your VIB credentials (see requirements)\n5. Close E-Notebook after successful launch: File - Exit or 'X' in the right upper corner\n6. Generate a shortcut on the desktop (right click - Send to - Desktop): All Programs - PerkinElmer - E-Notebook 2014 Client\n7. Install ChemBioDraw (STEP 2)\n\n**STEP 2: ChemBioDraw**\nNote: In case you only reinstall the ELN client, you don't have to reinstall the ChemBioDraw component\n1. Download the ChemBioDraw installation file from the same website as E-Notebook 2014: [https://eln.vib.be/clickonce](https://eln.vib.be/clickonce)\n2. Start the installation\n3. Install ChemBioDraw ActiveX component in suggested destination\n4. Follow the installation wizard instructions\n5. Click on “Install” and subsequently on \"Finish\"\n\n> Why use ELN throught Citrix on Windows? \nSome older Windows versions cause problems with the E-Notebook 2014 Client installation.\n\n**STEP 1: Citrix Workspace app**\n1. Browse to [http://www.citrix.com www.citrix.com] \n2. Click on Download\n3. Select Citrix Workspace app from the list of possible downloads\n4. Download and install Citrix Workspace app\n\n**STEP 2: Launch ELN online**\n1. Browse to [https://storefront.vib.be](https://storefront.vib.be)\n2. Login with your VIB credentials\n3. Launch the ELN application by clicking on the icon\n4. If your browser asks to download and open an .ica file, please agree\n5. Citrix Workspace will open en launch the application\n\n## MacOS, Linux, mobile devices\n**STEP 1: Citrix Workspace app**\n1. Browse to [https://www.citrix.com www.citrix.com] \n2. Click on Download\n3. Select Citrix Workspace app from the list of possible downloads\n4. Download and install Citrix Workspace app\n5. After the installation on Linux execute the following command:\n```\nsudo cp -a /usr/share/ca-certificates/mozilla/DigiCert_Assured_ID_ Root_ CA.crt /opt/Citrix/ICAClient/keystore/cacerts/\n```\n\n**STEP 2: Launch ELN online**\n1. Browse to [https://storefront.vib.be](https://storefront.vib.be)\n2. Login with your VIB credentials\n3. Launch the ELN application by clicking on the icon\n4. If your browser asks to download and open an .ica file, please agree\n5. Citrix Workspace will open en launch the application\n\n# Support\n- Call us at +32 (0)9 248 16 15\n- Mail us at eln@vib.be","# Login\nWhen launching the application (Windows: double-click the **E-notebook 2014 client** icon – Citrix: click on the ELN 2014 icon and open the .ica file, Citrix Workspace will launch the application), you will see the following login window:\n\nIn order to login on ELN, you need a **valid VIB account**. The VIB username usually has a format like: *firstname lastname*. More information on [https://help.vib.be](https://help.vib.be) or mail eln@vib.be.  \n\nWhen clicking on **Connect** the application will retrieve your data. The **Work Offline** option is only available with the client installation and will allow you to make adjustments to the data in your Offline folder.\n\n> Note: when launching the application for the first time, a download of all collections will start, this usually takes 1 or 2 minutes.\n\n# Layout\nThe layout is resembling to Microsoft Office. It has 3 main parts; the ribbon with options on top, the navigation and history area on the left and the working area on the right.\n\nThe default starting point is the Home location, this gives an overview of all data in the navigation area on the left and any modified experiments since one month on the right.\nIn the Audit Trail (bottom left) you can find the history of the object selected above. This history allow you to access previous versions of an experiment and retrieve a file in order to bring it back to the present. Every version has a timestamp and operator (= user that pressed the save button). Previous versions of an experiment can**t be modified, only the last version is adjustable.\nNavigating to your colleagues or Home can be done with the orange icons in the upper left corner. Next to the navigation buttons you find the Save button. When saving you can add annotations as well.\n# Ribbon\nThe Ribbon is where you can find the options corresponding with your selection (navigation area or section). By default, there are three tabs: Home, View and Data. Sections have specific tabs in the ribbon, e.g. Document, Image, Text, Table, Property List, etc. An example can be found below (Text):\n\n# Project, Notebook, Experiment\nThere are 3 basic levels to organize your data: Project, Notebook and Experiment (see icons below). You can see them as folders with a certain hierarchy. Only an experiment contains files. To add one of the levels click on the icon in the **Home** tab in the ribbon. \n\n# Sections\nAn experiment consists of sections, every section is a file or page. To add a section, select the icon in the **Home** tab in the ribbon. Some sections are hidden behind the **Other** button.\nYou can add sections automatically by drag and dropping them into your experiment. E-Notebook will recognize Word, Excel and PowerPoint files, PDF documents and images. GraphPad Prism files are not native to E-Notebook and will result in an Ancillary data section, this will happen with any other file type that is not native to the program.\n## General Page\nCreating a new experiment will give you a blank experiment with only one section, by default this is the General page. This is an example of a General Page:\n\nEvery lab group has a slightly different version of this General page. The universal parts of this section are the **General Information** and the **Reference to experiment** field. In the first field you have the option to enter general properties of your experiment such as start date, project, etc. Adding extra properties is available in the **Property List** tab in the ribbon.\n\nAdding a reference to your experiment can be very useful to link similar experiment to each other or make a series of experiments. This refence can be any experiment within your group. To add a reference, click on the option in the **Home** tab in the ribbon.\n\nAs last there are 3 or 4 text boxes to add keywords, aim of experiment, results, specifications or a conclusion.\n## Microsoft Office sections\nThree MS Office applications are supported in the E-Notebook software: Word, Excel and PowerPoint. All other MS Office files can be uploaded using the Ancillary Data section.\n\nFor the supported application you can add files using the corresponding section. This will initially display a (print) preview of the file, double-clicking the preview will launch the MS Office application to make adjustments. All other options are displayed in the ribbon:\n\n## Images\nUsing the Image section in E-Notebook will allow you to import one (1) image file. All common image extensions are supported, camera brand specific files (e.g. RAW or DNG) can be uploaded using a non-file-specific section. Next to the image file itself you can add a title and notes.\n\n## PDF files and Captured Image\nUsing the PDF section in E-Notebook will allow you to import 1 PDF file. Next to the PDF file itself you can add a description, date and a document name.\n\n## Ancillary Data (a.k.a. Binder)\nThis non-file-specific section will save 1 file. In order to open the file , you must double-clicking on it, this will launch the according application outside ELN. Closing the external application again (e.g. after making adjustments) will result in this window:\n\nClick **Continue** to save your changes and re-upload the new file in ELN or click **Cancel** to ignore the changes.\n## Supplementary Data Management (SDM)\nFiles imported in this section will be saved on an internal network drive linked to ELN. This means that files in SDM won**t be accessible outside of your research center or university network. Files in the SDM section are not limited to the file size limit of 30 MB. \nNext to the default list of sections, there are some lab-specific sections for PCR or Western Blot. To add one of these lab-specific sections, click on the **Other** icon and select your section.\n\n# Sharing data and linking experiments\n## Access rights for others\nTo grant a colleague access to your data, you simple select the object and click on the View tab in the ribbon. In the Properties field you click on Security. A new window will appear (left picture). The inherited privileges are default settings, you’re not able to modify this. The assigned privileges on the other hand can be modified by clicking ‘Grant’.\n\nBy filtering on user group or user you can select the group/person (right picture). The type of privilege can be: read, read and write, full control. You can define this in the next window.\n\nRemoving the privilege can de done by selecting the person or group and click on ‘Remove’. For both granting or removing access privileges there is no notification system, you have to tell them yourself.\n## Experiment shortcuts\nWhen a colleague granted you access to a project/notebook/experiment you can place a link to this object in your own ELN. This makes navigating to this object easier and allows you to group all your collaborations within your own ELN hierarchy. To create such a shortcut, follow these steps:\n1. Select the object of interest\n2. Right click – Copy\n3. Navigate to your own ELN\n4. Right-click on the location you want the link to appear\n5. Select Paste Reference\n\n> Note: shortcuts can be removed, the original data however is not deleted. \n## Templates\nTemplates can be created by every user and can be shared with your colleagues. To create a template, follow this procedure:\n\n1.\tnavigate to ‘User Configuration’ – ‘Templates’\n2.\tcreate new experiment\n3.\tbuild your new default experiment/template by adding information/sections\n4.\tsave your template\n\nNext time you want to create a new experiment, you will have the option to create a blank or template experiment. \n## Search\nThe collection search can be used for users, projects, notebooks and experiments. No content can be found with the search box in the upper right corner.\nThe Advanced Search option can find experiment content. You can find it in ‘Quick Links’ above the navigation pane.\n\n\n","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\n<details>\n  <summary>Click to expand!</summary>\n\n*Heading*\n1. A\n2. list\n   * With some\n   * Sub bullets\n\n<figure id=\"figure-1\"><img src=\"../../images/Seqselector.png\" alt=\"seqselector.png\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Seqselector.png</figcaption></figure>\n</details>\n\n### TODO: specific chapter on storage\n\n### Write your data management plan for your data storage\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module on data storage. You should be able to complete the following questions in the section ‘Data documentation’:\n\nWhere will you store your data?\nHow will the data be backed up?\nAfter finishing this part in DMPonline, please return to the learning environment and click on [Complete]. This takes you back to the course overview. Continue with the next learning unit.\n\nYou can ask your faculty or project data manager or RDM Support for a review of your DMP once you have finished writing all or parts of your DMP.\n","# Introduction\n{:.no_toc}\n\n<!-- This is a comment. -->\n\n<details>\n  <summary>Click to expand!</summary>\n\n*Heading*\n1. A\n2. list\n   * With some\n   * Sub bullets\n\n<figure id=\"figure-1\"><img src=\"../../images/Seqselector.png\" alt=\"seqselector.png\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Seqselector.png</figcaption></figure>\n</details>\n\n### Introduction to rounding up\n\nYou have almost reached the end of this course on research data management.\n\nYou have learned about data collection, data documentation, data storage and security, selection and preservation and making data available for reuse.\n\nWe are very curious to know if this course has helped you write your Data Management Plan (DMP).\n\nTo round up:\n\n* We want to remind you of the DMP review service of RDM Support;\n* We want to share some good practices of data management with you;\n* We invite you to fill out the evaluation of this online training. This will help us to further develop this training and future learners can benefit from this. Thank you very much!\n\n### DMP review service\n\nYou can have your data management plan (DMP) checked by the specialists of Research Data Management Support. You can get in touch if you are unsure about sections in your DMP or when you doubt whether your plan fits the requirements of your research funder.\n\nWhen you are in the process of writing a proposal for a research funder and you want a check on the data section, you can also contact the Research Support Office (RSO) of your faculty.\n\n### Researchers sharing their experiences\n\nTODO: add stories if available or links to resources\n\nTODO: merge with experiences\n\n### More data stories\n\nChallenges in irreproducible research\nspecial issue in Nature, 7 Oct 2015\n\nThere is growing alarm about results that cannot be reproduced.  Explanations include increased levels of scrutiny, complexity of experiments and statistics, and pressures on researchers. Journals, scientists, institutions and funders all have a part in tackling reproducibility. Nature has taken substantive steps to improve the transparency and robustness in what they publish, and to promote awareness within the scientific community.\n\nData stories in environmental science\ncollected by DataONE\n\nSuccess stories and cautionary tales from researchers related to their experiences with managing and sharing scientific research data as collected by DataONE.\n\nAdvantages of data sharing\nby John-Alan Pascoe of Delft University of Technology\n\nJohn-Alan Pascoe, researcher at the Faculty of Aerospace Engineering at Delft University of Technology, explains the advantages he experienced after sharing his raw and derived data in the data archive of 4TU.ResearchData.\n\n\n<iframe src=\"https://www.youtube.com/embed/Q7vC0v988R4\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### Evaluation of training\n\nTODO: link to questionnaire\n\n","## Introduction to data security\n\n<!-- This is a comment. -->\n\nBy now you know more about how to manage your data collection, how to organise and document your research data and where and how to store your data.\n\nNow we will take you into the world of keeping data safe and secure.\n\n**Loss of data, loss of academic career**\n\nThe loss of scientific data can have a devastating impact on careers. Imagine that you loose all of the research data you've been diligently collecting for four years. Now imagine the knock-on effect: you won't get the PhD you've been working towards, affecting your future career. This nightmare happened to Billy Hinchen, a biologist at Cambridge University. Listen to his story.\n\n<iframe src=\"https://www.youtube.com/embed/3xlax_Iin0Y\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### Data breaches\n\nThere are several examples of (mainly online) data storage going wrong, leading to leaks of sensitive and personal information.\n\nThe picture below shows the biggest cases of data breaches in the past 10 years. They involve some well-known, highly regarded and trusted companies as well as some practices from the academic world.\n[Read about the story](http://www.informationisbeautiful.net/visualizations/worlds-biggest-data-breaches-hacks/)\n\n<figure id=\"figure-1\"><img src=\"../../images/data-breaches.png\" alt=\"examples about data breaches\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Biggest data breaches</figcaption></figure>\n\n## Prevent unauthorised access\n\nData security may be needed to protect intellectual property rights, commercial interests, or to keep personal or sensitive information safe. Data security involves security of data files, computer system security and physical data security. All three need to be considered to ensure the security of your data files and to prevent unauthorised access, changes, disclosure or even destruction. Data security arrangements need to be proportionate to the nature of the data and the risks involved. Attention to security is also needed when data are to be destroyed.  If data destruction is in order, you need to make sure that the destruction process is irreversible.\n\nLearn about different measures depending on the kind of security you need.\n\n**Security of data files**\n\nThe information in data files can be protected by:\n\n* Controlling access to restricted materials with encryption. By coding your data, your files will become unreadable to anyone who does not have the correct encryption key. You may code an individual file, but also (part of) a hard disk or USB stick\n* Procedural arrangements like imposing non-disclosure agreements for managers or users of confidential data\n* Not sending personal or confidential data via email or through File Transfer Protocol (FTP), but rather by transmitting it as encrypted data e.g. [FileSender](https://filesender.belnet.be)\n* Destroying data in a consistent and reliable manner when needed\n* Authorisation and authentication: for personal data you have to give very selective access rights to specified individuals.\n\n**Computer security systems**\n\nThe computer you use to consult, process and store your data, must be secured:\n\n* Use a firewall\n* Install anti-virus software\n* Install updates for your operating system and software\n* Only use secured wireless networks\n* Use passwords and do not share them with anyone. Do not use passwords on your UU computer only, but also on your laptop or home computer. If necessary, secure individual files with a password.\n* Encrypt your devices (laptop, smartphone, USB stick/disk).\n\n**Physical data security**\n\nWith a number of simple measures, you can ensure the physical security of your research data:\n\n* Lock your computer when leaving it for just a moment (Windows key + L)\n* Lock your door if you are not in your room\n* Keep an eye on your laptop\n* Transport your USB stick or external hard disk in such a way that you cannot lose it\n* Keep non-digital material which should not be seen by others, in a locked cupboard or drawer.\n\n**Data classification**\n\nTODO: what to do with classified data\n\n**Data that contain personal information**\n\nThese data should be treated with higher levels of security than data which do not. You will learn more about privacy-sensitive data in the e-module.\n\n## What is your experience with unauthorised access to your research data?\n\nTODO: implementation form widget\n\nWe are interested to know if you have ever experienced unauthorized access to any of your research data. When you give your reply, we will show you an overview with the responses of other researchers in this course. All responses will be processed anonymously.\n\n[(1)] No, I am sure about that\n[(2)] Not that I am aware of\n[(3)] Yes, without much consequences\n[(0)] Yes, with severe consequences\n\n### Legal agreements and contracts\n\nOften other people are required to handle your data, or you might be the person that handles other people’s data.\n\nTo arrange the security of the research data you work with, in many cases you have to make a (legal) agreement with other people involved. These agreements will make explicit permitted uses, retention time, and agreed upon security measures. Find out what legal contracts you can use by studying the figure below.  TODO: Visit the Guide 'Legal instruments and agreements' for more information\n\nFor tailored advice and templates, contact Legal Affairs via your faculty Research Support Officer (RSO)\n\nTODO: add link\n\n<figure id=\"figure-2\"><img src=\"../../images/AgreementsPicture.png\" alt=\"Legal Agreement contacts -80width\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Agreements types for data</figcaption></figure>\n\n### When to use which legal contract?\n\nYou have been acquainted with the different flavors of legal agreements. Is it clear to you when you need which agreement? Please answer the following questions by choosing the right kind of agreement.\n\nTODO: add quiz or H5P quiz\n\n### Privacy-sensitive data\n\n<figure id=\"figure-3\"><img src=\"../../images/01_privacy-sensitive-data-learning-objectives.png\" alt=\"start privacy-sensitive data\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Personal data - learning objectives</figcaption></figure>\n\n---\n\n**Privacy in a nutshell**\n\nPrivacy is a fundamental right. With regards to privacy, we all have two perspectives:\n\n1. How is your privacy protected?\n2. How can we, as a researcher, protect the privacy of the people involved in our research (the data subjects)?\n\nTODO: add link to document and image screenshot\n<figure id=\"figure-4\"><img src=\"../../images/LCRDM-privacy-reference-card-why-Version-02.pdf\" alt=\"privacy reference card\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Privacy reference card</figcaption></figure>\n\n**Six principles from the European General Data Protection Regulation 1/2**\n\nThe European General Data Protection Regulation (GDPR) outlines how we should work with privacy-sensitive data.\n\nTODO: create working infographics with images\nsee http://gdprcoalition.ie/infographics\n\n**Six principles from the European General Data Protection Regulation 2/2**\n\nAccording to the GDPR processing of personal data must be done according to 6 principles.\n\nTODO: create HP5 document\n\nThe GDPR outlines six data protection principles you must comply with when processing personal data. These principles relate to:\n\n- Lawfulness, fairness and transparency - you must process personal data lawfully, fairly and in a transparent manner in relation to the data subject.\n- Purpose limitation - you must only collect personal data for a specific, explicit and legitimate purpose. You must clearly state what this purpose is, and only collect data for as long as necessary to complete that purpose.\n- Data minimisation - you must ensure that personal data you process is adequate, relevant and limited to what is necessary in relation to your processing purpose.\n- Accuracy - you must take every reasonable step to update or remove data that is inaccurate or incomplete. Individuals have the right to request that you erase or rectify erroneous data that relates to them, and you must do so within a month.\n- Storage limitation - You must delete personal data when you no longer need it. The timescales in most cases aren't set. They will depend on your business’ circumstances and the reasons why you collect this data.\n- Integrity and confidentiality - You must keep personal data safe and protected against unauthorised or unlawful processing and against accidental loss, destruction or damage, using appropriate technical or organisational measures. \n\n**Privacy by design**\n\nTo comply with the six principles from the GDPR, you can implement privacy by design. This means that you design a data management plan with measures on both IT and procedural level.\n\n<iframe src=\"https://www.youtube.com/embed/iZRcePnhS5I\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n**Which data breach is breached?**\n\nCan you recognise the principles that are breached in the different ways personal data is processed?\n\nTODO: H5P quiz 7 cases\n\n**Storing personal data 1/2**\n\n<figure id=\"figure-5\"><img src=\"../../images/02_privacy-sensitive-data-personal-data-01.png\" alt=\"storing personal data\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> Storing personal data</figcaption></figure>\n\n**Storing personal data 2/2**\n\nOnly if the access can be unambiguously be restricted to authorised persons, can data be stored without such measures.\n\nShould you want an elaborate visualisation of what is considered identifiable data, check out the information sheet at the Future Privacy Forum.\n\n[Download the visual guide to practical data de-identification](https://fpf.org/2016/04/25/a-visual-guide-to-practical-data-de-identification/)\n\n**Can you recognize identifiable data?**\n\n\n### {% icon question %} Can you recognize identifiable data?  \n\n1. a collection of GPS data of daily routines\n2.  a list of households sizes associated with number of pets\n3.  MRI scans without identifying metadata.\n4.  audio recordings with no metadata and no names of the recorded persons\n5. transcripts of interviews without any directly identifying information\n6.  a list of gender and grades for a de-identified course\n\n<details markdown='1'>\n  <summary>Check the answers.</summary>\n\nAnswer 1,3, and 4 are correct!\n\nGPS data holds information on where people go. In a daily routine, the track ends at a particular location which is likely the home of the subject. AN MRI scan from the profile of the head can be identifiable. Audio recordings can be identifiable from the tone of the voice. A list of surnames in itself is not identifying nor personal information.\n</details>\n{: .question }\n\n**Access to privacy-sensitive data**\n\nIf and how you can make personal data available, depends n the level of sensitivity of your data. The more sensitive, the more restrictions and safeguards need to be put in place to make sure the data does not fall into the hands of unauthorised persons both during and after research.\n\nTo determine where the privacy risks lie for your data you will have to do a Data Privacy Impact Assessment (DPIA).\n\nFor more information:\n\nTODO: link to: https://www.uu.nl/en/research/research-data-management/guides/handling-personal-data\n\nTowards the data subjects, you need to be transparent regarding the possible reuse, or retaining of the data for verification requirements, and get their prior consent.\n\n**Cases on how to make personal data accessible**\n\nCase 1: YOUth cohort study\n\nYOUTH COHORT STUDY\nYOUth (Youth Of Utrecht) is a large-scale, longitudinal cohort following children in their development from pregnancy until early adulthood.\n\nA total of 6,000 babies and children from Utrecht and its surrounding areas will be included in two different age groups and followed at regular intervals.\n\nThe YOUth data enables researchers to look for answers to all sorts of scientific questions on child development. A few examples of YOUth data: human bodily material, hours of videos, MRI images, questionnaires, ultrasounds and IQ scores. YOUth encourages and facilitates data sharing. It is one of the leading human cohorts in FAIR and open data in the Netherlands.\n\nMore information at: https://www.uu.nl/en/research/youth-cohort-study\n\nCase 2: TODO: other example from Wings?\n\n**An introduction to informed consent**\n\nIn the module 'Legal agreements and contracts' you learned about informed consent. Informed consent is very important when working with data which is in any way related to people.\n\nTODO: add graphics on informed consent\n\nOne thing to arrange in your informed consent is the possibility for future use, for verification or reuse. In your informed consent, it is important to be clear on future use of data.\n\n**Informed consent for data sharing**\n\nOne thing to arrange and to be crystal clear about in your informed consent is the possibility for future use of your data, for verification or reuse.\n\n### {% icon question %} Question  \n\nCheck the sentences that do permit data sharing if used as a single statement.\n\n1. Any personal information that reasonably could identify you will be removed or changed before files are shared with other researchers or results are made public.\n2. Other genuine researchers (may) have acces to tis data only if they agree to preserve the confidentiality on the information as requested in this form.\n3. Any data that could identify you will be accessible only to the researchers responsible for performing this study.\n4. All personally identifying information collected about you will be destroyed after the study.\n\n<details markdown='1'>\n  <summary>Check the answers.</summary>\n\nAnswer 1 and 2 are both correct!\n\nSharing of research data that relates to people can often be achieved using a combination of obtaining consent, anonymizing data and regulating data access. If the statement towards the data only mentions the current study, sharing is not explicitly possible. You should add some sentence to make it clear to participants that the data could be used for further research, deidentified where possible, or identifiable with enough safeguards and security measures, if it is not.\n\n</details>\n{: .question }\n\n## Write your data management plan for your data security\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module on data security. You should be able to complete the following questions in the section ‘Data security’:\n\n* Will you use or collect any confidential or privacy-sensitive data?\n* What measures will you take to ensure the security of any confidential or privacy-sensitive data?\n* What measures will you take to comply with security requirements and mitigate risks? To whom will access be granted/restricted?\n","# Introduction to documentation\n\n<!-- This is a comment. -->\n\nBy now you understand how to describe your data collection in terms of, for example, type, size, and format. You have identified this for your own research data.\n\nNow we will look into the documentation and metadata which will accompany your data. Documentation and metadata are essential to understand what a dataset means and to make it reusable in the future.\n\n<figure id=\"figure-1\"><img src=\"../../images/01_Metadata_Learning_Objective.png\" alt=\"Introduction \"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Why document your data: learning objectives</figcaption></figure>\n\n---\n\nTips for data documentation - John MacInnes, professor of Sociology of the University of Edinburgh, explains why it is necessary to document each step of your research and how this will benefit you in the long term.\n\n<iframe src=\"https://www.youtube.com/embed/EIZsxT-fIiQ\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n---\n\n**Examples of data documentation**\n\nSince there is a wide variety of types of data and types of research, there are many different ways of documenting data. A few examples of data documentation are:\n\n* Laboratory notebooks and experimental procedures\n* Questionnaires, codebooks, data dictionaries\n* Software syntax and outout files;\n* Information about equipment settings & instrument calibrations\n* Database schemes\n* Methodology reports\n* Provenance information about sources of derived or digitised data\n\n### {% icon question %} Question  \n\nWhat data documentation will you use and why?\n\n<details markdown='1'>\n  <summary>Feedback on your reflections</summary>\n\nData documentation has as goal to be used by people to understand the dataset. Such as specific conditions in which it was collected, what each column means and which methods were used to collect the data. When creating documentation, you need to ask yourself, can others (or I, myself) understand my dataset if I give them this information.\n\n</details>\n{: .question }\n\nThere are many different ways to set up and organise your documentation.\n\n**Project level**\n\nProject level documentation documents what the study sets out to do; how it contributes to new knowledge in the field, what research questions/hypotheses are, what methodologies are used, what samples are used, what intruments and measures are used, etc. A complete academic thesis normally contains this information in details, but a published article may not. If a dataset is shared, a detailed technical report needs to be included for the user to understand how the data were collected and processed. You should also provide a sample bibliographic citation to indicate how you would like secondary users of your data to cite it in any publication.\n\n**File or database level**\n\nFile or database level documentation documents how all the files (or tables in a database) that make up the dataset relate to each other, what format they are in, whether they supersede or are superseded by previous files, etc. A readme.txt file is the classic way of accounting for all the files and folders in a project.\n\n**Variable or item level**\n\nVariable or item level documentation documents how an object of analysis came about. For example, it does not just document a variable name at the top of a spreadsheet file, but also the full label explaining the meaning of that variable in terms of how it was operationalised.\n\nJohn MacInnes, professor of Sociology of the University of Edinburgh, speaks about how data documentation can help to find a way in often voluminous data collections of different copies, routings, syntaxes, samplings, etc.\n\n**On the necessity of data documentation in secondary data analysis**\n\n<iframe src=\"https://www.youtube.com/embed/Ebaiwg08CW8\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n### {% icon question %} Question  \n\nLooking back at your previous research project: Did you ever have problems reusing other people's data because of lack of documentation?\n\n- Never tried\n- Successfully reused\n- Had to ask clarification\n- Had to abandon the reuse attempt\n\n<details markdown='1'>\n  <summary>Feedback on your reflections</summary>\n\nData documentation always provides advantages for yourself and for others such as better understandability, sharability and reusability in the future. \n\n</details>\n\n{: .question }\n\n<figure id=\"figure-2\"><img src=\"../../images/02_Metadata_Lab-Notebook.png\" alt=\"Lab notebooks\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Laboratory Notebooks for documentation</figcaption></figure>\n\n---\n\nThorough and effective management of laboratory data and the routine documentation of all lab procedures is a highly important responsibility for all researchers.\n\nIf you want to learn more about the electronic lab notebook system at VIB, please see [these tutorials](https://material.bits.vib.be/topics/eln/) \n\n# An introduction to metadata\n\nWatch this web lecture to learn about the different types of metadata and how metadata can help make your research data better findable. You are pointed to useful sources for metadata standards.\n\n<iframe src=\"https://www.youtube.com/embed/h0oZ3swbTJ0\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n**identify different types of metadata**\n\nTODO: HP5 quiz or matrix quiz\n\n**Metadata for different disciplines**\n\nDifferent disciplines like biology, earth sciences, physical sciences and social sciences and humanities have their own standards. By choosing a well-supported standard, you will maximise the chance that your data can be re)used and understood by other researchers.\n\n<iframe src=\"https://www.youtube.com/embed/AvL7hEk8RJQ\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n**Metadata for different disciplines**\n\nUseful links to metadata standards:\n\n* [Biology](http://www.dcc.ac.uk/resources/subject-areas/biology)\n* [General Sciences](http://www.dcc.ac.uk/resources/subject-areas/general-research-data)\n\nA community-maintained [directory of metadata schemas](http://rd-alliance.github.io/metadata-directory/) which has been set up under the auspices of the Research Data Alliance.\n\nA list of metadata standards and other standards developed by [FairSharing](https://fairsharing.org/).\n\n**Controlled vocabulary**\n\n![Controlled vocabulary](../../images/03_Metadata-controlled-vocabulary.png)\n\n**Improve a record description**\n\n### {% icon question %} Question  \n\nTake a look at the record descriptions n the table below and answer the question below and in the following pages.\n\n| Soil Sample       | Condition     | Length| Classx |\n| ----------------- |:-------------:| -----:|:-------|\n| A1                | low           | $458  | III    |\n| A2                | low           | $391  | II     |\n| A3                | medium        | $422  | IV     |\n\nx according to the classification from last experiment\n\nIs the value of in the Soil sample column clear?\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\nYes, it is sufficient to say this is a sample. The identifier for the sample needs to be unique, the content of the sample comes from the other metadata fields and their values.\n\n</details>\n{: .question }\n\n### {% icon question %} Question  \n\nTake a look at the record descriptions n the table below and answer the question below and in the following pages.\n\n| Soil Sample       | Condition     | Length| Classx |\n| ----------------- |:-------------:| -----:|:-------|\n| A1                | low           | $458  | III    |\n| A2                | low           | $391  | II     |\n| A3                | medium        | $422  | IV     |\n\nx according to the classification from last experiment\n\nIs the value in the COndition column clear?\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\n  No! It is not clear what low or medium as condition means.\n\n</details>\n{: .question }\n\n### {% icon question %} Question  \n\nTake a look at the record descriptions n the table below and answer the question below and in the following pages.\n\n| Soil Sample       | Condition     | Length| Classx |\n| ----------------- |:-------------:| -----:|:-------|\n| A1                | low           | $458  | III    |\n| A2                | low           | $391  | II     |\n| A3                | medium        | $422  | IV     |\n\nx according to the classification from last experiment\n\nIs the value in the Length column clear?\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\nNo, it is not clear what is meant by length. Also a unit for the values is missing. Is it meters, centimeters, or seconds?\n\n</details>\n{: .question }\n\n### {% icon question %} Question  \n\nTake a look at the record descriptions n the table below and answer the question below and in the following pages.\n\n| Soil Sample       | Condition     | Length| Classx |\n| ----------------- |:-------------:| -----:|:-------|\n| A1                | low           | $458  | III    |\n| A2                | low           | $391  | II     |\n| A3                | medium        | $422  | IV     |\n\nx according to the classification from last experiment\n\nIs the value in the Class column clear?\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\nNo! There is a reference that the classes are explained somewhere. But no link to the document is given.\n\n</details>\n{: .question }\n\n## Data standards explained\n\nYour dataset can be standardised in various aspects. Standardisation, in general, makes data comparable and interpretable. In other words, your data becomes interoperable by applying standards. Datasets can be combined, compared or are simply easier to reuse. You have to plan standardisation, as it is for many aspects hard or impossible to apply afterwards.\n\nStandardise as much as possible between you and your collaborators or research group. If there are standards established and used in your field of research you are advised to use these.\n\nHere is a list of things you can standardise in your research.\n\n* Standardise how, what and when you measure things by standardising your protocol, or methods and materials For instance, is there a standard set of questions for ‘quality of life’? Is there a standard procedure to house mice for your purpose? What aspects do you measure? At what parameter values (age, concentration, etc.)? When do you measure (every two hours, every gram of weight gain, etc.)?\n\n* Standardise your file formats so you can easily exchange results without technical difficulties. Check for standard taxonomies or coding systems within your research discipline.\n\n* Standardise the units in which you note down your results. For instance, do you use mm, cm, m? It is extra work to transform units between experiments.\n\n* Standardise the metadata you use to describe your records or study. What fields will fill in by default, and according to what standard do you define the fields’ names? Will you design a metadata spreadsheet where you specify all things that you will note down?\n\n* Standardise the vocabulary you use. If everyone has the same terminology, it can avoid confusion or misinterpretation. Check for standard taxonomies or coding systems within your research discipline.\n\n### Check your knowledge on standards\n\nFollow the links below for examples of standards. What type of standardisation do the links refer to?\n\n* [Demographic market research](http://www.amplituderesearch.com/market-research-questions.shtml)\n* Find via Google: “general morphology score (GMS)”\n* [Marine Geoscience Data](http://www.marine-geo.org/submit/guidelines.php)\n* [International Union of crystallography](http://www.iucr.org/resources/cif/spec/ancillary/abbreviations)\n* [The Cultural Objects Name Authority](http://www.getty.edu/research/tools/vocabularies/cona/index.html))\n* [SI Units](https://www.nist.gov/pml/weights-and-measures/metric-si/si-units)\n* [UK data service](https://www.ukdataservice.ac.uk/manage-data/format/recommended-formats)\n\nTODO: add H5P exercise\n\n## Folder structure and file naming\n\n<figure id=\"figure-3\"><img src=\"../../images/01_Folder-structure-Learning-Objective.png\" alt=\"Introduction \"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Folder structure - learning objectives</figcaption></figure>\n\nCC BY: [https://mantra.edina.ac.uk/](https://mantra.edina.ac.uk/)\n\n---\n\n<figure id=\"figure-4\"><img src=\"../../images/02_Folder-structrue-introduction-file-management.png\" alt=\"Introduction to good file management\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> Introduction to good file management</figcaption></figure>\n\nTrying to find a data file that you need which has been stored or named incorrectly or inaccurately can be both frustrating and a waste of valuable time. In this short video Jeff Haywood, professor at the University of Edinburg, explains his experiences with good and bad file management.\n\n<iframe src=\"https://www.youtube.com/embed/i2jcOJOFUZg\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n**Project level**\n\nProject level documentation documents what the study sets out to do; how it contributes to new knowledge in the field, what research questions/hypotheses are, what methodologies are used, what samples are used, what intruments and measures are used, etc. A complete academic thesis normally contains this information in details, but a published article may not. If a dataset is shared, a detailed technical report needs to be included for the user to understand how the data were collected and processed. You should also provide a sample bibliographic citation to indicate how you would like secondary users of your data to cite it in any publication.\n\n**File or database level**\n\nFile or database level documentation documents how all the files (or tables in a database) that make up the dataset relate to each other, what format they are in, whether they supersede or are superseded by previous files, etc. A readme.txt file is the classic way of accounting for all the files and folders in a project.\n\n**Variable or item level**\n\nVariable or item level documentation documents how an object of analysis came about. For example, it does not just document a variable name at the top of a spreadsheet file, but also the full label explaining the meaning of that variable in terms of how it was operationalised.\n\n### {% icon question %} **Choose the best chronological file name**  \n\nWhich of the file names below is the most appropriate?\n\n- 2019-03-24_Attachment\n- 24 March 2006 Attachment\n- 240306attach\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\n2019-03-24_Attachment is correct! Using a date in the format Year-Month-Day will maintain the chronological order of your files.\n</details>\n{: .question }\n\n### {% icon question %} **Choose the best descriptive file name**  \n\nWhich of the file names below is the most appropriate?\n\n- labtox_recent_110810_old_version.sps\n- 2010-08-11_bioasssay_tox_V1.sps\n- FFTX_3776438656.sps\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\n2010-08-11_bioasssay_tox_V1.sps is correct! Keep the file names short and relevant while using sufficient characters to capture information. Do not name files recent or final or definitive_final, a date or version number will suffice.\n\n</details>\n{: .question }\n\n<figure id=\"figure-5\"><img src=\"../../images/03_Folder-structure-batch-renaming.png\" alt=\"Batch renaming\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> Batch renaming</figcaption></figure>\n\n---\n\n<figure id=\"figure-6\"><img src=\"../../images/04_Folder-structure-version-control.png\" alt=\"Lab notebooks\"><figcaption><span class=\"figcaption-prefix\">Figure 6:</span> Suggestions for version control</figcaption></figure>\n\n**How would you treat your data**\n\n### {% icon question %} **Choose the best descriptive file name**  \n\nWhy should you discard or delete obsolete versions of data?\n\n- The most current version is the only relevant version.\n- You have several versions of files in a state between versions\n- You are exceeding the storage space available to you.\n\n<details markdown='1'>\n  <summary>Click your answers!</summary>\n\nCorrect answer: You have several versions of files in a state between versions! Too many similar or related files may be confusing to yourself and to anyone else wanting to access or use your data. You may think that you know which data file is which but that may not always be the case as time passes and the number of different versions increases. It is easier to maintain a manageable number of versions with a clear naming structure. As long as the original raw or definitive copy is retained and processing is well documented, the intermediate working files can and should be discarded.\n\n</details>\n{: .question }\n\n**Fill the blanks**\n\nTODO: add H5P\n\n### Write your data management plan for your data documentation\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module Data documentation. You should be able to complete the following questions in the section Data documentation:\n\n* How will you structure your data?\n* How will the data be described and documented?\n* What standards will you use?\n\n","## Useful information & training resources on Research Data Management \n\n[UGent RDM webpages in Dutch](https://www.ugent.be/nl/onderzoek/datamanagement)\n\n[UGent RDM webpages in English](https://www.ugent.be/en/research/research-staff/organisation/datamanagement)\n\n[Australian National Data Service (esp. “23 (research data) things”)](http://www.ands.org.au/partners-and-communities/23-research-data-things)\n\n[Coursera Mooc “Research Data Management and Sharing”](https://www.coursera.org/learn/data-management)\n\n[Data Management Training Clearinghouse (registry of RDM learning resources)](http://dmtclearinghouse.esipfed.org/)\n\n[DataOne (esp. education modules)](https://www.dataone.org/education-modules)\n\n[Digital Curation Centre (esp. How-to Guides & Checklists)](http://www.dcc.ac.uk/resources/how-guides)\n\n[Essentials for Data Support](http://datasupport.researchdata.nl/en)\n\n[EUDAT (esp. training materials)](https://eudat.eu/training)\n\n[FOSTER training portal](https://www.fosteropenscience.eu)\n\n[MANTRA – Research Data Management Training](http://datalib.edina.ac.uk/mantra/)\n\n[OpenAIRE webinars](https://www.openaire.eu/webinars/)\n\n[RDM open training materials on Zenodo](https://zenodo.org/communities/dcc-rdm-training-materials/?page=1&size=20)\n\n[UK Data Service (esp. “Prepare & Manage Data pages)](https://www.ukdataservice.ac.uk/manage-data)\n\n[UK Data Service webinars](https://www.ukdataservice.ac.uk/news-and-events/webinars)\n\n[FAIRDOM Knowledge Hub](https://fair-dom.org/knowledgehub/)\n\n[Data4LifeSciences Handbook for Adquate Natural Data Stewardship](http://data4lifesciences.nl/hands/handbook-for-adequate-natural-data-stewardship/) \n\n","# Reading and writing files\n{:.no_toc}\n\n### Reading files\nEntering data in R can be done by typing the values when you create a variable. In most cases, however, you will have a file with data that was created by an instrument in your lab. How to import such a file into R? \n\nThere is a manual available in the R documentation called **R Data Import/Export**. It's accessible using help.start() and covers in detail the functionality R has to import and export data. Reading this is highly recommended. This manual covers importing data from spreadsheets, text files, and networks.\n\n### Reading text files\nMost instruments put out data in text format: tab-delimited text (.txt) or comma-separated value files (.csv). Both  can be easily opened in R. \n\nThe most convenient method to import data into R is to use the read functions, like read.table(). These functions can read data in a text file. In Notepad you can save such a file as a regular text file (extension .txt). Many spreadsheet programs can save data in this format. Reading means opening the file and storing its content into a data frame.\n```\nread.table(file,header=FALSE,sep=\"\",dec=?.?,skip=0,comment.char=\"#\")\n```\n\nThis function has a long list of arguments, the most important ones are:\n- *file*: path on your computer to the file e.g. D:/trainingen/Hormone.csv \n\tIf it is stored in the working directory, you can simply use its name. You can also use *file=file.choose()* to browse to the file and select it. File can be replaced by a url to load a file with data from the internet.\n- *header*: does the first line of the file contain column names?\n- *dec*: symbol used as decimal separator\n- *sep* symbol used as column separator, default is a whitespace or tab\n- *skip*: number of lines to skip in the file before starting to read data\n- *comment.char*: symbol to define lines that must be ignored during reading\n\nSee the documentation for an overview of all the arguments. The output of every read function is a data frame.\n\nThere are functions to read specific file formats like .csv or tab-delimited .txt files. In the documentation of read.table() you see that these functions are called read.csv() and read.delim().  Both functions call read.table(), but with a bunch of arguments already set.  Specifically they set up *sep* to be a tab or a comma, and they set *header=TRUE*.  \n\n```\nread.delim(file,header=TRUE,sep=\"\\t\")\n```\nOn the documentation page, you see that these functions each have two variants that have different default settings for the arguments they take:\n```\nread.csv(   file,header=TRUE,sep= \",\",dec=\".\", ...)\nread.csv2(  file,header=TRUE,sep= \";\",dec=\",\", ...)\nread.delim( file,header=TRUE,sep=\"\\t\",dec=\".\", ...)\nread.delim2(file,header=TRUE,sep=\"\\t\",dec=\",\", ...)\n```\nOriginally the CSV format was designed to hold data values separated by commas. In .csv files that are made on American computers this is the case. However, in Europe the comma was already used as a decimal separator. This is why .csv files that are made on a European computer use the semicolon as a separator. \n\nFor instance, the file below contains a header row and three columns, separated by semicolons. It uses the comma as decimal separator.\n```\nPatient;Drug;Hormone\n1;A;58,6\n2;A;57,1\n3;B;40,6\n```\nObviously, the file is a European CSV file, to open it use read.csv2()\n\n### Reading Excel files\nTo import Excel files via a command the easiest way is to let Excel save the file in .csv or tab delimited text format and use the read functions. \n\nAn easy way to import Excel files is to use the RStudio interface although I prefer to use commands. To use the interface go to the **Environment** tab and click the **Import Dataset** button. \n\nRStudio can import 3 categories of files: text files, Excel files and files generated by other statistical software. To read .xls or .xlsx files select **From Excel**. \n\nA dialog opens with options on the import. You can import data from your computer (**Browse**) or from the internet (provide a url and click **Update**). Click **Browse**, locate the Excel file and click **Open**.\n\nThe **Data Preview** section shows what the data will look like in R.\n\nThe **Import Options** section allows you to specify the import parameters. \n- *Name*: name of the data frame that will hold the imported data. The default is the name of the file that you are opening.\n- *Skip*: number of rows at the top of the file to skip during import. Some data formats contain a number of header rows with general info like parameter settings, sample names etc. These rows are followed by the actual data. Skip allows you to skip over the header rows and import the actual data. \n- If the first row of the file contains column names, select *First Row as Names*\n- *Open data viewer* shows the data in the script editor upon import\n\nClick **Import**.\n\nBehind the scenes RStudio uses the **readxl** package that comes with the tidyverse package. You can also use the functions of this package directly in commands. \n\nCompared to other packages for reading Excel files (gdata, xlsx, xlsReadWrite) readxl has no external dependencies, so it?s easy to install and use on all operating systems. It supports the  .xls format and the .xlsx format. The easiest way to install it from CRAN is to install the whole tidyverse package but you have to load readxl explicitly, since it is not a core tidyverse package.\n\nOnce imported into RStudio the data is stored in a data frame and you can use it as input of commands. The data frame appears in the list of **Data in the Environment tab**.\n\n<figure id=\"figure-1\"><img src=\"../../images/Rfile_imported.png\" alt=\"file_imported\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Inspect Variables and Data Frames in the Environment tab</figcaption></figure>\n\nIf you want to view the data frame you can **click its name in the Environment** tab and it will appear in a separate tab in the script editor.\n\n<figure id=\"figure-2\"><img src=\"../../images/Rview_file.png\" alt=\"view_file\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> View file content</figcaption></figure>\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Reading files** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 17a\n>\n> 1. Import the file [GeneEx.csv](http://data.bits.vib.be/pub/trainingen/RIntro/GeneEx.csv) into a data frame called GeneEx\n> 2. Rename the two last columns Ct1 and Ct2\n> 3. Create a new column containing the average Ct: (Ct1+Ct2)/2\n>    > <details markdown=\"1\">\n>    > <summary> {% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  GeneEx <- read.csv2(\"Rdata/GeneEx.csv\")\n>    >  colnames(GeneEx)[c(3,4)] <- c(\"Ct1\",\"Ct2\")\n>    >  GeneEx$Average_Ct <- (GeneEx$Ct1 + GeneEx$Ct2)/2\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  Which of these 2 commands will work ?\n>    > ```\n>    >  GeneEx <- read.csv2(\"Rdata/GeneEx\")\n>    >  GeneEx <- read.csv2(\"http://data.bits.vib.be/pub/trainingen/RIntro/GeneEx.csv\")\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Which of these 2 commands will work ?\n>    >  ```\n>    >  names(GeneEx[c(3,4)]) <- c(\"Ct11\",\"Ct21\")\n>    >  names(GeneEx)[3:4] <- c(\"Ct11\",\"Ct21\")\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > What's the difference in result between these 2 commands ?\n>    > ```\n>    > GeneEx$Average_Ct2 <- (GeneEx$Ct1+GeneEx[4])/2\n>    > GeneEx[5] <- (GeneEx[3]+GeneEx[4])/2\n>    > ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Can you use sum() instead of + ?\n>    > ```\n>    > sum(GeneEx$Ct1,GeneEx$Ct2)\n>    > (GeneEx$Ct1+GeneEx$Ct2)\n>    > ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Can you use mean() instead of +/2 ?\n>    > ```\n>    > mean(GeneEx$Ct1,GeneEx$Ct2)\n>    > mean(GeneEx$Ct1)\n>    > ```\n>    {: .question}\n{: .hands_on}\n\n### Reading other files\nAlso of note is an R package called **foreign**. This package contains functionality for importing data into R that is formatted by most other statistical software packages, including SAS, SPSS, STRATA and others. \n\n### Writing files\nReversely, to write a data frame to a file you can use the generic function:\n```\nwrite.table(x,file=?name.txt?,quote=TRUE,row.names=TRUE,col.names=TRUE)\n```\nThis function has a long list of arguments, the most important ones are:\n- *x*: data frame to be written to a file\n- *file*: name or full path of the file e.g. D:/trainingen/Hormone.csv\n- *quote*: if TRUE, strings, row and column names will be surrounded by double quotes. If FALSE, nothing is quoted.\n- *sep*: column separator\n- *row.names*: boolean indicating whether the row names of x are to be written or a character vector of row names to be written\n- *col.names*: boolean indicating whether the column names of x are to be written or a character vector of column names to be written\n- *append=FALSE*: if TRUE x is **added** to the file defined by *file*\n- *eol = ?\\n?*: end-of-line character, default ?\\n? represents an enter\n- *na=?NA?*: string to use for missing values in the data\n- *dec=?.?*: decimal separator\n\nSee the help file for a full overview of all arguments. \n\nTo specifically write .csv files use write.csv() or write.csv2(). See the help file for a description of the difference between them. \n\nExcel can read .csv files but if you really want to write .xls or .xlsx files use the openxlsx package.  \n\n\n> ### {% icon hands_on %} Hands-on: Exercise 17b\n>\n> 1. Read the file [RNASeqDE.txt](http://data.bits.vib.be/pub/trainingen/RIntro/RNASeqDE.txt) into a data frame called DE. It contains the differentially expressed genes from an RNA-Seq experiment.  \n> 2. Split the table into a table of upregulated genes (log2foldchange > 0) and a table of downregulated genes and store them in data frames called up and down.\n> 3. How many up- and downregulated genes are there?\n> 4. What is the gene with the highest log2 fold change?\n> 5. What is the data of the gene with the lowest adjusted p-value (= padj)?\n> 6. Write the Ensembl IDs (= row names) of the upregulated genes to a file called up.txt. You will use this file for functional enrichment analysis using online tools like ToppGene,EnrichR? These tools want a file with only Ensembl IDs as input (one per line, no double quotes, no column headers, no row names).\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  DE <- read.table(\"Rdata/RNASeqDE.txt\",header=TRUE)\n>    >  up <- DE[DE$log2FoldChange > 0,]\n>    >  down <- DE[DE$log2FoldChange < 0,]\n>    >  nrow(up) \n>    >  nrow(down)\n>    >  rownames(up[which.max(up$log2FoldChange),])\n>    >  DE[which.min(DE$padj),]\n>    >  write.table(rownames(up),file=\"up.txt\",quote=FALSE,col.names=FALSE,row.names=FALSE)\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    > Which of the following 2 commands will not work properly ?\n>    > ```\n>    >  DE <- read.table(\"Rdata/RNASeqDE.txt\")\n>    >  file <- file.choose()\n>    >  DE <- read.table(file,header=TRUE)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Will the following command work ?\n>    > ```\n>    >  up <- subset(DE,log2FoldChange > 0)\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > What's the difference between these 2 commands ?\n>    >  ```\n>    >  which.max(up$log2FoldChange)\n>    >  max(up$log2FoldChange)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    > Will this command write Ensembl IDs and log fold changes ?\n>    > ```\n>    >  toprint <- as.data.frame(up$log2FoldChange)\n>    >  write.table(toprint,file=\"up.txt\",quote=FALSE,col.names=FALSE)\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 17c\n>\n> Which type of files are imported by read.delim ? \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    > Check the documentation and look at the default for *sep* \n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 17d\n>\n> 1. Read the file [ALLphenoData.tsv](http://data.bits.vib.be/pub/trainingen/RIntro/ALLphenoData.tsv) into a variable called pdata using one of the read functions\n> 2. What type of data structure is pdata ?\n> 3. What are the names of the columns of pdata ?\n> 4. How many rows and columns are in pdata ?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >  ```\n>    >  pdata <- read.delim(\"Rdata/ALLphenoData.tsv\")\n>    >  class(pdata)\n>    >  colnames(pdata)\n>    >  dim(pdata)\n>    >  ``` \n>    > </details>\n>\n{: .hands_on}\n","<!-- This is a comment. -->\n\n## Introduction to data availability for reuse\n\nThanks to information and communication technology and globalisation new opportunities arise to exchange results of scientific research - publications and research data - and even of scientific methods and practices. This new way of practising science is called ‘open science’.\n\nOpen data is a part of this movement towards open science. It is the ambition of universities, governments, funders and publishers to make research data optimally suited for reuse.\n\nThere are different reasons why you may not be able to share your research data. Thinking about these issues and challenges when developing your data management plan will help you reflect on such reasons in an early stage.\n\n**How frustrating a data request can be**\n\nNot being prepared to share your data can lead to problems in using the data. In this short video, you see what shouldn't happen when a researcher makes a data sharing request! Topics include storage, documentation, and file formats. A made up, yet not unrealistic story.\n\n\n<iframe src=\"https://www.youtube.com/embed/66oNv_DJuPc\" allowfullscreen=\"\" allow=\"accelerometer; autoplay; encrypted-media; gyroscope; picture-in-picture\" height=\"515px\" style=\"display: inline-block;\" width=\"800px\" title=\"\"></iframe>\n\n## Introduction to data repositories\n\nIn order to preserve, manage, and provide access to your research data, you can deposit your data in a data repository. Data repositories allow permanent access to datasets in a trustworthy environment and enable search, discovery, and reuse of the data they host.\n\nClick on the topics below to find out more about data repositories.\n\nTODO: add repositories from Elixir\n\n**A wide variety**\n\nThere is a wide variety of data repositories. Most have the option to publish your dataset using a persistent identifier and some provide the service of long-term preservation. Some repositories host data from various disciplines and others are domain- or discipline specific.\n\n**Choosing a data repository**\n\nWhen choosing a repository for your data be sure to check if the repository meets your criteria or the criteria set by your funder or journal editors.\n\nCriteria to select a certain repository can be:\n\n* Is the repository certified with a [CoreTrustSeal](https://www.coretrustseal.org/) or Data Seal of Approval?\nRepositories with a Data Seal of Approval are recognised in the community as a trustworthy source of data.\n* Is long term archiving guaranteed or not?\nSome repositories will guarantee the legibility of the data, even if the hardware and software become obsolete.\n* What are the costs per dataset or gigabyte?\nRepositories differ in their cost model, some allow free deposits up to a certain amount of storage\n* What is the physical storage location of data?\nThe location of your data determines under which data protection law it falls. Some repositories store data in the US and others in the EU.\n* What is the default license?\nSome repositories allow for open or restricted access, or you can specify which license for use you want for your data.\n\nYou can use this [repository selection tool](https://www.uu.nl/en/research/research-data-management/tools-services/tools-for-storing-and-managing-data/decision-aid-data-repositories) to help you select a suitable repository.\n\n**Registry of research data repositories**\n\nYou can browse or search for a data repository in re3data.org. This is a global registry of research data repositories covering different academic disciplines. You can search or browse by subject, content type or country. You can filter the search and browse results on criteria for choosing a data repository as described above.\n\n[https://www.re3data.org/](https://www.re3data.org/)\n\n**Some well-known and more generic repositories**\n\n* [Zenodo](https://zenodo.org/) – a repository that enables researchers, scientists, EU projects and institutions to share and showcase multidisciplinary research results (data and publications) that are not part of the existing institutional or subject-based repositories of the research communities;\n* [Dryad](http://www.datadryad.org/) – a curated general-purpose repository that makes the data underlying scientific publications discoverable, freely reusable and citable. Dryad has integrated data submission for a growing list of journals;\n* [Open Science Framework (OSF)](https://osf.io/) - a scholarly commons to connect the entire research cycle. It is part network of research materials, part version control system, and part collaboration software;\n* [Figshare](https://figshare.com/) – a repository that allows researchers to publish all of their research outputs in an easily citable, sharable and discoverable manner.\n\n## Explore data repositories\n\nYou have just learned about the existence of a global registry of research data repositories that covers repositories from different academic disciplines.\n\nRe3data.org makes it possible to search for a repository that meets your criteria.\n\nGo to [www.re3data.org/search](http://www.re3data.org/search) and find a repository that meets all three of the following criteria:\n\n* Certificate → CoreTrustSeal\n* Data licenses → CC0 (Creative Commons 0)\n* Persistent identifier (PID systems) → DOI (Digital Object Identifier)\n\nMake use of the filters offered on the left side of the screen, as visualized here:\n\nTODO: quiz with ELIXIR resources\n\n### Give clarity with (Creative Commons) licenses\n\nIn order to publish your data and make it reusable, you require a license. A license creates clarity and certainty for potential users of your data. A license is not an option for all data; some of it may be too confidential or privacy-sensitive to be published.\n\n**Creative Commons licenses**\n\nLicenses such as the [Creative Commons](https://creativecommons.org/share-your-work/licensing-types-examples/) (CC) licenses replace 'all rights reserved' copyright with 'some rights reserved'. There are seven standard CC licenses. CC-BY is the most commonly used license, in which attribution is mandatory when using data. You can also choose restrictions like non-commercial, no derivatives, or share alike. Creative Commons offers a [guide](https://creativecommons.org/choose/?lang=en) to help you determine your preferred license.\n\n<figure id=\"figure-1\"><img src=\"../../images/CC.png\" alt=\"Creative Commons\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Creative Commons</figcaption></figure>\n\n**Assigning a license to your data**\n\nAssigning licenses to data can also have disadvantages. Licenses are static and do not change with the quick developments in the field of research data. Therefore, some data repositories work with a CC0 license whereby no rights are reserved. Instructions regarding use are completed with codes of conduct, which may be adapted more easily.\n\nA short movie explaining the different Creative Commons elements is shown below. Remember that sharing without a license can still lead to conflicts.\n\nTODO: add video on CC licenses?\n\n**Question**\n\nWe are very interested to know what license you would choose if you were to share the underlying research data of your most recent publication.  \n\nAn explanation for each license can be found by clicking on the links below.\n\n1. CC BY: [Attribution](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#by)\n2. CC BY-SA: [Attribution ShareAlike](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#sa)\n3. CC BY-ND: [Attribution-NoDerivs](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#nd)\n4. CC BY-NC: [Attribution-NonCommercial](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#nc)\n5. CC BY-NC-SA: [Attribution-NonCommercial-ShareAlike](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#by-nc-sa)\n6. CC BY-NC-ND: [Attribution-NonCommercial-NoDerivs](https://creativecommons.org/share-your-work/licensing-types-examples/licensing-examples/#by-nc-nd)\n7. CC0: [Public Domain](https://creativecommons.org/share-your-work/public-domain/)\n\n## Publishing in a data journal\n\nData journals are publications whose primary purpose is to publish datasets. They enable you as an author to focus on the data itself, rather than producing an extensive analysis of the data which occurs in the traditional journal model. Fundamentally, data journals seek to:\n\n* Promote scientific accreditation and reuse;\n* Improve transparency of scientific methods and results;\n* Support good data management practices;\n* Provide an accessible and permanent route to the dataset.\n\n**The benefits of publishing in a data journal**\n\nPublishing in a data journal may be of interest to researchers and data producers for whom data is a primary research output. In some cases, the publication cycle may be quicker than that of traditional journals, and where there is a requirement to deposit data in an \"approved repository\", long-term curation and access to the data is assured.\n\nPublishing a data paper may be regarded as best practice in data management as it:\n\n* Includes an element of peer review of the dataset;\n* Maximises opportunities for reuse of the dataset;\n* Provides academic accreditation for data scientists as well as for front-line researchers.\n(source: [ANDS Guide](http://www.ands.org.au/working-with-data/publishing-and-reusing-data/data-journals))\n\n**General and disciplinary data journals**\n\nThere are data journals for various disciplines and also more general data journals exist. A widespread standard PID is the DOI. DOI stands for ‘Digital Object Identifier’. A DOI is an alphanumeric string assigned to an object which allows for an object to be identified over time. Often a DOI will be presented as a link which looks like: https://doi.org/10.1109/5.771073. There are other identifiers available which some repositories may use instead. If you are depositing in a reputable repository then you should be given some type of persistent identifier which you can use to cite and link to your data.\n\nExamples of generic data journals:\n\n* [Scientific Data](http://www.nature.com/sdata/about)  \n* [Data in Brief](http://www.journals.elsevier.com/data-in-brief)   \n* [Data Science Journal](http://www.codata.org/publications/data-science-journal)\n\nExamples of disciplinary data journals:\n\nTODO: check for life science additions\n\nOpen archaeology data;\nEarth System Science Data;\nResearch Data Journal for the Humanities and Social Sciences.\n\n## How to cite a dataset\n\nCitations to your data can add to your academic impact.\n\nA citation should include enough information so that the exact version of the data being cited can be located. Including a Persistent Identifier (PID) in the citation ensures that even if the location of the data changes, the PID will always link to the data that were used.\n\nYou can indicate in your (Creative Commons) license or user agreement that you want your data cited when reused.\n\nData citations work just like book or journal article citations and can include the following information:\n\n* Author;\n* Year;\n* Dataset title;\n* Repository;\n* Version;\n* Persistent IDentifier (PID), often works as a functional link/URL.\n\n**Examples**\n\nA widespread standard PID is the DOI. DOI stands for ‘Digital Object Identifier’. A DOI is an alphanumeric string assigned to an object which allows for an object to be identified over time. Often a DOI will be presented as a link which looks like: https://doi.org/10.1109/5.771073. There are other identifiers available which some repositories may use instead. If you are depositing in a reputable repository then you should be given some type of persistent identifier which you can use to cite and link to your data.\n\nIrino, T; Tada, R (2009): Chemical and mineral compositions of sediments from ODP Site 127‐797. Geological Institute, University of Tokyo. http://dx.doi.org/10.1594/PANGAEA.726855\n\n\n**Tips**\n\nTip1: Get a PID at the data repository of your choice.\nTip2: Is your PID a DOI and do you want to cite it in the format of a specific journal? Use the [DOI formatter](https://citation.crosscite.org/) from CrossCite.\n\n\nTODO: add short quiz\n\n### FAIR data\n\nFAIR stands for ‘Findable, Accessible, Interoperable, and Reusable’. The FAIR data principles act as an international guideline for the result of high-quality data management.\n\nWith the increase in volume, complexity and creation speed of data, humans are more and more relying on computational support for dealing with data. The principles were defined with the focus on machine-actionability, i.e. the capacity of computational systems to find, access, interoperate and reuse data with none or minimal human intervention.\n\n* F – Findable\n\nBy using correct metadata to describe the data, it will be findable. By using a persistent identifier the data can be found by computer systems automatically.\n\n* A – Accessible\n\nThe data should be accessible for the long term. Even when underlying data is not accessible, the describing metadata should remain available.\n\n* I – Interoperable\n\nThe data can be used and combined with other datasets. To achieve this, the data should be stored in generic file types, not in software specific file types.\n\n* R – Reusable\n\nThe options for reuse should be stated clearly in a license. Without a license there is no certainty about the options for reuse and creator rights are implicit.\n\n**How to achieve FAIR data**\n\nIn general, having a good data management plan will lead to FAIR data. In the case of privacy-sensitive data, it is possible to meet the criteria, but not to share the data openly. In this case you can make sure that a well-described dataset can be found online, while preventing the underlying data to be downloaded and used without permission.\n\nIf you anonymise your data, presuming the data is of limited sensitivity and you are very sure the data cannot lead back to the persons involved, you can share your data openly.\n\nThe FAIR Guiding Principles were put together and published in Scientific Data (Mark D. Wilkinson et al., “The FAIR Guiding Principles for Scientific Data Management and Stewardship,” Scientific Data 3 (March 15, 2016): 160018.).\n\nTODO: add question H5P quiz?\n\n### Open science\n\n“Open Science is the practice of science in such a way that others can collaborate and contribute, where research data, lab notes and other research processes are freely available, under terms that enable reuse, redistribution and reproduction of the research and its underlying data and methods.”\n\n(Source:[ FOSTER](https://www.fosteropenscience.eu/foster-taxonomy/open-science-definition)).\n\nYou have learned that good data management contributes to the findability, accessibility, interoperability and reusability of your research data. This does not necessarily mean that you should make your data openly available. But to open up data, you do need good data management from the earliest possible stage of your research project.\n\nTODO: add links to ORION course or other relevant elements\nFlemish open science plan?\n\n### Write your data management plan for your data reuse\n\nGo to DMPonline and open your draft data management plan created in the Introduction.\n\nYou have now completed the module on data sharing and availability for reuse. You should be able to complete the following questions in the section ‘Data availability for reuse’:\n\n* What secondary use of your data is intended or foreseeable?\n* Where will you make your data available?\n* What access and usage conditions will apply?\n","# What is R ?\n{:.no_toc}\n\nR is many things: a project, a language... \nAs a **project**, R is part of the [GNU free software project](http://www.gnu.org). The development of R is done under the philosophy that software should be free of charge. This is good for the user, although there are some disadvantages: R comes with ABSOLUTELY NO WARRANTY. This statement comes up on the screen every time you start R. There is no company regulating R as a product. The R project is largely an academic endeavor, and most of the contributors are statisticians, hence the sometimes incomprehensible documentation. \nAs a **computer language** it was created to allow manipulation of data, statistical analysis and visualization. It is not easy to learn the language if you haven't done any programming before but it is worth taking the time as it can be a very useful tool.  An enormous variety of statistical analyses are available and R allows you to produce graphs exactly as you want them with publication quality. \n\n### Good things about R\n- It's free\n- It works on Windows, Mac and Linux\n- It can deal with very large datasets (compared to Excel)\n- A lot of freedom: graphs can be produced to your own taste\n- Supports all statistical analyses: from basic to very complex\n\n### Bad things about R\n- It can struggle with extremely large datasets\n- Difficult if you don't have any programming experience \n- Open source: many people contribute thus consistency can be low\n- Open source: documentation can be poor or written by/for experts\n- Can contain  bugs and errors: packages that are widely used are probably correct, niche packages can contain errors, there is no central team assessing the quality of the code\n\n# Installing R\nR is available on the [CRAN website](https://cran.r-project.org/) (Comprehensive R Archive Network]. \nIt can be installed on Linux, Mac and Windows. On the top of the CRAN page is a section with **Precompiled Binary Distribution**: R versions you can download as an .exe file (for Windows users) and are easy to install. What you download is the basic R installation: it contains the base package and other packages considered essential enough to include in the main installation. Exact content may vary with different versions of R.\nAs R is constantly being updated and new versions are constantly released, it is recommended to regularly install the newest version of R. \n\n# Installing RStudio\nAlthough you can work directly in the R editor, most people find it easier to use [RStudio](https://www.rstudio.com/)  on top of R. RStudio is free and available for Windows, Mac and Linux. You need to have R installed to run Rstudio. \n\n# RStudio user interface\nWatch this [video tutorial](https://www.youtube.com/watch?v=5YmcEYTSN7k) on the different components of the RStudio user interface and this [video tutorial](https://www.youtube.com/watch?v=o0Y478jOjGk) on how to use the RStudio user interface.\n\n### The script editor\nA script is a text file that contains all the commands you want to run. You can write and run scripts and you can also save them so next time you need to do a similar analysis you can change and re-run the script with minimal effort. An R project can contain multiple scripts. \nThe script editor highlights syntax in scripts making it easy to find and prevent errors. It has many features that will help you write scripts e.g. autocompletion, find/replace, commenting. \n\n### Autocompletion\nIt supports the automatic completion of code, e.g. if you have an object named relfreq in your workspace, type rel in the script editor and it will show a list of possibilities to complete the name.\n\n<figure id=\"figure-1\"><img src=\"../../images/Rautocompletion.png\" alt=\"autocompletion\"><figcaption><span class=\"figcaption-prefix\">Figure 1:</span> Example for autocompletion</figcaption></figure>\n\n### Find and replace\nFind and replace can be opened using Ctrl+F.\n\n### Adding comments to scripts\nIn scripts you must include comments to help you remember or tell collaborators what you did. Comments are lines that start with a # symbol. This symbol tells R to ignore  this line. Comments are displayed in green.\nYou can comment and uncomment large selections of code using: **Comment/Uncomment Lines**\n\n<figure id=\"figure-2\"><img src=\"../../images/Rcomment_uncomment.png\" alt=\"comment_uncomment\"><figcaption><span class=\"figcaption-prefix\">Figure 2:</span> Menu Comment/Uncomment Lines</figcaption></figure>\n\n### Adding section headings to scripts\nAdd section headings to your scripts using the following format: #Heading Name####\n\n<figure id=\"figure-3\"><img src=\"../../images/Rsection_headings.png\" alt=\"section_headings\"><figcaption><span class=\"figcaption-prefix\">Figure 3:</span> Define section headings</figcaption></figure>\n\nAt the bottom of the script editor you can quickly navigate to sections in your script. Especially in long scripts this is very useful.\n\n### Creating a new script\nClick **File** in the top menu and select **New File > R Script**.\n\n<figure id=\"figure-4\"><img src=\"../../images/Rnew_script.png\" alt=\"new_script\"><figcaption><span class=\"figcaption-prefix\">Figure 4:</span> File Menu / New File</figcaption></figure>\n\nBesides a simple R script, there are many other file types you can create: \n- [R markdown](http://rmarkdown.rstudio.com/) file: incorporate R-code and its results in a report \n- R Notebook: R Markdown file with chunks of code that can be executed interactively, with output visible beneath the code\n- R Sweave file: incorporate R-code and its results in a Latex report\n\n### Opening an existing script\nClick **File** in the top menu and select **Open File**.\n\nScripts are opened as a tab in the script editor. You can open several scripts at the same time in RStudio. \n\n### Running a script\nTo run a script you select the code that you want to execute in the script editor and click the **Run** button at the top right of the script editor. \n\n![run_script](../../images/Rrun_script.png)\n\nThe code will be executed in the console.\n\n### Saving a script\n\nIf there are unsaved changes in a script, the name of the script will be red and followed by an asterisk. To save the script click the **Save** button: ![save_script](../../images/Rsave_script.png)\n\nR scripts should have the extension .R \nOnce it is saved the asterisk disappears and the name becomes black.\n\n### The console\nThe  > symbol in the console shows that R is ready to execute code \ne.g. type 10+3 and press return\n```\n> 10 + 3\n[1] 13\n>\n```\nThe result is printed in the console. \n\nIt is recommended to write commands in a script rather than typing them directly into the console. Creating a script makes it easier to reproduce, repeat and describe the analysis. If you select commands in the script editor and press the **Run** button, you will see the commands appearing in the console as they are executed. \n\nIf the > symbol does not reappear upon execution of a command it means that R has crashed or is still calculating. To terminate a command press Esc.\n\nThe console also has many [features that make life easier](https://support.rstudio.com/hc/en-us/articles/200404846-Working-in-the-Console) like autocompletion, retrieving previous commands.\n\n### Environment\nA list of all variables (numbers, vectors, plots, models...) that have been imported or generated. The variables that R creates and manipulates are called *objects*. \nTo remove all variables that have been generated in the RStudio session:\n```\n> rm(list=ls())\n```\nls() lists the objects in the current workspace and rm() removes them.\n\n### History\nAn overview of the last 500 commands that were run in the console: see [how to use the history](https://support.rstudio.com/hc/en-us/articles/200526217-Command-History).\n\n### Connections\nAn interface to easily [connect to databases](http://db.rstudio.com/) in R. \n\n### Files\nThe list of files and folders in the working directory. RStudio has a default working directory, typically your home folder.\n\n### Changing the working directory\n Often you want to work in the folder that contains the data. In that case you can change the working directory. \n Check which folder R is using as a working directory:\n```\n> getwd()\n```\nChange the working directory:\n```\n> setwd(\"D:/trainingen/zelfgegeven/R/\")\n```\n\n> ### {% icon comment %} Comment\n>\n> You need to use / or \\\\ in paths. Either will work but \\ will not since R sees it as the character that represents a division. \n{: .comment}\n\nChanging your working directory will make relative file references in your code invalid so you type this in the console **at the start of the analysis**.\n\nAlternatively you can change the working directory in the **Files** tab, expand **More** and select **Set As Working Directory**.\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> 1. Download the demo script for this lesson and open it in RStudio\n> [`Demo_1.R`](http://data.bits.vib.be/pub/trainingen/RIntro/Demo_1.R)\n> 2. From the demo script run the **Set working directory** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 1\n>\n> Set the working directory to the folder that contains the demo script that you have downloaded and check if it was changed. \n{: .hands_on}\n\nTo list the files in the working directory:\n```\n> list.files() \n```\n\n### Plots\nPlots that are generated by the code you run will appear here.\nTo save a plot click the **Export** button: ![export_plot](../../images/Rexport_plot.png)\n\n### Packages\nR is popular because of the enormous diversity of packages. R is essentially a modular environment and you install and load the modules (packages) you need. Packages are available at the [CRAN](https://cran.r-project.org/web/packages/available_packages_by_name.html) and [Bioconductor](http://www.bioconductor.org/packages/release/BiocViews.html) websites. \nInstalling a package means that a copy of the package is downloaded and unzipped on your computer. If you want to know in what directory R stores the packages, type:\n\n```\n>.libPaths()\n[1] \"D:/R-3.6.0/library\"\n>\n```\nto see the default path where R stores packages. If you want to change this folder use the *destdir* argument of the install.packages() function:\n\n```\n> install.packages(\"car\",destdir=\"C:/Users/Janick/R\")\n```\nYou only need to install a package once, as it is saved on your computer.\n\n### Installing R packages\nWatch this [video tutorial](https://www.youtube.com/watch?v=u1r5XTqrCTQ ) on how to install CRAN packages. \nWhen you have made changes to the right side of the Rstudio user interface (packages, files tab...), R is sometimes slow to show these changes. In that case hit the refresh button: ![refresh_button](../../images/Rrefresh_button.png)\n\nSome packages are not available on the CRAN site. Download in compressed format (as a .zip or .tar.gz file) from the source site. To install: select **Install from Package Archive File (.zip; .tar.gz)** in the **Install Packages** window and R will put it in the appropriate directory. \n\n<figure id=\"figure-5\"><img src=\"../../images/Rinstall_zip.png\" alt=\"install_zip\"><figcaption><span class=\"figcaption-prefix\">Figure 5:</span> Installing packages downloaded from their source site</figcaption></figure>\n\n### Installing Bioconductor packages\nBioconductor is a set of R packages that provides tools for the analysis of high-throughput data, e.g. NGS data.\nMake sure you have the BiocManager package installed:\n```\n> if (!requireNamespace(\"BiocManager\")) \ninstall.packages(\"BiocManager\") \n```\nThe if statement is checking if you already have the BiocManager package installed, if not then install.packages() will install it. BiocManager is a package to install and update Bioconductor packages. Once BiocManager is installed, you can install the Bioconductor core packages:\n```\n> BiocManager::install()\n```\nTo install additional Bioconductor packages e.g. **GenomicFeatures** you type the following command:\n```\n> BiocManager::install(\"GenomicFeatures\")\n```\nOverview of all available Bioconductor [packages](https://www.bioconductor.org/packages/release/BiocViews.html#___Software) and [workflows](https://www.bioconductor.org/packages/release/BiocViews.html#___Workflow).\n\n### Installing packages from GitHub\nGit is a free and open source version control system. Version control helps software developers manage changes to code by keeping track of every change in a special database. If a mistake is made, the developer can turn back the clock and compare earlier versions of the code to fix the mistake. \nThere is an install_github() function in the devtools packageto install R packages hosted on GitHub:\n```\n> install.packages(\"devtools\") \n> library(devtools)\n> devtools::install_github(\"statOmics/MSqRob&copy;MSqRob0.7.6\")\n```\n\n### Loading packages\n Each time you want to use a package you have to load it (activate its functions). Loading a package is done by selecting it in the list of installed packages or by typing the following command:\n```\n> library(\"name_of_package\")\n```\nIf R responds:\n```\nError in library(car) : there is no package called 'car'\n```\nor similar, it means that the car package needs to be installed first.\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> Run commands of the **Installation** section of the demo script\n{: .hands_on}\n\n### Help\nYou can find a lot of documentation online: e.g. the [getting help section](https://www.r-project.org/help.html) of the R website. R documentation is not easily accessible nor well-structured  so it can be a challenge to consult the help files of R packages online. By far the most user-friendly interface for searching the R documentation is the [Rdocumentation website](https://www.rdocumentation.org/).\nAdditional useful links:\n- [Documentation of RStudio](https://support.rstudio.com/hc/en-us/categories/200035113-Documentation) \n- [Quick R by DataCamp](https://www.statmethods.net/about/sitemap.html): loads of basic and advanced tutorials\n- [R-bloggers](https://www.r-bloggers.com/): R-news and tutorials contributed by bloggers\n- [Rseek](https://rseek.org/): Google specifically for R.\n- [Google's R style guide](https://google.github.io/styleguide/Rguide.xml): Programming rules for R designed in collaboration with the entire R user community at Google to make R code easier to read, share, and verify.\n\nAccess the R documentation in RStudio using commands: help() or ?\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Get help** section\n{: .hands_on}\n\n### Viewer\nViews HTML files that are located on your computer.\n\n[All RStudio keyboard shortcuts](https://support.rstudio.com/hc/en-us/articles/200711853-Keyboard-Shortcuts)\n\n# Expressions in R\nR can handle any kind of data: numerical, character, logical... \n\n### Character data\nCharacter data like \"green\", \"cytoplasm\" must be typed in between **single or double quotes**:\n```\n> x <- \"Hello\"\n```\nTo use quotes in the text escape the quotes:\n```\n> x <- \"say \\\"Hello\\\"\"\n```\nNames of packages, files, paths on your computer, urls are all text data and need to be typed in between quotes. Names of variables do not. \n\n### Booleans\nBoolean values are **TRUE** and **FALSE** without quotes because they are Booleans not text. \n\n> ### {% icon comment %} Comment\n>\n> R is case sensitive: true and false are not recognized as Booleans. They have to be written in capitals.\n{: .comment}\n\n### Missing values\nMissing values are represented by **NA** (Not Available) without quotes. \nImpossible values (e.g., dividing by zero) are represented by the symbol NaN (Not A Number).\n\n### Arithmetic operators\n\n<figure id=\"figure-6\"><img src=\"../../images/Rarithmetic_operators.png\" alt=\"arithmetic_operators\"><figcaption><span class=\"figcaption-prefix\">Figure 6:</span> Overview of arithmetic operators</figcaption></figure>\n\nArithmetic operators follow the standard **order of priority**, with exponentiation the highest and addition and subtraction the lowest priority, but you can control the order with **parentheses**. Do not use brackets as these are for other purposes in R. \n\n### Logical operators\nLogical operators can be used to selectively execute code based on certain conditions. They allow to create logical expressions (comparisons) that return TRUE or FALSE. \n\n<figure id=\"figure-7\"><img src=\"../../images/Rlogic_operators.png\" alt=\"logic_operators\"><figcaption><span class=\"figcaption-prefix\">Figure 7:</span> Overview of logical operators</figcaption></figure>\n\nLogical expressions may be combined using logical operators. The NOT operator (!) can be used to assess whether something is NOT the case. \n\n```\n> x = 1\n> y = 2   \n> z = x > y      \t\tis x larger than y? \n> z              \t\t\tFALSE \n> u = TRUE\n> v = FALSE \n> u & v          \t\t\tu AND v: FALSE \n> u | v          \t\t\tu OR v: TRUE \n> !u             \t\t\tNOT u: FALSE\n```\n> ### {% icon hands_on %} Hands-on: Exercise 2a\n>\n>    > ### {% icon question %} Question \n>    > What's the difference between x=2 and x==2 ? \n>    >\n>    > > <details markdown=\"1\">\n>    > > <summary>{% icon solution %} Solution\n>    > > </summary>\n>    > >  The = operator attributes a value to a variable (see next section), x becomes 2. \n>    > >  The == is a logical operator, testing whether the logical expression x equals 2 is TRUE or FALSE.\n>    > > </details>\n>    >\n>    {: .question }\n>\n{: .hands_on }\n\n> ### {% icon hands_on %} Hands-on: Exercise 2b\n>\n> Check if the words UseR and user are equal. \n{: .hands_on}\n\n> ### {% icon comment %} R is case sensitive\n>\n> As exercise 2b showed R is indeed case sensitive.\n{: .comment}\n\n# Assigning variables\nA variable allows you to save a value or an object (a plot, a table, a list of values) in R. \nA value or object is assigned to a variable by the assignment operator **<-**\nIt consists of the two characters < (less than) and - (minus): \n```\n> v <- 4\tnow the value of variable v is 4\n````\nIn most contexts the = operator can be used as an alternative:\n```\n> v <- 4 \n> v = 4 \ngive the same result: a variable called v with value 4\n```\nAfter R has performed the assignment you will not see any output, as the value 4 has been saved to variable v. You can access and use this variable at any time and print its value in the console by running its name:\n```\n> v\n[1] 4\n```\nYou can now use v in expressions instead of 4\n```\n> v * v\n[1] 16\n```\nYou can re-assign a new value to a variable at any time: \n```\n> v <- \"a cool variable\"\n> v\n[1] \"a cool variable\"\n```\n\nR is not very fussy as far as syntax goes. Variable names can be anything, though they cannot begin with a number or symbol. Informative names often involve using more than one word. Providing there are **no spaces** between these words you can join them using dots, underscores and capital letters though the Google R style guide recommends that names are joined with a dot. \n\n### Using operators to create variables\nYou can combine variables into a new one using operators (like + or /).\n\n### Using functions to create variables\nA function is a piece of code that performs a specific task. \nFunctions are called by another line of code that sends a request to the function to do something or return a variable. The call may pass *arguments* (inputs) to the function. In other words a function allows you to combine variables (arguments) into a new variable (returned variable).\nThere are lots of built in functions in R and you can also write your own. Even the base package supplies a large number of pre-written functions to use. Other packages are filled with additional functions for related tasks.\nCalling a function in R has a certain syntax:\n**output <- function(list of arguments)** \nFor example: \n```\n> p <-  ggplot(mtcars,(aes(wt,mpg))\n```\nIn this example **ggplot()** is the **function**. The brackets () are always needed. Before a function can start the actions and calculations  it encodes, it needs prior information: **input** data and parameter settings. These are called the **arguments** of the function. In this example the arguments are:\n- **mtcars**: a table containing the input data\n- **aes(wt,mpg)**: defines the two columns of the table you want to plot: weight (wt) along the X-axis and miles/gallon (mpg) along the Y-axis.\n\nTo see the arguments of a function you can use **?** or **help()**:\n```\n> ? ggplot \n> help(ggplot)\n```\nThis opens the documentation of the function in the **Help** tab including an overview of the arguments of the function. At the bottom of the documentation page you find examples on how to use the function.\nThe function generates a plot so the plot **p** is the **output** of the function.  \n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Assigning variables** section\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Exercise 3a\n>\n> 1. Create a variable called patients with value 42\n> 2. Print the value of patients divided by 2\n> 3. Create a variable called patients_gr2 with value 24\n> 4. Print the total number of patients\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  patients <- 42\n>    >  patients/2\n>    >  patients_gr2 <- 24\n>    >  total_patients <- patients + patients_gr2\n>    >  total_patients\n>    > ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  \"patients\" <- 42\n>    >  \"patients\"/2\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 3b\n>\n> Check the arguments of the mean() function. \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  ?mean\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\nThe mean() function has many arguments and each argument has a default value. To use the default values simply do not specify these arguments in the function call. You only have to specify the arguments for which you want to use a value other than the default.\nTo show the **examples** section instead of the full documentation page:\n```\n> example(min) \n```\n\n> ### {% icon hands_on %} Hands-on: Exercise 3c\n>\n> Calculate and print the sum of patients and patients_gr2 using the sum() function.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} solution: answer\n>    > </summary>\n>    >  ```\n>    >  sum(patients,patients_gr2)\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  Replace the sum() function with the mean() function. What happens ?\n>    >    > <details markdown=\"1\">\n>    >    > <summary>{% icon solution %} solution: answer\n>    >    > </summary>\n>    >    >  Look at the help of the sum() function. What's the first argument ? \n>    >    >  Compare with the first argument of the mean() function\n>    >    > </details>\n>    >\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  Will the code below work ?\n>    >  ```\n>    >  sum (patients,patients_gr2)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  Will the code below work ?\n>    >  ```\n>    >  sum ( patients , patients_gr2 )\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\nSometimes functions from different packages have the same name. In that case use **package::function** to specify the package you want to use, e.g. `ggplot2::ggplot()` where `ggplot2` is the name of the package and `ggplot()` is the name of the function.\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 3d\n>\n> Create a variable `patients_gr3` with value \"twenty\" and print the total number of patients\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  patients_gr3 <- \"twenty\"\n>    >  patients + patients_gr3\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 3e\n>\n> 1. Create variable `x` with value `5`\n> 2. Create variable `y` with value `2`\n> 3. Create variable `z` as the sum of `x` and `y` and print the value of `z`\n> 4. Print `x - y`\n> 5. Print the product of `x` and `y` and add `2` to it\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  x <- 5\n>    >  y <- 2\n>    >  z <- x+y\n>    >  z\n>    >  x-y\n>    >  x*y+2\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 3f\n>\n> What is the difference between:\n> 1. `correctLogic <- TRUE`\n> 2. `incorrectLogic <- \"TRUE\"`\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 3g\n>\n> Is there a difference between:\n> 1. `name <- \"Janick\"`\n> 2. `name <- 'Janick'`\n> 3. `name <- Janick`\n{: .hands_on}\n","# Data structures in R\n{:.no_toc}\n\nThe power of R lies not in its ability to work with simple numbers but in its ability to work with large datasets. R has a wide variety of data structures including scalars, vectors, matrices, data frames, and lists.\n\n### Vectors\nThe simplest data structure is the *vector*, a single row consisting of data values of the same type, e.g. all numbers, characters, Booleans... \n\n#### Creating a vector\nThe function **c()** (short for \"combine values\" in a vector) is used to create vectors. The only arguments that need to be passed to c() are the  values that you want to combine into a vector. \nYou can create a **numeric** (a), **character** (b) or **logical** (c) vector:\n```\na <- c(1,2,5.3,6,-2,4)\nb <- c(\"janick\",\"jasper\",\"niels\")\nc <- c(TRUE,TRUE,TRUE,FALSE,TRUE,FALSE)\n```\nYou can also create a vector by **joining existing vectors with the c () function:**\n```\nx1 <- c(1,2,3)\nx2 <- c(3,4)\nc(x1,x2)\n# [1] 1 2 3 3 4\n```\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data Creation: vectors** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 4a\n>\n> You count every day how many plants of the initial set of 40 plants developed lesions as a result of a mold infection. \n> \n> 1. Create a vector called Plants_with_lesions containing the results of your counts: 1,3,4,2,6\n> 2. Create a vector days containing the days of the week in the following format: Mon, Tues, Wednes, Thurs, Fri.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  > Plants_with_lesions <- c(1,3,4,2,6)\n>    >  > days <-  c(\"Mon\",\"Tues\",\"Wednes\",\"Thurs\",\"Fri\")\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 4b\n>\n> Create a vector newVector with the following elements: 2,5,5,3,3,6,2 and print its content.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  newVector <- c(2,5,5,3,3,6,2)\n>    >  newVector\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\nIf you need a sequence of consecutive integers you can create it with the **start:end** notation, e.g. a vector with values from 5 through 9\n```\n5:9\t\n# [1] 5 6 7 8 9\n```\nYou can also define a decreasing sequence of integers:\n```\n9:5\t\n# [1] 9 8 7 6 5\n```\nYou can create the same vector with the seq() function:\n```\nseq(5,9)  \n# [1] 5 6 7 8 9\n```\n\nBut seq (short for sequence) can do a lot more: it allows to take increments other than 1. It takes four arguments:\n- *from*: the first number in the sequence\n- *to*: the last possible number in the sequence. \n- *by=increment*: increment, can be added or subtracted depending on the start and the end of the sequence. If from > to then subtract increment, if from < to then add increment.\n- *length.out*: alternative to end, number of elements in the vector.\n\nAs you can see, some arguments of a function have a name, e.g. the increment argument is called *by*. \n\nThe **rep()** function **repeats** a value a specified number of times.\n```\nrep(\"bla\", 3)\n# [1] \"bla\" \"bla\" \"bla\"\n```\nYou can combine these functions with the c() function to make more complicated vectors:\n```\nc(rep(1,3), rep(2,3), rep(3,3))\n# [1] 1 1 1 2 2 2 3 3 3\n```\n\nTo generate a **random** set of **numbers** drawn from a normal distribution with a given mean and spread use the **rnorm(n, mean = 0, sd = 1)** function where:\n- *n*: how many random numbers do you want ?\n- *mean*: mean of the normal distribution\n- *sd*: standard deviation of the normal distribution\n```\nrnorm(1000, 3, 0.25)\n```\ngenerates 1000 numbers from a normal distribution with mean 3 and sd=0.25\n\nThe normal distribution implies that numbers close to the mean have a higher probability of occurring than numbers far from the mean.\n\nIf you want a set of random numbers from a uniform distribution (every number in the specified range has the same probability of being drawn) you can use the **runif(n, min=0, max=1)** function where:\n- *n*: how many random numbers do you want ?\n- *min*: lowest number of the range of numbers to choose from\n- *max*: highest number of the range of numbers to choose from\n\nThe most freedom is given by the **sample(x, size, replace = FALSE)** function: it takes a random sample of a specified size from the elements of x either with or without replacement:\n- *x*: a vector of elements from which to choose\n- *size*: how many random numbers do you want ?\n- *replace*: place sampled numbers back in set or not ?\n```\nsample(c(0,1), 100, replace=TRUE)\n```\t\ngenerates a set of 100 random zeros or ones.\n\nSuppose you want to simulate 10 rolls of a dice. Because the outcome of a single roll is a number between 1 and 6, your code looks like this:\n```\nsample(1:6, 10, replace=TRUE)\n# [1] 2 2 5 3 5 3 5 6 3 5\n```\nYou tell sample() to return 10 values, each in the range 1:6. Because every roll of dice is independent, you sample with replacement. This means that you put the element you?ve drawn back into the list of values to choose from.\n\n> ### {% icon hands_on %} Hands-on: Exercise 4c\n>\n> For a study checking the effect of a drug on a disease, we want to store patient info. \n> \n> 1. Create a vector named ID containing numerical values 1,2,3,4\n> 2. Create a vector named treatment containing values A, placebo, B, and a missing value.\n> 3.  Use the rep() function to create a vector called smoking containing booleans true, true, true, and false. Check the documentation and the examples of usage of rep(). \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  ID <- 1:4\n>    >  treatment <- c(\"A\",\"placebo\",\"B\",NA)\n>    >  smoking <- c(rep(TRUE,3),FALSE)\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  smoking <- c(rep(true,3),false)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    >  ```\n>    >  smoking <- c(rep(\"true\",3),\"false\")\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 4d\n>\n> Create vector threes consisting of 3,3,3,3,3,3,3 and print the content of threes\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  threes<-rep(3,7)\n>    >  threes\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 4e\n>\n> Print ha ha ha ha\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  rep(\"ha\",4) \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon comment %} Comment\n>\n> Vectors cannot hold values of different types! R automatically converts all values to the same type so that the vector can hold them. If one of the values is a string all values will be converted to strings or in case of a mix of integers and booleans all values will be converted to integers. \n{: .comment}\n\n> ### {% icon comment %} Comment\n>\n> Words used as values have to be written between quotes, words used as variable names do not! If R encounters a word without quotes it will try to find a variable with that name.\n{: .comment}\n\n#### Referring to elements of a vector\nEvery element in a vector is assigned an index (= its position in the vector) in the order in which elements were entered. This index starts with one, not zero. \n\nYou can extract elements from vectors in two ways:\n1. You directly identify specific elements using their indices\n2. You create a logical operation to select certain elements.\n\nTo refer to elements of a vector use indices or a logical operation inside square brackets []\ne.g. to retrieve the 2nd element of vector a use:\n```\na[2]\n```\nto retrieve the 2nd, 3rd and 4th element of vector a use:\n```\na[2:4]\n```\nto retrieve the 2nd and 4th element of vector a use:\n```\na[c(2,4)]\n```\nYou also see [] when you look at output in the console. The number in between the square brackets is the index of the first value on the line. \n```\nv <- c(rep(5,10),rep(10,5))\n#[1] 5 5 5 5 5 5 5 5 5 5 10 10\n#[13] 10 10 10 \n```\nThere are 12 values on the first line, so on the second line of data, the first value (10) is actually on the 13th position in the vector v. So [13] refers to the index of the first element on the line.\n\nRetrieving elements using a logical operation is done as follows:\n```\nx\n#[1] 1 3 11 1 7\nx[x < 4]\n#[1] 1 3 1\n```\nRetrieving data with logical operators is based on the following fact: every logical statement produces the outcome TRUE or FALSE.\n```\nx < 4\n#[1]  TRUE  TRUE  FALSE  TRUE  FALSE\n```\n\nLogical operators applied to vectors will result in a vector of the same length consisting of TRUE or FALSE values depending on whether the statement is true for the particular element. If you use the outcomes of a logical operation to retrieve elements of a vector, only the elements where the outcome is TRUE will be selected. \n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data extraction: vectors** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 5a\n>\n> Create a vector named x containing the numbers 20 to 2. Retrieve elements that are larger than 5 and smaller than 15.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  x <- 20:2\n>    >  x[x > 5 & x < 15]\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  x[15 > x > 5]\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  x(x > 5 & x < 15)\n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  x[x > 5] & x[x < 15]\n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 5b\n>\n> 1. Retrieve the 4th and 5th elements from the days vector.\n> 2. Retrieve elements from Plants_with_lesions that are larger than 2.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  days[c(4,5)]\n>    >  Plants_with_lesions[Plants_with_lesions > 2]\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  days[4,5]\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  days[4:5]\n>    >  \n>    >  ```\n>    {: .question}\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  days(4:5)\n>    >   \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 5c\n>\n> Create vector y with elements 9,2,4 and retrieve the second element of y.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  y <-c (9,2,4)\n>    >  y[2] \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 5d\n>\n> 1. Create vector z with elements 1, 2, 3, 4, 12, 31, 2, 51, 23, 1, 23, 2341, 23, 512, 32, 312, 123, 21, 3\n> 2. Retrieve the 3rd, 4th, 5th, 6th and 7th element\n> 3. Retrieve the 2nd and 4th element\n> 4. Retrieve elements from z that are larger than 100\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  z <- c(1,2,3,4,12,31,2,51,23,1,23,2341,23,512,32,312,123,21,3)\n>    >  z[3:7] \n>    >  z[c(2,4)]\n>    >  z[z > 100] \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Logical and arithmetic operations on variables** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 5h\n>\n> Retrieve elements from newVector (exercise 4b) that are larger than the corresponding elements of vector threes (exercise 4d).\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  newVector[newVector > threes]\n>    >   \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n#### Removing, changing or adding elements in a vector\nTo remove an element from a vector use a negative index: ?-? indicates ?NOT? followed by the index of the element you want to remove, e.g. to remove the second element of vector z use:\n```\nz <- z[-2]\n```\n\nChange or add elements by assigning a new value to that element . \n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data removal vectors** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Exercise 6a\n>\n> From vector x (exercise 5a) remove the first 8 elements and store the result in x2.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  x2 <- x[-(1:8)]\n>    >  x2\n>    >  ```\n>    > </details>\n>\n>    > ### {% icon question %} Question\n>    >\n>    >  What will happen when you run this code ?\n>    > ```\n>    >  x2 <- x[-1:8]\n>    >  \n>    >  ```\n>    {: .question}\n{: .hands_on}\n\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 6b\n>\n> Retrieve the same elements from z as in exercise 5d2 but first replace the 3rd element by 7.\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  z[3] <- 7\n>    >  z[3:7] \n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n### Factors\nYou can tell R that a variable is categorical (= text labels representing categories although sometimes numbers are also used) by making it a factor. \n\nThe difference between a categorical variable and a continuous variable is that a categorical variable represents a limited number of categories. A continuous variable is the result of a measurement and can correspond to an infinite number of values. \n\nIn most cases categorical data is used to **describe** other data, it is not used in calculations e.g. which group does a measurement belong to. Storing data as factors ensures that the graphing and statistical functions in R will treat such data correctly.\n\nThere are two types of categorical data:\n1. unranked categorical data do not have an implied order\n2. ranked categorical data do have a natural ordering\n\nR will treat factors by default as unranked but you can create ordered (ranked) factors. \n\nTo create a factor, first create a vector and then convert it to a factor using the factor() function:\n```\nv <- c(1,4,4,4,3,5,4,4,5,3,2,5,4,3,1,3,1,5,3,4)\nv\n#[1] 1 4 4 4 3 5 4 4 5 3 2 5 4 3 1 3 1 5 3 4\nf <- factor(v,ordered=TRUE)\nf\n#[1] 1 4 4 4 3 5 4 4 5 3 2 5 4 3 1 3 1 5 3 4\n#Levels: 1 < 2 < 3 < 4 < 5 \n```\n\n> ### {% icon comment %} Comment\n>\n> The factor() function creates \"Levels\": these are the labels of the categories.\n{: .comment}\n\nThe only **required argument** of the factor() function is a **vector** of values which will be factorized. Both numeric and character vectors can be made into factors but you will use factor() typically for numerical data that represents categories. \n\nWhen you create a vector containing text values in R you have to factorize it but if you store the vector as a column in a data frame, text data is automatically converted to a factor. \n\nWhen you import data into R using read.() functions, the data is automatically stored in a data frame so text will be automatically converted into a factor. \n\nSo in reality (since you mostly import data into R) you use factor() mainly to factorize **numbers** that represent categories.\n\nBy default, factor() transforms a vector into an unordered factor, as does the automated factorization of the read.() functions. Unordered means that the categories are processed in alphabetical order: High will be plotted before Low since H comes first in the alphabet. \n\nIf the categories are ranked, you have to create an ordered factor, you have to add two additional arguments: \n- Set *ordered* to TRUE to indicate that the factor is ordered\n- *levels*: a vector of category labels (as strings) in the correct order\n\n> ### {% icon hands_on %} Hands-on: Demo\n>\n> From the demo script run the **Data creation: factors** section\n{: .hands_on}\n\n> ### {% icon hands_on %} Hands-on: Extra exercise 7a\n>\n> 1. Create a vector gender with the following elements: Male, Female, male. \n> 2. Convert gender into a factor with levels: Male and Female\n> 3. Print the content of the factor. What happens?\n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```\n>    >  gender <- c(\"Male\",\"Female\",\"male\")\n>    >  gender <- factor(gender,levels=c(\"Male\",\"Female\"))\n>    >  gender\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n","## 11.1 Introduction\nSo now that we know how to make functions, how can you re-use them? Imagine that you've started writing code and functions in one file and the project has grown to such an extent that it would be easier to maintain it in different files each containing a specific part of the project. Or you want to re-use some of the functions in other projects as well. \n\nIn Python you can import functions and chunks of code from files. Such a file containing the functions is called a *module*. Generally we say that we import a *definition* from a *module*. A module can have one or multiple functions in it. \nThe file name is the module name with the suffix `.py` appended. \n\nUsing the code from this module is possible by using **import**. In this way you can import your own functions, but also draw on a very extensive library of functions provided by Python (built-in modules). We will first look at the syntax for imports and how to import your own functions, then explore the most commonly used Python libraries.\n\n## 11.2 How imports work\nThe easiest way to import a module looks like this:\n\n```python\nimport module1\n```\n\nImagine that in the module `module1`, there is a function called `getMeanValue()`. This way of importing does not make the name of the function available; it only remembers the module name `module1` which you can than use to access the functions within the module:\n\n```python\nimport module1\nmodule1.getMeanValue([1,2,3])\n```\n\n## 11.3 How to create your own module\nThe easiest example is importing a module from within the same working directory. Let's create a Python module called `module1.py` with the code of the function `getMeanValue()` that we have written earlier (and you can find here below). \n\n> ### {% icon hands_on %} Create your own module\n>\n> To create your own module from Jupyter Notebook, follow these steps:\n> 1. In order to create a module in Jupyter Lab, first create a new notebook \n> 2. Rename the notebook (e.g. 'module1.ipynb') and copy paste the code in the notebook \n> 3. Click 'File', 'Download as' and 'Python' \n> 4. Jupyter will not download it in some local folder, copy it to your current working directory (in our case in the same directory as we're in right now). \n>\n{: .hands_on}\n\nUnfortunately, Jupyter Notebook doesn't have a streamlined & straightforward way of creating Python modules and Python scripts. When you export the notebook, it will always export the whole Notebook and not just a part of it, which makes it very messy if you have a very large notebook. \n\nImport the following code in the `module1.py` file. \n\n\n```python\n# When you download this as a Python script, Jupyter will automatically insert the environment shebang here. \n\ndef getMeanValue(valueList):\n    \"\"\"\n    Calculate the mean (average) value from a list of values.\n    Input: list of integers/floats\n    Output: mean value\n    \"\"\"\n    valueTotal = 0.0\n \n    for value in valueList:\n        valueTotal += value\n    numberValues = len(valueList)\n    \n    return (valueTotal/numberValues)\n```\n\n## 11.4 Import syntax \nWe can now use the module we just created by importing it. In this case where we import the whole 'module1' file, we can call the function as a method, similar to the methods for lists and strings that we saw earlier:\n\n\n```python\nimport module1\n\nprint(module1.getMeanValue([4,6,77,3,67,54,6,5]))\n```\n\nIf we were to write code for a huge project, long names can get exhaustive. Programmers will intrinsically make shortcut names for functions they use a lot. Renaming a module is therefore a common thing to do (e.g. NumPy as np, pandas as pd, etc.):\n\n\n```python\nimport module1 as m1\n\nprint(m1.getMeanValue([4,6,77,3,67,54,6,5]))\n```\n\nWhen importing a file, Python only searches the current directory, the directory that the entry-point script is running from, and sys.path which includes locations such as the package installation directory (it's actually a little more complex than this, but this covers most cases).\n\nHowever, you can specify the Python path yourself as well. If you're using the materials from [Github](https://github.com/vibbits/gentle-hands-on-python), note that within our folders there is a directory named `modules` and within this folder, there is a module named `module2` (recognizable due to its .py extension). In that module there are two functions: 'getMeanValue' and 'compareMeanValueOfLists'. \n\n\n```python\nfrom modules import module2\n\nprint(module2.getMeanValue([4,6,77,3,67,54,6,5]))\n```\n\n\n```python\nfrom modules import module2 as m2\n\nprint(m2.getMeanValue([4,6,77,3,67,54,6,5]))\n```\n\nAnother way of writing this is with an absolute path to the module. You can explicitly import an attribute from a module.\n\n\n```python\nfrom modules.module2 import compareMeanValueOfLists\n\nprint(compareMeanValueOfLists([1,2,3,4,5,6,7], [4,6,77,3,67,54,6,5]))\n```\n\nSo here we *import* the function compareMeanValueOfLists (without brackets!) from the file *module2* (without .py extension!).\n\nIn order to have an overview of all the different functions within a module, use `dir()`:\n\n\n```python\ndir(module2)\n```\n\n## 11.5 Built-in Modules\n\nThere are several built-in modules in Python, which you can import whenever you like.\n\nPython has many ready-to-use functions that can save you a lot of time when writing code. The most common ones are **time**, **sys**, **os/os.path** and **re**.\n\n### 11.5.1 `time`\nWith **time** you can get information on the current time and date, ...:\n\n\n```python\nimport time\ntime.ctime()  # Print current day and time\n```\n\n\n```python\ntime.time()   # Print system clock time\n```\n\n\n```python\ntime.sleep(10)       # Sleep for 5 seconds - the program will wait here\n```\n\nSee the [Python documentation](https://docs.python.org/3/library/time.html) for a full description of time. Also see [datetime](https://docs.python.org/3/library/datetime.html), which is a module to deal with date/time manipulations.\n\n\n### 11.5.2 `sys`\ngives you system-specific parameters and functions:\n\n\n```python\nimport sys\n\n```\n\n\n```python\nsys.argv  # A list of parameters that are given when calling this script \n          # from the command line (e.g. ''python myScript a b c'')\n```\n\n\n```python\nsys.platform # The platform the code is currently running on\n```\n\n\n```python\nsys.path     # The directories where Python will look for things to import\n```\n\n\n```python\nhelp(sys.exit)          # Exit the code immediately\n```\n\nSee the [Python documentation](https://docs.python.org/3/library/sys.html) for a full description.\n\n### 11.5.3 `os` and `os.path` \nare very useful when dealing with files and directories:\n\n\n\n```python\nimport os\n```\n\n\n```python\n# Get the current working directory (cwd)\ncurrentDir = os.getcwd()\ncurrentDir\n```\n\n\n```python\n# Get a list of the files in the current working directory    \nmyFiles = os.listdir(currentDir)\nmyFiles\n```\n\n\n```python\n# Create a directory, rename it, and remove it\nos.mkdir(\"myTempDir\")\nos.rename(\"myTempDir\",\"myNewTempDir\")\nos.removedirs(\"myNewTempDir\")\n```\n\n\n```python\n# Create a full path name to the `module2` module in the modules folder\nmyFileFullPath = os.path.join(currentDir,'modules','module2.py')\nmyFileFullPath\n```\n\n\n```python\n# Does this file exist?\nos.path.exists(myFileFullPath)\n```\n\n\n```python\n# How big is the file?\nos.path.getsize(myFileFullPath)\n```\n\n\n```python\n# Split the directory path from the file name\n(myDir,myFileName) = os.path.split(myFileFullPath)\nprint(myDir)\nprint(myFileName)\n```\n\nSee the Python documentation for [**os**](https://docs.python.org/3/library/os.html) and [**os.path**](https://docs.python.org/3/library/os.path.html) for a full description.\n\n### 11.5.4 `re`\n\nA library that is very powerful for dealing with strings is **re**. It allows you to use regular expressions to examine text - using these is a course in itself, so just consider this simple example:\n\n\n```python\nimport re\n\nmyText = \"\"\"Call me Ishmael. Some years ago - never mind how long precisely -\nhaving little or no money in my purse, and nothing particular to interest me on \nshore, I thought I would sail about a little and see the watery part of the \nworld.\"\"\"\n\n# Compile a regular expression, \nmyPattern = re.compile(\"(w\\w+d)\")    # Look for the first word that starts with a w,\n                                     # is followed by 1 or more characters (\\w+)\n                                     # and ends in a d\n\nmySearch = myPattern.search(myText)\n\n# mySearch will be None if nothing was found\nif mySearch:\n    print(mySearch.groups())\n```\n\nSee the full [Python documentation](https://docs.python.org/3/library/re.html) on regular expressions for more information.\n\n## 11.6 Putting everything together\n\n\n---\n\n> ### {% icon hands_on %} Exercise 11.6.1\n>\n> Make a new directory in which you write out 5 files with a 2 second delay. Each file should contain the date and time when it was originally written out.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > # 1\n>    > import time, os\n>    >  \n>    > \n>    > # Create a variable for the directory name\n>    > myDir = \"timeTest\"\n>    > \n>    > # Check whether the directory exists, if not create it\n>    > if not os.path.exists(myDir):\n>    >     os.mkdir(myDir)\n>    > \n>    > \n>    > # Loop from 1 to 5\n>    > for i in range(1,6):\n>    > \n>    >     # Get the current time\n>    >     currentTime = time.ctime()\n>    > \n>    >     # Write out the file - use i to give a different name to each\n>    >     filePath = os.path.join(myDir,\"myFile{}.txt\".format(i))\n>    > \n>    >     outFileHandle = open(filePath,'w')    \n>    >     outFileHandle.write(\"{}\\n\".format(currentTime))\n>    >     outFileHandle.close()\n>    > \n>    >     print(\"Written file {}...\".format(filePath))\n>    > \n>    >     # Sleep for 2 seconds\n>    >     time.sleep(2)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n\n\n---\n> ### {% icon hands_on %} Exercise 11.6.2\n>\n> Write a function to read in a FASTA file with an RNA sequence and return the RNA sequence (in 3 base unit chunks).\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > # 2 \n>    > import os\n>    >  \n>    > def readRnaFastaFile(fileName):\n>    >  \n>    >     if not os.path.exists(fileName):\n>    >         print(\"Error: File {} not available!\".format(fileName))\n>    >         return (None,None,None)\n>    > \n>    >     fconnect = open(fileName)\n>    >     lines = fconnect.readlines()\n>    >     fconnect.close()\n>    > \n>    >     sequenceInfo = []\n>    >     moleculeName = None\n>    >     description = None\n>    > \n>    >     # Get information from the first line - ignore the >\n>    >     firstLine = lines[0]\n>    >     firstLineCols = firstLine[1:].split()\n>    >     moleculeName = firstLineCols[0]\n>    >     description = firstLine[1:].replace(moleculeName,'').strip()\n>    > \n>    >     # Now get the full sequence out\n>    >     fullSequence = \"\"\n>    >     for line in lines[1:]:\n>    > \n>    >         line = line.strip()\n>    >         fullSequence += line\n>    > \n>    >     # Divide up the sequence depending on type (amino acid or nucleic acid)\n>    >     for seqIndex in range(0,len(fullSequence),3):\n>    >         sequenceInfo.append(fullSequence[seqIndex:seqIndex+3])\n>    > \n>    >     return (moleculeName,description,sequenceInfo)\n>    > \n>    > \n>    > print(readRnaFastaFile(\"data/rnaSeq.txt\"))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n\n---\n\n\n\n\n---\n> ### {% icon hands_on %} Exercise 11.6.3\n>\n> Write a program where you ask the user for a one-letter amino acid sequence, and print out the three-letter amino acid codes. Download the dictionary from section 8.2 and save it as a module named SequenceDicts.py first.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > # 3\n>    > # Note how you can import a function (or variable) with a different name for your program!\n>    > \n>    > from modules.SequenceDicts import proteinOneToThree as oneToThreeLetterCodes\n>    > \n>    > oneLetterSeq = input('Give one letter sequence:')\n>    >  \n>    > if oneLetterSeq:\n>    >     for oneLetterCode in oneLetterSeq:\n>    >         if oneLetterCode in oneToThreeLetterCodes.keys():\n>    >             print(oneToThreeLetterCodes[oneLetterCode])\n>    >         else:\n>    >             print(\"One letter code '{}' is not a valid amino acid code!\".format(oneLetterCode))\n>    > else:\n>    >     print(\"You didn't give me any information!\")\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n---\n> ### {% icon hands_on %} Exercise 11.6.4 \n>\n> Write a program where you translate the RNA sequence `data/rnaSeq.txt` into 3 letter amino acid codes. Use the dictionary from section 8.2 (called myDictionary) and save it as a module named SequenceDicts.py first. You can use the `readFasta.py` module from the modules folder. \n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > from modules.SequenceDicts import standardRnaToProtein, proteinOneToThree\n>    > \n>    > from modules.readFasta import readRnaFastaFile\n>    > \n>    > (molName,description,sequenceInfo) = readRnaFastaFile(\"data/rnaSeq.txt\")\n>    > proteinThreeLetterSeq = []\n>    > \n>    > for rnaCodon in sequenceInfo:\n>    > \n>    >     aaOneLetterCode = standardRnaToProtein[rnaCodon]\n>    >     aaThreeLetterCode = proteinOneToThree[aaOneLetterCode]\n>    >     proteinThreeLetterSeq.append(aaThreeLetterCode)\n>    > \n>    > print(proteinThreeLetterSeq)\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n\n\n---\n> ### {% icon hands_on %} Exercise 11.6.5 \n>\n> Write a program that:\n> - Has a function `readSampleInformationFile()` to read the information from this sample data file into a dictionary. Also check whether the file exists.\n> - Has a function `getSampleIdsForValueRange()` that can extract sample IDs from this dictionary. Print the sample IDs for pH 6.0-7.0, temperature 280-290 and volume 200-220 using this function.\n> \n>    > <details markdown=\"1\">\n>    > <summary>{% icon solution %} Solution\n>    > </summary>\n>    >\n>    >  ```python\n>    > import os\n>    >  \n>    > def readSampleInformationFile(fileName):\n>    >  \n>    >     # Read in the sample information file in .csv (comma-delimited) format\n>    > \n>    >     # Doublecheck if file exists\n>    >     if not os.path.exists(fileName):\n>    >         print(\"File {} does not exist!\".format(fileName))\n>    >         return None\n>    >  \n>    >     # Open the file and read the information\n>    >     fileHandle = open(fileName)\n>    >     lines = fileHandle.readlines()\n>    >     fileHandle.close()\n>    > \n>    >     # Now read the information. The first line has the header information which\n>    >     # we are going to use to create the dictionary!\n>    > \n>    >     fileInfoDict = {}\n>    > \n>    >     headerCols = lines[0].strip().split(',')\n>    > \n>    >     # Now read in the information, use the first column as the key for the dictionary\n>    >     # Note that you could organise this differently by creating a dictionary with\n>    >     # the header names as keys, then a list of the values for each of the columns.\n>    > \n>    >     for line in lines[1:]:\n>    >  \n>    >         line = line.strip()  # Remove newline characters\n>    >         cols = line.split(',')\n>    > \n>    >         sampleId = int(cols[0])\n>    > \n>    >         fileInfoDict[sampleId] = {}\n>    > \n>    >         # Don't use the first column, is already the key!\n>    >         for i in range(1,len(headerCols)):\n>    >             valueName = headerCols[i]\n>    >  \n>    >             value = cols[i]\n>    >             if valueName in ('pH','temperature','volume'):\n>    >                 value = float(value)\n>    > \n>    >             fileInfoDict[sampleId][valueName] = value\n>    > \n>    >     # Return the dictionary with the file information\n>    >     return fileInfoDict\n>    > \n>    > def getSampleIdsForValueRange(fileInfoDict,valueName,lowValue,highValue):\n>    >  \n>    >     # Return the sample IDs that fit within the given value range for a kind of value\n>    >  \n>    >     #sampleIdList = fileInfoDict.keys()\n>    >     #sampleIdList.sort()\n>    >     sampleIdList = sorted(fileInfoDict.keys())\n>    >     sampleIdsFound = []\n>    > \n>    >     for sampleId in sampleIdList:\n>    > \n>    >         currentValue = fileInfoDict[sampleId][valueName]\n>    >  \n>    >         if lowValue <= currentValue <= highValue:\n>    >             sampleIdsFound.append(sampleId)\n>    >  \n>    >     return sampleIdsFound\n>    >  \n>    > if __name__ == '__main__':\n>    >  \n>    >     fileInfoDict = readSampleInformationFile(\"../data/SampleInfo.txt\")\n>    > \n>    >     print(getSampleIdsForValueRange(fileInfoDict,'pH',6.0,7.0))\n>    >     print(getSampleIdsForValueRange(fileInfoDict,'temperature',280,290))\n>    >     print(getSampleIdsForValueRange(fileInfoDict,'volume',200,220))\n>    >  ```\n>    > </details>\n>\n{: .hands_on}\n---\n\n","","<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n{% if page.xsl %}<?xml-stylesheet type=\"text/xsl\" href=\"{{ \"/sitemap.xsl\" | absolute_url }}\"?>\n{% endif %}<urlset xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd\" xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n{% assign collections = site.collections | where_exp:'collection','collection.output != false' %}{% for collection in collections %}{% assign docs = collection.docs | where_exp:'doc','doc.sitemap != false' %}{% for doc in docs %}<url>\n<loc>{{ doc.url | replace:'/index.html','/' | absolute_url | xml_escape }}</loc>\n{% if doc.last_modified_at or doc.date %}<lastmod>{{ doc.last_modified_at | default: doc.date | date_to_xmlschema }}</lastmod>\n{% endif %}</url>\n{% endfor %}{% endfor %}{% assign pages = site.html_pages | where_exp:'doc','doc.sitemap != false' | where_exp:'doc','doc.url != \"/404.html\"' %}{% for page in pages %}<url>\n<loc>{{ page.url | replace:'/index.html','/' | absolute_url | xml_escape }}</loc>\n{% if page.last_modified_at %}<lastmod>{{ page.last_modified_at | date_to_xmlschema }}</lastmod>\n{% endif %}</url>\n{% endfor %}{% assign static_files = page.static_files | where_exp:'page','page.sitemap != false' | where_exp:'page','page.name != \"404.html\"' %}{% for file in static_files %}<url>\n<loc>{{ file.path | replace:'/index.html','/' | absolute_url | xml_escape }}</loc>\n<lastmod>{{ file.modified_time | date_to_xmlschema }}</lastmod>\n</url>\n{% endfor %}</urlset>\n","Sitemap: {{ \"sitemap.xml\" | absolute_url }}\n"],"posts":[],"tags":{},"source":".","destination":"./_site","collections_dir":"","plugins_dir":"_plugins","layouts_dir":"_layouts","data_dir":"metadata","includes_dir":".","safe":false,"include":[".nojekyll"],"exclude":["shared/font-awesome/src/","Gemfile","Gemfile.lock","package.json","package-lock.json","CONTRIBUTING.md","CONTRIBUTORS.yaml","LICENSE.md","README.md","Makefile","miniconda.sh","**/README.md","**/*.yaml","**/*.yml","**/*.sh","bin/","metadata/","templates/","vendor/","node_modules/"],"keep_files":[".git",".svn"],"encoding":"utf-8","markdown_ext":"markdown,mkdown,mkdn,mkd,md","strict_front_matter":false,"show_drafts":null,"limit_posts":0,"future":true,"unpublished":false,"whitelist":[],"plugins":["jekyll-feed","jekyll-environment-variables","jekyll-github-metadata","jekyll-scholar","jekyll-sitemap","jekyll_pages_api_search"],"markdown":"kramdown","highlighter":"rouge","lsi":false,"excerpt_separator":"\n\n","incremental":false,"detach":false,"port":"4000","host":"127.0.0.1","baseurl":"","show_dir_listing":false,"permalink":"date","paginate_path":"/page:num","timezone":null,"quiet":false,"verbose":false,"defaults":[],"liquid":{"error_mode":"warn","strict_filters":false,"strict_variables":false},"rdiscount":{"extensions":[]},"redcarpet":{"extensions":[]},"kramdown":{"auto_ids":true,"toc_levels":"1..2","entity_output":"as_char","smart_quotes":"lsquo,rsquo,ldquo,rdquo","input":"GFM","hard_wrap":false,"guess_lang":true,"footnote_nr":1,"show_warnings":false,"syntax_highlighter":"rouge","syntax_highlighter_opts":{"default_lang":"plaintext","guess_lang":true},"coderay":{},"auto_ids":true,"toc_levels":"1..2","entity_output":"as_char","smart_quotes":"lsquo,rsquo,ldquo,rdquo","input":"GFM","hard_wrap":false,"guess_lang":true,"footnote_nr":1,"show_warnings":false,"syntax_highlighter":"rouge","syntax_highlighter_opts":{"default_lang":"plaintext","guess_lang":true},"coderay":{}},"title":"VIB Bioinformatics Core","email":"bits@vib.be","description":"A collection of tutorials generated and maintained by VIB Bioinformatics Core","url":"https://material.bits.vib.be/","repository":"vibbits/training-material","repository_branch":"master","logo":"assets/images/logo.svg","small_logo":"assets/images/bioinformatics_core_rgb_neg.png","help_url":"https://www.bits.vib.be","other_languages":"fr, ja, es, pt, ar","figurify":{"skip_empty":true,"skip_layouts":["introduction_slides","tutorial_slides","base_slides"],"skip_titles":["Example of an image with a caption"]},"jekyll_pages_api_search":{"index_fields":{"title":{"boost":10},"tags":{"boost":10},"url":{"boost":5},"body":null},"skip_index":false},"scholar":{"style":"_layouts/g3.csl","locale":"en","sort_by":"year,month","order":"ascending","group_by":"none","group_order":"ascending","bibliography_group_tag":"h2,h3,h4,h5","bibliography_list_tag":"ol","bibliography_item_tag":"li","bibliography_list_attributes":{},"bibliography_item_attributes":{},"source":"topics/","bibliography":"**/*.bib","repository":null,"repository_file_delimiter":".","bibtex_options":{"strip":false,"parse_months":true},"bibtex_filters":["smallcaps","superscript","italics","latex"],"bibtex_skip_fields":["abstract","month_numeric"],"bibtex_quotes":["{","}"],"replace_strings":true,"join_strings":true,"details_dir":"bibliography","details_layout":"bibtex.html","details_link":"Details","details_permalink":"/:details_dir/:key:extension","use_raw_bibtex_entry":true,"bibliography_class":"bibliography","bibliography_template":"bibtemplate","reference_tagname":"span","missing_reference":"(missing reference)","details_link_class":"details","query":"@*","cite_class":"citation","type_names":{"article":"Journal Articles","book":"Books","incollection":"Book Chapters","inproceedings":"Conference Articles","thesis":"Theses","mastersthesis":"Master's Theses","phdthesis":"PhD Theses","manual":"Manuals","techreport":"Technical Reports","misc":"Miscellaneous","unpublished":"Unpublished"},"type_aliases":{"phdthesis":"thesis","mastersthesis":"thesis"},"type_order":[],"month_names":null},"icon-tag":{"question":"fa-question-circle","solution":"fa-eye","hands_on":"fa-pencil","comment":"fa-commenting-o","tip":"fa-lightbulb-o","objectives":"fa-bullseye","requirements":"fa-check-circle","time":"fa-hourglass-end","keypoints":"fa-key","tool":"fa-wrench","workflow":"fa-share-alt","feedback":"fa-comments-o","congratulations":"fa-thumbs-up","trophy":"fa-trophy","warning":"fa-warning","details":"fa-info-circle","exchange":"fa-exchange","param-file":"fa-file-o","param-files":"fa-files-o","param-collection":"fa-folder-o","param-text":"fa-pencil","param-check":"fa-check-square-o","param-select":"fa-filter","param-repeat":"fa-plus-square-o","galaxy-eye":"fa-eye","galaxy-gear":"fa-cog","galaxy-history":"fa-archive","galaxy-library":"fa-folder","galaxy-pencil":"fa-pencil","galaxy-refresh":"fa-refresh","galaxy-barchart":"fa-bar-chart","galaxy-cross":"fa-times","galaxy-columns":"fa-columns","galaxy-tags":"fa-tags","galaxy-selector":"fa-check-square-o","galaxy-upload":"fa fa-upload","galaxy-chart-select-data":"fa fa-database","galaxy-save":"fa fa-save","galaxy-scratchbook":"fa fa-th","galaxy-dropdown":"fa-caret-down","search":"fa fa-search","zenodo_link":"fa-files-o","tutorial":"fa-laptop","slides":"fa-slideshare","interactive_tour":"fa-magic","topic":"fa-folder-o","instances":"fa-cog","docker_image":"fa-ship","galaxy_instance":"fa-external-link","references":"fa-bookmark","gitter":"fa-comments","help":"fa-life-ring","github":"fa-github","email":"fa-envelope-o","twitter":"fa-twitter","linkedin":"fa-linkedin","orcid":"ai-orcid","curriculum":"fa-graduation-cap","level":"fa-graduation-cap","hall-of-fame":"fa-users"},"serving":false,"github":{"api_url":"https://api.github.com","archived":false,"baseurl":"/pages/vibbits/training-material","build_revision":"f6a060d4bd96ae4baacc30e6c3abb5f4093a9ea6","clone_url":"https://github.com/vibbits/training-material.git","contributors":[{"login":"tmuylder","id":50097865,"node_id":"MDQ6VXNlcjUwMDk3ODY1","avatar_url":"https://avatars.githubusercontent.com/u/50097865?v=4","gravatar_id":"","url":"https://api.github.com/users/tmuylder","html_url":"https://github.com/tmuylder","followers_url":"https://api.github.com/users/tmuylder/followers","following_url":"https://api.github.com/users/tmuylder/following{/other_user}","gists_url":"https://api.github.com/users/tmuylder/gists{/gist_id}","starred_url":"https://api.github.com/users/tmuylder/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/tmuylder/subscriptions","organizations_url":"https://api.github.com/users/tmuylder/orgs","repos_url":"https://api.github.com/users/tmuylder/repos","events_url":"https://api.github.com/users/tmuylder/events{/privacy}","received_events_url":"https://api.github.com/users/tmuylder/received_events","type":"User","site_admin":false,"contributions":143},{"login":"abotzki","id":38151284,"node_id":"MDQ6VXNlcjM4MTUxMjg0","avatar_url":"https://avatars.githubusercontent.com/u/38151284?v=4","gravatar_id":"","url":"https://api.github.com/users/abotzki","html_url":"https://github.com/abotzki","followers_url":"https://api.github.com/users/abotzki/followers","following_url":"https://api.github.com/users/abotzki/following{/other_user}","gists_url":"https://api.github.com/users/abotzki/gists{/gist_id}","starred_url":"https://api.github.com/users/abotzki/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/abotzki/subscriptions","organizations_url":"https://api.github.com/users/abotzki/orgs","repos_url":"https://api.github.com/users/abotzki/repos","events_url":"https://api.github.com/users/abotzki/events{/privacy}","received_events_url":"https://api.github.com/users/abotzki/received_events","type":"User","site_admin":false,"contributions":91},{"login":"alex-botzki","id":30123186,"node_id":"MDQ6VXNlcjMwMTIzMTg2","avatar_url":"https://avatars.githubusercontent.com/u/30123186?v=4","gravatar_id":"","url":"https://api.github.com/users/alex-botzki","html_url":"https://github.com/alex-botzki","followers_url":"https://api.github.com/users/alex-botzki/followers","following_url":"https://api.github.com/users/alex-botzki/following{/other_user}","gists_url":"https://api.github.com/users/alex-botzki/gists{/gist_id}","starred_url":"https://api.github.com/users/alex-botzki/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/alex-botzki/subscriptions","organizations_url":"https://api.github.com/users/alex-botzki/orgs","repos_url":"https://api.github.com/users/alex-botzki/repos","events_url":"https://api.github.com/users/alex-botzki/events{/privacy}","received_events_url":"https://api.github.com/users/alex-botzki/received_events","type":"User","site_admin":false,"contributions":75},{"login":"chdeb","id":14911769,"node_id":"MDQ6VXNlcjE0OTExNzY5","avatar_url":"https://avatars.githubusercontent.com/u/14911769?v=4","gravatar_id":"","url":"https://api.github.com/users/chdeb","html_url":"https://github.com/chdeb","followers_url":"https://api.github.com/users/chdeb/followers","following_url":"https://api.github.com/users/chdeb/following{/other_user}","gists_url":"https://api.github.com/users/chdeb/gists{/gist_id}","starred_url":"https://api.github.com/users/chdeb/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/chdeb/subscriptions","organizations_url":"https://api.github.com/users/chdeb/orgs","repos_url":"https://api.github.com/users/chdeb/repos","events_url":"https://api.github.com/users/chdeb/events{/privacy}","received_events_url":"https://api.github.com/users/chdeb/received_events","type":"User","site_admin":false,"contributions":34},{"login":"MaybeJustJames","id":26444312,"node_id":"MDQ6VXNlcjI2NDQ0MzEy","avatar_url":"https://avatars.githubusercontent.com/u/26444312?v=4","gravatar_id":"","url":"https://api.github.com/users/MaybeJustJames","html_url":"https://github.com/MaybeJustJames","followers_url":"https://api.github.com/users/MaybeJustJames/followers","following_url":"https://api.github.com/users/MaybeJustJames/following{/other_user}","gists_url":"https://api.github.com/users/MaybeJustJames/gists{/gist_id}","starred_url":"https://api.github.com/users/MaybeJustJames/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/MaybeJustJames/subscriptions","organizations_url":"https://api.github.com/users/MaybeJustJames/orgs","repos_url":"https://api.github.com/users/MaybeJustJames/repos","events_url":"https://api.github.com/users/MaybeJustJames/events{/privacy}","received_events_url":"https://api.github.com/users/MaybeJustJames/received_events","type":"User","site_admin":false,"contributions":25},{"login":"janick-bits","id":51365002,"node_id":"MDQ6VXNlcjUxMzY1MDAy","avatar_url":"https://avatars.githubusercontent.com/u/51365002?v=4","gravatar_id":"","url":"https://api.github.com/users/janick-bits","html_url":"https://github.com/janick-bits","followers_url":"https://api.github.com/users/janick-bits/followers","following_url":"https://api.github.com/users/janick-bits/following{/other_user}","gists_url":"https://api.github.com/users/janick-bits/gists{/gist_id}","starred_url":"https://api.github.com/users/janick-bits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/janick-bits/subscriptions","organizations_url":"https://api.github.com/users/janick-bits/orgs","repos_url":"https://api.github.com/users/janick-bits/repos","events_url":"https://api.github.com/users/janick-bits/events{/privacy}","received_events_url":"https://api.github.com/users/janick-bits/received_events","type":"User","site_admin":false,"contributions":10}],"disabled":false,"environment":"production","help_url":"https://help.github.com","hostname":"github.com","is_project_page":true,"is_user_page":false,"issues_url":"https://github.com/vibbits/training-material/issues","language":"JavaScript","latest_release":false,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"organization_members":[{"login":"MaybeJustJames","id":26444312,"node_id":"MDQ6VXNlcjI2NDQ0MzEy","avatar_url":"https://avatars.githubusercontent.com/u/26444312?v=4","gravatar_id":"","url":"https://api.github.com/users/MaybeJustJames","html_url":"https://github.com/MaybeJustJames","followers_url":"https://api.github.com/users/MaybeJustJames/followers","following_url":"https://api.github.com/users/MaybeJustJames/following{/other_user}","gists_url":"https://api.github.com/users/MaybeJustJames/gists{/gist_id}","starred_url":"https://api.github.com/users/MaybeJustJames/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/MaybeJustJames/subscriptions","organizations_url":"https://api.github.com/users/MaybeJustJames/orgs","repos_url":"https://api.github.com/users/MaybeJustJames/repos","events_url":"https://api.github.com/users/MaybeJustJames/events{/privacy}","received_events_url":"https://api.github.com/users/MaybeJustJames/received_events","type":"User","site_admin":false},{"login":"tmuylder","id":50097865,"node_id":"MDQ6VXNlcjUwMDk3ODY1","avatar_url":"https://avatars.githubusercontent.com/u/50097865?v=4","gravatar_id":"","url":"https://api.github.com/users/tmuylder","html_url":"https://github.com/tmuylder","followers_url":"https://api.github.com/users/tmuylder/followers","following_url":"https://api.github.com/users/tmuylder/following{/other_user}","gists_url":"https://api.github.com/users/tmuylder/gists{/gist_id}","starred_url":"https://api.github.com/users/tmuylder/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/tmuylder/subscriptions","organizations_url":"https://api.github.com/users/tmuylder/orgs","repos_url":"https://api.github.com/users/tmuylder/repos","events_url":"https://api.github.com/users/tmuylder/events{/privacy}","received_events_url":"https://api.github.com/users/tmuylder/received_events","type":"User","site_admin":false}],"owner":{"avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","bio":null,"blog":"www.bits.vib.be","collaborators":null,"company":null,"created_at":"2016-10-18 09:13:54 UTC","description":null,"email":null,"followers":0,"following":0,"has_organization_projects":true,"has_repository_projects":true,"hireable":null,"html_url":"https://github.com/vibbits","id":22908438,"is_verified":false,"location":"Belgium","login":"vibbits","name":"VIB Bioinformatics Core","node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","public_gists":0,"public_repos":58,"type":"Organization","updated_at":"2021-04-12 00:00:26 UTC"},"owner_gravatar_url":"https://github.com/vibbits.png","owner_name":"vibbits","owner_url":"https://github.com/vibbits","pages_env":"production","pages_hostname":"github.io","private":false,"project_tagline":"VIB BITS training material","project_title":"training-material","public_repositories":[{"id":270063361,"node_id":"MDEwOlJlcG9zaXRvcnkyNzAwNjMzNjE=","name":"alpine-php-wordpress","full_name":"vibbits/alpine-php-wordpress","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/alpine-php-wordpress","description":"Lightwight Docker image for the (latest) PHP-FPM and Nginx for WordPress","fork":true,"url":"https://api.github.com/repos/vibbits/alpine-php-wordpress","forks_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/forks","keys_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/teams","hooks_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/hooks","issue_events_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/events","assignees_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/tags","blobs_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/languages","stargazers_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/stargazers","contributors_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/contributors","subscribers_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/subscribers","subscription_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/subscription","commits_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/merges","archive_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/downloads","issues_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/alpine-php-wordpress/deployments","created_at":"2020-06-06 18:09:14 UTC","updated_at":"2020-06-12 19:26:52 UTC","pushed_at":"2020-06-12 19:26:49 UTC","git_url":"git://github.com/vibbits/alpine-php-wordpress.git","ssh_url":"git@github.com:vibbits/alpine-php-wordpress.git","clone_url":"https://github.com/vibbits/alpine-php-wordpress.git","svn_url":"https://github.com/vibbits/alpine-php-wordpress","homepage":"http://www.wordpressdocker.com","size":68,"stargazers_count":0,"watchers_count":0,"language":"Dockerfile","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":160538640,"node_id":"MDEwOlJlcG9zaXRvcnkxNjA1Mzg2NDA=","name":"bigdatasurvey","full_name":"vibbits/bigdatasurvey","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/bigdatasurvey","description":"A survey of open-source big data platforms, centering around the Hadoop ecosystem.","fork":true,"url":"https://api.github.com/repos/vibbits/bigdatasurvey","forks_url":"https://api.github.com/repos/vibbits/bigdatasurvey/forks","keys_url":"https://api.github.com/repos/vibbits/bigdatasurvey/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/bigdatasurvey/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/bigdatasurvey/teams","hooks_url":"https://api.github.com/repos/vibbits/bigdatasurvey/hooks","issue_events_url":"https://api.github.com/repos/vibbits/bigdatasurvey/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/bigdatasurvey/events","assignees_url":"https://api.github.com/repos/vibbits/bigdatasurvey/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/bigdatasurvey/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/bigdatasurvey/tags","blobs_url":"https://api.github.com/repos/vibbits/bigdatasurvey/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/bigdatasurvey/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/bigdatasurvey/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/bigdatasurvey/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/bigdatasurvey/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/bigdatasurvey/languages","stargazers_url":"https://api.github.com/repos/vibbits/bigdatasurvey/stargazers","contributors_url":"https://api.github.com/repos/vibbits/bigdatasurvey/contributors","subscribers_url":"https://api.github.com/repos/vibbits/bigdatasurvey/subscribers","subscription_url":"https://api.github.com/repos/vibbits/bigdatasurvey/subscription","commits_url":"https://api.github.com/repos/vibbits/bigdatasurvey/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/bigdatasurvey/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/bigdatasurvey/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/bigdatasurvey/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/bigdatasurvey/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/bigdatasurvey/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/bigdatasurvey/merges","archive_url":"https://api.github.com/repos/vibbits/bigdatasurvey/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/bigdatasurvey/downloads","issues_url":"https://api.github.com/repos/vibbits/bigdatasurvey/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/bigdatasurvey/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/bigdatasurvey/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/bigdatasurvey/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/bigdatasurvey/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/bigdatasurvey/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/bigdatasurvey/deployments","created_at":"2018-12-05 15:27:29 UTC","updated_at":"2018-12-05 15:31:44 UTC","pushed_at":"2015-10-28 20:33:50 UTC","git_url":"git://github.com/vibbits/bigdatasurvey.git","ssh_url":"git@github.com:vibbits/bigdatasurvey.git","clone_url":"https://github.com/vibbits/bigdatasurvey.git","svn_url":"https://github.com/vibbits/bigdatasurvey","homepage":null,"size":524,"stargazers_count":0,"watchers_count":0,"language":"JavaScript","has_issues":false,"has_projects":false,"has_downloads":true,"has_wiki":false,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"gh-pages"},{"id":136517018,"node_id":"MDEwOlJlcG9zaXRvcnkxMzY1MTcwMTg=","name":"bioconda-recipes","full_name":"vibbits/bioconda-recipes","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/bioconda-recipes","description":"Conda recipes for the bioconda channel.","fork":true,"url":"https://api.github.com/repos/vibbits/bioconda-recipes","forks_url":"https://api.github.com/repos/vibbits/bioconda-recipes/forks","keys_url":"https://api.github.com/repos/vibbits/bioconda-recipes/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/bioconda-recipes/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/bioconda-recipes/teams","hooks_url":"https://api.github.com/repos/vibbits/bioconda-recipes/hooks","issue_events_url":"https://api.github.com/repos/vibbits/bioconda-recipes/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/bioconda-recipes/events","assignees_url":"https://api.github.com/repos/vibbits/bioconda-recipes/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/bioconda-recipes/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/bioconda-recipes/tags","blobs_url":"https://api.github.com/repos/vibbits/bioconda-recipes/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/bioconda-recipes/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/bioconda-recipes/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/bioconda-recipes/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/bioconda-recipes/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/bioconda-recipes/languages","stargazers_url":"https://api.github.com/repos/vibbits/bioconda-recipes/stargazers","contributors_url":"https://api.github.com/repos/vibbits/bioconda-recipes/contributors","subscribers_url":"https://api.github.com/repos/vibbits/bioconda-recipes/subscribers","subscription_url":"https://api.github.com/repos/vibbits/bioconda-recipes/subscription","commits_url":"https://api.github.com/repos/vibbits/bioconda-recipes/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/bioconda-recipes/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/bioconda-recipes/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/bioconda-recipes/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/bioconda-recipes/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/bioconda-recipes/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/bioconda-recipes/merges","archive_url":"https://api.github.com/repos/vibbits/bioconda-recipes/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/bioconda-recipes/downloads","issues_url":"https://api.github.com/repos/vibbits/bioconda-recipes/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/bioconda-recipes/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/bioconda-recipes/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/bioconda-recipes/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/bioconda-recipes/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/bioconda-recipes/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/bioconda-recipes/deployments","created_at":"2018-06-07 18:36:07 UTC","updated_at":"2018-10-11 09:37:21 UTC","pushed_at":"2019-01-21 17:01:17 UTC","git_url":"git://github.com/vibbits/bioconda-recipes.git","ssh_url":"git@github.com:vibbits/bioconda-recipes.git","clone_url":"https://github.com/vibbits/bioconda-recipes.git","svn_url":"https://github.com/vibbits/bioconda-recipes","homepage":"https://bioconda.github.io","size":167593,"stargazers_count":0,"watchers_count":0,"language":"Shell","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":170295072,"node_id":"MDEwOlJlcG9zaXRvcnkxNzAyOTUwNzI=","name":"CAMEL","full_name":"vibbits/CAMEL","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/CAMEL","description":" The CAMEL platform, an online compendium of Adaptive Microbial Experiments in the Lab.","fork":false,"url":"https://api.github.com/repos/vibbits/CAMEL","forks_url":"https://api.github.com/repos/vibbits/CAMEL/forks","keys_url":"https://api.github.com/repos/vibbits/CAMEL/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/CAMEL/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/CAMEL/teams","hooks_url":"https://api.github.com/repos/vibbits/CAMEL/hooks","issue_events_url":"https://api.github.com/repos/vibbits/CAMEL/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/CAMEL/events","assignees_url":"https://api.github.com/repos/vibbits/CAMEL/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/CAMEL/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/CAMEL/tags","blobs_url":"https://api.github.com/repos/vibbits/CAMEL/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/CAMEL/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/CAMEL/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/CAMEL/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/CAMEL/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/CAMEL/languages","stargazers_url":"https://api.github.com/repos/vibbits/CAMEL/stargazers","contributors_url":"https://api.github.com/repos/vibbits/CAMEL/contributors","subscribers_url":"https://api.github.com/repos/vibbits/CAMEL/subscribers","subscription_url":"https://api.github.com/repos/vibbits/CAMEL/subscription","commits_url":"https://api.github.com/repos/vibbits/CAMEL/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/CAMEL/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/CAMEL/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/CAMEL/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/CAMEL/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/CAMEL/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/CAMEL/merges","archive_url":"https://api.github.com/repos/vibbits/CAMEL/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/CAMEL/downloads","issues_url":"https://api.github.com/repos/vibbits/CAMEL/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/CAMEL/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/CAMEL/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/CAMEL/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/CAMEL/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/CAMEL/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/CAMEL/deployments","created_at":"2019-02-12 10:03:23 UTC","updated_at":"2021-02-25 09:17:05 UTC","pushed_at":"2021-02-25 09:17:03 UTC","git_url":"git://github.com/vibbits/CAMEL.git","ssh_url":"git@github.com:vibbits/CAMEL.git","clone_url":"https://github.com/vibbits/CAMEL.git","svn_url":"https://github.com/vibbits/CAMEL","homepage":"https://cameldatabase.com","size":2463,"stargazers_count":1,"watchers_count":1,"language":"HTML","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":2,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":6,"license":{"key":"agpl-3.0","name":"GNU Affero General Public License v3.0","spdx_id":"AGPL-3.0","url":"https://api.github.com/licenses/agpl-3.0","node_id":"MDc6TGljZW5zZTE="},"topics":[],"forks":2,"open_issues":6,"watchers":1,"default_branch":"master"},{"id":352638906,"node_id":"MDEwOlJlcG9zaXRvcnkzNTI2Mzg5MDY=","name":"capita-selecta-2021","full_name":"vibbits/capita-selecta-2021","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/capita-selecta-2021","description":"Presentation for Capita Selecta in Bioinformatics (Ghent University course) - 29 March 2021","fork":false,"url":"https://api.github.com/repos/vibbits/capita-selecta-2021","forks_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/forks","keys_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/teams","hooks_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/hooks","issue_events_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/events","assignees_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/tags","blobs_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/languages","stargazers_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/stargazers","contributors_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/contributors","subscribers_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/subscribers","subscription_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/subscription","commits_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/merges","archive_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/downloads","issues_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/capita-selecta-2021/deployments","created_at":"2021-03-29 12:41:18 UTC","updated_at":"2021-03-29 13:24:10 UTC","pushed_at":"2021-03-29 13:24:07 UTC","git_url":"git://github.com/vibbits/capita-selecta-2021.git","ssh_url":"git@github.com:vibbits/capita-selecta-2021.git","clone_url":"https://github.com/vibbits/capita-selecta-2021.git","svn_url":"https://github.com/vibbits/capita-selecta-2021","homepage":null,"size":10424,"stargazers_count":0,"watchers_count":0,"language":"HTML","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"main"},{"id":364883030,"node_id":"MDEwOlJlcG9zaXRvcnkzNjQ4ODMwMzA=","name":"chipseq-nextflow","full_name":"vibbits/chipseq-nextflow","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/chipseq-nextflow","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/chipseq-nextflow","forks_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/forks","keys_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/teams","hooks_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/hooks","issue_events_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/events","assignees_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/tags","blobs_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/languages","stargazers_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/stargazers","contributors_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/contributors","subscribers_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/subscribers","subscription_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/subscription","commits_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/merges","archive_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/downloads","issues_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/chipseq-nextflow/deployments","created_at":"2021-05-06 11:15:00 UTC","updated_at":"2021-05-21 11:11:40 UTC","pushed_at":"2021-05-21 11:10:43 UTC","git_url":"git://github.com/vibbits/chipseq-nextflow.git","ssh_url":"git@github.com:vibbits/chipseq-nextflow.git","clone_url":"https://github.com/vibbits/chipseq-nextflow.git","svn_url":"https://github.com/vibbits/chipseq-nextflow","homepage":null,"size":33,"stargazers_count":0,"watchers_count":0,"language":"Nextflow","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"main"},{"id":299208736,"node_id":"MDEwOlJlcG9zaXRvcnkyOTkyMDg3MzY=","name":"containers-workflow-hackathon","full_name":"vibbits/containers-workflow-hackathon","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/containers-workflow-hackathon","description":"This is a draft repository for organizing and collecting materials for the containers & workflows course. ","fork":false,"url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon","forks_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/forks","keys_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/teams","hooks_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/hooks","issue_events_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/events","assignees_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/tags","blobs_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/languages","stargazers_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/stargazers","contributors_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/contributors","subscribers_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/subscribers","subscription_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/subscription","commits_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/merges","archive_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/downloads","issues_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/containers-workflow-hackathon/deployments","created_at":"2020-09-28 06:21:14 UTC","updated_at":"2021-04-16 12:43:24 UTC","pushed_at":"2021-04-16 12:43:22 UTC","git_url":"git://github.com/vibbits/containers-workflow-hackathon.git","ssh_url":"git@github.com:vibbits/containers-workflow-hackathon.git","clone_url":"https://github.com/vibbits/containers-workflow-hackathon.git","svn_url":"https://github.com/vibbits/containers-workflow-hackathon","homepage":null,"size":131,"stargazers_count":5,"watchers_count":5,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":1,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":{"key":"cc-by-4.0","name":"Creative Commons Attribution 4.0 International","spdx_id":"CC-BY-4.0","url":"https://api.github.com/licenses/cc-by-4.0","node_id":"MDc6TGljZW5zZTI1"},"topics":[],"forks":1,"open_issues":1,"watchers":5,"default_branch":"master"},{"id":364534901,"node_id":"MDEwOlJlcG9zaXRvcnkzNjQ1MzQ5MDE=","name":"containers-workshop","full_name":"vibbits/containers-workshop","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/containers-workshop","description":"Materials for the workshop on containers","fork":false,"url":"https://api.github.com/repos/vibbits/containers-workshop","forks_url":"https://api.github.com/repos/vibbits/containers-workshop/forks","keys_url":"https://api.github.com/repos/vibbits/containers-workshop/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/containers-workshop/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/containers-workshop/teams","hooks_url":"https://api.github.com/repos/vibbits/containers-workshop/hooks","issue_events_url":"https://api.github.com/repos/vibbits/containers-workshop/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/containers-workshop/events","assignees_url":"https://api.github.com/repos/vibbits/containers-workshop/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/containers-workshop/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/containers-workshop/tags","blobs_url":"https://api.github.com/repos/vibbits/containers-workshop/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/containers-workshop/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/containers-workshop/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/containers-workshop/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/containers-workshop/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/containers-workshop/languages","stargazers_url":"https://api.github.com/repos/vibbits/containers-workshop/stargazers","contributors_url":"https://api.github.com/repos/vibbits/containers-workshop/contributors","subscribers_url":"https://api.github.com/repos/vibbits/containers-workshop/subscribers","subscription_url":"https://api.github.com/repos/vibbits/containers-workshop/subscription","commits_url":"https://api.github.com/repos/vibbits/containers-workshop/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/containers-workshop/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/containers-workshop/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/containers-workshop/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/containers-workshop/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/containers-workshop/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/containers-workshop/merges","archive_url":"https://api.github.com/repos/vibbits/containers-workshop/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/containers-workshop/downloads","issues_url":"https://api.github.com/repos/vibbits/containers-workshop/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/containers-workshop/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/containers-workshop/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/containers-workshop/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/containers-workshop/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/containers-workshop/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/containers-workshop/deployments","created_at":"2021-05-05 10:09:23 UTC","updated_at":"2021-05-20 13:04:20 UTC","pushed_at":"2021-05-20 13:04:18 UTC","git_url":"git://github.com/vibbits/containers-workshop.git","ssh_url":"git@github.com:vibbits/containers-workshop.git","clone_url":"https://github.com/vibbits/containers-workshop.git","svn_url":"https://github.com/vibbits/containers-workshop","homepage":null,"size":35,"stargazers_count":0,"watchers_count":0,"language":"Dockerfile","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":2,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"cc0-1.0","name":"Creative Commons Zero v1.0 Universal","spdx_id":"CC0-1.0","url":"https://api.github.com/licenses/cc0-1.0","node_id":"MDc6TGljZW5zZTY="},"topics":[],"forks":2,"open_issues":0,"watchers":0,"default_branch":"main"},{"id":173921762,"node_id":"MDEwOlJlcG9zaXRvcnkxNzM5MjE3NjI=","name":"cytokit","full_name":"vibbits/cytokit","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/cytokit","description":"Microscopy Image Cytometry Toolkit","fork":true,"url":"https://api.github.com/repos/vibbits/cytokit","forks_url":"https://api.github.com/repos/vibbits/cytokit/forks","keys_url":"https://api.github.com/repos/vibbits/cytokit/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/cytokit/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/cytokit/teams","hooks_url":"https://api.github.com/repos/vibbits/cytokit/hooks","issue_events_url":"https://api.github.com/repos/vibbits/cytokit/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/cytokit/events","assignees_url":"https://api.github.com/repos/vibbits/cytokit/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/cytokit/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/cytokit/tags","blobs_url":"https://api.github.com/repos/vibbits/cytokit/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/cytokit/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/cytokit/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/cytokit/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/cytokit/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/cytokit/languages","stargazers_url":"https://api.github.com/repos/vibbits/cytokit/stargazers","contributors_url":"https://api.github.com/repos/vibbits/cytokit/contributors","subscribers_url":"https://api.github.com/repos/vibbits/cytokit/subscribers","subscription_url":"https://api.github.com/repos/vibbits/cytokit/subscription","commits_url":"https://api.github.com/repos/vibbits/cytokit/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/cytokit/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/cytokit/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/cytokit/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/cytokit/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/cytokit/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/cytokit/merges","archive_url":"https://api.github.com/repos/vibbits/cytokit/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/cytokit/downloads","issues_url":"https://api.github.com/repos/vibbits/cytokit/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/cytokit/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/cytokit/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/cytokit/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/cytokit/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/cytokit/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/cytokit/deployments","created_at":"2019-03-05 10:04:08 UTC","updated_at":"2019-03-05 15:37:40 UTC","pushed_at":"2019-03-05 15:37:38 UTC","git_url":"git://github.com/vibbits/cytokit.git","ssh_url":"git@github.com:vibbits/cytokit.git","clone_url":"https://github.com/vibbits/cytokit.git","svn_url":"https://github.com/vibbits/cytokit","homepage":"","size":174062,"stargazers_count":0,"watchers_count":0,"language":"Jupyter Notebook","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"apache-2.0","name":"Apache License 2.0","spdx_id":"Apache-2.0","url":"https://api.github.com/licenses/apache-2.0","node_id":"MDc6TGljZW5zZTI="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":135151492,"node_id":"MDEwOlJlcG9zaXRvcnkxMzUxNTE0OTI=","name":"DataIntegrationFlows","full_name":"vibbits/DataIntegrationFlows","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/DataIntegrationFlows","description":"This is a docker image to set up the atmosphere for running several data integration workflows","fork":false,"url":"https://api.github.com/repos/vibbits/DataIntegrationFlows","forks_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/forks","keys_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/teams","hooks_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/hooks","issue_events_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/events","assignees_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/tags","blobs_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/languages","stargazers_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/stargazers","contributors_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/contributors","subscribers_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/subscribers","subscription_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/subscription","commits_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/merges","archive_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/downloads","issues_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/DataIntegrationFlows/deployments","created_at":"2018-05-28 11:24:02 UTC","updated_at":"2018-10-11 09:37:21 UTC","pushed_at":"2018-05-28 12:05:05 UTC","git_url":"git://github.com/vibbits/DataIntegrationFlows.git","ssh_url":"git@github.com:vibbits/DataIntegrationFlows.git","clone_url":"https://github.com/vibbits/DataIntegrationFlows.git","svn_url":"https://github.com/vibbits/DataIntegrationFlows","homepage":null,"size":2,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":153103893,"node_id":"MDEwOlJlcG9zaXRvcnkxNTMxMDM4OTM=","name":"EMDenoising","full_name":"vibbits/EMDenoising","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/EMDenoising","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/EMDenoising","forks_url":"https://api.github.com/repos/vibbits/EMDenoising/forks","keys_url":"https://api.github.com/repos/vibbits/EMDenoising/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/EMDenoising/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/EMDenoising/teams","hooks_url":"https://api.github.com/repos/vibbits/EMDenoising/hooks","issue_events_url":"https://api.github.com/repos/vibbits/EMDenoising/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/EMDenoising/events","assignees_url":"https://api.github.com/repos/vibbits/EMDenoising/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/EMDenoising/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/EMDenoising/tags","blobs_url":"https://api.github.com/repos/vibbits/EMDenoising/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/EMDenoising/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/EMDenoising/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/EMDenoising/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/EMDenoising/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/EMDenoising/languages","stargazers_url":"https://api.github.com/repos/vibbits/EMDenoising/stargazers","contributors_url":"https://api.github.com/repos/vibbits/EMDenoising/contributors","subscribers_url":"https://api.github.com/repos/vibbits/EMDenoising/subscribers","subscription_url":"https://api.github.com/repos/vibbits/EMDenoising/subscription","commits_url":"https://api.github.com/repos/vibbits/EMDenoising/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/EMDenoising/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/EMDenoising/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/EMDenoising/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/EMDenoising/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/EMDenoising/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/EMDenoising/merges","archive_url":"https://api.github.com/repos/vibbits/EMDenoising/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/EMDenoising/downloads","issues_url":"https://api.github.com/repos/vibbits/EMDenoising/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/EMDenoising/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/EMDenoising/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/EMDenoising/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/EMDenoising/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/EMDenoising/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/EMDenoising/deployments","created_at":"2018-10-15 11:49:08 UTC","updated_at":"2021-03-10 05:06:05 UTC","pushed_at":"2021-02-04 15:00:02 UTC","git_url":"git://github.com/vibbits/EMDenoising.git","ssh_url":"git@github.com:vibbits/EMDenoising.git","clone_url":"https://github.com/vibbits/EMDenoising.git","svn_url":"https://github.com/vibbits/EMDenoising","homepage":null,"size":4344,"stargazers_count":4,"watchers_count":4,"language":"Java","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":2,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":2,"open_issues":1,"watchers":4,"default_branch":"master"},{"id":153100745,"node_id":"MDEwOlJlcG9zaXRvcnkxNTMxMDA3NDU=","name":"EMRegistration","full_name":"vibbits/EMRegistration","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/EMRegistration","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/EMRegistration","forks_url":"https://api.github.com/repos/vibbits/EMRegistration/forks","keys_url":"https://api.github.com/repos/vibbits/EMRegistration/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/EMRegistration/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/EMRegistration/teams","hooks_url":"https://api.github.com/repos/vibbits/EMRegistration/hooks","issue_events_url":"https://api.github.com/repos/vibbits/EMRegistration/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/EMRegistration/events","assignees_url":"https://api.github.com/repos/vibbits/EMRegistration/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/EMRegistration/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/EMRegistration/tags","blobs_url":"https://api.github.com/repos/vibbits/EMRegistration/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/EMRegistration/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/EMRegistration/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/EMRegistration/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/EMRegistration/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/EMRegistration/languages","stargazers_url":"https://api.github.com/repos/vibbits/EMRegistration/stargazers","contributors_url":"https://api.github.com/repos/vibbits/EMRegistration/contributors","subscribers_url":"https://api.github.com/repos/vibbits/EMRegistration/subscribers","subscription_url":"https://api.github.com/repos/vibbits/EMRegistration/subscription","commits_url":"https://api.github.com/repos/vibbits/EMRegistration/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/EMRegistration/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/EMRegistration/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/EMRegistration/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/EMRegistration/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/EMRegistration/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/EMRegistration/merges","archive_url":"https://api.github.com/repos/vibbits/EMRegistration/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/EMRegistration/downloads","issues_url":"https://api.github.com/repos/vibbits/EMRegistration/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/EMRegistration/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/EMRegistration/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/EMRegistration/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/EMRegistration/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/EMRegistration/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/EMRegistration/deployments","created_at":"2018-10-15 11:21:38 UTC","updated_at":"2020-07-16 14:35:41 UTC","pushed_at":"2020-07-10 11:21:29 UTC","git_url":"git://github.com/vibbits/EMRegistration.git","ssh_url":"git@github.com:vibbits/EMRegistration.git","clone_url":"https://github.com/vibbits/EMRegistration.git","svn_url":"https://github.com/vibbits/EMRegistration","homepage":null,"size":118,"stargazers_count":0,"watchers_count":0,"language":"Java","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":null,"topics":[],"forks":0,"open_issues":1,"watchers":0,"default_branch":"master"},{"id":243021384,"node_id":"MDEwOlJlcG9zaXRvcnkyNDMwMjEzODQ=","name":"example-bioinformatics-project","full_name":"vibbits/example-bioinformatics-project","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/example-bioinformatics-project","description":"Compare and contrast available tools for bioinformatics projects","fork":false,"url":"https://api.github.com/repos/vibbits/example-bioinformatics-project","forks_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/forks","keys_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/teams","hooks_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/hooks","issue_events_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/events","assignees_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/tags","blobs_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/languages","stargazers_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/stargazers","contributors_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/contributors","subscribers_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/subscribers","subscription_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/subscription","commits_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/merges","archive_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/downloads","issues_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/example-bioinformatics-project/deployments","created_at":"2020-02-25 14:34:49 UTC","updated_at":"2020-05-04 07:31:07 UTC","pushed_at":"2020-05-04 07:31:05 UTC","git_url":"git://github.com/vibbits/example-bioinformatics-project.git","ssh_url":"git@github.com:vibbits/example-bioinformatics-project.git","clone_url":"https://github.com/vibbits/example-bioinformatics-project.git","svn_url":"https://github.com/vibbits/example-bioinformatics-project","homepage":null,"size":2166,"stargazers_count":0,"watchers_count":0,"language":"HTML","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":263285898,"node_id":"MDEwOlJlcG9zaXRvcnkyNjMyODU4OTg=","name":"fork-repository","full_name":"vibbits/fork-repository","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/fork-repository","description":"This repository is made for teaching purposes. ","fork":false,"url":"https://api.github.com/repos/vibbits/fork-repository","forks_url":"https://api.github.com/repos/vibbits/fork-repository/forks","keys_url":"https://api.github.com/repos/vibbits/fork-repository/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/fork-repository/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/fork-repository/teams","hooks_url":"https://api.github.com/repos/vibbits/fork-repository/hooks","issue_events_url":"https://api.github.com/repos/vibbits/fork-repository/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/fork-repository/events","assignees_url":"https://api.github.com/repos/vibbits/fork-repository/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/fork-repository/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/fork-repository/tags","blobs_url":"https://api.github.com/repos/vibbits/fork-repository/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/fork-repository/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/fork-repository/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/fork-repository/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/fork-repository/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/fork-repository/languages","stargazers_url":"https://api.github.com/repos/vibbits/fork-repository/stargazers","contributors_url":"https://api.github.com/repos/vibbits/fork-repository/contributors","subscribers_url":"https://api.github.com/repos/vibbits/fork-repository/subscribers","subscription_url":"https://api.github.com/repos/vibbits/fork-repository/subscription","commits_url":"https://api.github.com/repos/vibbits/fork-repository/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/fork-repository/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/fork-repository/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/fork-repository/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/fork-repository/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/fork-repository/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/fork-repository/merges","archive_url":"https://api.github.com/repos/vibbits/fork-repository/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/fork-repository/downloads","issues_url":"https://api.github.com/repos/vibbits/fork-repository/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/fork-repository/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/fork-repository/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/fork-repository/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/fork-repository/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/fork-repository/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/fork-repository/deployments","created_at":"2020-05-12 09:01:58 UTC","updated_at":"2021-05-11 13:24:26 UTC","pushed_at":"2021-05-11 13:27:11 UTC","git_url":"git://github.com/vibbits/fork-repository.git","ssh_url":"git@github.com:vibbits/fork-repository.git","clone_url":"https://github.com/vibbits/fork-repository.git","svn_url":"https://github.com/vibbits/fork-repository","homepage":null,"size":54,"stargazers_count":1,"watchers_count":1,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":true,"forks_count":30,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":7,"license":null,"topics":[],"forks":30,"open_issues":7,"watchers":1,"default_branch":"master"},{"id":295326720,"node_id":"MDEwOlJlcG9zaXRvcnkyOTUzMjY3MjA=","name":"galaxy-variantcalling","full_name":"vibbits/galaxy-variantcalling","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/galaxy-variantcalling","description":"Course materials - Galaxy variant calling - ELIXIR 16092020","fork":false,"url":"https://api.github.com/repos/vibbits/galaxy-variantcalling","forks_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/forks","keys_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/teams","hooks_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/hooks","issue_events_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/events","assignees_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/tags","blobs_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/languages","stargazers_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/stargazers","contributors_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/contributors","subscribers_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/subscribers","subscription_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/subscription","commits_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/merges","archive_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/downloads","issues_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/galaxy-variantcalling/deployments","created_at":"2020-09-14 06:39:54 UTC","updated_at":"2020-09-16 06:20:02 UTC","pushed_at":"2020-09-16 06:20:00 UTC","git_url":"git://github.com/vibbits/galaxy-variantcalling.git","ssh_url":"git@github.com:vibbits/galaxy-variantcalling.git","clone_url":"https://github.com/vibbits/galaxy-variantcalling.git","svn_url":"https://github.com/vibbits/galaxy-variantcalling","homepage":null,"size":7339,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":247007332,"node_id":"MDEwOlJlcG9zaXRvcnkyNDcwMDczMzI=","name":"gentle-hands-on-python","full_name":"vibbits/gentle-hands-on-python","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/gentle-hands-on-python","description":"Course materials for the 'Gentle hands-on introduction to Python programming'. ","fork":false,"url":"https://api.github.com/repos/vibbits/gentle-hands-on-python","forks_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/forks","keys_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/teams","hooks_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/hooks","issue_events_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/events","assignees_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/tags","blobs_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/languages","stargazers_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/stargazers","contributors_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/contributors","subscribers_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/subscribers","subscription_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/subscription","commits_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/merges","archive_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/downloads","issues_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python/deployments","created_at":"2020-03-13 06:57:57 UTC","updated_at":"2021-05-25 13:02:58 UTC","pushed_at":"2021-05-25 13:02:56 UTC","git_url":"git://github.com/vibbits/gentle-hands-on-python.git","ssh_url":"git@github.com:vibbits/gentle-hands-on-python.git","clone_url":"https://github.com/vibbits/gentle-hands-on-python.git","svn_url":"https://github.com/vibbits/gentle-hands-on-python","homepage":null,"size":2956,"stargazers_count":0,"watchers_count":0,"language":"Jupyter Notebook","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":1,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":9,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":1,"open_issues":9,"watchers":0,"default_branch":"master"},{"id":232378367,"node_id":"MDEwOlJlcG9zaXRvcnkyMzIzNzgzNjc=","name":"gentle-hands-on-python-09012020","full_name":"vibbits/gentle-hands-on-python-09012020","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/gentle-hands-on-python-09012020","description":"Copy of Gentle-Hands-On-Python repo, however without solutions in jupyter notebook","fork":false,"url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020","forks_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/forks","keys_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/teams","hooks_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/hooks","issue_events_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/events","assignees_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/tags","blobs_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/languages","stargazers_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/stargazers","contributors_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/contributors","subscribers_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/subscribers","subscription_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/subscription","commits_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/merges","archive_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/downloads","issues_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/gentle-hands-on-python-09012020/deployments","created_at":"2020-01-07 17:26:55 UTC","updated_at":"2020-01-16 14:51:32 UTC","pushed_at":"2020-01-16 14:51:31 UTC","git_url":"git://github.com/vibbits/gentle-hands-on-python-09012020.git","ssh_url":"git@github.com:vibbits/gentle-hands-on-python-09012020.git","clone_url":"https://github.com/vibbits/gentle-hands-on-python-09012020.git","svn_url":"https://github.com/vibbits/gentle-hands-on-python-09012020","homepage":null,"size":559,"stargazers_count":0,"watchers_count":0,"language":"Jupyter Notebook","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":223176463,"node_id":"MDEwOlJlcG9zaXRvcnkyMjMxNzY0NjM=","name":"herodotus","full_name":"vibbits/herodotus","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/herodotus","description":"Your friendly neighbourhood Slack historian (bot).","fork":false,"url":"https://api.github.com/repos/vibbits/herodotus","forks_url":"https://api.github.com/repos/vibbits/herodotus/forks","keys_url":"https://api.github.com/repos/vibbits/herodotus/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/herodotus/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/herodotus/teams","hooks_url":"https://api.github.com/repos/vibbits/herodotus/hooks","issue_events_url":"https://api.github.com/repos/vibbits/herodotus/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/herodotus/events","assignees_url":"https://api.github.com/repos/vibbits/herodotus/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/herodotus/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/herodotus/tags","blobs_url":"https://api.github.com/repos/vibbits/herodotus/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/herodotus/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/herodotus/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/herodotus/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/herodotus/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/herodotus/languages","stargazers_url":"https://api.github.com/repos/vibbits/herodotus/stargazers","contributors_url":"https://api.github.com/repos/vibbits/herodotus/contributors","subscribers_url":"https://api.github.com/repos/vibbits/herodotus/subscribers","subscription_url":"https://api.github.com/repos/vibbits/herodotus/subscription","commits_url":"https://api.github.com/repos/vibbits/herodotus/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/herodotus/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/herodotus/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/herodotus/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/herodotus/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/herodotus/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/herodotus/merges","archive_url":"https://api.github.com/repos/vibbits/herodotus/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/herodotus/downloads","issues_url":"https://api.github.com/repos/vibbits/herodotus/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/herodotus/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/herodotus/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/herodotus/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/herodotus/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/herodotus/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/herodotus/deployments","created_at":"2019-11-21 13:08:27 UTC","updated_at":"2020-12-15 13:11:24 UTC","pushed_at":"2019-11-21 14:02:18 UTC","git_url":"git://github.com/vibbits/herodotus.git","ssh_url":"git@github.com:vibbits/herodotus.git","clone_url":"https://github.com/vibbits/herodotus.git","svn_url":"https://github.com/vibbits/herodotus","homepage":null,"size":24,"stargazers_count":1,"watchers_count":1,"language":"Clojure","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":{"key":"epl-2.0","name":"Eclipse Public License 2.0","spdx_id":"EPL-2.0","url":"https://api.github.com/licenses/epl-2.0","node_id":"MDc6TGljZW5zZTMy"},"topics":[],"forks":0,"open_issues":1,"watchers":1,"default_branch":"master"},{"id":232307716,"node_id":"MDEwOlJlcG9zaXRvcnkyMzIzMDc3MTY=","name":"ilastik","full_name":"vibbits/ilastik","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/ilastik","description":"ilastik-shell, applets, and workflows to string them together.","fork":true,"url":"https://api.github.com/repos/vibbits/ilastik","forks_url":"https://api.github.com/repos/vibbits/ilastik/forks","keys_url":"https://api.github.com/repos/vibbits/ilastik/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/ilastik/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/ilastik/teams","hooks_url":"https://api.github.com/repos/vibbits/ilastik/hooks","issue_events_url":"https://api.github.com/repos/vibbits/ilastik/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/ilastik/events","assignees_url":"https://api.github.com/repos/vibbits/ilastik/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/ilastik/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/ilastik/tags","blobs_url":"https://api.github.com/repos/vibbits/ilastik/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/ilastik/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/ilastik/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/ilastik/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/ilastik/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/ilastik/languages","stargazers_url":"https://api.github.com/repos/vibbits/ilastik/stargazers","contributors_url":"https://api.github.com/repos/vibbits/ilastik/contributors","subscribers_url":"https://api.github.com/repos/vibbits/ilastik/subscribers","subscription_url":"https://api.github.com/repos/vibbits/ilastik/subscription","commits_url":"https://api.github.com/repos/vibbits/ilastik/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/ilastik/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/ilastik/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/ilastik/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/ilastik/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/ilastik/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/ilastik/merges","archive_url":"https://api.github.com/repos/vibbits/ilastik/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/ilastik/downloads","issues_url":"https://api.github.com/repos/vibbits/ilastik/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/ilastik/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/ilastik/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/ilastik/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/ilastik/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/ilastik/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/ilastik/deployments","created_at":"2020-01-07 11:11:11 UTC","updated_at":"2020-02-05 14:51:44 UTC","pushed_at":"2020-02-05 14:51:41 UTC","git_url":"git://github.com/vibbits/ilastik.git","ssh_url":"git@github.com:vibbits/ilastik.git","clone_url":"https://github.com/vibbits/ilastik.git","svn_url":"https://github.com/vibbits/ilastik","homepage":null,"size":105559,"stargazers_count":0,"watchers_count":0,"language":"Python","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":280193746,"node_id":"MDEwOlJlcG9zaXRvcnkyODAxOTM3NDY=","name":"ilastik-meta","full_name":"vibbits/ilastik-meta","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/ilastik-meta","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/ilastik-meta","forks_url":"https://api.github.com/repos/vibbits/ilastik-meta/forks","keys_url":"https://api.github.com/repos/vibbits/ilastik-meta/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/ilastik-meta/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/ilastik-meta/teams","hooks_url":"https://api.github.com/repos/vibbits/ilastik-meta/hooks","issue_events_url":"https://api.github.com/repos/vibbits/ilastik-meta/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/ilastik-meta/events","assignees_url":"https://api.github.com/repos/vibbits/ilastik-meta/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/ilastik-meta/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/ilastik-meta/tags","blobs_url":"https://api.github.com/repos/vibbits/ilastik-meta/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/ilastik-meta/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/ilastik-meta/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/ilastik-meta/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/ilastik-meta/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/ilastik-meta/languages","stargazers_url":"https://api.github.com/repos/vibbits/ilastik-meta/stargazers","contributors_url":"https://api.github.com/repos/vibbits/ilastik-meta/contributors","subscribers_url":"https://api.github.com/repos/vibbits/ilastik-meta/subscribers","subscription_url":"https://api.github.com/repos/vibbits/ilastik-meta/subscription","commits_url":"https://api.github.com/repos/vibbits/ilastik-meta/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/ilastik-meta/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/ilastik-meta/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/ilastik-meta/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/ilastik-meta/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/ilastik-meta/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/ilastik-meta/merges","archive_url":"https://api.github.com/repos/vibbits/ilastik-meta/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/ilastik-meta/downloads","issues_url":"https://api.github.com/repos/vibbits/ilastik-meta/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/ilastik-meta/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/ilastik-meta/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/ilastik-meta/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/ilastik-meta/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/ilastik-meta/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/ilastik-meta/deployments","created_at":"2020-07-16 15:45:53 UTC","updated_at":"2020-07-16 15:52:52 UTC","pushed_at":"2020-07-16 15:50:36 UTC","git_url":"git://github.com/vibbits/ilastik-meta.git","ssh_url":"git@github.com:vibbits/ilastik-meta.git","clone_url":"https://github.com/vibbits/ilastik-meta.git","svn_url":"https://github.com/vibbits/ilastik-meta","homepage":null,"size":83,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"vib-deep-learning-forks"},{"id":169587916,"node_id":"MDEwOlJlcG9zaXRvcnkxNjk1ODc5MTY=","name":"incubator-echarts","full_name":"vibbits/incubator-echarts","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/incubator-echarts","description":"A powerful, interactive charting and visualization library for browser","fork":true,"url":"https://api.github.com/repos/vibbits/incubator-echarts","forks_url":"https://api.github.com/repos/vibbits/incubator-echarts/forks","keys_url":"https://api.github.com/repos/vibbits/incubator-echarts/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/incubator-echarts/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/incubator-echarts/teams","hooks_url":"https://api.github.com/repos/vibbits/incubator-echarts/hooks","issue_events_url":"https://api.github.com/repos/vibbits/incubator-echarts/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/incubator-echarts/events","assignees_url":"https://api.github.com/repos/vibbits/incubator-echarts/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/incubator-echarts/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/incubator-echarts/tags","blobs_url":"https://api.github.com/repos/vibbits/incubator-echarts/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/incubator-echarts/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/incubator-echarts/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/incubator-echarts/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/incubator-echarts/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/incubator-echarts/languages","stargazers_url":"https://api.github.com/repos/vibbits/incubator-echarts/stargazers","contributors_url":"https://api.github.com/repos/vibbits/incubator-echarts/contributors","subscribers_url":"https://api.github.com/repos/vibbits/incubator-echarts/subscribers","subscription_url":"https://api.github.com/repos/vibbits/incubator-echarts/subscription","commits_url":"https://api.github.com/repos/vibbits/incubator-echarts/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/incubator-echarts/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/incubator-echarts/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/incubator-echarts/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/incubator-echarts/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/incubator-echarts/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/incubator-echarts/merges","archive_url":"https://api.github.com/repos/vibbits/incubator-echarts/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/incubator-echarts/downloads","issues_url":"https://api.github.com/repos/vibbits/incubator-echarts/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/incubator-echarts/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/incubator-echarts/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/incubator-echarts/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/incubator-echarts/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/incubator-echarts/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/incubator-echarts/deployments","created_at":"2019-02-07 14:50:26 UTC","updated_at":"2019-02-07 14:50:32 UTC","pushed_at":"2019-02-07 13:03:45 UTC","git_url":"git://github.com/vibbits/incubator-echarts.git","ssh_url":"git@github.com:vibbits/incubator-echarts.git","clone_url":"https://github.com/vibbits/incubator-echarts.git","svn_url":"https://github.com/vibbits/incubator-echarts","homepage":"http://echarts.apache.org/","size":151893,"stargazers_count":0,"watchers_count":0,"language":"JavaScript","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"apache-2.0","name":"Apache License 2.0","spdx_id":"Apache-2.0","url":"https://api.github.com/licenses/apache-2.0","node_id":"MDc6TGljZW5zZTI="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":258452760,"node_id":"MDEwOlJlcG9zaXRvcnkyNTg0NTI3NjA=","name":"introduction-github","full_name":"vibbits/introduction-github","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/introduction-github","description":"This is a test-repository for the GitHub tutorial.","fork":false,"url":"https://api.github.com/repos/vibbits/introduction-github","forks_url":"https://api.github.com/repos/vibbits/introduction-github/forks","keys_url":"https://api.github.com/repos/vibbits/introduction-github/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/introduction-github/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/introduction-github/teams","hooks_url":"https://api.github.com/repos/vibbits/introduction-github/hooks","issue_events_url":"https://api.github.com/repos/vibbits/introduction-github/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/introduction-github/events","assignees_url":"https://api.github.com/repos/vibbits/introduction-github/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/introduction-github/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/introduction-github/tags","blobs_url":"https://api.github.com/repos/vibbits/introduction-github/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/introduction-github/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/introduction-github/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/introduction-github/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/introduction-github/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/introduction-github/languages","stargazers_url":"https://api.github.com/repos/vibbits/introduction-github/stargazers","contributors_url":"https://api.github.com/repos/vibbits/introduction-github/contributors","subscribers_url":"https://api.github.com/repos/vibbits/introduction-github/subscribers","subscription_url":"https://api.github.com/repos/vibbits/introduction-github/subscription","commits_url":"https://api.github.com/repos/vibbits/introduction-github/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/introduction-github/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/introduction-github/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/introduction-github/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/introduction-github/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/introduction-github/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/introduction-github/merges","archive_url":"https://api.github.com/repos/vibbits/introduction-github/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/introduction-github/downloads","issues_url":"https://api.github.com/repos/vibbits/introduction-github/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/introduction-github/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/introduction-github/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/introduction-github/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/introduction-github/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/introduction-github/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/introduction-github/deployments","created_at":"2020-04-24 08:29:21 UTC","updated_at":"2020-06-19 15:03:58 UTC","pushed_at":"2021-01-05 07:37:18 UTC","git_url":"git://github.com/vibbits/introduction-github.git","ssh_url":"git@github.com:vibbits/introduction-github.git","clone_url":"https://github.com/vibbits/introduction-github.git","svn_url":"https://github.com/vibbits/introduction-github","homepage":null,"size":7,"stargazers_count":0,"watchers_count":0,"language":"R","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":1,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":1,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":153101025,"node_id":"MDEwOlJlcG9zaXRvcnkxNTMxMDEwMjU=","name":"JavaQuasarBridge","full_name":"vibbits/JavaQuasarBridge","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/JavaQuasarBridge","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/JavaQuasarBridge","forks_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/forks","keys_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/teams","hooks_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/hooks","issue_events_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/events","assignees_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/tags","blobs_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/languages","stargazers_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/stargazers","contributors_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/contributors","subscribers_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/subscribers","subscription_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/subscription","commits_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/merges","archive_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/downloads","issues_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/JavaQuasarBridge/deployments","created_at":"2018-10-15 11:24:09 UTC","updated_at":"2019-10-02 15:58:35 UTC","pushed_at":"2019-10-02 15:52:05 UTC","git_url":"git://github.com/vibbits/JavaQuasarBridge.git","ssh_url":"git@github.com:vibbits/JavaQuasarBridge.git","clone_url":"https://github.com/vibbits/JavaQuasarBridge.git","svn_url":"https://github.com/vibbits/JavaQuasarBridge","homepage":null,"size":132,"stargazers_count":0,"watchers_count":0,"language":"C++","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":1,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":1,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":241322809,"node_id":"MDEwOlJlcG9zaXRvcnkyNDEzMjI4MDk=","name":"jekyll-pdf","full_name":"vibbits/jekyll-pdf","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/jekyll-pdf","description":"Create PDFs from Jekyll pages & documents.","fork":true,"url":"https://api.github.com/repos/vibbits/jekyll-pdf","forks_url":"https://api.github.com/repos/vibbits/jekyll-pdf/forks","keys_url":"https://api.github.com/repos/vibbits/jekyll-pdf/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/jekyll-pdf/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/jekyll-pdf/teams","hooks_url":"https://api.github.com/repos/vibbits/jekyll-pdf/hooks","issue_events_url":"https://api.github.com/repos/vibbits/jekyll-pdf/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/jekyll-pdf/events","assignees_url":"https://api.github.com/repos/vibbits/jekyll-pdf/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/jekyll-pdf/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/jekyll-pdf/tags","blobs_url":"https://api.github.com/repos/vibbits/jekyll-pdf/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/jekyll-pdf/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/jekyll-pdf/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/jekyll-pdf/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/jekyll-pdf/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/jekyll-pdf/languages","stargazers_url":"https://api.github.com/repos/vibbits/jekyll-pdf/stargazers","contributors_url":"https://api.github.com/repos/vibbits/jekyll-pdf/contributors","subscribers_url":"https://api.github.com/repos/vibbits/jekyll-pdf/subscribers","subscription_url":"https://api.github.com/repos/vibbits/jekyll-pdf/subscription","commits_url":"https://api.github.com/repos/vibbits/jekyll-pdf/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/jekyll-pdf/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/jekyll-pdf/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/jekyll-pdf/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/jekyll-pdf/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/jekyll-pdf/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/jekyll-pdf/merges","archive_url":"https://api.github.com/repos/vibbits/jekyll-pdf/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/jekyll-pdf/downloads","issues_url":"https://api.github.com/repos/vibbits/jekyll-pdf/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/jekyll-pdf/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/jekyll-pdf/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/jekyll-pdf/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/jekyll-pdf/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/jekyll-pdf/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/jekyll-pdf/deployments","created_at":"2020-02-18 09:36:27 UTC","updated_at":"2020-02-18 09:40:02 UTC","pushed_at":"2020-02-18 15:55:41 UTC","git_url":"git://github.com/vibbits/jekyll-pdf.git","ssh_url":"git@github.com:vibbits/jekyll-pdf.git","clone_url":"https://github.com/vibbits/jekyll-pdf.git","svn_url":"https://github.com/vibbits/jekyll-pdf","homepage":"http://abemedia.co.uk/jekyll-pdf/","size":47,"stargazers_count":0,"watchers_count":0,"language":"Ruby","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":false,"has_pages":false,"forks_count":1,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":1,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":232307757,"node_id":"MDEwOlJlcG9zaXRvcnkyMzIzMDc3NTc=","name":"lazyflow","full_name":"vibbits/lazyflow","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/lazyflow","description":"lazy parallel ondemand  zero copy numpy array data flows with caching and dirty propagation","fork":true,"url":"https://api.github.com/repos/vibbits/lazyflow","forks_url":"https://api.github.com/repos/vibbits/lazyflow/forks","keys_url":"https://api.github.com/repos/vibbits/lazyflow/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/lazyflow/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/lazyflow/teams","hooks_url":"https://api.github.com/repos/vibbits/lazyflow/hooks","issue_events_url":"https://api.github.com/repos/vibbits/lazyflow/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/lazyflow/events","assignees_url":"https://api.github.com/repos/vibbits/lazyflow/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/lazyflow/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/lazyflow/tags","blobs_url":"https://api.github.com/repos/vibbits/lazyflow/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/lazyflow/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/lazyflow/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/lazyflow/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/lazyflow/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/lazyflow/languages","stargazers_url":"https://api.github.com/repos/vibbits/lazyflow/stargazers","contributors_url":"https://api.github.com/repos/vibbits/lazyflow/contributors","subscribers_url":"https://api.github.com/repos/vibbits/lazyflow/subscribers","subscription_url":"https://api.github.com/repos/vibbits/lazyflow/subscription","commits_url":"https://api.github.com/repos/vibbits/lazyflow/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/lazyflow/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/lazyflow/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/lazyflow/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/lazyflow/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/lazyflow/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/lazyflow/merges","archive_url":"https://api.github.com/repos/vibbits/lazyflow/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/lazyflow/downloads","issues_url":"https://api.github.com/repos/vibbits/lazyflow/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/lazyflow/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/lazyflow/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/lazyflow/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/lazyflow/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/lazyflow/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/lazyflow/deployments","created_at":"2020-01-07 11:11:28 UTC","updated_at":"2020-02-03 14:04:14 UTC","pushed_at":"2020-02-03 14:04:10 UTC","git_url":"git://github.com/vibbits/lazyflow.git","ssh_url":"git@github.com:vibbits/lazyflow.git","clone_url":"https://github.com/vibbits/lazyflow.git","svn_url":"https://github.com/vibbits/lazyflow","homepage":"","size":17494,"stargazers_count":0,"watchers_count":0,"language":"Python","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":232080557,"node_id":"MDEwOlJlcG9zaXRvcnkyMzIwODA1NTc=","name":"LiaScript","full_name":"vibbits/LiaScript","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/LiaScript","description":"Interpreter interactive educational content, written in an extended Markdown format...   ","fork":true,"url":"https://api.github.com/repos/vibbits/LiaScript","forks_url":"https://api.github.com/repos/vibbits/LiaScript/forks","keys_url":"https://api.github.com/repos/vibbits/LiaScript/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/LiaScript/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/LiaScript/teams","hooks_url":"https://api.github.com/repos/vibbits/LiaScript/hooks","issue_events_url":"https://api.github.com/repos/vibbits/LiaScript/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/LiaScript/events","assignees_url":"https://api.github.com/repos/vibbits/LiaScript/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/LiaScript/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/LiaScript/tags","blobs_url":"https://api.github.com/repos/vibbits/LiaScript/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/LiaScript/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/LiaScript/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/LiaScript/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/LiaScript/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/LiaScript/languages","stargazers_url":"https://api.github.com/repos/vibbits/LiaScript/stargazers","contributors_url":"https://api.github.com/repos/vibbits/LiaScript/contributors","subscribers_url":"https://api.github.com/repos/vibbits/LiaScript/subscribers","subscription_url":"https://api.github.com/repos/vibbits/LiaScript/subscription","commits_url":"https://api.github.com/repos/vibbits/LiaScript/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/LiaScript/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/LiaScript/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/LiaScript/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/LiaScript/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/LiaScript/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/LiaScript/merges","archive_url":"https://api.github.com/repos/vibbits/LiaScript/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/LiaScript/downloads","issues_url":"https://api.github.com/repos/vibbits/LiaScript/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/LiaScript/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/LiaScript/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/LiaScript/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/LiaScript/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/LiaScript/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/LiaScript/deployments","created_at":"2020-01-06 10:47:20 UTC","updated_at":"2020-01-20 18:41:25 UTC","pushed_at":"2020-01-20 18:41:23 UTC","git_url":"git://github.com/vibbits/LiaScript.git","ssh_url":"git@github.com:vibbits/LiaScript.git","clone_url":"https://github.com/vibbits/LiaScript.git","svn_url":"https://github.com/vibbits/LiaScript","homepage":"https://LiaScript.github.io","size":12701,"stargazers_count":0,"watchers_count":0,"language":"Elm","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":230752313,"node_id":"MDEwOlJlcG9zaXRvcnkyMzA3NTIzMTM=","name":"material-liascript","full_name":"vibbits/material-liascript","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/material-liascript","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/material-liascript","forks_url":"https://api.github.com/repos/vibbits/material-liascript/forks","keys_url":"https://api.github.com/repos/vibbits/material-liascript/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/material-liascript/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/material-liascript/teams","hooks_url":"https://api.github.com/repos/vibbits/material-liascript/hooks","issue_events_url":"https://api.github.com/repos/vibbits/material-liascript/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/material-liascript/events","assignees_url":"https://api.github.com/repos/vibbits/material-liascript/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/material-liascript/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/material-liascript/tags","blobs_url":"https://api.github.com/repos/vibbits/material-liascript/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/material-liascript/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/material-liascript/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/material-liascript/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/material-liascript/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/material-liascript/languages","stargazers_url":"https://api.github.com/repos/vibbits/material-liascript/stargazers","contributors_url":"https://api.github.com/repos/vibbits/material-liascript/contributors","subscribers_url":"https://api.github.com/repos/vibbits/material-liascript/subscribers","subscription_url":"https://api.github.com/repos/vibbits/material-liascript/subscription","commits_url":"https://api.github.com/repos/vibbits/material-liascript/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/material-liascript/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/material-liascript/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/material-liascript/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/material-liascript/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/material-liascript/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/material-liascript/merges","archive_url":"https://api.github.com/repos/vibbits/material-liascript/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/material-liascript/downloads","issues_url":"https://api.github.com/repos/vibbits/material-liascript/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/material-liascript/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/material-liascript/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/material-liascript/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/material-liascript/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/material-liascript/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/material-liascript/deployments","created_at":"2019-12-29 13:13:06 UTC","updated_at":"2020-12-02 20:54:40 UTC","pushed_at":"2020-02-24 08:00:47 UTC","git_url":"git://github.com/vibbits/material-liascript.git","ssh_url":"git@github.com:vibbits/material-liascript.git","clone_url":"https://github.com/vibbits/material-liascript.git","svn_url":"https://github.com/vibbits/material-liascript","homepage":null,"size":5468,"stargazers_count":1,"watchers_count":1,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":null,"topics":[],"forks":0,"open_issues":1,"watchers":1,"default_branch":"master"},{"id":118458037,"node_id":"MDEwOlJlcG9zaXRvcnkxMTg0NTgwMzc=","name":"modules-4-GenePattern","full_name":"vibbits/modules-4-GenePattern","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/modules-4-GenePattern","description":"modules for integrating under GenePattern some extra tools for NGS analysis","fork":false,"url":"https://api.github.com/repos/vibbits/modules-4-GenePattern","forks_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/forks","keys_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/teams","hooks_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/hooks","issue_events_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/events","assignees_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/tags","blobs_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/languages","stargazers_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/stargazers","contributors_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/contributors","subscribers_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/subscribers","subscription_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/subscription","commits_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/merges","archive_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/downloads","issues_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/modules-4-GenePattern/deployments","created_at":"2018-01-22 13:04:16 UTC","updated_at":"2020-04-17 07:03:12 UTC","pushed_at":"2020-04-17 07:03:10 UTC","git_url":"git://github.com/vibbits/modules-4-GenePattern.git","ssh_url":"git@github.com:vibbits/modules-4-GenePattern.git","clone_url":"https://github.com/vibbits/modules-4-GenePattern.git","svn_url":"https://github.com/vibbits/modules-4-GenePattern","homepage":null,"size":2619,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":136658710,"node_id":"MDEwOlJlcG9zaXRvcnkxMzY2NTg3MTA=","name":"MOFA","full_name":"vibbits/MOFA","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/MOFA","description":"Multi-Omics Factor Analysis","fork":true,"url":"https://api.github.com/repos/vibbits/MOFA","forks_url":"https://api.github.com/repos/vibbits/MOFA/forks","keys_url":"https://api.github.com/repos/vibbits/MOFA/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/MOFA/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/MOFA/teams","hooks_url":"https://api.github.com/repos/vibbits/MOFA/hooks","issue_events_url":"https://api.github.com/repos/vibbits/MOFA/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/MOFA/events","assignees_url":"https://api.github.com/repos/vibbits/MOFA/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/MOFA/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/MOFA/tags","blobs_url":"https://api.github.com/repos/vibbits/MOFA/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/MOFA/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/MOFA/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/MOFA/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/MOFA/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/MOFA/languages","stargazers_url":"https://api.github.com/repos/vibbits/MOFA/stargazers","contributors_url":"https://api.github.com/repos/vibbits/MOFA/contributors","subscribers_url":"https://api.github.com/repos/vibbits/MOFA/subscribers","subscription_url":"https://api.github.com/repos/vibbits/MOFA/subscription","commits_url":"https://api.github.com/repos/vibbits/MOFA/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/MOFA/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/MOFA/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/MOFA/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/MOFA/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/MOFA/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/MOFA/merges","archive_url":"https://api.github.com/repos/vibbits/MOFA/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/MOFA/downloads","issues_url":"https://api.github.com/repos/vibbits/MOFA/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/MOFA/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/MOFA/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/MOFA/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/MOFA/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/MOFA/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/MOFA/deployments","created_at":"2018-06-08 19:31:44 UTC","updated_at":"2018-10-11 09:37:21 UTC","pushed_at":"2018-06-23 20:50:58 UTC","git_url":"git://github.com/vibbits/MOFA.git","ssh_url":"git@github.com:vibbits/MOFA.git","clone_url":"https://github.com/vibbits/MOFA.git","svn_url":"https://github.com/vibbits/MOFA","homepage":"","size":110522,"stargazers_count":0,"watchers_count":0,"language":"HTML","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":300293711,"node_id":"MDEwOlJlcG9zaXRvcnkzMDAyOTM3MTE=","name":"nextflow-jnj","full_name":"vibbits/nextflow-jnj","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/nextflow-jnj","description":"Course materials for the workflow pipelines (Nextflow) course. ","fork":false,"url":"https://api.github.com/repos/vibbits/nextflow-jnj","forks_url":"https://api.github.com/repos/vibbits/nextflow-jnj/forks","keys_url":"https://api.github.com/repos/vibbits/nextflow-jnj/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/nextflow-jnj/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/nextflow-jnj/teams","hooks_url":"https://api.github.com/repos/vibbits/nextflow-jnj/hooks","issue_events_url":"https://api.github.com/repos/vibbits/nextflow-jnj/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/nextflow-jnj/events","assignees_url":"https://api.github.com/repos/vibbits/nextflow-jnj/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/nextflow-jnj/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/nextflow-jnj/tags","blobs_url":"https://api.github.com/repos/vibbits/nextflow-jnj/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/nextflow-jnj/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/nextflow-jnj/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/nextflow-jnj/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/nextflow-jnj/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/nextflow-jnj/languages","stargazers_url":"https://api.github.com/repos/vibbits/nextflow-jnj/stargazers","contributors_url":"https://api.github.com/repos/vibbits/nextflow-jnj/contributors","subscribers_url":"https://api.github.com/repos/vibbits/nextflow-jnj/subscribers","subscription_url":"https://api.github.com/repos/vibbits/nextflow-jnj/subscription","commits_url":"https://api.github.com/repos/vibbits/nextflow-jnj/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/nextflow-jnj/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/nextflow-jnj/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/nextflow-jnj/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/nextflow-jnj/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/nextflow-jnj/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/nextflow-jnj/merges","archive_url":"https://api.github.com/repos/vibbits/nextflow-jnj/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/nextflow-jnj/downloads","issues_url":"https://api.github.com/repos/vibbits/nextflow-jnj/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/nextflow-jnj/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/nextflow-jnj/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/nextflow-jnj/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/nextflow-jnj/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/nextflow-jnj/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/nextflow-jnj/deployments","created_at":"2020-10-01 13:42:36 UTC","updated_at":"2020-12-14 11:19:54 UTC","pushed_at":"2020-12-14 11:19:52 UTC","git_url":"git://github.com/vibbits/nextflow-jnj.git","ssh_url":"git@github.com:vibbits/nextflow-jnj.git","clone_url":"https://github.com/vibbits/nextflow-jnj.git","svn_url":"https://github.com/vibbits/nextflow-jnj","homepage":null,"size":93748,"stargazers_count":2,"watchers_count":2,"language":"Nextflow","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":2,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":2,"open_issues":0,"watchers":2,"default_branch":"master"},{"id":357533819,"node_id":"MDEwOlJlcG9zaXRvcnkzNTc1MzM4MTk=","name":"nextflow-workshop","full_name":"vibbits/nextflow-workshop","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/nextflow-workshop","description":"Nextflow workshop materials 27 May 2021","fork":false,"url":"https://api.github.com/repos/vibbits/nextflow-workshop","forks_url":"https://api.github.com/repos/vibbits/nextflow-workshop/forks","keys_url":"https://api.github.com/repos/vibbits/nextflow-workshop/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/nextflow-workshop/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/nextflow-workshop/teams","hooks_url":"https://api.github.com/repos/vibbits/nextflow-workshop/hooks","issue_events_url":"https://api.github.com/repos/vibbits/nextflow-workshop/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/nextflow-workshop/events","assignees_url":"https://api.github.com/repos/vibbits/nextflow-workshop/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/nextflow-workshop/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/nextflow-workshop/tags","blobs_url":"https://api.github.com/repos/vibbits/nextflow-workshop/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/nextflow-workshop/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/nextflow-workshop/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/nextflow-workshop/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/nextflow-workshop/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/nextflow-workshop/languages","stargazers_url":"https://api.github.com/repos/vibbits/nextflow-workshop/stargazers","contributors_url":"https://api.github.com/repos/vibbits/nextflow-workshop/contributors","subscribers_url":"https://api.github.com/repos/vibbits/nextflow-workshop/subscribers","subscription_url":"https://api.github.com/repos/vibbits/nextflow-workshop/subscription","commits_url":"https://api.github.com/repos/vibbits/nextflow-workshop/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/nextflow-workshop/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/nextflow-workshop/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/nextflow-workshop/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/nextflow-workshop/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/nextflow-workshop/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/nextflow-workshop/merges","archive_url":"https://api.github.com/repos/vibbits/nextflow-workshop/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/nextflow-workshop/downloads","issues_url":"https://api.github.com/repos/vibbits/nextflow-workshop/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/nextflow-workshop/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/nextflow-workshop/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/nextflow-workshop/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/nextflow-workshop/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/nextflow-workshop/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/nextflow-workshop/deployments","created_at":"2021-04-13 11:50:15 UTC","updated_at":"2021-05-26 12:21:51 UTC","pushed_at":"2021-05-26 12:21:48 UTC","git_url":"git://github.com/vibbits/nextflow-workshop.git","ssh_url":"git@github.com:vibbits/nextflow-workshop.git","clone_url":"https://github.com/vibbits/nextflow-workshop.git","svn_url":"https://github.com/vibbits/nextflow-workshop","homepage":null,"size":3028,"stargazers_count":0,"watchers_count":0,"language":"Nextflow","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"main"},{"id":157213851,"node_id":"MDEwOlJlcG9zaXRvcnkxNTcyMTM4NTE=","name":"NextFlow_pipelines","full_name":"vibbits/NextFlow_pipelines","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/NextFlow_pipelines","description":"Tricks and tips for designing a NextFlow pipeline","fork":false,"url":"https://api.github.com/repos/vibbits/NextFlow_pipelines","forks_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/forks","keys_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/teams","hooks_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/hooks","issue_events_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/events","assignees_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/tags","blobs_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/languages","stargazers_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/stargazers","contributors_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/contributors","subscribers_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/subscribers","subscription_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/subscription","commits_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/merges","archive_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/downloads","issues_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/NextFlow_pipelines/deployments","created_at":"2018-11-12 12:57:24 UTC","updated_at":"2019-02-25 13:20:35 UTC","pushed_at":"2019-02-25 13:20:34 UTC","git_url":"git://github.com/vibbits/NextFlow_pipelines.git","ssh_url":"git@github.com:vibbits/NextFlow_pipelines.git","clone_url":"https://github.com/vibbits/NextFlow_pipelines.git","svn_url":"https://github.com/vibbits/NextFlow_pipelines","homepage":null,"size":1578,"stargazers_count":0,"watchers_count":0,"language":"Nextflow","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":272147337,"node_id":"MDEwOlJlcG9zaXRvcnkyNzIxNDczMzc=","name":"nginx-proxy","full_name":"vibbits/nginx-proxy","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/nginx-proxy","description":"Automated nginx proxy for Docker containers using docker-gen","fork":true,"url":"https://api.github.com/repos/vibbits/nginx-proxy","forks_url":"https://api.github.com/repos/vibbits/nginx-proxy/forks","keys_url":"https://api.github.com/repos/vibbits/nginx-proxy/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/nginx-proxy/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/nginx-proxy/teams","hooks_url":"https://api.github.com/repos/vibbits/nginx-proxy/hooks","issue_events_url":"https://api.github.com/repos/vibbits/nginx-proxy/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/nginx-proxy/events","assignees_url":"https://api.github.com/repos/vibbits/nginx-proxy/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/nginx-proxy/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/nginx-proxy/tags","blobs_url":"https://api.github.com/repos/vibbits/nginx-proxy/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/nginx-proxy/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/nginx-proxy/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/nginx-proxy/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/nginx-proxy/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/nginx-proxy/languages","stargazers_url":"https://api.github.com/repos/vibbits/nginx-proxy/stargazers","contributors_url":"https://api.github.com/repos/vibbits/nginx-proxy/contributors","subscribers_url":"https://api.github.com/repos/vibbits/nginx-proxy/subscribers","subscription_url":"https://api.github.com/repos/vibbits/nginx-proxy/subscription","commits_url":"https://api.github.com/repos/vibbits/nginx-proxy/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/nginx-proxy/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/nginx-proxy/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/nginx-proxy/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/nginx-proxy/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/nginx-proxy/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/nginx-proxy/merges","archive_url":"https://api.github.com/repos/vibbits/nginx-proxy/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/nginx-proxy/downloads","issues_url":"https://api.github.com/repos/vibbits/nginx-proxy/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/nginx-proxy/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/nginx-proxy/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/nginx-proxy/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/nginx-proxy/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/nginx-proxy/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/nginx-proxy/deployments","created_at":"2020-06-14 06:38:38 UTC","updated_at":"2020-06-14 06:59:32 UTC","pushed_at":"2020-06-14 06:59:29 UTC","git_url":"git://github.com/vibbits/nginx-proxy.git","ssh_url":"git@github.com:vibbits/nginx-proxy.git","clone_url":"https://github.com/vibbits/nginx-proxy.git","svn_url":"https://github.com/vibbits/nginx-proxy","homepage":null,"size":531,"stargazers_count":0,"watchers_count":0,"language":"Python","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"mit","name":"MIT License","spdx_id":"MIT","url":"https://api.github.com/licenses/mit","node_id":"MDc6TGljZW5zZTEz"},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":207801732,"node_id":"MDEwOlJlcG9zaXRvcnkyMDc4MDE3MzI=","name":"OMeta-Public","full_name":"vibbits/OMeta-Public","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/OMeta-Public","description":"OMeta-Public","fork":true,"url":"https://api.github.com/repos/vibbits/OMeta-Public","forks_url":"https://api.github.com/repos/vibbits/OMeta-Public/forks","keys_url":"https://api.github.com/repos/vibbits/OMeta-Public/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/OMeta-Public/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/OMeta-Public/teams","hooks_url":"https://api.github.com/repos/vibbits/OMeta-Public/hooks","issue_events_url":"https://api.github.com/repos/vibbits/OMeta-Public/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/OMeta-Public/events","assignees_url":"https://api.github.com/repos/vibbits/OMeta-Public/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/OMeta-Public/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/OMeta-Public/tags","blobs_url":"https://api.github.com/repos/vibbits/OMeta-Public/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/OMeta-Public/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/OMeta-Public/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/OMeta-Public/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/OMeta-Public/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/OMeta-Public/languages","stargazers_url":"https://api.github.com/repos/vibbits/OMeta-Public/stargazers","contributors_url":"https://api.github.com/repos/vibbits/OMeta-Public/contributors","subscribers_url":"https://api.github.com/repos/vibbits/OMeta-Public/subscribers","subscription_url":"https://api.github.com/repos/vibbits/OMeta-Public/subscription","commits_url":"https://api.github.com/repos/vibbits/OMeta-Public/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/OMeta-Public/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/OMeta-Public/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/OMeta-Public/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/OMeta-Public/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/OMeta-Public/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/OMeta-Public/merges","archive_url":"https://api.github.com/repos/vibbits/OMeta-Public/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/OMeta-Public/downloads","issues_url":"https://api.github.com/repos/vibbits/OMeta-Public/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/OMeta-Public/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/OMeta-Public/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/OMeta-Public/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/OMeta-Public/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/OMeta-Public/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/OMeta-Public/deployments","created_at":"2019-09-11 12:00:36 UTC","updated_at":"2019-09-11 12:00:37 UTC","pushed_at":"2019-08-29 10:37:03 UTC","git_url":"git://github.com/vibbits/OMeta-Public.git","ssh_url":"git@github.com:vibbits/OMeta-Public.git","clone_url":"https://github.com/vibbits/OMeta-Public.git","svn_url":"https://github.com/vibbits/OMeta-Public","homepage":null,"size":331121,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":292024590,"node_id":"MDEwOlJlcG9zaXRvcnkyOTIwMjQ1OTA=","name":"openid-connect-flask-template","full_name":"vibbits/openid-connect-flask-template","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/openid-connect-flask-template","description":"A template Flask application with OpenID Connect integration with VIB Services ","fork":false,"url":"https://api.github.com/repos/vibbits/openid-connect-flask-template","forks_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/forks","keys_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/teams","hooks_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/hooks","issue_events_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/events","assignees_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/tags","blobs_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/languages","stargazers_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/stargazers","contributors_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/contributors","subscribers_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/subscribers","subscription_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/subscription","commits_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/merges","archive_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/downloads","issues_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/openid-connect-flask-template/deployments","created_at":"2020-09-01 14:44:22 UTC","updated_at":"2020-09-02 07:26:42 UTC","pushed_at":"2020-09-02 07:26:40 UTC","git_url":"git://github.com/vibbits/openid-connect-flask-template.git","ssh_url":"git@github.com:vibbits/openid-connect-flask-template.git","clone_url":"https://github.com/vibbits/openid-connect-flask-template.git","svn_url":"https://github.com/vibbits/openid-connect-flask-template","homepage":null,"size":41,"stargazers_count":0,"watchers_count":0,"language":"Python","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":186219236,"node_id":"MDEwOlJlcG9zaXRvcnkxODYyMTkyMzY=","name":"OpenRefineTrainingData","full_name":"vibbits/OpenRefineTrainingData","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/OpenRefineTrainingData","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData","forks_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/forks","keys_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/teams","hooks_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/hooks","issue_events_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/events","assignees_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/tags","blobs_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/languages","stargazers_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/stargazers","contributors_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/contributors","subscribers_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/subscribers","subscription_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/subscription","commits_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/merges","archive_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/downloads","issues_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/OpenRefineTrainingData/deployments","created_at":"2019-05-12 06:19:18 UTC","updated_at":"2019-05-18 14:15:38 UTC","pushed_at":"2019-05-18 14:15:36 UTC","git_url":"git://github.com/vibbits/OpenRefineTrainingData.git","ssh_url":"git@github.com:vibbits/OpenRefineTrainingData.git","clone_url":"https://github.com/vibbits/OpenRefineTrainingData.git","svn_url":"https://github.com/vibbits/OpenRefineTrainingData","homepage":null,"size":7919,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"mit","name":"MIT License","spdx_id":"MIT","url":"https://api.github.com/licenses/mit","node_id":"MDc6TGljZW5zZTEz"},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":71230827,"node_id":"MDEwOlJlcG9zaXRvcnk3MTIzMDgyNw==","name":"phyd3","full_name":"vibbits/phyd3","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/phyd3","description":"Phylogenetic tree viewer based on D3.js","fork":false,"url":"https://api.github.com/repos/vibbits/phyd3","forks_url":"https://api.github.com/repos/vibbits/phyd3/forks","keys_url":"https://api.github.com/repos/vibbits/phyd3/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/phyd3/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/phyd3/teams","hooks_url":"https://api.github.com/repos/vibbits/phyd3/hooks","issue_events_url":"https://api.github.com/repos/vibbits/phyd3/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/phyd3/events","assignees_url":"https://api.github.com/repos/vibbits/phyd3/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/phyd3/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/phyd3/tags","blobs_url":"https://api.github.com/repos/vibbits/phyd3/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/phyd3/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/phyd3/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/phyd3/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/phyd3/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/phyd3/languages","stargazers_url":"https://api.github.com/repos/vibbits/phyd3/stargazers","contributors_url":"https://api.github.com/repos/vibbits/phyd3/contributors","subscribers_url":"https://api.github.com/repos/vibbits/phyd3/subscribers","subscription_url":"https://api.github.com/repos/vibbits/phyd3/subscription","commits_url":"https://api.github.com/repos/vibbits/phyd3/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/phyd3/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/phyd3/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/phyd3/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/phyd3/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/phyd3/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/phyd3/merges","archive_url":"https://api.github.com/repos/vibbits/phyd3/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/phyd3/downloads","issues_url":"https://api.github.com/repos/vibbits/phyd3/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/phyd3/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/phyd3/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/phyd3/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/phyd3/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/phyd3/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/phyd3/deployments","created_at":"2016-10-18 09:19:54 UTC","updated_at":"2021-05-19 18:28:37 UTC","pushed_at":"2020-03-04 09:38:31 UTC","git_url":"git://github.com/vibbits/phyd3.git","ssh_url":"git@github.com:vibbits/phyd3.git","clone_url":"https://github.com/vibbits/phyd3.git","svn_url":"https://github.com/vibbits/phyd3","homepage":null,"size":3012,"stargazers_count":39,"watchers_count":39,"language":"JavaScript","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":12,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":4,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":12,"open_issues":4,"watchers":39,"default_branch":"master"},{"id":138773305,"node_id":"MDEwOlJlcG9zaXRvcnkxMzg3NzMzMDU=","name":"presentation","full_name":"vibbits/presentation","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/presentation","description":"20180907 presentation community meeting","fork":false,"url":"https://api.github.com/repos/vibbits/presentation","forks_url":"https://api.github.com/repos/vibbits/presentation/forks","keys_url":"https://api.github.com/repos/vibbits/presentation/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/presentation/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/presentation/teams","hooks_url":"https://api.github.com/repos/vibbits/presentation/hooks","issue_events_url":"https://api.github.com/repos/vibbits/presentation/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/presentation/events","assignees_url":"https://api.github.com/repos/vibbits/presentation/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/presentation/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/presentation/tags","blobs_url":"https://api.github.com/repos/vibbits/presentation/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/presentation/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/presentation/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/presentation/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/presentation/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/presentation/languages","stargazers_url":"https://api.github.com/repos/vibbits/presentation/stargazers","contributors_url":"https://api.github.com/repos/vibbits/presentation/contributors","subscribers_url":"https://api.github.com/repos/vibbits/presentation/subscribers","subscription_url":"https://api.github.com/repos/vibbits/presentation/subscription","commits_url":"https://api.github.com/repos/vibbits/presentation/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/presentation/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/presentation/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/presentation/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/presentation/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/presentation/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/presentation/merges","archive_url":"https://api.github.com/repos/vibbits/presentation/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/presentation/downloads","issues_url":"https://api.github.com/repos/vibbits/presentation/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/presentation/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/presentation/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/presentation/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/presentation/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/presentation/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/presentation/deployments","created_at":"2018-06-26 17:49:03 UTC","updated_at":"2020-02-24 07:48:42 UTC","pushed_at":"2020-02-24 07:48:40 UTC","git_url":"git://github.com/vibbits/presentation.git","ssh_url":"git@github.com:vibbits/presentation.git","clone_url":"https://github.com/vibbits/presentation.git","svn_url":"https://github.com/vibbits/presentation","homepage":"","size":1718,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":2,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":null,"topics":[],"forks":2,"open_issues":1,"watchers":0,"default_branch":"master"},{"id":296544736,"node_id":"MDEwOlJlcG9zaXRvcnkyOTY1NDQ3MzY=","name":"python-data-analysis","full_name":"vibbits/python-data-analysis","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/python-data-analysis","description":"Materials for the course 'Python for downstream data analysis'. ","fork":false,"url":"https://api.github.com/repos/vibbits/python-data-analysis","forks_url":"https://api.github.com/repos/vibbits/python-data-analysis/forks","keys_url":"https://api.github.com/repos/vibbits/python-data-analysis/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/python-data-analysis/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/python-data-analysis/teams","hooks_url":"https://api.github.com/repos/vibbits/python-data-analysis/hooks","issue_events_url":"https://api.github.com/repos/vibbits/python-data-analysis/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/python-data-analysis/events","assignees_url":"https://api.github.com/repos/vibbits/python-data-analysis/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/python-data-analysis/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/python-data-analysis/tags","blobs_url":"https://api.github.com/repos/vibbits/python-data-analysis/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/python-data-analysis/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/python-data-analysis/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/python-data-analysis/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/python-data-analysis/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/python-data-analysis/languages","stargazers_url":"https://api.github.com/repos/vibbits/python-data-analysis/stargazers","contributors_url":"https://api.github.com/repos/vibbits/python-data-analysis/contributors","subscribers_url":"https://api.github.com/repos/vibbits/python-data-analysis/subscribers","subscription_url":"https://api.github.com/repos/vibbits/python-data-analysis/subscription","commits_url":"https://api.github.com/repos/vibbits/python-data-analysis/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/python-data-analysis/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/python-data-analysis/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/python-data-analysis/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/python-data-analysis/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/python-data-analysis/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/python-data-analysis/merges","archive_url":"https://api.github.com/repos/vibbits/python-data-analysis/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/python-data-analysis/downloads","issues_url":"https://api.github.com/repos/vibbits/python-data-analysis/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/python-data-analysis/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/python-data-analysis/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/python-data-analysis/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/python-data-analysis/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/python-data-analysis/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/python-data-analysis/deployments","created_at":"2020-09-18 07:19:29 UTC","updated_at":"2021-02-24 15:35:54 UTC","pushed_at":"2021-02-24 15:35:51 UTC","git_url":"git://github.com/vibbits/python-data-analysis.git","ssh_url":"git@github.com:vibbits/python-data-analysis.git","clone_url":"https://github.com/vibbits/python-data-analysis.git","svn_url":"https://github.com/vibbits/python-data-analysis","homepage":null,"size":36329,"stargazers_count":0,"watchers_count":0,"language":"Jupyter Notebook","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":1,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":2,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":1,"open_issues":2,"watchers":0,"default_branch":"master"},{"id":186428088,"node_id":"MDEwOlJlcG9zaXRvcnkxODY0MjgwODg=","name":"qupath","full_name":"vibbits/qupath","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/qupath","description":"QuPath - Open Source Digital Pathology","fork":true,"url":"https://api.github.com/repos/vibbits/qupath","forks_url":"https://api.github.com/repos/vibbits/qupath/forks","keys_url":"https://api.github.com/repos/vibbits/qupath/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/qupath/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/qupath/teams","hooks_url":"https://api.github.com/repos/vibbits/qupath/hooks","issue_events_url":"https://api.github.com/repos/vibbits/qupath/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/qupath/events","assignees_url":"https://api.github.com/repos/vibbits/qupath/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/qupath/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/qupath/tags","blobs_url":"https://api.github.com/repos/vibbits/qupath/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/qupath/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/qupath/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/qupath/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/qupath/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/qupath/languages","stargazers_url":"https://api.github.com/repos/vibbits/qupath/stargazers","contributors_url":"https://api.github.com/repos/vibbits/qupath/contributors","subscribers_url":"https://api.github.com/repos/vibbits/qupath/subscribers","subscription_url":"https://api.github.com/repos/vibbits/qupath/subscription","commits_url":"https://api.github.com/repos/vibbits/qupath/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/qupath/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/qupath/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/qupath/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/qupath/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/qupath/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/qupath/merges","archive_url":"https://api.github.com/repos/vibbits/qupath/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/qupath/downloads","issues_url":"https://api.github.com/repos/vibbits/qupath/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/qupath/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/qupath/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/qupath/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/qupath/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/qupath/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/qupath/deployments","created_at":"2019-05-13 13:45:18 UTC","updated_at":"2019-05-15 09:48:35 UTC","pushed_at":"2019-05-15 09:48:33 UTC","git_url":"git://github.com/vibbits/qupath.git","ssh_url":"git@github.com:vibbits/qupath.git","clone_url":"https://github.com/vibbits/qupath.git","svn_url":"https://github.com/vibbits/qupath","homepage":"https://qupath.github.io","size":83357,"stargazers_count":0,"watchers_count":0,"language":"Java","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":291686769,"node_id":"MDEwOlJlcG9zaXRvcnkyOTE2ODY3Njk=","name":"RDDpred","full_name":"vibbits/RDDpred","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/RDDpred","description":"This repository contains the RDDpred software in Docker container format. ","fork":false,"url":"https://api.github.com/repos/vibbits/RDDpred","forks_url":"https://api.github.com/repos/vibbits/RDDpred/forks","keys_url":"https://api.github.com/repos/vibbits/RDDpred/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/RDDpred/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/RDDpred/teams","hooks_url":"https://api.github.com/repos/vibbits/RDDpred/hooks","issue_events_url":"https://api.github.com/repos/vibbits/RDDpred/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/RDDpred/events","assignees_url":"https://api.github.com/repos/vibbits/RDDpred/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/RDDpred/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/RDDpred/tags","blobs_url":"https://api.github.com/repos/vibbits/RDDpred/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/RDDpred/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/RDDpred/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/RDDpred/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/RDDpred/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/RDDpred/languages","stargazers_url":"https://api.github.com/repos/vibbits/RDDpred/stargazers","contributors_url":"https://api.github.com/repos/vibbits/RDDpred/contributors","subscribers_url":"https://api.github.com/repos/vibbits/RDDpred/subscribers","subscription_url":"https://api.github.com/repos/vibbits/RDDpred/subscription","commits_url":"https://api.github.com/repos/vibbits/RDDpred/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/RDDpred/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/RDDpred/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/RDDpred/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/RDDpred/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/RDDpred/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/RDDpred/merges","archive_url":"https://api.github.com/repos/vibbits/RDDpred/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/RDDpred/downloads","issues_url":"https://api.github.com/repos/vibbits/RDDpred/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/RDDpred/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/RDDpred/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/RDDpred/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/RDDpred/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/RDDpred/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/RDDpred/deployments","created_at":"2020-08-31 10:38:10 UTC","updated_at":"2021-03-09 08:47:38 UTC","pushed_at":"2021-03-09 08:47:33 UTC","git_url":"git://github.com/vibbits/RDDpred.git","ssh_url":"git@github.com:vibbits/RDDpred.git","clone_url":"https://github.com/vibbits/RDDpred.git","svn_url":"https://github.com/vibbits/RDDpred","homepage":null,"size":47325,"stargazers_count":0,"watchers_count":0,"language":"Python","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":310610411,"node_id":"MDEwOlJlcG9zaXRvcnkzMTA2MTA0MTE=","name":"RDM-LS","full_name":"vibbits/RDM-LS","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/RDM-LS","description":"Session for Research Data Management in Life Sciences workshop on 10 Nov 2020. Creating and working with a reproducible data analysis environment.","fork":false,"url":"https://api.github.com/repos/vibbits/RDM-LS","forks_url":"https://api.github.com/repos/vibbits/RDM-LS/forks","keys_url":"https://api.github.com/repos/vibbits/RDM-LS/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/RDM-LS/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/RDM-LS/teams","hooks_url":"https://api.github.com/repos/vibbits/RDM-LS/hooks","issue_events_url":"https://api.github.com/repos/vibbits/RDM-LS/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/RDM-LS/events","assignees_url":"https://api.github.com/repos/vibbits/RDM-LS/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/RDM-LS/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/RDM-LS/tags","blobs_url":"https://api.github.com/repos/vibbits/RDM-LS/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/RDM-LS/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/RDM-LS/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/RDM-LS/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/RDM-LS/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/RDM-LS/languages","stargazers_url":"https://api.github.com/repos/vibbits/RDM-LS/stargazers","contributors_url":"https://api.github.com/repos/vibbits/RDM-LS/contributors","subscribers_url":"https://api.github.com/repos/vibbits/RDM-LS/subscribers","subscription_url":"https://api.github.com/repos/vibbits/RDM-LS/subscription","commits_url":"https://api.github.com/repos/vibbits/RDM-LS/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/RDM-LS/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/RDM-LS/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/RDM-LS/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/RDM-LS/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/RDM-LS/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/RDM-LS/merges","archive_url":"https://api.github.com/repos/vibbits/RDM-LS/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/RDM-LS/downloads","issues_url":"https://api.github.com/repos/vibbits/RDM-LS/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/RDM-LS/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/RDM-LS/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/RDM-LS/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/RDM-LS/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/RDM-LS/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/RDM-LS/deployments","created_at":"2020-11-06 13:52:07 UTC","updated_at":"2020-11-09 18:28:52 UTC","pushed_at":"2020-11-09 18:28:50 UTC","git_url":"git://github.com/vibbits/RDM-LS.git","ssh_url":"git@github.com:vibbits/RDM-LS.git","clone_url":"https://github.com/vibbits/RDM-LS.git","svn_url":"https://github.com/vibbits/RDM-LS","homepage":null,"size":205,"stargazers_count":0,"watchers_count":0,"language":"R","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":28,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":28,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":308578586,"node_id":"MDEwOlJlcG9zaXRvcnkzMDg1Nzg1ODY=","name":"RDM-LS-solution","full_name":"vibbits/RDM-LS-solution","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/RDM-LS-solution","description":"Session for Research Data Management in Life Sciences workshop on 10 Nov 2020. Creating and working with a reproducible data analysis environment. ","fork":false,"url":"https://api.github.com/repos/vibbits/RDM-LS-solution","forks_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/forks","keys_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/teams","hooks_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/hooks","issue_events_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/events","assignees_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/tags","blobs_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/languages","stargazers_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/stargazers","contributors_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/contributors","subscribers_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/subscribers","subscription_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/subscription","commits_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/merges","archive_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/downloads","issues_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/RDM-LS-solution/deployments","created_at":"2020-10-30 09:11:05 UTC","updated_at":"2020-11-10 12:33:23 UTC","pushed_at":"2020-11-10 08:13:19 UTC","git_url":"git://github.com/vibbits/RDM-LS-solution.git","ssh_url":"git@github.com:vibbits/RDM-LS-solution.git","clone_url":"https://github.com/vibbits/RDM-LS-solution.git","svn_url":"https://github.com/vibbits/RDM-LS-solution","homepage":null,"size":625,"stargazers_count":0,"watchers_count":0,"language":"HTML","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":3,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":3,"open_issues":0,"watchers":0,"default_branch":"main"},{"id":246810271,"node_id":"MDEwOlJlcG9zaXRvcnkyNDY4MTAyNzE=","name":"ReproHack-2020","full_name":"vibbits/ReproHack-2020","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/ReproHack-2020","description":"More information & papers for the Reprocubility Hackaton 2020. ","fork":false,"url":"https://api.github.com/repos/vibbits/ReproHack-2020","forks_url":"https://api.github.com/repos/vibbits/ReproHack-2020/forks","keys_url":"https://api.github.com/repos/vibbits/ReproHack-2020/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/ReproHack-2020/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/ReproHack-2020/teams","hooks_url":"https://api.github.com/repos/vibbits/ReproHack-2020/hooks","issue_events_url":"https://api.github.com/repos/vibbits/ReproHack-2020/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/ReproHack-2020/events","assignees_url":"https://api.github.com/repos/vibbits/ReproHack-2020/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/ReproHack-2020/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/ReproHack-2020/tags","blobs_url":"https://api.github.com/repos/vibbits/ReproHack-2020/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/ReproHack-2020/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/ReproHack-2020/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/ReproHack-2020/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/ReproHack-2020/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/ReproHack-2020/languages","stargazers_url":"https://api.github.com/repos/vibbits/ReproHack-2020/stargazers","contributors_url":"https://api.github.com/repos/vibbits/ReproHack-2020/contributors","subscribers_url":"https://api.github.com/repos/vibbits/ReproHack-2020/subscribers","subscription_url":"https://api.github.com/repos/vibbits/ReproHack-2020/subscription","commits_url":"https://api.github.com/repos/vibbits/ReproHack-2020/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/ReproHack-2020/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/ReproHack-2020/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/ReproHack-2020/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/ReproHack-2020/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/ReproHack-2020/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/ReproHack-2020/merges","archive_url":"https://api.github.com/repos/vibbits/ReproHack-2020/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/ReproHack-2020/downloads","issues_url":"https://api.github.com/repos/vibbits/ReproHack-2020/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/ReproHack-2020/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/ReproHack-2020/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/ReproHack-2020/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/ReproHack-2020/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/ReproHack-2020/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/ReproHack-2020/deployments","created_at":"2020-03-12 10:50:23 UTC","updated_at":"2020-10-20 06:30:20 UTC","pushed_at":"2020-10-20 06:30:18 UTC","git_url":"git://github.com/vibbits/ReproHack-2020.git","ssh_url":"git@github.com:vibbits/ReproHack-2020.git","clone_url":"https://github.com/vibbits/ReproHack-2020.git","svn_url":"https://github.com/vibbits/ReproHack-2020","homepage":null,"size":645,"stargazers_count":1,"watchers_count":1,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"mit","name":"MIT License","spdx_id":"MIT","url":"https://api.github.com/licenses/mit","node_id":"MDc6TGljZW5zZTEz"},"topics":[],"forks":0,"open_issues":0,"watchers":1,"default_branch":"master"},{"id":133400916,"node_id":"MDEwOlJlcG9zaXRvcnkxMzM0MDA5MTY=","name":"rocker_conda_data_integration","full_name":"vibbits/rocker_conda_data_integration","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/rocker_conda_data_integration","description":"Docker container based on rocker/tidyverse with miniconda3 to run various integration tools for Omics","fork":false,"url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration","forks_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/forks","keys_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/teams","hooks_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/hooks","issue_events_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/events","assignees_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/tags","blobs_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/languages","stargazers_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/stargazers","contributors_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/contributors","subscribers_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/subscribers","subscription_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/subscription","commits_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/merges","archive_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/downloads","issues_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/rocker_conda_data_integration/deployments","created_at":"2018-05-14 17:53:42 UTC","updated_at":"2020-08-22 02:56:01 UTC","pushed_at":"2019-01-13 08:51:00 UTC","git_url":"git://github.com/vibbits/rocker_conda_data_integration.git","ssh_url":"git@github.com:vibbits/rocker_conda_data_integration.git","clone_url":"https://github.com/vibbits/rocker_conda_data_integration.git","svn_url":"https://github.com/vibbits/rocker_conda_data_integration","homepage":"","size":50406,"stargazers_count":5,"watchers_count":5,"language":"R","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":2,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-2.0","name":"GNU General Public License v2.0","spdx_id":"GPL-2.0","url":"https://api.github.com/licenses/gpl-2.0","node_id":"MDc6TGljZW5zZTg="},"topics":[],"forks":2,"open_issues":0,"watchers":5,"default_branch":"master"},{"id":131875427,"node_id":"MDEwOlJlcG9zaXRvcnkxMzE4NzU0Mjc=","name":"rocker_conda_mofa","full_name":"vibbits/rocker_conda_mofa","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/rocker_conda_mofa","description":"Docker container on rocker/tidyverse with miniconda3 to run MOFA tool https://github.com/bioFAM/MOFA","fork":false,"url":"https://api.github.com/repos/vibbits/rocker_conda_mofa","forks_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/forks","keys_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/teams","hooks_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/hooks","issue_events_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/events","assignees_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/tags","blobs_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/languages","stargazers_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/stargazers","contributors_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/contributors","subscribers_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/subscribers","subscription_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/subscription","commits_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/merges","archive_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/downloads","issues_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/rocker_conda_mofa/deployments","created_at":"2018-05-02 16:08:04 UTC","updated_at":"2018-10-11 09:37:21 UTC","pushed_at":"2018-05-02 18:07:02 UTC","git_url":"git://github.com/vibbits/rocker_conda_mofa.git","ssh_url":"git@github.com:vibbits/rocker_conda_mofa.git","clone_url":"https://github.com/vibbits/rocker_conda_mofa.git","svn_url":"https://github.com/vibbits/rocker_conda_mofa","homepage":null,"size":17,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-2.0","name":"GNU General Public License v2.0","spdx_id":"GPL-2.0","url":"https://api.github.com/licenses/gpl-2.0","node_id":"MDc6TGljZW5zZTg="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":323568864,"node_id":"MDEwOlJlcG9zaXRvcnkzMjM1Njg4NjQ=","name":"rust-node-ci","full_name":"vibbits/rust-node-ci","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/rust-node-ci","description":"A docker image optimized for building Rust and Node projects in CI.","fork":false,"url":"https://api.github.com/repos/vibbits/rust-node-ci","forks_url":"https://api.github.com/repos/vibbits/rust-node-ci/forks","keys_url":"https://api.github.com/repos/vibbits/rust-node-ci/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/rust-node-ci/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/rust-node-ci/teams","hooks_url":"https://api.github.com/repos/vibbits/rust-node-ci/hooks","issue_events_url":"https://api.github.com/repos/vibbits/rust-node-ci/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/rust-node-ci/events","assignees_url":"https://api.github.com/repos/vibbits/rust-node-ci/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/rust-node-ci/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/rust-node-ci/tags","blobs_url":"https://api.github.com/repos/vibbits/rust-node-ci/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/rust-node-ci/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/rust-node-ci/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/rust-node-ci/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/rust-node-ci/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/rust-node-ci/languages","stargazers_url":"https://api.github.com/repos/vibbits/rust-node-ci/stargazers","contributors_url":"https://api.github.com/repos/vibbits/rust-node-ci/contributors","subscribers_url":"https://api.github.com/repos/vibbits/rust-node-ci/subscribers","subscription_url":"https://api.github.com/repos/vibbits/rust-node-ci/subscription","commits_url":"https://api.github.com/repos/vibbits/rust-node-ci/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/rust-node-ci/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/rust-node-ci/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/rust-node-ci/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/rust-node-ci/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/rust-node-ci/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/rust-node-ci/merges","archive_url":"https://api.github.com/repos/vibbits/rust-node-ci/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/rust-node-ci/downloads","issues_url":"https://api.github.com/repos/vibbits/rust-node-ci/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/rust-node-ci/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/rust-node-ci/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/rust-node-ci/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/rust-node-ci/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/rust-node-ci/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/rust-node-ci/deployments","created_at":"2020-12-22 08:40:20 UTC","updated_at":"2020-12-23 09:36:51 UTC","pushed_at":"2020-12-23 09:26:46 UTC","git_url":"git://github.com/vibbits/rust-node-ci.git","ssh_url":"git@github.com:vibbits/rust-node-ci.git","clone_url":"https://github.com/vibbits/rust-node-ci.git","svn_url":"https://github.com/vibbits/rust-node-ci","homepage":"https://hub.docker.com/r/vibbioinfocore/rust-node-ci","size":6,"stargazers_count":1,"watchers_count":1,"language":"Dockerfile","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"mit","name":"MIT License","spdx_id":"MIT","url":"https://api.github.com/licenses/mit","node_id":"MDc6TGljZW5zZTEz"},"topics":["docker","ci","rust-lang","elm-lang"],"forks":0,"open_issues":0,"watchers":1,"default_branch":"main"},{"id":113286207,"node_id":"MDEwOlJlcG9zaXRvcnkxMTMyODYyMDc=","name":"scop3d","full_name":"vibbits/scop3d","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/scop3d","description":"Sequence conservation of protein on 3D structure.","fork":true,"url":"https://api.github.com/repos/vibbits/scop3d","forks_url":"https://api.github.com/repos/vibbits/scop3d/forks","keys_url":"https://api.github.com/repos/vibbits/scop3d/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/scop3d/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/scop3d/teams","hooks_url":"https://api.github.com/repos/vibbits/scop3d/hooks","issue_events_url":"https://api.github.com/repos/vibbits/scop3d/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/scop3d/events","assignees_url":"https://api.github.com/repos/vibbits/scop3d/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/scop3d/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/scop3d/tags","blobs_url":"https://api.github.com/repos/vibbits/scop3d/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/scop3d/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/scop3d/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/scop3d/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/scop3d/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/scop3d/languages","stargazers_url":"https://api.github.com/repos/vibbits/scop3d/stargazers","contributors_url":"https://api.github.com/repos/vibbits/scop3d/contributors","subscribers_url":"https://api.github.com/repos/vibbits/scop3d/subscribers","subscription_url":"https://api.github.com/repos/vibbits/scop3d/subscription","commits_url":"https://api.github.com/repos/vibbits/scop3d/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/scop3d/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/scop3d/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/scop3d/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/scop3d/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/scop3d/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/scop3d/merges","archive_url":"https://api.github.com/repos/vibbits/scop3d/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/scop3d/downloads","issues_url":"https://api.github.com/repos/vibbits/scop3d/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/scop3d/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/scop3d/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/scop3d/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/scop3d/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/scop3d/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/scop3d/deployments","created_at":"2017-12-06 07:56:32 UTC","updated_at":"2019-06-04 20:25:31 UTC","pushed_at":"2019-06-04 20:25:30 UTC","git_url":"git://github.com/vibbits/scop3d.git","ssh_url":"git@github.com:vibbits/scop3d.git","clone_url":"https://github.com/vibbits/scop3d.git","svn_url":"https://github.com/vibbits/scop3d","homepage":"","size":26190,"stargazers_count":0,"watchers_count":0,"language":"Python","has_issues":false,"has_projects":true,"has_downloads":false,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":104596713,"node_id":"MDEwOlJlcG9zaXRvcnkxMDQ1OTY3MTM=","name":"scRNA-Seq-TCC-prep","full_name":"vibbits/scRNA-Seq-TCC-prep","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/scRNA-Seq-TCC-prep","description":"Preprocessing of single-cell RNA-Seq data for input to kallisto","fork":true,"url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep","forks_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/forks","keys_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/teams","hooks_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/hooks","issue_events_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/events","assignees_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/tags","blobs_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/languages","stargazers_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/stargazers","contributors_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/contributors","subscribers_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/subscribers","subscription_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/subscription","commits_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/merges","archive_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/downloads","issues_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/scRNA-Seq-TCC-prep/deployments","created_at":"2017-09-23 20:22:54 UTC","updated_at":"2018-10-11 09:37:22 UTC","pushed_at":"2016-12-01 02:04:26 UTC","git_url":"git://github.com/vibbits/scRNA-Seq-TCC-prep.git","ssh_url":"git@github.com:vibbits/scRNA-Seq-TCC-prep.git","clone_url":"https://github.com/vibbits/scRNA-Seq-TCC-prep.git","svn_url":"https://github.com/vibbits/scRNA-Seq-TCC-prep","homepage":null,"size":8740,"stargazers_count":0,"watchers_count":0,"language":"Jupyter Notebook","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":137523231,"node_id":"MDEwOlJlcG9zaXRvcnkxMzc1MjMyMzE=","name":"scRNA-tools","full_name":"vibbits/scRNA-tools","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/scRNA-tools","description":"Table of software for the analysis of single-cell RNA-seq data.","fork":true,"url":"https://api.github.com/repos/vibbits/scRNA-tools","forks_url":"https://api.github.com/repos/vibbits/scRNA-tools/forks","keys_url":"https://api.github.com/repos/vibbits/scRNA-tools/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/scRNA-tools/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/scRNA-tools/teams","hooks_url":"https://api.github.com/repos/vibbits/scRNA-tools/hooks","issue_events_url":"https://api.github.com/repos/vibbits/scRNA-tools/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/scRNA-tools/events","assignees_url":"https://api.github.com/repos/vibbits/scRNA-tools/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/scRNA-tools/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/scRNA-tools/tags","blobs_url":"https://api.github.com/repos/vibbits/scRNA-tools/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/scRNA-tools/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/scRNA-tools/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/scRNA-tools/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/scRNA-tools/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/scRNA-tools/languages","stargazers_url":"https://api.github.com/repos/vibbits/scRNA-tools/stargazers","contributors_url":"https://api.github.com/repos/vibbits/scRNA-tools/contributors","subscribers_url":"https://api.github.com/repos/vibbits/scRNA-tools/subscribers","subscription_url":"https://api.github.com/repos/vibbits/scRNA-tools/subscription","commits_url":"https://api.github.com/repos/vibbits/scRNA-tools/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/scRNA-tools/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/scRNA-tools/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/scRNA-tools/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/scRNA-tools/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/scRNA-tools/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/scRNA-tools/merges","archive_url":"https://api.github.com/repos/vibbits/scRNA-tools/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/scRNA-tools/downloads","issues_url":"https://api.github.com/repos/vibbits/scRNA-tools/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/scRNA-tools/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/scRNA-tools/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/scRNA-tools/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/scRNA-tools/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/scRNA-tools/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/scRNA-tools/deployments","created_at":"2018-06-15 19:10:39 UTC","updated_at":"2020-04-24 11:43:45 UTC","pushed_at":"2018-06-15 19:31:56 UTC","git_url":"git://github.com/vibbits/scRNA-tools.git","ssh_url":"git@github.com:vibbits/scRNA-tools.git","clone_url":"https://github.com/vibbits/scRNA-tools.git","svn_url":"https://github.com/vibbits/scRNA-tools","homepage":null,"size":3835,"stargazers_count":0,"watchers_count":0,"language":"R","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"mit","name":"MIT License","spdx_id":"MIT","url":"https://api.github.com/licenses/mit","node_id":"MDc6TGljZW5zZTEz"},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":100585992,"node_id":"MDEwOlJlcG9zaXRvcnkxMDA1ODU5OTI=","name":"sc_read_kallisto_wrapper","full_name":"vibbits/sc_read_kallisto_wrapper","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/sc_read_kallisto_wrapper","description":"wrapper for single cell transcriptomics pipeline with kallisto","fork":false,"url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper","forks_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/forks","keys_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/teams","hooks_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/hooks","issue_events_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/events","assignees_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/tags","blobs_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/languages","stargazers_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/stargazers","contributors_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/contributors","subscribers_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/subscribers","subscription_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/subscription","commits_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/merges","archive_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/downloads","issues_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/sc_read_kallisto_wrapper/deployments","created_at":"2017-08-17 09:24:32 UTC","updated_at":"2020-10-02 14:53:06 UTC","pushed_at":"2018-02-09 09:22:40 UTC","git_url":"git://github.com/vibbits/sc_read_kallisto_wrapper.git","ssh_url":"git@github.com:vibbits/sc_read_kallisto_wrapper.git","clone_url":"https://github.com/vibbits/sc_read_kallisto_wrapper.git","svn_url":"https://github.com/vibbits/sc_read_kallisto_wrapper","homepage":null,"size":84,"stargazers_count":7,"watchers_count":7,"language":"HTML","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":2,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":2,"open_issues":0,"watchers":7,"default_branch":"master"},{"id":153587742,"node_id":"MDEwOlJlcG9zaXRvcnkxNTM1ODc3NDI=","name":"tomo","full_name":"vibbits/tomo","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/tomo","description":"Experimental tool for serial section tomography on SECOM","fork":false,"url":"https://api.github.com/repos/vibbits/tomo","forks_url":"https://api.github.com/repos/vibbits/tomo/forks","keys_url":"https://api.github.com/repos/vibbits/tomo/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/tomo/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/tomo/teams","hooks_url":"https://api.github.com/repos/vibbits/tomo/hooks","issue_events_url":"https://api.github.com/repos/vibbits/tomo/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/tomo/events","assignees_url":"https://api.github.com/repos/vibbits/tomo/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/tomo/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/tomo/tags","blobs_url":"https://api.github.com/repos/vibbits/tomo/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/tomo/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/tomo/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/tomo/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/tomo/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/tomo/languages","stargazers_url":"https://api.github.com/repos/vibbits/tomo/stargazers","contributors_url":"https://api.github.com/repos/vibbits/tomo/contributors","subscribers_url":"https://api.github.com/repos/vibbits/tomo/subscribers","subscription_url":"https://api.github.com/repos/vibbits/tomo/subscription","commits_url":"https://api.github.com/repos/vibbits/tomo/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/tomo/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/tomo/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/tomo/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/tomo/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/tomo/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/tomo/merges","archive_url":"https://api.github.com/repos/vibbits/tomo/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/tomo/downloads","issues_url":"https://api.github.com/repos/vibbits/tomo/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/tomo/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/tomo/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/tomo/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/tomo/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/tomo/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/tomo/deployments","created_at":"2018-10-18 08:13:16 UTC","updated_at":"2021-05-06 16:23:58 UTC","pushed_at":"2021-05-06 16:07:59 UTC","git_url":"git://github.com/vibbits/tomo.git","ssh_url":"git@github.com:vibbits/tomo.git","clone_url":"https://github.com/vibbits/tomo.git","svn_url":"https://github.com/vibbits/tomo","homepage":"","size":8429,"stargazers_count":0,"watchers_count":0,"language":"Python","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":189082762,"node_id":"MDEwOlJlcG9zaXRvcnkxODkwODI3NjI=","name":"training-material","full_name":"vibbits/training-material","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/training-material","description":"VIB BITS training material","fork":false,"url":"https://api.github.com/repos/vibbits/training-material","forks_url":"https://api.github.com/repos/vibbits/training-material/forks","keys_url":"https://api.github.com/repos/vibbits/training-material/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/training-material/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/training-material/teams","hooks_url":"https://api.github.com/repos/vibbits/training-material/hooks","issue_events_url":"https://api.github.com/repos/vibbits/training-material/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/training-material/events","assignees_url":"https://api.github.com/repos/vibbits/training-material/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/training-material/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/training-material/tags","blobs_url":"https://api.github.com/repos/vibbits/training-material/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/training-material/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/training-material/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/training-material/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/training-material/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/training-material/languages","stargazers_url":"https://api.github.com/repos/vibbits/training-material/stargazers","contributors_url":"https://api.github.com/repos/vibbits/training-material/contributors","subscribers_url":"https://api.github.com/repos/vibbits/training-material/subscribers","subscription_url":"https://api.github.com/repos/vibbits/training-material/subscription","commits_url":"https://api.github.com/repos/vibbits/training-material/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/training-material/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/training-material/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/training-material/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/training-material/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/training-material/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/training-material/merges","archive_url":"https://api.github.com/repos/vibbits/training-material/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/training-material/downloads","issues_url":"https://api.github.com/repos/vibbits/training-material/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/training-material/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/training-material/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/training-material/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/training-material/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/training-material/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/training-material/deployments","created_at":"2019-05-28 18:33:29 UTC","updated_at":"2021-05-31 09:43:17 UTC","pushed_at":"2021-05-31 09:44:47 UTC","git_url":"git://github.com/vibbits/training-material.git","ssh_url":"git@github.com:vibbits/training-material.git","clone_url":"https://github.com/vibbits/training-material.git","svn_url":"https://github.com/vibbits/training-material","homepage":"https://material.bits.vib.be/","size":74310,"stargazers_count":3,"watchers_count":3,"language":"JavaScript","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":true,"forks_count":2,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":20,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":2,"open_issues":20,"watchers":3,"default_branch":"master"},{"id":232307799,"node_id":"MDEwOlJlcG9zaXRvcnkyMzIzMDc3OTk=","name":"volumina","full_name":"vibbits/volumina","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/volumina","description":"Volume Slicing and Editing","fork":true,"url":"https://api.github.com/repos/vibbits/volumina","forks_url":"https://api.github.com/repos/vibbits/volumina/forks","keys_url":"https://api.github.com/repos/vibbits/volumina/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/volumina/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/volumina/teams","hooks_url":"https://api.github.com/repos/vibbits/volumina/hooks","issue_events_url":"https://api.github.com/repos/vibbits/volumina/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/volumina/events","assignees_url":"https://api.github.com/repos/vibbits/volumina/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/volumina/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/volumina/tags","blobs_url":"https://api.github.com/repos/vibbits/volumina/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/volumina/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/volumina/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/volumina/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/volumina/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/volumina/languages","stargazers_url":"https://api.github.com/repos/vibbits/volumina/stargazers","contributors_url":"https://api.github.com/repos/vibbits/volumina/contributors","subscribers_url":"https://api.github.com/repos/vibbits/volumina/subscribers","subscription_url":"https://api.github.com/repos/vibbits/volumina/subscription","commits_url":"https://api.github.com/repos/vibbits/volumina/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/volumina/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/volumina/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/volumina/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/volumina/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/volumina/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/volumina/merges","archive_url":"https://api.github.com/repos/vibbits/volumina/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/volumina/downloads","issues_url":"https://api.github.com/repos/vibbits/volumina/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/volumina/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/volumina/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/volumina/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/volumina/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/volumina/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/volumina/deployments","created_at":"2020-01-07 11:11:45 UTC","updated_at":"2020-01-07 11:11:47 UTC","pushed_at":"2019-11-22 15:53:44 UTC","git_url":"git://github.com/vibbits/volumina.git","ssh_url":"git@github.com:vibbits/volumina.git","clone_url":"https://github.com/vibbits/volumina.git","svn_url":"https://github.com/vibbits/volumina","homepage":"http://ilastik.org","size":5774,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"other","name":"Other","spdx_id":"NOASSERTION","url":null,"node_id":"MDc6TGljZW5zZTA="},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":153169091,"node_id":"MDEwOlJlcG9zaXRvcnkxNTMxNjkwOTE=","name":"WaaS-Finnish-Cloud","full_name":"vibbits/WaaS-Finnish-Cloud","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/WaaS-Finnish-Cloud","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud","forks_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/forks","keys_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/teams","hooks_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/hooks","issue_events_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/events","assignees_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/tags","blobs_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/languages","stargazers_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/stargazers","contributors_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/contributors","subscribers_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/subscribers","subscription_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/subscription","commits_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/merges","archive_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/downloads","issues_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/WaaS-Finnish-Cloud/deployments","created_at":"2018-10-15 19:20:17 UTC","updated_at":"2019-03-27 05:45:56 UTC","pushed_at":"2019-03-27 05:44:56 UTC","git_url":"git://github.com/vibbits/WaaS-Finnish-Cloud.git","ssh_url":"git@github.com:vibbits/WaaS-Finnish-Cloud.git","clone_url":"https://github.com/vibbits/WaaS-Finnish-Cloud.git","svn_url":"https://github.com/vibbits/WaaS-Finnish-Cloud","homepage":null,"size":3,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":null,"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":290495484,"node_id":"MDEwOlJlcG9zaXRvcnkyOTA0OTU0ODQ=","name":"wordpress-export-to-markdown","full_name":"vibbits/wordpress-export-to-markdown","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/wordpress-export-to-markdown","description":"Converts a WordPress export XML file into Markdown files.","fork":true,"url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown","forks_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/forks","keys_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/teams","hooks_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/hooks","issue_events_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/events","assignees_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/tags","blobs_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/languages","stargazers_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/stargazers","contributors_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/contributors","subscribers_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/subscribers","subscription_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/subscription","commits_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/merges","archive_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/downloads","issues_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/wordpress-export-to-markdown/deployments","created_at":"2020-08-26 12:52:44 UTC","updated_at":"2020-08-26 12:52:46 UTC","pushed_at":"2020-07-19 00:36:13 UTC","git_url":"git://github.com/vibbits/wordpress-export-to-markdown.git","ssh_url":"git@github.com:vibbits/wordpress-export-to-markdown.git","clone_url":"https://github.com/vibbits/wordpress-export-to-markdown.git","svn_url":"https://github.com/vibbits/wordpress-export-to-markdown","homepage":"","size":148,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"mit","name":"MIT License","spdx_id":"MIT","url":"https://api.github.com/licenses/mit","node_id":"MDc6TGljZW5zZTEz"},"topics":[],"forks":0,"open_issues":0,"watchers":0,"default_branch":"master"},{"id":306830216,"node_id":"MDEwOlJlcG9zaXRvcnkzMDY4MzAyMTY=","name":"workshop-janssen","full_name":"vibbits/workshop-janssen","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/workshop-janssen","description":null,"fork":false,"url":"https://api.github.com/repos/vibbits/workshop-janssen","forks_url":"https://api.github.com/repos/vibbits/workshop-janssen/forks","keys_url":"https://api.github.com/repos/vibbits/workshop-janssen/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/workshop-janssen/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/workshop-janssen/teams","hooks_url":"https://api.github.com/repos/vibbits/workshop-janssen/hooks","issue_events_url":"https://api.github.com/repos/vibbits/workshop-janssen/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/workshop-janssen/events","assignees_url":"https://api.github.com/repos/vibbits/workshop-janssen/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/workshop-janssen/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/workshop-janssen/tags","blobs_url":"https://api.github.com/repos/vibbits/workshop-janssen/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/workshop-janssen/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/workshop-janssen/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/workshop-janssen/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/workshop-janssen/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/workshop-janssen/languages","stargazers_url":"https://api.github.com/repos/vibbits/workshop-janssen/stargazers","contributors_url":"https://api.github.com/repos/vibbits/workshop-janssen/contributors","subscribers_url":"https://api.github.com/repos/vibbits/workshop-janssen/subscribers","subscription_url":"https://api.github.com/repos/vibbits/workshop-janssen/subscription","commits_url":"https://api.github.com/repos/vibbits/workshop-janssen/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/workshop-janssen/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/workshop-janssen/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/workshop-janssen/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/workshop-janssen/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/workshop-janssen/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/workshop-janssen/merges","archive_url":"https://api.github.com/repos/vibbits/workshop-janssen/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/workshop-janssen/downloads","issues_url":"https://api.github.com/repos/vibbits/workshop-janssen/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/workshop-janssen/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/workshop-janssen/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/workshop-janssen/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/workshop-janssen/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/workshop-janssen/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/workshop-janssen/deployments","created_at":"2020-10-24 07:31:23 UTC","updated_at":"2020-11-23 08:34:21 UTC","pushed_at":"2020-11-23 08:34:19 UTC","git_url":"git://github.com/vibbits/workshop-janssen.git","ssh_url":"git@github.com:vibbits/workshop-janssen.git","clone_url":"https://github.com/vibbits/workshop-janssen.git","svn_url":"https://github.com/vibbits/workshop-janssen","homepage":null,"size":18,"stargazers_count":0,"watchers_count":0,"language":"Dockerfile","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":1,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":{"key":"cc0-1.0","name":"Creative Commons Zero v1.0 Universal","spdx_id":"CC0-1.0","url":"https://api.github.com/licenses/cc0-1.0","node_id":"MDc6TGljZW5zZTY="},"topics":[],"forks":1,"open_issues":1,"watchers":0,"default_branch":"main"},{"id":242333149,"node_id":"MDEwOlJlcG9zaXRvcnkyNDIzMzMxNDk=","name":"yasara-mmligner-plugin","full_name":"vibbits/yasara-mmligner-plugin","private":false,"owner":{"login":"vibbits","id":22908438,"node_id":"MDEyOk9yZ2FuaXphdGlvbjIyOTA4NDM4","avatar_url":"https://avatars.githubusercontent.com/u/22908438?v=4","gravatar_id":"","url":"https://api.github.com/users/vibbits","html_url":"https://github.com/vibbits","followers_url":"https://api.github.com/users/vibbits/followers","following_url":"https://api.github.com/users/vibbits/following{/other_user}","gists_url":"https://api.github.com/users/vibbits/gists{/gist_id}","starred_url":"https://api.github.com/users/vibbits/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/vibbits/subscriptions","organizations_url":"https://api.github.com/users/vibbits/orgs","repos_url":"https://api.github.com/users/vibbits/repos","events_url":"https://api.github.com/users/vibbits/events{/privacy}","received_events_url":"https://api.github.com/users/vibbits/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/vibbits/yasara-mmligner-plugin","description":"Use the MMLigner structural alignment program from within Yasara!","fork":false,"url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin","forks_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/forks","keys_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/keys{/key_id}","collaborators_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/teams","hooks_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/hooks","issue_events_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/issues/events{/number}","events_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/events","assignees_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/assignees{/user}","branches_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/branches{/branch}","tags_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/tags","blobs_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/git/refs{/sha}","trees_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/git/trees{/sha}","statuses_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/statuses/{sha}","languages_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/languages","stargazers_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/stargazers","contributors_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/contributors","subscribers_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/subscribers","subscription_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/subscription","commits_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/commits{/sha}","git_commits_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/git/commits{/sha}","comments_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/comments{/number}","issue_comment_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/issues/comments{/number}","contents_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/contents/{+path}","compare_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/compare/{base}...{head}","merges_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/merges","archive_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/downloads","issues_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/issues{/number}","pulls_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/pulls{/number}","milestones_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/milestones{/number}","notifications_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/labels{/name}","releases_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/releases{/id}","deployments_url":"https://api.github.com/repos/vibbits/yasara-mmligner-plugin/deployments","created_at":"2020-02-22 11:53:28 UTC","updated_at":"2020-02-27 15:04:15 UTC","pushed_at":"2020-02-27 15:00:58 UTC","git_url":"git://github.com/vibbits/yasara-mmligner-plugin.git","ssh_url":"git@github.com:vibbits/yasara-mmligner-plugin.git","clone_url":"https://github.com/vibbits/yasara-mmligner-plugin.git","svn_url":"https://github.com/vibbits/yasara-mmligner-plugin","homepage":"","size":39,"stargazers_count":1,"watchers_count":1,"language":"Python","has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":3,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"topics":["visualization","python","plugin","molecular-biology","molecular-modeling"],"forks":0,"open_issues":3,"watchers":1,"default_branch":"master"}],"releases":[],"releases_url":"https://github.com/vibbits/training-material/releases","repository_name":"training-material","repository_nwo":"vibbits/training-material","repository_url":"https://github.com/vibbits/training-material","show_downloads":true,"source":{"branch":"gh-pages","path":"/"},"tar_url":"https://github.com/vibbits/training-material/tarball/gh-pages","url":"https://github.com/pages/vibbits/training-material","versions":{},"wiki_url":"https://github.com/vibbits/training-material/wiki","zip_url":"https://github.com/vibbits/training-material/zipball/gh-pages"},"env":{"DRAFTS":"false","HOSTNAME":"770d5c96ead9","LANGUAGE":"en_US","JEKYLL_VERSION":"3.8.6","RUBY_DOWNLOAD_SHA256":"11a83f85c03d3f0fc9b8a9b6cad1b2674f26c5aaa43ba858d4b0fcc2b54171e1","JEKYLL_BIN":"/usr/jekyll/bin","JEKYLL_DOCKER_NAME":"builder","JEKYLL_ENV":"production","RUBY_VERSION":"2.6.3","PWD":"/srv/jekyll","BUNDLE_APP_CONFIG":"/usr/local/bundle","RUBY_MAJOR":"2.6","TZ":"America/Chicago","HOME":"/home/jekyll","JEKYLL_GID":"1000","LANG":"en_US.UTF-8","BUNDLE_SILENCE_ROOT_WARNING":"1","BUNDLE_HOME":"/usr/local/bundle","JEKYLL_DOCKER_COMMIT":"fb155c6719556d9ecf3ebc8b7e09dafc97f5bafd","GEM_HOME":"/usr/local/bundle","BUNDLE_BIN":"/usr/local/bundle/bin","SHLVL":"1","FORCE_POLLING":"false","JEKYLL_DOCKER_TAG":"3.8.6","JEKYLL_UID":"1000","BUNDLE_PATH":"/usr/local/bundle","LC_ALL":"en_US.UTF-8","JEKYLL_VAR_DIR":"/var/jekyll","PATH":"/usr/local/bundle/bin:/usr/jekyll/bin:/usr/local/bundle/gems/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin","JEKYLL_DATA_DIR":"/srv/jekyll","GEM_BIN":"/usr/gem/bin","VERBOSE":"false","BUNDLER_ORIG_BUNDLE_BIN_PATH":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLER_ORIG_BUNDLE_GEMFILE":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLER_ORIG_BUNDLER_ORIG_MANPATH":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLER_ORIG_BUNDLER_VERSION":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLER_ORIG_GEM_HOME":"/usr/gem","BUNDLER_ORIG_GEM_PATH":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLER_ORIG_MANPATH":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLER_ORIG_PATH":"/usr/jekyll/bin:/usr/local/bundle/bin:/usr/local/bundle/gems/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin","BUNDLER_ORIG_RB_USER_INSTALL":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLER_ORIG_RUBYLIB":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLER_ORIG_RUBYOPT":"BUNDLER_ENVIRONMENT_PRESERVER_INTENTIONALLY_NIL","BUNDLE_BIN_PATH":"/usr/local/lib/ruby/gems/2.6.0/gems/bundler-2.0.2/exe/bundle","BUNDLE_GEMFILE":"/srv/jekyll/Gemfile","BUNDLER_VERSION":"2.0.2","RUBYOPT":"-rbundler/setup","RUBYLIB":"/usr/local/lib/ruby/gems/2.6.0/gems/bundler-2.0.2/lib","GEM_PATH":"","MANPATH":"/usr/local/bundle/gems/kramdown-1.17.0/man","JEKYLL_NO_BUNDLER_REQUIRE":"true"}}
              page: {"layout":"introduction_slides","logo":"GTN","title":"ChIP-Seq Analysis","type":"introduction","contributors":["morganeTC","janick-bits"],"topic_name":"chip-seq","tutorial_name":"introduction","content":"### ChiP-Seq Analysis ###\n\n[slides](http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/booklet/thomas-chollier_2020.pdf)\n","dir":"/topics/chip-seq/slides/","name":"introduction.html","path":"topics/chip-seq/slides/introduction.html","url":"/topics/chip-seq/slides/introduction.html"}
              topic: {"name":"chip-seq","type":"basics","category":"omics","title":"ChIP-Seq analysis","summary":" This training gives an introduction to ChIP-seq data analysis, covering the processing steps starting from the reads to the peaks. Among all possible downstream analyses, the practical aspect will focus on motif analyses. A particular emphasis will be put on deciding which downstream analyses to perform depending on the biological question. This training does not cover all methods available today. It does not aim at bringing users to a professional NGS analyst level but provides enough information to allow biologists understand what DNA sequencing practically is and to communicate with NGS experts for more in-depth needs.\n- Have an understanding of the nature of ChIP-Seq data - Perform a complete analysis workflow including QC, read mapping, visualization in a genome browser and peak-calling - Use the GenePattern platform for each step of the workflow and feel the complexity of the task - Have an overview of possible downstream analyses - Perform a motif analysis with online web programs\nFor this training, we will use a dataset produced by Myers et al [1] involved in the regulation of gene expression under anaerobic conditions in bacteria. We will focus on one factor: FNR. The advantage of this dataset is its small size, allowing real time execution of all steps of the dataset. ","requirements":null,"maintainers":["abotzki","janick-bits","morganeTC"],"references":[{"authors":"Bailey et al. ","title":"Practical Guidelines for the Comprehensive Analysis of ChIP-seq Data. PLoS Comput Biol 9, e1003326 (2013)","link":"http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/articles/2013_Bailey_PLoS%20Comput%20Biol.pdf","summary":""},{"authors":"Thomas-Chollier et al.","title":"A complete workflow for the analysis of full-size ChIP-seq (and similar) data sets using peak-motifs Nature Protocols 7, 1551–1568 (2012)","link":"http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/articles/2012_Thomas-Chollier_Nature%20Protocols.pdf","summary":""},{"authors":"Kevin S Myers et al.","title":"Genome-scale analysis of escherichia coli FNR reveals complex features of transcription factor binding. PLoS Genet.: 2013, 9(6);e1003565","link":"http://dx.doi.org/10.1371/journal.pgen.1003565","summary":""}]}
              material: {"layout":"introduction_slides","logo":"GTN","title":"ChIP-Seq Analysis","type":"introduction","contributors":["morganeTC","janick-bits"],"topic_name":"chip-seq","tutorial_name":"introduction","content":"### ChiP-Seq Analysis ###\n\n[slides](http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/booklet/thomas-chollier_2020.pdf)\n","dir":"/topics/chip-seq/slides/","name":"introduction.html","path":"topics/chip-seq/slides/introduction.html","url":"/topics/chip-seq/slides/introduction.html"}
name: inverse
layout: true
class: center, middle, inverse
</span></div>
</span></div>
---
# ChIP-Seq Analysis
---
### ChiP-Seq Analysis ###
[slides](http://data.bits.vib.be/pub/trainingen/NGSChIPSEQ/booklet/thomas-chollier_2020.pdf)
---
## Related tutorials
---
## Thank you!
This material is the result of a collaborative work. Thanks the colleagues of the VIB Bioinformatics Core and all the contributors!
<a href="/hall-of-fame#morganeTC" class="contributor-badge">Morgane Thomas-Chollier</a>
, <a href="/hall-of-fame#janick-bits" class="contributor-badge">Janick Mathys</a>
</div>