1 Load library

library(tidyverse)
library(data.table)
data.table 1.12.0  Latest news: r-datatable.com

Attaching package: ‘data.table’

The following objects are masked from ‘package:dplyr’:

    between, first, last

The following object is masked from ‘package:purrr’:

    transpose
library(janitor)

1.1 Read data

genomes_data <- data.table::fread("genome_data/prokaryotes.txt") %>%
  janitor::clean_names()
Found and resolved improper quoting out-of-sample. First healed line 41875: <<Rhodococcus erythropolis DN1  1381122 PRJNA214035 214035  Terrabacteria group Actinobacteria  6.5484  62.4    -   AUZK01  78  6163    5630    2013/08/23  2017/04/01  Contig  "National Center for Biotechnology" RSE SAMN02470607    GCA_000454425.1 -   ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/454/425/GCA_000454425.1_RodocDN1 24136850    DN1>>. If the fields are not quoted (e.g. field separator does not appear within any field), try quote="" to avoid this warning.
str(genomes_data)
Classes ‘data.table’ and 'data.frame':  183794 obs. of  23 variables:
 $ number_organism_name : chr  "Enterococcus faecium DO" "Bacillus mycoides" "Yersinia frederiksenii Y225" "Alteromonas macleodii ATCC 27126" ...
 $ tax_id               : int  333849 1405 1454377 529120 522373 40041 702459 465515 28152 537972 ...
 $ bio_project_accession: chr  "PRJNA30627" "PRJNA238211" "PRJNA236482" "PRJNA29793" ...
 $ bio_project_id       : int  30627 238211 236482 29793 30351 30767 42863 20655 240099 30075 ...
 $ group                : chr  "Terrabacteria group" "Terrabacteria group" "Proteobacteria" "Proteobacteria" ...
 $ sub_group            : chr  "Firmicutes" "Firmicutes" "Gammaproteobacteria" "Gammaproteobacteria" ...
 $ size_mb              : num  3.05 5.64 4.55 4.65 4.85 ...
 $ gc_percent           : chr  "37.9158" "35.3849" "47.282" "44.7" ...
 $ replicons            : chr  "chromosome:NC_017960.1/CP003583.1; plasmid 1:NC_017961.1/CP003584.1; plasmid 2:NC_017962.1/CP003585.1; plasmid "| __truncated__ "chromosome:NZ_CP009692.1/CP009692.1; plasmid pBMX_1:NZ_CP009691.1/CP009691.1; plasmid pBMX_2:NZ_CP009690.1/CP00"| __truncated__ "chromosome:NZ_CP009364.1/CP009364.1; plasmid unnsmrf:NZ_CP009363.1/CP009363.1" "chromosome:NC_018632.1/CP003841.1" ...
 $ wgs                  : chr  "-" "-" "-" "-" ...
 $ scaffolds            : chr  "4" "4" "2" "1" ...
 $ genes                : chr  "3209" "5903" "4239" "3966" ...
 $ proteins             : chr  "3114" "5490" "3988" "3799" ...
 $ release_date         : chr  "2012/05/25" "2015/02/05" "2015/02/09" "2012/09/21" ...
 $ modify_date          : chr  "2016/08/03" "2017/04/03" "2017/04/03" "2017/05/18" ...
 $ status               : chr  "Complete Genome" "Complete Genome" "Complete Genome" "Complete Genome" ...
 $ center               : chr  "Baylor College of Medicine" "Los Alamos National Laboratory" "Los Alamos National Laboratory" "Evolucinary Genomics Group, Universidad Miguel Hernandez" ...
 $ bio_sample_accession : chr  "SAMN00002237" "SAMN03012838" "SAMN03010446" "SAMN02603229" ...
 $ assembly_accession   : chr  "GCA_000174395.2" "GCA_000832605.1" "GCA_000834215.1" "GCA_000172635.2" ...
 $ reference            : chr  "REFR" "REFR" "-" "REFR" ...
 $ ftp_path             : chr  "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/174/395/GCA_000174395.2_ASM17439v2" "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/832/605/GCA_000832605.1_ASM83260v1" "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/834/215/GCA_000834215.1_ASM83421v1" "ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/000/172/635/GCA_000172635.2_ASM17263v2" ...
 $ pubmed_id            : chr  "22769602" "25931591" "25931590" "18670397,23209244" ...
 $ strain               : chr  "DO" "ATCC 6462" "Y225" "ATCC 27126" ...
 - attr(*, ".internal.selfref")=<externalptr> 

1.2 Fix the names and select columns we need

genomes_data_df <- genomes_data %>%
  janitor::clean_names() %>%
  select(number_organism_name,
         group,
         sub_group,
         strain,
         size_mb,
         gc_percent,
         scaffolds,
         genes,
         proteins,
         status,
         release_date,
         modify_date,
         center
         ) %>%
  mutate(genes=as.numeric(genes), proteins = as.numeric(proteins))
NAs introduced by coercionNAs introduced by coercion

2 Find number of genomes released by year

genomes_data_df %>%
    separate(release_date, c("year","month","day")) %>%
    group_by(year) %>%
    summarise(genomes_cnt = n()) %>%
    ggplot(.,aes(x = year, y = genomes_cnt)) +
    geom_bar(stat = "identity") +
    theme_minimal()+
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

NA

3 Find number of complete genomes by year

unique(genomes_data_df$status)
[1] "Complete Genome" "Scaffold"        "Chromosome"      "Contig"          "Complete"       

Complete Genome > Chromosome > Scaffold > Contig

genomes_data_df %>%
    separate(release_date, c("year","month","day")) %>%
    filter(grepl("Complete", status)) %>%
    group_by(year) %>%
    summarise(genomes_cnt = n()) %>%
    ggplot(.,aes(x = year, y = genomes_cnt)) +
    geom_bar(stat = "identity") +
    theme_minimal()+
    theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

3.1 Find number of genomes released by Baylor College of Medicine

genomes_data_df %>%
    separate(release_date, c("year","month","day")) %>%
    group_by(center) %>%
    summarise(genomes_cnt = n()) %>%
    arrange(desc(genomes_cnt)) %>%
    filter(grepl("Baylor|BCM",center))

4 Find top 10 submitter of complete genomes.

genomes_data_df %>%
    separate(release_date, c("year","month","day")) %>%
    filter(grepl("Complete| Chromosome", status)) %>%
    group_by(center) %>%
    summarise(genomes_cnt = n()) %>%
    arrange(desc(genomes_cnt)) %>%
    slice(1:10) %>%
    ggplot(.,aes(x = reorder(center, -genomes_cnt), y = genomes_cnt)) +
    geom_bar(stat = "identity") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
    coord_flip()

5 Find the most submitted genome bacterium

genomes_data_df %>%
    separate(release_date, c("year","month","day")) %>%
    group_by(number_organism_name) %>%
    summarise(genomes_cnt = n()) %>%
    arrange(desc(genomes_cnt))

6 Find the most submitted genome by CDC

genomes_data_df %>%
  filter(grepl("CDC|Centers for Disease Control and Prevention|Food And Drug",center)) %>%
   group_by(number_organism_name) %>%
    summarise(genomes_cnt = n()) %>%
    arrange(desc(genomes_cnt))

7 What is the GC content of the bacteria sequenced by the CDC as compared to overall GC content?

GC content indicates the percent of G,C bases out of total four base - ATGC that makes DNA

genomes_data_df %>%
  filter(grepl("CDC|Centers for Disease Control and Prevention|Food And Drug",center)) %>%
  filter(status == "Complete Genome") %>%
  mutate(gc_percent = as.numeric(gc_percent)) %>%
  summarize(mean_gc = mean(gc_percent, na.rm = T))
NAs introduced by coercion

8 Calculate the GC percent by the group

genomes_data_df %>%
  filter(status == "Complete Genome") %>%
  mutate(gc_percent = as.numeric(gc_percent)) %>%
  group_by(sub_group) %>%
  summarize(mean_gc = mean(gc_percent, na.rm = T)) %>%
  arrange(desc(mean_gc))
NAs introduced by coercion

9 Calculate the size by the group

genomes_data_df %>%
  filter(grepl("Complete", status)) %>%
  mutate(size_mb = as.numeric(size_mb)) %>%
  group_by(group) %>%
  summarize(avg_size_mb = mean(size_mb, na.rm = T)) %>%
  arrange(desc(avg_size_mb))

10 Calculate the size by the group and sub_group

genomes_data_df %>%
  filter(grepl("Complete", status)) %>%
  mutate(size_mb = as.numeric(size_mb)) %>%
  group_by(group,sub_group) %>%
  summarize(avg_size_mb = mean(size_mb, na.rm = T)) %>%
  arrange(desc(avg_size_mb))

11 What is the relation between genome size and genes?

genomes_data_df %>% 
filter(grepl("Complete", status)) %>%
filter(size_mb < 5) %>%
ggplot(.,aes(x = size_mb, y = genes)) +
geom_point() +
theme_bw() +
geom_smooth(method = "lm", se = FALSE)

12 What is the relation between genome size and GC-content?

genomes_data_df %>% 
filter(grepl("Complete", status)) %>%
filter(size_mb > 5) %>%
mutate(gc_percent = as.numeric(gc_percent)) %>%
ggplot(.,aes(x = size_mb, y = gc_percent)) +
geom_point() +
theme_bw() +
geom_smooth(method = "lm", se = TRUE)
NAs introduced by coercion

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayAtIEFuYWx5emUgZ2Vub21lIGRhdGEiCm91dHB1dDogCiAgaHRtbF9kb2N1bWVudDoKICAgIGRmX3ByaW50OiBwYWdlZAogICAgdG9jOiB5ZXMKICBodG1sX25vdGVib29rOgogICAgaGlnaGxpZ2h0OiBoYWRkb2NrCiAgICBtYXRoamF4OiBudWxsCiAgICBudW1iZXJfc2VjdGlvbnM6IHllcwogICAgdGhlbWU6IHJlYWRhYmxlCiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKZWRpdG9yX29wdGlvbnM6IAogIGNodW5rX291dHB1dF90eXBlOiBpbmxpbmUKLS0tCgoKCiMgTG9hZCBsaWJyYXJ5CmBgYHtyfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShkYXRhLnRhYmxlKQpsaWJyYXJ5KGphbml0b3IpCmBgYAoKIyMgUmVhZCBkYXRhCmBgYHtyfQpnZW5vbWVzX2RhdGEgPC0gZGF0YS50YWJsZTo6ZnJlYWQoImdlbm9tZV9kYXRhL3Byb2thcnlvdGVzLnR4dCIpICU+JQogIGphbml0b3I6OmNsZWFuX25hbWVzKCkKYGBgCgoKCmBgYHtyfQpzdHIoZ2Vub21lc19kYXRhKQpgYGAKCgoKCgojIyBGaXggdGhlIG5hbWVzIGFuZCBzZWxlY3QgY29sdW1ucyB3ZSBuZWVkCmBgYHtyfQpnZW5vbWVzX2RhdGFfZGYgPC0gZ2Vub21lc19kYXRhICU+JQogIGphbml0b3I6OmNsZWFuX25hbWVzKCkgJT4lCiAgc2VsZWN0KG51bWJlcl9vcmdhbmlzbV9uYW1lLAogICAgICAgICBncm91cCwKICAgICAgICAgc3ViX2dyb3VwLAogICAgICAgICBzdHJhaW4sCiAgICAgICAgIHNpemVfbWIsCiAgICAgICAgIGdjX3BlcmNlbnQsCiAgICAgICAgIHNjYWZmb2xkcywKICAgICAgICAgZ2VuZXMsCiAgICAgICAgIHByb3RlaW5zLAogICAgICAgICBzdGF0dXMsCiAgICAgICAgIHJlbGVhc2VfZGF0ZSwKICAgICAgICAgbW9kaWZ5X2RhdGUsCiAgICAgICAgIGNlbnRlcgogICAgICAgICApICU+JQogIG11dGF0ZShnZW5lcz1hcy5udW1lcmljKGdlbmVzKSwgcHJvdGVpbnMgPSBhcy5udW1lcmljKHByb3RlaW5zKSkKYGBgCgoKIyBGaW5kIG51bWJlciBvZiBnZW5vbWVzIHJlbGVhc2VkIGJ5IHllYXIKYGBge3J9CgpnZW5vbWVzX2RhdGFfZGYgJT4lCiAgICBzZXBhcmF0ZShyZWxlYXNlX2RhdGUsIGMoInllYXIiLCJtb250aCIsImRheSIpKSAlPiUKICAgIGdyb3VwX2J5KHllYXIpICU+JQogICAgc3VtbWFyaXNlKGdlbm9tZXNfY250ID0gbigpKSAlPiUKICAgIGdncGxvdCguLGFlcyh4ID0geWVhciwgeSA9IGdlbm9tZXNfY250KSkgKwogICAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIpICsKICAgIHRoZW1lX21pbmltYWwoKSsKICAgIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGFuZ2xlID0gNDUsIGhqdXN0ID0gMSkpIAogIApgYGAKCgojIEZpbmQgbnVtYmVyIG9mIGNvbXBsZXRlIGdlbm9tZXMgYnkgeWVhcgpgYGB7cn0KdW5pcXVlKGdlbm9tZXNfZGF0YV9kZiRzdGF0dXMpCmBgYAoKQ29tcGxldGUgR2Vub21lICQ+JCBDaHJvbW9zb21lICQ+JCBTY2FmZm9sZCAkPiQgQ29udGlnIAoKYGBge3J9Cmdlbm9tZXNfZGF0YV9kZiAlPiUKICAgIHNlcGFyYXRlKHJlbGVhc2VfZGF0ZSwgYygieWVhciIsIm1vbnRoIiwiZGF5IikpICU+JQogICAgZmlsdGVyKGdyZXBsKCJDb21wbGV0ZSIsIHN0YXR1cykpICU+JQogICAgZ3JvdXBfYnkoeWVhcikgJT4lCiAgICBzdW1tYXJpc2UoZ2Vub21lc19jbnQgPSBuKCkpICU+JQogICAgZ2dwbG90KC4sYWVzKHggPSB5ZWFyLCB5ID0gZ2Vub21lc19jbnQpKSArCiAgICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKwogICAgdGhlbWVfbWluaW1hbCgpKwogICAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA0NSwgaGp1c3QgPSAxKSkgCmBgYAoKCgoKCgoKCgojI0ZpbmQgbnVtYmVyIG9mIGdlbm9tZXMgcmVsZWFzZWQgYnkgQmF5bG9yIENvbGxlZ2Ugb2YgTWVkaWNpbmUKYGBge3J9Cmdlbm9tZXNfZGF0YV9kZiAlPiUKICAgIHNlcGFyYXRlKHJlbGVhc2VfZGF0ZSwgYygieWVhciIsIm1vbnRoIiwiZGF5IikpICU+JQogICAgZ3JvdXBfYnkoY2VudGVyKSAlPiUKICAgIHN1bW1hcmlzZShnZW5vbWVzX2NudCA9IG4oKSkgJT4lCiAgICBhcnJhbmdlKGRlc2MoZ2Vub21lc19jbnQpKSAlPiUKICAgIGZpbHRlcihncmVwbCgiQmF5bG9yfEJDTSIsY2VudGVyKSkKYGBgCgoKCiMgRmluZCB0b3AgMTAgc3VibWl0dGVyIG9mIGNvbXBsZXRlIGdlbm9tZXMuIApgYGB7cn0KZ2Vub21lc19kYXRhX2RmICU+JQogICAgc2VwYXJhdGUocmVsZWFzZV9kYXRlLCBjKCJ5ZWFyIiwibW9udGgiLCJkYXkiKSkgJT4lCiAgICBmaWx0ZXIoZ3JlcGwoIkNvbXBsZXRlfCBDaHJvbW9zb21lIiwgc3RhdHVzKSkgJT4lCiAgICBncm91cF9ieShjZW50ZXIpICU+JQogICAgc3VtbWFyaXNlKGdlbm9tZXNfY250ID0gbigpKSAlPiUKICAgIGFycmFuZ2UoZGVzYyhnZW5vbWVzX2NudCkpICU+JQogICAgc2xpY2UoMToxMCkgJT4lCiAgICBnZ3Bsb3QoLixhZXMoeCA9IHJlb3JkZXIoY2VudGVyLCAtZ2Vub21lc19jbnQpLCB5ID0gZ2Vub21lc19jbnQpKSArCiAgICBnZW9tX2JhcihzdGF0ID0gImlkZW50aXR5IikgKwogICAgdGhlbWUoYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoYW5nbGUgPSA5MCwgaGp1c3QgPSAxKSkgKwogICAgY29vcmRfZmxpcCgpCmBgYAoKCiMgRmluZCB0aGUgbW9zdCBzdWJtaXR0ZWQgZ2Vub21lIGJhY3Rlcml1bQpgYGB7cn0KZ2Vub21lc19kYXRhX2RmICU+JQogICAgc2VwYXJhdGUocmVsZWFzZV9kYXRlLCBjKCJ5ZWFyIiwibW9udGgiLCJkYXkiKSkgJT4lCiAgICBncm91cF9ieShudW1iZXJfb3JnYW5pc21fbmFtZSkgJT4lCiAgICBzdW1tYXJpc2UoZ2Vub21lc19jbnQgPSBuKCkpICU+JQogICAgYXJyYW5nZShkZXNjKGdlbm9tZXNfY250KSkKYGBgCgoKI0ZpbmQgdGhlIG1vc3Qgc3VibWl0dGVkIGdlbm9tZSBieSBDREMKYGBge3J9Cmdlbm9tZXNfZGF0YV9kZiAlPiUKICBmaWx0ZXIoZ3JlcGwoIkNEQ3xDZW50ZXJzIGZvciBEaXNlYXNlIENvbnRyb2wgYW5kIFByZXZlbnRpb258Rm9vZCBBbmQgRHJ1ZyIsY2VudGVyKSkgJT4lCiAgIGdyb3VwX2J5KG51bWJlcl9vcmdhbmlzbV9uYW1lKSAlPiUKICAgIHN1bW1hcmlzZShnZW5vbWVzX2NudCA9IG4oKSkgJT4lCiAgICBhcnJhbmdlKGRlc2MoZ2Vub21lc19jbnQpKQpgYGAKCgojV2hhdCBpcyB0aGUgR0MgY29udGVudCBvZiB0aGUgYmFjdGVyaWEgc2VxdWVuY2VkIGJ5IHRoZSBDREMgYXMgY29tcGFyZWQgdG8gb3ZlcmFsbCBHQyBjb250ZW50PwoKCkdDIGNvbnRlbnQgaW5kaWNhdGVzIHRoZSBwZXJjZW50IG9mIEcsQyBiYXNlcyBvdXQgb2YgdG90YWwgZm91ciBiYXNlIC0gQVRHQyB0aGF0IG1ha2VzIEROQQoKCmBgYHtyfQpnZW5vbWVzX2RhdGFfZGYgJT4lCiAgZmlsdGVyKGdyZXBsKCJDREN8Q2VudGVycyBmb3IgRGlzZWFzZSBDb250cm9sIGFuZCBQcmV2ZW50aW9ufEZvb2QgQW5kIERydWciLGNlbnRlcikpICU+JQogIGZpbHRlcihzdGF0dXMgPT0gIkNvbXBsZXRlIEdlbm9tZSIpICU+JQogIG11dGF0ZShnY19wZXJjZW50ID0gYXMubnVtZXJpYyhnY19wZXJjZW50KSkgJT4lCiAgc3VtbWFyaXplKG1lYW5fZ2MgPSBtZWFuKGdjX3BlcmNlbnQsIG5hLnJtID0gVCkpCmBgYAoKI0NhbGN1bGF0ZSB0aGUgR0MgcGVyY2VudCBieSB0aGUgZ3JvdXAKYGBge3J9Cmdlbm9tZXNfZGF0YV9kZiAlPiUKICBmaWx0ZXIoc3RhdHVzID09ICJDb21wbGV0ZSBHZW5vbWUiKSAlPiUKICBtdXRhdGUoZ2NfcGVyY2VudCA9IGFzLm51bWVyaWMoZ2NfcGVyY2VudCkpICU+JQogIGdyb3VwX2J5KHN1Yl9ncm91cCkgJT4lCiAgc3VtbWFyaXplKG1lYW5fZ2MgPSBtZWFuKGdjX3BlcmNlbnQsIG5hLnJtID0gVCkpICU+JQogIGFycmFuZ2UoZGVzYyhtZWFuX2djKSkKYGBgCgoKCiNDYWxjdWxhdGUgdGhlIHNpemUgYnkgdGhlIGdyb3VwIApgYGB7cn0KZ2Vub21lc19kYXRhX2RmICU+JQogIGZpbHRlcihncmVwbCgiQ29tcGxldGUiLCBzdGF0dXMpKSAlPiUKICBtdXRhdGUoc2l6ZV9tYiA9IGFzLm51bWVyaWMoc2l6ZV9tYikpICU+JQogIGdyb3VwX2J5KGdyb3VwKSAlPiUKICBzdW1tYXJpemUoYXZnX3NpemVfbWIgPSBtZWFuKHNpemVfbWIsIG5hLnJtID0gVCkpICU+JQogIGFycmFuZ2UoZGVzYyhhdmdfc2l6ZV9tYikpCmBgYAoKI0NhbGN1bGF0ZSB0aGUgc2l6ZSBieSB0aGUgZ3JvdXAgYW5kIHN1Yl9ncm91cApgYGB7cn0KZ2Vub21lc19kYXRhX2RmICU+JQogIGZpbHRlcihncmVwbCgiQ29tcGxldGUiLCBzdGF0dXMpKSAlPiUKICBtdXRhdGUoc2l6ZV9tYiA9IGFzLm51bWVyaWMoc2l6ZV9tYikpICU+JQogIGdyb3VwX2J5KGdyb3VwLHN1Yl9ncm91cCkgJT4lCiAgc3VtbWFyaXplKGF2Z19zaXplX21iID0gbWVhbihzaXplX21iLCBuYS5ybSA9IFQpKSAlPiUKICBhcnJhbmdlKGRlc2MoYXZnX3NpemVfbWIpKQpgYGAKCgoKI1doYXQgaXMgdGhlIHJlbGF0aW9uIGJldHdlZW4gZ2Vub21lIHNpemUgYW5kIGdlbmVzPwpgYGB7cn0KZ2Vub21lc19kYXRhX2RmICU+JSAKZmlsdGVyKGdyZXBsKCJDb21wbGV0ZSIsIHN0YXR1cykpICU+JQpmaWx0ZXIoc2l6ZV9tYiA8IDUpICU+JQpnZ3Bsb3QoLixhZXMoeCA9IHNpemVfbWIsIHkgPSBnZW5lcykpICsKZ2VvbV9wb2ludCgpICsKdGhlbWVfYncoKSArCmdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIHNlID0gRkFMU0UpCmBgYAoKCgojV2hhdCBpcyB0aGUgcmVsYXRpb24gYmV0d2VlbiBnZW5vbWUgc2l6ZSBhbmQgR0MtY29udGVudD8KCmBgYHtyfQpnZW5vbWVzX2RhdGFfZGYgJT4lIApmaWx0ZXIoZ3JlcGwoIkNvbXBsZXRlIiwgc3RhdHVzKSkgJT4lCmZpbHRlcihzaXplX21iID4gNSkgJT4lCm11dGF0ZShnY19wZXJjZW50ID0gYXMubnVtZXJpYyhnY19wZXJjZW50KSkgJT4lCmdncGxvdCguLGFlcyh4ID0gc2l6ZV9tYiwgeSA9IGdjX3BlcmNlbnQpKSArCmdlb21fcG9pbnQoKSArCnRoZW1lX2J3KCkgKwpnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBzZSA9IFRSVUUpCmBgYAoKCgoKCgoK