Updating the Repo

2016-09-03 09:53:54 -05:00
parent ec45e6b2eb
commit 739f3c3e2b
21 changed files with 969 additions and 5 deletions
--- a/go/nucleotide-count/README.md
+++ b/go/nucleotide-count/README.md
@@ -0,0 +1,45 @@
+# Nucleotide Count
+
+Given a DNA string, compute how many times each nucleotide occurs in the string.
+
+DNA is represented by an alphabet of the following symbols: 'A', 'C',
+'G', and 'T'.
+
+Each symbol represents a nucleotide, which is a fancy name for the
+particular molecules that happen to make up a large part of DNA.
+
+Shortest intro to biochemistry EVAR:
+
+- twigs are to birds nests as
+- nucleotides are to DNA and RNA as
+- amino acids are to proteins as
+- sugar is to starch as
+- oh crap lipids
+
+I'm not going to talk about lipids because they're crazy complex.
+
+So back to nucleotides.
+
+DNA contains four types of them: adenine (`A`), cytosine (`C`), guanine
+(`G`), and thymine (`T`).
+
+RNA contains a slightly different set of nucleotides, but we don't care
+about that for now.
+
+To run the tests simply run the command `go test` in the exercise directory.
+
+If the test suite contains benchmarks, you can run these with the `-bench`
+flag:
+
+    go test -bench .
+
+For more detailed info about the Go track see the [help
+page](http://exercism.io/languages/go).
+
+## Source
+
+The Calculating DNA Nucleotides_problem at Rosalind [http://rosalind.info/problems/dna/](http://rosalind.info/problems/dna/)
+
+## Submitting Incomplete Problems
+It's possible to submit an incomplete solution so you can see how others have completed the exercise.
+
--- a/go/nucleotide-count/dna.go
+++ b/go/nucleotide-count/dna.go
@@ -0,0 +1,38 @@
+package dna
+
+import (
+	"errors"
+	"strings"
+)
+
+// Histogram is just a map
+type Histogram map[byte]int
+
+// DNAProc is a struct that holds a dna strand
+type DNAProc struct {
+	strand string
+}
+
+// DNA is the function that creates DNAProcs
+func DNA(st string) *DNAProc {
+	return &DNAProc{strand: st}
+}
+
+// Count takes a nucleotide byte and returns how many there are
+// or an error if it's an invalid nucleotide
+func (d *DNAProc) Count(n byte) (int, error) {
+	if n != 'A' && n != 'C' && n != 'G' && n != 'T' {
+		return 0, errors.New("Invalid Nucleotide " + string(n))
+	}
+	return strings.Count(d.strand, string(n)), nil
+}
+
+// Counts returns a Histogram of all nucleotide counts
+func (d *DNAProc) Counts() Histogram {
+	h := make(Histogram)
+	h['A'], _ = d.Count('A')
+	h['C'], _ = d.Count('C')
+	h['G'], _ = d.Count('G')
+	h['T'], _ = d.Count('T')
+	return h
+}
--- a/go/nucleotide-count/nucleotide_count_test.go
+++ b/go/nucleotide-count/nucleotide_count_test.go
@@ -0,0 +1,108 @@
+package dna
+
+import "testing"
+
+func (h Histogram) Equal(o Histogram) bool {
+	return h.sameLength(o) && h.sameMappings(o)
+}
+
+func (h Histogram) sameLength(o Histogram) bool {
+	return len(h) == len(o)
+}
+
+func (h Histogram) sameMappings(o Histogram) (res bool) {
+	res = true
+	for k := range h {
+		if h[k] != o[k] {
+			res = false
+		}
+	}
+	return
+}
+
+var tallyTests = []struct {
+	strand     string
+	nucleotide byte
+	expected   int
+}{
+	{"", 'A', 0},
+	{"ACT", 'G', 0},
+	{"CCCCC", 'C', 5},
+	{"GGGGGTAACCCGG", 'T', 1},
+}
+
+func TestNucleotideCounts(t *testing.T) {
+	for _, tt := range tallyTests {
+		dna := DNA(tt.strand)
+		count, _ := dna.Count(tt.nucleotide)
+		if count != tt.expected {
+			t.Fatalf("Got \"%v\", expected \"%v\"", count, tt.expected)
+		}
+	}
+}
+
+func TestHasErrorForInvalidNucleotides(t *testing.T) {
+	dna := DNA("GATTACA")
+	count, err := dna.Count('X')
+	if count != 0 {
+		t.Fatalf("Got \"%v\", expected \"%v\"", count, 0)
+	}
+	if err == nil {
+		t.Fatalf("X is an invalid nucleotide, but no error was raised")
+	}
+}
+
+// In most cases, this test is pointless.
+// Very occasionally it matters.
+// Just roll with it.
+func TestCountingDoesntChangeCount(t *testing.T) {
+	dna := DNA("CGATTGGG")
+	dna.Count('T')
+	count, _ := dna.Count('T')
+	if count != 2 {
+		t.Fatalf("Got \"%v\", expected \"%v\"", count, 2)
+	}
+}
+
+type histogramTest struct {
+	strand   string
+	expected Histogram
+}
+
+var histogramTests = []histogramTest{
+	{
+		"",
+		Histogram{'A': 0, 'C': 0, 'T': 0, 'G': 0},
+	},
+	{
+		"GGGGGGGG",
+		Histogram{'A': 0, 'C': 0, 'T': 0, 'G': 8},
+	},
+	{
+		"AGCTTTTCATTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATTAAAAAAAGAGTGTCTGATAGCAGC",
+		Histogram{'A': 20, 'C': 12, 'T': 21, 'G': 17},
+	},
+}
+
+func TestSequenceHistograms(t *testing.T) {
+	for _, tt := range histogramTests {
+		dna := DNA(tt.strand)
+		if !dna.Counts().Equal(tt.expected) {
+			t.Fatalf("DNA{ \"%v\" }: Got \"%v\", expected \"%v\"", tt.strand, dna.Counts(), tt.expected)
+		}
+	}
+}
+
+func BenchmarkSequenceHistograms(b *testing.B) {
+	b.StopTimer()
+	for _, tt := range histogramTests {
+		for i := 0; i < b.N; i++ {
+			dna := DNA(tt.strand)
+			b.StartTimer()
+
+			dna.Counts()
+
+			b.StopTimer()
+		}
+	}
+}