DNA Sequence Pattern Finder

import matplotlib.pyplot as plt


# ---------------------------------------

# Basic DNA Validation

# ---------------------------------------

def validate_dna(sequence):

    sequence = sequence.upper()

    valid = set("ATCG")

    return all(base in valid for base in sequence)



# ---------------------------------------

# GC Content Calculation

# ---------------------------------------

def gc_content(sequence):

    gc_count = sequence.count("G") + sequence.count("C")

    return (gc_count / len(sequence)) * 100



# ---------------------------------------

# Motif Finder

# ---------------------------------------

def find_motif(sequence, motif):

    sequence = sequence.upper()

    motif = motif.upper()

    positions = []


    for i in range(len(sequence) - len(motif) + 1):

        if sequence[i:i+len(motif)] == motif:

            positions.append(i)


    return positions



# ---------------------------------------

# Mutation Comparison

# ---------------------------------------

def compare_sequences(seq1, seq2):

    mutations = []


    min_len = min(len(seq1), len(seq2))


    for i in range(min_len):

        if seq1[i] != seq2[i]:

            mutations.append((i, seq1[i], seq2[i]))


    return mutations



# ---------------------------------------

# GC Content Visualization (Sliding Window)

# ---------------------------------------

def gc_sliding_window(sequence, window_size=20):

    gc_values = []


    for i in range(len(sequence) - window_size + 1):

        window = sequence[i:i+window_size]

        gc_values.append(gc_content(window))


    return gc_values



def plot_gc_distribution(sequence):

    window_size = 20

    gc_values = gc_sliding_window(sequence, window_size)


    plt.figure(figsize=(10, 4))

    plt.plot(gc_values)

    plt.title("GC Content Distribution (Sliding Window)")

    plt.xlabel("Position")

    plt.ylabel("GC %")

    plt.show()



# ---------------------------------------

# MAIN

# ---------------------------------------

if __name__ == "__main__":

    dna = input("Enter DNA sequence: ").strip().upper()


    if not validate_dna(dna):

        print("❌ Invalid DNA sequence (Only A, T, C, G allowed)")

        exit()


    print("\n🧬 DNA Analysis Results\n")


    # GC Content

    gc = gc_content(dna)

    print(f"GC Content: {gc:.2f}%")


    # Motif Search

    motif = input("\nEnter motif to search (e.g., ATG): ").strip().upper()

    positions = find_motif(dna, motif)


    if positions:

        print(f"Motif '{motif}' found at positions: {positions}")

    else:

        print(f"Motif '{motif}' not found.")


    # Mutation Comparison

    compare = input("\nCompare with another sequence? (y/n): ").strip().lower()

    if compare == "y":

        dna2 = input("Enter second DNA sequence: ").strip().upper()


        if len(dna) != len(dna2):

            print("⚠ Sequences have different lengths. Comparing minimum length.")


        mutations = compare_sequences(dna, dna2)


        if mutations:

            print("\nMutations found:")

            for pos, base1, base2 in mutations:

                print(f"Position {pos}: {base1} → {base2}")

        else:

            print("No mutations detected.")


    # GC Distribution Plot

    plot = input("\nPlot GC content distribution? (y/n): ").strip().lower()

    if plot == "y":

        plot_gc_distribution(dna)


No comments: