import matplotlib.pyplot as plt
# ---------------------------------------
# Basic DNA Validation
# ---------------------------------------
def validate_dna(sequence):
sequence = sequence.upper()
valid = set("ATCG")
return all(base in valid for base in sequence)
# ---------------------------------------
# GC Content Calculation
# ---------------------------------------
def gc_content(sequence):
gc_count = sequence.count("G") + sequence.count("C")
return (gc_count / len(sequence)) * 100
# ---------------------------------------
# Motif Finder
# ---------------------------------------
def find_motif(sequence, motif):
sequence = sequence.upper()
motif = motif.upper()
positions = []
for i in range(len(sequence) - len(motif) + 1):
if sequence[i:i+len(motif)] == motif:
positions.append(i)
return positions
# ---------------------------------------
# Mutation Comparison
# ---------------------------------------
def compare_sequences(seq1, seq2):
mutations = []
min_len = min(len(seq1), len(seq2))
for i in range(min_len):
if seq1[i] != seq2[i]:
mutations.append((i, seq1[i], seq2[i]))
return mutations
# ---------------------------------------
# GC Content Visualization (Sliding Window)
# ---------------------------------------
def gc_sliding_window(sequence, window_size=20):
gc_values = []
for i in range(len(sequence) - window_size + 1):
window = sequence[i:i+window_size]
gc_values.append(gc_content(window))
return gc_values
def plot_gc_distribution(sequence):
window_size = 20
gc_values = gc_sliding_window(sequence, window_size)
plt.figure(figsize=(10, 4))
plt.plot(gc_values)
plt.title("GC Content Distribution (Sliding Window)")
plt.xlabel("Position")
plt.ylabel("GC %")
plt.show()
# ---------------------------------------
# MAIN
# ---------------------------------------
if __name__ == "__main__":
dna = input("Enter DNA sequence: ").strip().upper()
if not validate_dna(dna):
print("❌ Invalid DNA sequence (Only A, T, C, G allowed)")
exit()
print("\n𧬠DNA Analysis Results\n")
# GC Content
gc = gc_content(dna)
print(f"GC Content: {gc:.2f}%")
# Motif Search
motif = input("\nEnter motif to search (e.g., ATG): ").strip().upper()
positions = find_motif(dna, motif)
if positions:
print(f"Motif '{motif}' found at positions: {positions}")
else:
print(f"Motif '{motif}' not found.")
# Mutation Comparison
compare = input("\nCompare with another sequence? (y/n): ").strip().lower()
if compare == "y":
dna2 = input("Enter second DNA sequence: ").strip().upper()
if len(dna) != len(dna2):
print("⚠ Sequences have different lengths. Comparing minimum length.")
mutations = compare_sequences(dna, dna2)
if mutations:
print("\nMutations found:")
for pos, base1, base2 in mutations:
print(f"Position {pos}: {base1} → {base2}")
else:
print("No mutations detected.")
# GC Distribution Plot
plot = input("\nPlot GC content distribution? (y/n): ").strip().lower()
if plot == "y":
plot_gc_distribution(dna)