我真的在这个关于链表和节点删除的练习中苦苦挣扎。
基本上,当一个序列中的每个碱基在另一个序列中的相同位置具有相应的互补时,就称 DNA 序列是互补的。碱基 A 与 T 键合,碱基 C 与 G 键合。例如,序列 ACGT 和 TGCA 是互补的。
我必须实现一个需要两个链表的函数:第一个是原始 DNA,第二个是编辑序列。该函数必须应用前面描述的编辑方法并返回新列表。
例如,我们将“ACGTAGACGTTCT A”作为原始 DNA 序列,将“GC A”作为键合序列。G 与 C 匹配,C 与 G 匹配,A 与 T 匹配(反之亦然)。因此,“CG T”是“GC A”的反射。然后我们必须按照这个确切的顺序从原始 DNA 序列中删除“C”、“G”和 T”,结果证明这是下面所述的预期结果。
另外,有两件事: 1. 我必须忽略由于任何节点删除而生成的任何后续匹配项。2. 除了 stdlib.h 和 malloc() 或 calloc() 等函数,我不能使用任何头文件。我只被允许使用free()。
输入:
A C G T A G A C G T T C T A
G C A
预期结果:
A A G A T C T A
实际结果:
A A G A C G T T C T A
代码暂定:
typedef struct LinkedNode LinkedNode;
struct LinkedNode {
char data;
LinkedNode *next;
};
int bases_dna(LinkedNode *atual, LinkedNode *seq_edicao){
int i = 0, k = 0, seq_count = 0;
LinkedNode *seq_new = seq_edicao;
while(seq_new!=NULL){
seq_new = seq_new->next;
seq_count++;
}
seq_new = seq_edicao;
for(; i<seq_count; i++){
if ((atual->data == 'C' && seq_new->data == 'G') ||
(atual->data == 'A' && seq_new->data == 'T') ||
(atual->data == 'G' && seq_new->data == 'C') ||
(atual->data == 'T' && seq_new->data == 'A')){
k++;
atual = atual->next;
seq_new = seq_new->next;
}
}
if(k==seq_count) return k;
else return 0;
}
LinkedNode *editar_dna(LinkedNode *dna_original, LinkedNode *seq_edicao){
if (dna_original == NULL) return NULL;
int result = bases_dna(dna_original, seq_edicao);
if (result != 0){
LinkedNode *temp = dna_original;
for(int i = 0; i < result; i++){
temp = temp->next;
}
return temp;
}
dna_original->next = editar_dna(dna_original->next, seq_edicao);
return dna_original;
}
代码说明:
名为editar_dna的函数完成所有递归工作并调用名为bases_dna的辅助函数,该函数负责验证原始 DNA 中给定的编辑序列,因此返回该序列具有的元素数量,以便让第一个函数知道如何许多元素将不得不“跳过”。
我面临的问题是,我的递归执行在到达editar_dna函数中的第一个返回时立即停止。我正在考虑在递归之后立即实现它的条件结构,所以它不会停止得太快,并且可能会一直到列表的末尾。但是,如果我能以某种方式轻松地验证从链表末尾到其开头的字符,那将是解决此练习的好方法,但相反的情况要简单得多(这正是我的辅助函数假设会发生的情况) .
任何想法将不胜感激。
谢谢!
我应该根据您当前的代码编写代码以便更好地沟通,但我认为不使用递归会更简单。请你试试:
#include <stdio.h>
#include <stdlib.h>
typedef struct LinkedNode {
char data;
struct LinkedNode *next;
} LinkedNode;
/*
* append the node with data at the end of the list
*/
void append(LinkedNode *head, char data)
{
LinkedNode *newNode = malloc(sizeof(LinkedNode));
newNode->data = data;
newNode->next = NULL;
LinkedNode *temp = head;
while (temp->next != NULL) {
temp = temp->next;
}
temp->next = newNode;
}
/*
* dump the data of the list
*/
void dump(LinkedNode *head)
{
for (head = head->next; head != NULL; head = head->next) {
printf("%c ", head->data);
}
printf("\n");
}
/*
* generate another list of complimentary sequence
*/
void gencomp(LinkedNode *dest, LinkedNode *src)
{
char s_data, d_data;
for (src = src->next; src != NULL; src = src->next) {
s_data = src->data;
switch(s_data) {
case 'A':
d_data = 'T';
break;
case 'T':
d_data = 'A';
break;
case 'C':
d_data = 'G';
break;
case 'G':
d_data = 'C';
break;
default:
fprintf(stderr, "unknown base: %c\n", s_data);
exit(1);
}
append(dest, d_data);
}
}
/*
* return the pointer to the last element if the subsequences match
*/
LinkedNode *match(LinkedNode *head, LinkedNode *comp)
{
for (; head != NULL; head = head->next, comp = comp->next) {
if (head->data != comp->data) {
return NULL;
} else if (comp->next == NULL) {
return head;
}
}
return NULL;
}
/*
* search "head" for "comp". If matched, the matched portion is skipped
* (not freed)
*/
void edit(LinkedNode *head, LinkedNode *comp)
{
LinkedNode *matched;
for (; head != NULL; head = head->next) {
if ((matched = match(head->next, comp->next))) {
head->next = matched->next; // skip matched sequence
}
}
}
int main()
{
char dna_orig[] = "ACGTAGACGTTCTA";
char dna_bond[] = "GCA";
int i;
// generate list of original sequence
LinkedNode *head = malloc(sizeof(LinkedNode));
head->next = NULL;
for (i = 0; dna_orig[i] != 0; i++) {
append(head, dna_orig[i]);
}
// generate list of bonding sequence
LinkedNode *bond = malloc(sizeof(LinkedNode));
bond->next = NULL;
for (i = 0; dna_bond[i] != 0; i++) {
append(bond, dna_bond[i]);
}
// generate list of complementary sequence of bond
LinkedNode *comp = malloc(sizeof(LinkedNode));
comp->next = NULL;
gencomp(comp, bond);
// edit the original sequence and see the result
edit(head, comp);
dump(head);
return 0;
}
输出:
A A G A T C T A
[编辑]
释放跳过的节点时,我们不能按照从头到尾的正向顺序释放它们,因为一旦第一个节点被释放,到下一个节点的链接就会丢失。然后我们可以利用递归以相反的顺序释放它们。这是释放跳过的节点的函数:
void freenodes(LinkedNode *head, LinkedNode *tail)
{
if (head->next == tail) { // the last node to remove
free(head);
return;
}
freenodes(head->next, tail); // free nodes recursively
free(head);
}
函数的第一个参数一个接一个地前进,直到它到达匹配序列的最后一个节点。然后函数返回到调用者,返回匹配序列的第一个节点,同时释放节点。这是更新的完整代码,包括修改edit
函数以返回已编辑列表的头部。
#include <stdio.h>
#include <stdlib.h>
typedef struct LinkedNode {
char data;
struct LinkedNode *next;
} LinkedNode;
/*
* append the node with data at the end of the list
*/
void append(LinkedNode *head, char data)
{
LinkedNode *newNode = malloc(sizeof(LinkedNode));
newNode->data = data;
newNode->next = NULL;
LinkedNode *temp = head;
while (temp->next != NULL) {
temp = temp->next;
}
temp->next = newNode;
}
/*
* dump the data of the list
*/
void dump(LinkedNode *head)
{
for (head = head->next; head != NULL; head = head->next) {
printf("%c ", head->data);
}
printf("\n");
}
/*
* compare characters complementarily
* return 1 for matching pairs as A<=>T or C<=>G
*/
int compcomp(char a, char b)
{
if ((a == 'A' && b == 'T') || (a == 'T' && b == 'A')
|| (a == 'C' && b == 'G') || (a == 'G' && b == 'C'))
return 1;
else
return 0;
}
/*
* return the pointer to the last node if the subsequences match complementarily
*/
LinkedNode *match(LinkedNode *head, LinkedNode *bond)
{
for (; head != NULL; head = head->next, bond = bond->next) {
if (! compcomp(head->data, bond->data)) {
return NULL;
} else if (bond->next == NULL) {
return head;
}
}
return NULL;
}
/*
* free skipped nodes
*/
void freenodes(LinkedNode *head, LinkedNode *tail)
{
if (head->next == tail) { // the last node to remove
// printf("> %c\n", head->data); // for debugging
free(head);
return;
}
freenodes(head->next, tail); // free nodes recursively
// printf("> %c\n", head->data); // for debugging
free(head);
}
/*
* search "head" for the sequence of complementary of "bond". If matched,
* the matched portion is skipped and skipped nodes are freed
*/
LinkedNode *edit(LinkedNode *head, LinkedNode *bond)
{
LinkedNode *matched; // last node of matched sequence
LinkedNode *matchednext;
LinkedNode *tmp; // copy of head to traverse the list
for (tmp = head; tmp != NULL; tmp = tmp->next) {
if ((matched = match(tmp->next, bond->next))) {
matchednext = matched->next; // backup matched->next
freenodes(tmp->next, matched->next);
tmp->next = matchednext; // skip matched sequence
}
}
return head;
}
int main()
{
char dna_orig[] = "ACGTAGACGTTCTA";
char dna_bond[] = "GCA";
int i;
// generate list of original sequence
LinkedNode *head = malloc(sizeof(LinkedNode));
head->next = NULL;
for (i = 0; dna_orig[i] != 0; i++) {
append(head, dna_orig[i]);
}
// generate list of bonding sequence
LinkedNode *bond = malloc(sizeof(LinkedNode));
bond->next = NULL;
for (i = 0; dna_bond[i] != 0; i++) {
append(bond, dna_bond[i]);
}
// edit the original sequence and see the result
dump(edit(head, bond));
return 0;
}
顺便说一句,您可以#include <stdio.h>
通过删除printf()
和fprintf()
仅用于报告目的的函数来省略。
[EDIT2]
你的代码和我的主要区别在于链表的数据结构。我的head
列表中有空数据,是第一个节点的入口,而您dna_original
直接从包含数据的节点开始。与bond
和相同sequencia
。两者都有自己的优势,但我们不能混合使用。然后请修改您的editar_dna
为:
LinkedNode *editar_dna(LinkedNode *dna_original, LinkedNode *seq_edicao){
LinkedNode *matched, *matchednext, *tmp;
for (tmp = dna_original; tmp != NULL; tmp = tmp->next) {
if ((matched = match(tmp->next, seq_edicao))) {
matchednext = matched->next;
liberar_nos(tmp->next, matched->next);
tmp->next = matchednext;
}
}
return dna_original;
}
更改seq_edicao->next
为功能seq_edicao
。match()
然后它会输出A A A
。
您的数据结构的潜在问题是我们无法删除起始子序列,例如:"CGTACGTA"
. 技术上可以修复,但可能需要额外的大量修改。
现在你有三个选择:
由你决定。:)
[EDIT3]
这是将选项 3(不修改 main)应用于您的代码(仅供参考)的版本:
#include <stdio.h>
#include <stdlib.h>
typedef struct LinkedNode LinkedNode;
struct LinkedNode {
char data;
LinkedNode *next;
};
void imprimir1(LinkedNode *inicio){
if(inicio == NULL){
return;
}
printf("%c", inicio->data);
if (inicio->next!=NULL) printf(" ");
else printf("\n");
imprimir1(inicio->next);
return;
}
void liberar_lista(LinkedNode *inicio){
if(inicio == NULL) return;
liberar_lista(inicio->next);
free(inicio);
}
LinkedNode *inserir_final_r(LinkedNode *inicio, char valor) {
if (inicio == NULL) {
LinkedNode *novo = malloc(sizeof(LinkedNode));
if (novo == NULL) return inicio;
novo->data = valor;
novo->next = NULL;
return novo;
}
inicio->next = inserir_final_r(inicio->next, valor);
return inicio;
}
void liberar_nos(LinkedNode *head, LinkedNode *tail) {
if (head == tail) {
return;
}
liberar_nos(head->next, tail);
free(head);
}
int compare(char a, char b) {
if ((a == 'A' && b == 'T') || (a == 'T' && b == 'A')
|| (a == 'C' && b == 'G') || (a == 'G' && b == 'C'))
return 1;
else
return 0;
}
LinkedNode *match(LinkedNode *head, LinkedNode *bond) {
for (; head != NULL; head = head->next, bond = bond->next) {
if (! compare(head->data, bond->data)) {
return NULL;
} else if (bond->next == NULL) {
return head;
}
}
return NULL;
}
LinkedNode *editar_dna(LinkedNode *dna_original, LinkedNode *seq_edicao){
LinkedNode *matched, *matchednext, *tmp;
// remove leading matched subsequence(s) as a preproces
while ((matched = match(dna_original, seq_edicao))) {
matchednext = matched->next;
liberar_nos(dna_original->next, matched->next); // note we cannot free the 1st node
dna_original->data = matchednext->data;
dna_original->next = matchednext->next;
}
for (tmp = dna_original; tmp != NULL; tmp = tmp->next) {
if ((matched = match(tmp->next, seq_edicao))) {
matchednext = matched->next;
liberar_nos(tmp->next, matched->next);
tmp->next = matchednext;
}
}
return dna_original;
}
int main(){
//Creating empty nodes
LinkedNode *dna_original = NULL;
LinkedNode *sequencia = NULL;
//Populating test data into the original sequence
// dna_original = inserir_final_r(dna_original, 'A'); // dropped just for the test
dna_original = inserir_final_r(dna_original, 'C');
dna_original = inserir_final_r(dna_original, 'G');
dna_original = inserir_final_r(dna_original, 'T');
dna_original = inserir_final_r(dna_original, 'C');
dna_original = inserir_final_r(dna_original, 'G');
dna_original = inserir_final_r(dna_original, 'T');
dna_original = inserir_final_r(dna_original, 'A');
dna_original = inserir_final_r(dna_original, 'C');
dna_original = inserir_final_r(dna_original, 'G');
dna_original = inserir_final_r(dna_original, 'T');
dna_original = inserir_final_r(dna_original, 'A');
//Populating test data into the second sequence
sequencia = inserir_final_r(sequencia, 'G');
sequencia = inserir_final_r(sequencia, 'C');
sequencia = inserir_final_r(sequencia, 'A');
//Printing the original sequence before change
imprimir1(dna_original);
//Changing the sequence
editar_dna(dna_original, sequencia);
//Printing the original sequence after change
imprimir1(dna_original);
//Freeing allocated memory
liberar_lista(dna_original);
liberar_lista(sequencia);
return 0;
}
请注意 main() 中列表的初始化是为了调试和演示目的而修改的。
本文收集自互联网,转载请注明来源。
如有侵权,请联系 [email protected] 删除。
我来说两句