我正试图将所有单词与文本分开,我也需要将标点符号分开。
将它们分开并将其保存在字符串数组中的最佳方法是什么?
这是一个例子:
输入:
预期输出
实际输出
我的代码:
palavra_linha[i] = strtok (linhas[i], " \n\r");
while (palavra_linha[i] != NULL) {
palavras_finais[j] = palavra_linha[i];
j++;
palavra_linha[i] = strtok (NULL, " \n\r");
}
我知道我必须使用类似的东西,但它不起作用,因为假设是假的:
if (strlen(palavra_linha[i])-1) == '.') {
palavras_finais[j] = palavra_linha[i];
}
答案 0 :(得分:1)
现在它正常工作并给我输出:
[Hello]
[,]
[Sara]
[!]
[How]
[are]
[You]
[?]
使用后不要忘记释放数组,也可以在程序开始时保存原始字符串以指定tmp指针(例如)。
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h>
#include <unistd.h>
static void skip_copied_bytes(char *str, int *i)
{
char c = 0;
while (*str == ' ' && str)
{
str++;
(*i)++;
}
while (*str != ' ' && *str != '\0' && !ispunct(c))
{
str++;
(*i)++;
c = *str;
}
while (*str == ' ' && str)
{
str++;
(*i)++;
}
}
static int count_char(char *str)
{
int count = 0;
char c = 0;
while (*str == ' ' && str)
str++;
while (*str != ' ' && *str != '\0' && !ispunct(c))
{
count++;
str++;
c = *str;
}
while (*str == ' ' && str)
{
str++;
}
return count;
}
static int count_word(char const *s, char c)
{
int i;
int count;
count = 0;
i = 0;
while (s[i] != '\0')
{
while (s[i] == c)
i++;
if (s[i] != '\0')
count++;
while (s[i] != c && s[i] != '\0') {
if (ispunct(s[i]))
count++;
i++;
}
}
return count;
}
int main(void)
{
char *str = "Hello, Sara! How are You?";
char **array;
int i = 0;
int j = 0;
int size = 0;
size = count_word(str, ' ');
if((array = malloc(sizeof(char *) * (size + 1))) == NULL)
return -1;
while (str[i])
{
size = count_char(&str[i]);
if ((array[j] = malloc(sizeof(char) * (size))) == NULL)
return -1;
strncpy(array[j], &str[i], size);
array[j][size] = '\0';
skip_copied_bytes(&str[i], &i);
j++;
}
array[j] = 0;
for(i = 0; array[i]; i++) {
printf("[%s] \n", array[i]);
}
}
答案 1 :(得分:1)
一个想法是,您可以保留原始文本并创建它的副本。在创建副本时,迭代文本的每个字符并检查当前字符是否是任何标点符号。如果是这样,请在标点符号之前向副本插入空格,并从标点符号的下一个字符继续,直到到达文本的终止空字符。最后,您可以使用strtok()
来标记文本副本,方法与操作相同。以下是实现上述想法的示例。
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
#include <string.h>
int main(void) {
char *stnc_org, *stnc_new;
int size;
printf("What is the expected size of the sentence: ");
scanf("%d", &size);
stnc_org = (char *)malloc(size * sizeof(char));
printf("Input: \n");
scanf(" %[^\n]", stnc_org);
// get the number of punct
int punct_num = 0;
int i;
for (i = 0; *(stnc_org + i) != '\0'; i++) {
if (ispunct(*(stnc_org + i))) {
punct_num++;
}
}
char * stnc_backup = (char *)malloc((size + punct_num * 2) * sizeof(char));
stnc_new = stnc_backup;
// copy the original str to the new and add space before each punct
for (i = 0; *(stnc_org + i) != '\0'; i++) {
if (ispunct(*(stnc_org + i))) { // found a punct
// boundary check!!!
// 1. the first character is the punct
if (i == 0) {
*(stnc_new + i) = *(stnc_org + i);
*(stnc_new + i + 1) = ' ';
stnc_new = stnc_new + (i + 2);
}
// 2. the last character is the punct
else if (*(stnc_org + i + 1) == '\0') {
if (*(stnc_org + i - 1) != ' ') {
strncpy(stnc_new, stnc_org, i);
*(stnc_new + i) = ' ';
*(stnc_new + i + 1) = *(stnc_org + i);
*(stnc_new + i + 2) = '\0';
stnc_new = stnc_new + (i + 1);
}
}
// check the previous and next characters of the punct
// 3. previous not the space && next is space -> insert ' ' before the punct
else if (*(stnc_org + i - 1) != ' ' && *(stnc_org + i + 1) == ' ') {
strncpy(stnc_new, stnc_org, i);
*(stnc_new + i) = ' ';
*(stnc_new + i + 1) = *(stnc_org + i);
stnc_new = stnc_new + (i + 1);
}
// 4. previous is space && next is not space -> insert ' ' after the punct
else if (*(stnc_org + i - 1) == ' ' && *(stnc_org + i + 1) != ' ') {
strncpy(stnc_new, stnc_org, i);
*(stnc_new + i) = *(stnc_org + i);
*(stnc_new + i + 1) = ' ';
stnc_new = stnc_new + (i + 2);
}
// 5. no space before or after -> insert ' ' both before and after the punct
else if (*(stnc_org + i - 1) != ' ' && *(stnc_org + i + 1) != ' ') {
strncpy(stnc_new, stnc_org, i);
*(stnc_new + i) = ' ';
*(stnc_new + i + 1) = *(stnc_org + i);
*(stnc_new + i + 2) = ' ';
stnc_new = stnc_new + (i + 3);
}
// reset the pointer of the original text
stnc_org = stnc_org + i + 1;
// reset the index, so that it starts from 0 in the next loop
i = -1;
}
}
//printf("%s\n", stnc_backup);
printf("\nOutput:\n");
char *str;
str = strtok(stnc_backup, " \n\r");
while(str != NULL) {
printf("%s\n", str);
str = strtok(NULL, " \n\r");
}
}
示例输出如下:
Running "/home/ubuntu/workspace/replace.c"
What is the expected size of the sentence: 300
Input:
"Isn't it true that Bill O'Reilly didn't win (he came in 3rd!)? 'Tain't necessarily so!"
Output:
"
Isn
'
t
it
true
that
Bill
O
'
Reilly
didn
'
t
win
(
he
came
in
3rd
!
)
?
'
Tain
'
t
necessarily
so
!
"
Process exited with code: 0