计算java中句子中每个单词的频率

时间:2014-02-14 05:33:50

标签: java string extract words

我正在写一个非常基本的java程序来计算句子中每个单词的频率到目前为止我设法做了这么多

import java.io.*;

class Linked {

    public static void main(String args[]) throws IOException {

        BufferedReader br = new BufferedReader(
            new InputStreamReader(System.in));
        System.out.println("Enter the sentence");
        String st = br.readLine();
        st = st + " ";
        int a = lengthx(st);
        String arr[] = new String[a];
        int p = 0;
        int c = 0;

        for (int j = 0; j < st.length(); j++) {
            if (st.charAt(j) == ' ') {
                arr[p++] = st.substring(c,j);
                c = j + 1;
            }
        }
    }

    static int lengthx(String a) {
        int p = 0;
        for (int j = 0; j < a.length(); j++) {
            if (a.charAt(j) == ' ') {
                p++;
            }
        }
        return p;
    }
}

我已经提取了每个字符串并将其存储在一个数组中,现在的问题实际上是如何计算每个'字'重复的实例的数量以及如何显示以便重复的单词不会多次显示,你能帮助吗?我在这一个?

19 个答案:

答案 0 :(得分:22)

使用带有单词作为键的地图并计为值,就像这样

    Map<String, Integer> map = new HashMap<>();
    for (String w : words) {
        Integer n = map.get(w);
        n = (n == null) ? 1 : ++n;
        map.put(w, n);
    }

如果你不允许使用java.util,那么你可以使用一些排序算法对arr进行排序并执行此操作

    String[] words = new String[arr.length];
    int[] counts = new int[arr.length];
    words[0] = words[0];
    counts[0] = 1;
    for (int i = 1, j = 0; i < arr.length; i++) {
        if (words[j].equals(arr[i])) {
            counts[j]++;
        } else {
            j++;
            words[j] = arr[i];
            counts[j] = 1;
        }
    }

自Java 8以来ConcurrentHashMap的一个有趣的解决方案

    ConcurrentMap<String, Integer> m = new ConcurrentHashMap<>();
    m.compute("x", (k, v) -> v == null ? 1 : v + 1);

答案 1 :(得分:11)

在Java 8中,您可以用两行简单的方式编写它!此外,您还可以利用并行计算。

这是最美妙的方式:

Stream<String> stream = Stream.of(text.toLowerCase().split("\\W+")).parallel();

Map<String, Long> wordFreq = stream
     .collect(Collectors.groupingBy(String::toString,Collectors.counting()));

答案 2 :(得分:3)

试试这个

public class Main
{

    public static void main(String[] args)
    {       
        String text = "the quick brown fox jumps fox fox over the lazy dog brown";
        String[] keys = text.split(" ");
        String[] uniqueKeys;
        int count = 0;
        System.out.println(text);
        uniqueKeys = getUniqueKeys(keys);

        for(String key: uniqueKeys)
        {
            if(null == key)
            {
                break;
            }           
            for(String s : keys)
            {
                if(key.equals(s))
                {
                    count++;
                }               
            }
            System.out.println("Count of ["+key+"] is : "+count);
            count=0;
        }
    }

    private static String[] getUniqueKeys(String[] keys)
    {
        String[] uniqueKeys = new String[keys.length];

        uniqueKeys[0] = keys[0];
        int uniqueKeyIndex = 1;
        boolean keyAlreadyExists = false;

        for(int i=1; i<keys.length ; i++)
        {
            for(int j=0; j<=uniqueKeyIndex; j++)
            {
                if(keys[i].equals(uniqueKeys[j]))
                {
                    keyAlreadyExists = true;
                }
            }           

            if(!keyAlreadyExists)
            {
                uniqueKeys[uniqueKeyIndex] = keys[i];
                uniqueKeyIndex++;               
            }
            keyAlreadyExists = false;
        }       
        return uniqueKeys;
    }
}

输出:

the quick brown fox jumps fox fox over the lazy dog brown
Count of [the] is : 2
Count of [quick] is : 1
Count of [brown] is : 2
Count of [fox] is : 3
Count of [jumps] is : 1
Count of [over] is : 1
Count of [lazy] is : 1
Count of [dog] is : 1

答案 3 :(得分:3)

import java.util.*;

public class WordCounter {

    public static void main(String[] args) {

        String s = "this is a this is this a this yes this is a this what it may be i do not care about this";
        String a[] = s.split(" ");
        Map<String, Integer> words = new HashMap<>();
        for (String str : a) {
            if (words.containsKey(str)) {
                words.put(str, 1 + words.get(str));
            } else {
                words.put(str, 1);
            }
        }
        System.out.println(words);
    }
}

输出: {a = 3,be = 1,may = 1,yes = 1,this = 7,about = 1,i = 1,is = 3,it = 1,do = 1,not = 1,what = 1,care = 1}

答案 4 :(得分:2)

在Java 10中,您可以使用以下内容:

import java.util.Arrays;
import java.util.stream.Collectors;

public class StringFrequencyMap {
    public static void main(String... args){
        String[] wordArray = {"One", "One", "Two","Three", "Two", "two"};
        var freq = Arrays.stream(wordArray)
                         .collect(Collectors.groupingBy(x -> x, Collectors.counting()));
        System.out.println(freq);
    }
}

输出:

{One=2, two=1, Two=2, Three=1}

答案 5 :(得分:1)

你可以试试这个

public static void frequency(String s) {
    String trimmed = s.trim().replaceAll(" +", " ");
    String[] a = trimmed.split(" ");
    ArrayList<Integer> p = new ArrayList<>();
    for (int i = 0; i < a.length; i++) {
        if (p.contains(i)) {
            continue;
        }
        int d = 1;
        for (int j = i+1; j < a.length; j++) {
            if (a[i].equals(a[j])) {
                d += 1;
                p.add(j);
            }
        }
        System.out.println("Count of "+a[i]+" is:"+d);
    }
}

答案 6 :(得分:1)

package naresh.java;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;

public class StringWordDuplicates {

    static void duplicate(String inputString){

        HashMap<String, Integer> wordCount = new HashMap<String,Integer>();
        String[] words = inputString.split(" ");

        for(String word : words){
            if(wordCount.containsKey(word)){
                wordCount.put(word, wordCount.get(word)+1);             
            }
            else{
                wordCount.put(word, 1);
            }
        }
        //Extracting of all keys of word count
        Set<String> wordsInString = wordCount.keySet();

        for(String word : wordsInString){
            if(wordCount.get(word)>1){
                System.out.println(word+":"+wordCount.get(word));
            }
        }

    }
    public static void main(String args[]){
        duplicate("I am Java Programmer and IT Server Programmer with Java as Best Java lover");

    }
}

答案 7 :(得分:0)

为此问题创建了一个简单易懂的解决方案,涵盖了所有测试用例-

import java.util.HashMap;
import java.util.Map;

/*
 * Problem Statement - Count Frequency of each word in a given string, ignoring special characters and space 
 * Input 1 - "To be or Not to be"
 * Output 1 - to(2 times), be(2 times), or(1 time), not(1 time)
 * 
 * Input 2 -"Star 123 ### 123 star"
 * Output - Star(2 times), 123(2 times)
 */

public class FrequencyofWords {

    public static void main(String[] args) {
        String s1="To be or not **** to be! is all i ask for";
        fnFrequencyofWords(s1);
        
    }
    
    //-------Supporting Function-----------------
    static void fnFrequencyofWords(String s1) {
        //------- Convert String to proper format----
        s1=s1.replaceAll("[^A-Za-z0-9\\s]","");
        s1=s1.replaceAll(" +"," ");
        s1=s1.toLowerCase();
        
        //-------Create String to an array with words------
        String[] s2=s1.split(" ");
        System.out.println(s1);
            
        //-------- Create a HashMap to store each word and its count--
        Map <String , Integer> map=new HashMap<String, Integer>();
        for(int i=0;i<s2.length;i++) {
        
        if(map.containsKey(s2[i])) //---- Verify if Word Already Exits---
            {
                map.put(s2[i], 1+ map.get(s2[i])); //-- Increment value by 1 if word already exits--
            }
            else {
                map.put(s2[i], 1); // --- Add Word to map and set value as 1 if it does not exist in map--
            }
        }
        System.out.println(map); //--- Print the HashMap with Key, Value Pair-------
    }
}

答案 8 :(得分:0)

请尝试这些可能对您有帮助

    public static void main(String[] args) {
        String str1="I am indian , I am proud to be indian proud.";
        Map<String,Integer> map=findFrquenciesInString(str1);
        System.out.println(map);
    }

    private static Map<String,Integer> findFrquenciesInString(String str1) {
        String[] strArr=str1.split(" ");
        Map<String,Integer> map=new HashMap<>();
        for(int i=0;i<strArr.length;i++) {
            int count=1;
            for(int j=i+1;j<strArr.length;j++) {
                if(strArr[i].equals(strArr[j]) && strArr[i]!="-1") {
                    strArr[j]="-1";
                    count++;
                }
            }
            if(count>1 && strArr[i]!="-1") {
                map.put(strArr[i], count);
                strArr[i]="-1";
            }
        }
        return map;
    }

答案 9 :(得分:0)

    String s[]=st.split(" ");
    String sf[]=new String[s.length];
    int count[]=new int[s.length];
    sf[0]=s[0];
    int j=1;
    count[0]=1;
    for(int i=1;i<s.length;i++)
    {
        int t=j-1;
        while(t>=0)
        {

            if(s[i].equals(sf[t]))
            {
                count[t]++;
                break;
            }
            t--;
        }
        if(t<0)
        {
            sf[j]=s[i];
            count[j]++;
            j++;
        }
    }

答案 10 :(得分:0)

计数Java 8中列表元素的频率

List<Integer> list = new ArrayList<Integer>();
Collections.addAll(list,3,6,3,8,4,9,3,6,9,4,8,3,7,2);
Map<Integer, Long> frequencyMap = list.stream().collect(Collectors.groupingBy(Function.identity(),Collectors.counting()));

    System.out.println(frequencyMap);

注意: 对于字符串频率计数,请分割字符串并将其转换为列表,然后将流用于计数频率=>(Map frequencyMap)*

Check below link

答案 11 :(得分:0)

只需使用Java 8流收集器groupby函数:

    import java.util.function.Function;
    import java.util.stream.Collectors;  

    static String[] COUNTRY_NAMES 
  = { "China", "Australia", "India", "USA", "USSR", "UK", "China", 
  "France", "Poland", "Austria", "India", "USA", "Egypt", "China" };

    Map<String, Long> result = Stream.of(COUNTRY_NAMES).collect(
            Collectors.groupingBy(Function.identity(), Collectors.counting()));

答案 12 :(得分:0)

import java.io.*;

class Linked {

    public static void main(String args[]) throws IOException {

        BufferedReader br = new BufferedReader(
            new InputStreamReader(System.in));
        System.out.println("Enter the sentence");
        String st = br.readLine();
        st = st + " ";
        int a = lengthx(st);
        String arr[] = new String[a];
        int p = 0;
        int c = 0;

        for (int j = 0; j < st.length(); j++) {
            if (st.charAt(j) == ' ') {
                arr[p++] = st.substring(c,j);
                c = j + 1;
            }
        }
    }

    static int lengthx(String a) {
        int p = 0;
        for (int j = 0; j < a.length(); j++) {
            if (a.charAt(j) == ' ') {
                p++;
            }
        }
        return p;
    }
}

答案 13 :(得分:0)

以下程序查找频率,对其进行相应排序并打印出来。

以下是按频率分组的输出:

PDO

这是我的计划:

0-10:
       The   2
       Is    4
11-20:
       Have 13
       Done 15

答案 14 :(得分:0)

确定文件中单词的频率。

File f = new File(fileName);
Scanner s = new Scanner(f);
Map<String, Integer> counts =
 new Map<String, Integer>(); 
while( s.hasNext() ){
 String word = s.next();
if( !counts.containsKey( word ) )
 counts.put( word, 1 );
else
 counts.put( word, 
  counts.get(word) + 1 );

}

答案 15 :(得分:0)

public class wordFrequency {
    private static Scanner scn;

    public static void countwords(String sent) {
        sent = sent.toLowerCase().replaceAll("[^a-z ]", "");
        ArrayList<String> arr = new ArrayList<String>();
        String[] sentarr = sent.split(" ");
        Map<String, Integer> a = new HashMap<String, Integer>();
        for (String word : sentarr) {
            arr.add(word);
        }
        for (String word : arr) {
            int count = Collections.frequency(arr, word);
            a.put(word, count);
        }
        for (String key : a.keySet()) {
            System.out.println(key + " = " + a.get(key));
        }
    }

    public static void main(String[] args) {
        scn = new Scanner(System.in);
        System.out.println("Enter sentence:");
        String inp = scn.nextLine();
        countwords(inp);
    }

}

答案 16 :(得分:0)

class find
{
    public static void main(String nm,String w)
    {
        int l,i;
        int c=0;


        l=nm.length();String b="";

        for(i=0;i<l;i++)
        {
            char d=nm.charAt(i);
            if(d!=' ')
            {
                b=b+d;
            }
            if(d==' ')
            {
                if(b.compareTo(w)==0)
                {
                    c++;

                } 
               b="";           
            }        
        }       
        System.out.println(c);
    }
}

答案 17 :(得分:-1)

public class TestSplit {

    public static void main(String[] args) {
            String input="Find the repeated word which is repeated in this string";
            List<String> output= (List) Arrays.asList(input.split(" "));

            for(String str: output) {
                    int occurrences = Collections.frequency(output, str);
                    System.out.println("Occurence of " + str+ " is "+occurrences);
            }

            System.out.println(output);
    }

}

答案 18 :(得分:-1)

public class WordFrequencyProblem {

    public static void main(String args[]){
        String s="the quick brown fox jumps fox fox over the lazy dog brown";
        String alreadyProcessedWords="";
        boolean isCount=false;
        String[] splitWord = s.split("\\s|\\.");
        for(int i=0;i<splitWord.length;i++){
            String word = splitWord[i];
            int count = 0;
            isCount=false;
            if(!alreadyProcessedWords.contains(word)){
                for(int j=0;j<splitWord.length;j++){
                        if(word.equals(splitWord[j])){
                            count++;
                            isCount = true;
                            alreadyProcessedWords=alreadyProcessedWords+word+" ";
                        }
                    }
            }
            if(isCount)
            System.out.println(word +"Present "+ count);
        }
    }

}