Count number of word in string

Improve Article

Save Article

Like Article

  • Read
  • Discuss(120+)
  • Improve Article

    Save Article

    Like Article

    Given a string, count the number of words in it. The words are separated by the following characters: space (‘ ‘) or new line (‘n’) or tab (‘t’) or a combination of these.

    Method 1: The idea is to maintain two states: IN and OUT. The state OUT indicates that a separator is seen. State IN indicates that a word character is seen. We increment word count when previous state is OUT and next character is a word character. 

    C

    #include <bits/stdc++.h>

    using namespace std;

    #define OUT 0

    #define IN 1

    unsigned countWords(char *str)

    {

        int state = OUT;

        unsigned wc = 0;

        while (*str)

        {

            if (*str == ' ' || *str == 'n' || *str == 't')

                state = OUT;

            else if (state == OUT)

            {

                state = IN;

                ++wc;

            }

            ++str;

        }

        return wc;

    }

    int main(void)

    {

        char str[] = "One two     threen fourtfive ";

        cout<<"No of words : "<<countWords(str);

        return 0;

    }

    C

    #include <stdio.h>

    #define OUT    0

    #define IN    1

    unsigned countWords(char *str)

    {

        int state = OUT;

        unsigned wc = 0; 

        while (*str)

        {

            if (*str == ' ' || *str == 'n' || *str == 't')

                state = OUT;

            else if (state == OUT)

            {

                state = IN;

                ++wc;

            }

            ++str;

        }

        return wc;

    }

    int main(void)

    {

        char str[] = "One two         threen    fourtfive  ";

        printf("No of words : %u", countWords(str));

        return 0;

    }

    Java

    public class GFG {

        static final int OUT = 0;

        static final int IN = 1;

        static int countWords(String str)

        {

            int state = OUT;

            int wc = 0

            int i = 0;

            while (i < str.length())

            {

                if (str.charAt(i) == ' ' || str.charAt(i) == 'n'

                        || str.charAt(i) == 't')

                    state = OUT;

                else if (state == OUT)

                {

                    state = IN;

                    ++wc;

                }

                ++i;

            }

            return wc;

        }

        public static void main(String args[])

        {

            String str = "One two       threen fourtfive  ";

            System.out.println("No of words : " + countWords(str));

        }

    }

    Python3

    OUT = 0

    IN = 1

    def countWords(string):

        state = OUT

        wc = 0

        for i in range(len(string)):

            if (string[i] == ' ' or string[i] == 'n' or

                string[i] == 't'):

                state = OUT

            elif state == OUT:

                state = IN

                wc += 1

        return wc

    string = "One two         threen fourtfive "

    print("No. of words : " + str(countWords(string)))

    C#

    using System;

    class GFG {

        static int OUT = 0;

        static int IN = 1;

        static int countWords(String str)

        {

            int state = OUT;

            int wc = 0;

            int i = 0;

            while (i < str.Length)

            {

                if (str[i] == ' ' || str[i] == 'n'||

                                      str[i] == 't')

                    state = OUT;

                else if (state == OUT)

                {

                    state = IN;

                    ++wc;

                }

                ++i;

            }

            return wc;

        }

        public static void Main()

        {

            String str = "One two     threen fourtfive ";

            Console.WriteLine("No of words : "

                                  + countWords(str));

        }

    }

    PHP

    <?php

    $OUT = 0;

    $IN = 1;

    function countWords($str)

    {

        global $OUT, $IN;

        $state = $OUT;

        $wc = 0;

        $i = 0;

        while ($i < strlen($str))

        {

            if ($str[$i] == " " ||

                $str[$i] == "n" ||

                $str[$i] == "t")

                $state = $OUT;

            else if ($state == $OUT)

            {

                $state = $IN;

                ++$wc;

            }

            ++$i;

        }

        return $wc;

    }

    $str = "One two         threen fourtfive ";

    echo "No of words : " . countWords($str);

    ?>

    Javascript

    <script>

        var OUT = 0;

        var IN = 1;

        function countWords( str)

        {

            var state = OUT;

            var wc = 0;

            var i = 0;

            while (i < str.length)

            {

                if (str[i] == ' ' || str[i] == 'n'||

                                      str[i] == 't')

                    state = OUT;

                else if (state == OUT)

                {

                    state = IN;

                    ++wc;

                }

                ++i;

            }

            return wc;

        }

            var str = "One two     threen fourtfive ";

            document.write("No of words : "

                                  + countWords(str));

    </script>

    Time complexity: O(n)
    Auxiliary Space: O(1)

    This article is compiled by Aarti_Rathi and Narendra Kangralkar. Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.

    Method 2: using String.split() method

    1. Get the string to count the total number of words.
    2. Check if the string is empty or null then return 0.
    3. Use split() method of String class to split the string on whitespaces.
    4. The split() method breaks the given string around matches of the given regular expression and returns an array of string.
    5. The length of the array is the number of words in the given string.
    6. Now, print the result.

    Below is the implementation of the above approach:

    C++

    #include <bits/stdc++.h>

    using namespace std;

    int countWords(string str)

    {

      if (str.size() == 0) {

        return 0;

      }

      vector<string> words;

      string temp = "";

      for (int i = 0; i < str.size(); i++) {

        if (str[i] == ' ') {

          words.push_back(temp);

          temp = "";

        }

        else {

          temp += str[i];

        }

      }

      int count = 1;

      for (int i = 0; i < words.size(); i++) {

        if (words[i].size() != 0)

          count++;

      }

      return count;

    }

    int main()

    {

      string str = "One two       threen fourtfive ";

      cout << "No of words : " << countWords(str);

      return 0;

    }

    Java

    import java.io.*;

    class GFG

    {

        public static int

          countWords(String str)

        {

            if (str == null || str.isEmpty())

                return 0;

            String[] words = str.split("\s+");

            return words.length;

        }

        public static void main(String args[])

        {

            String str =

              "One two       threen fourtfive ";

            System.out.println("No of words : " +

               countWords(str));

        }

    }

    Python3

    def countWords(s):

        if s.strip() == "":

            return 0

        words = s.split()

        return len(words)

    if __name__ == "__main__":

        s = "One two       threen fourtfive "

        print("No of words : ", countWords(s))

    C#

    using System;

    public class GFG

    {

        public static int countWords(String str)

        {

            if (str == null || str.Length == 0)

            {

                return 0;

            }

            String[] words = str.Split(" ");

          int count = 1;

          for(int i=0;i<words.Length;i++){

              if(words[i].Length!=0) count++;

          }

            return count;

        }

        public static void Main(String[] args)

        {

            var str = "One two       threen fourtfive ";

            Console.WriteLine("No of words : " + GFG.countWords(str).ToString());

        }

    }

    Javascript

    function countWords(str)

    {

      if (str.length == 0) {

        return 0;

      }

      words = [];

      var temp = "";

      for (var i = 0; i < str.length; i++) {

        if (str[i] == " ") {

          words.push(temp);

          temp = "";

        }

        else {

          temp += str[i];

        }

      }

      var count = 1;

      for (var i = 0; i < words.length; i++) {

        if (words[i].length != 0)

          count++;

      }

      return count;

    }

      var str = "One two       threen fourtfive ";

      console.log("No of words : " +countWords(str));

    Time Complexity: O(N)
    Auxiliary Space: O(1)

    Method 3:  using StringTokenizer.countTokens() method

    1. Get the string to count the total number of words.
    2. Check if the string is empty or null then return 0.
    3. Create a StringTokenizer with the given string passed as a parameter.
    4. Count the total number of words in the given string using the countTokens() method.
    5. Now, print the result.

    Below is the implementation of the above approach:

    C++

    #include <bits/stdc++.h>

    using namespace std;

    int countWords(string s)

    {

        if (s.empty())

            return 0;

        istringstream is(s);

        int count = 0;

        string line;

        while (getline(is, line, '/'))

            ++count;

        return count;

    }

    int main()

    {

        string str = "One/ two /      three/n four/tfive ";

        cout << "No of words: " << countWords(str) << endl;

    }

    Java

    import java.util.StringTokenizer;

    class GFG

    {

        public static int

          countWords(String str)

        {

            if (str    == null || str.isEmpty())

                return 0;

            StringTokenizer tokens = new

              StringTokenizer(str);

            return tokens.countTokens();

        }

        public static void main(String args[])

        {

            String str =

              "One two       threen fourtfive ";

            System.out.println("No of words: " +

              countWords(str));

        }

    }

    Python3

    def count_words(s):

        if not s:

            return 0

        count = 0

        lines = s.split("/")

        for line in lines:

            if line.strip():

                count += 1

        return count

    s = "One/ two /      three/n four/tfive "

    print("No of words:", count_words(s))

    C#

    using System;

    class GFG

    {

      public static int

        countWords(String str)

      {

        if (string.IsNullOrEmpty(str))

          return 0;

        string[] tokens = str.Split(' ');

        return tokens.Length;

      }

      public static void Main()

      {

        string str =

          "One two     threen fourtfive ";

        Console.Write("No of words: " +

                      countWords(str));

      }

    }

    Javascript

    function countWords(s)

    {

      if (s.length === 0) return 0;

      const lines = s.split("/");

      return lines.length;

    }

    const str = "One/ two /      three/n four/tfive ";

    console.log(`No of words: ${countWords(str)}`);

    Time Complexity: O(N)
    Auxiliary Space : O(1)

    Method 4: using Character.isLetter() method

    1. Get the string to count the total number of words.
    2. Check if the string is empty or null then return 0.
    3. Converting the given string into a character array.
    4. Check if the character is a letter and index of the character array doesn’t equal to the end of the line that means, it is a word and set isWord by true.
    5. Check if the character is not a letter that means there is a space, then we increment the wordCount by one and set the isWord by false.
    6. Check for the last word of the sentence and increment the wordCount by one.
    7. Now, print the result.

    Below is the implementation of the above approach:

    C++

    #include <bits/stdc++.h>

    using namespace std;

    int countWords(char* str, int n)

    {

        if (n == 0) {

            return 0;

        }

        int wordCount = 0;

        bool isWord = false;

        int endOfLine = n - 1;

        for (int i = 0; i < n; i++) {

            if (isalpha(str[i]) && i != endOfLine) {

                isWord = true;

            }

            else if (!isalpha(str[i]) && isWord)

            {

                wordCount++;

                isWord = false;

            }

            else if (isalpha(str[i]) && i == endOfLine) {

                wordCount++;

            }

        }

        return wordCount;

    }

    int main()

    {

        char str[] = "One two     threen fourtfive ";

        int n = (sizeof(str) / sizeof(char)) - 1;

        cout << "No of words : " << countWords(str, n);

        return 0;

    }

    Java

    import java.io.*;

    class GFG

    {

        public static int

          countWords(String str)

        {

            if(str    == null || str.isEmpty())

                return 0;

            int wordCount = 0;

            boolean isWord = false;

            int endOfLine = str.length() - 1;

            char[] ch = str.toCharArray();

            for (int i = 0; i < ch.length; i++) {

                if (Character.isLetter(ch[i])

                    && i != endOfLine)

                    isWord = true;

                else if (!Character.isLetter(ch[i])

                         && isWord) {

                    wordCount++;

                    isWord = false;

                }

                else if (Character.isLetter(ch[i])

                         && i == endOfLine)

                    wordCount++;

            }

            return wordCount;

        }

        public static void main(String args[])

        {

            String str =

              "One two       threen fourtfive ";

            System.out.println("No of words : " +

              countWords(str));

        }

    }

    Python3

    def countWords(Str):

        if(Str == None or len(Str) == 0):

            return 0

        wordCount = 0

        isWord = False

        endOfLine = len(Str) - 1

        ch = list(Str)

        for i in range(len(ch)):

            if(ch[i].isalpha() and i != endOfLine):

                isWord = True

            elif(not ch[i].isalpha() and isWord):

                wordCount += 1

                isWord = False

            elif(ch[i].isalpha() and i == endOfLine):

                wordCount += 1

        return wordCount

    Str =  "One two       threen fourtfive "

    print("No of words :", countWords(Str))

    C#

    using System;

    public class GFG

    {

      static int countWords(String str)

      {

        if(str == null)

        {

          return 0;

        }

        int wordCount = 0;

        bool isWord = false;

        int endOfLine = str.Length - 1;

        char[] ch = str.ToCharArray();

        for (int i = 0; i < ch.Length; i++)

        {

          if (Char.IsLetter(ch[i]) 

              && i != endOfLine)

          {

            isWord = true;

          }

          else if (!Char.IsLetter(ch[i]) 

                   && isWord)

          {

            wordCount++;

            isWord = false;

          }

          else if (Char.IsLetter(ch[i])

                   && i == endOfLine)

          {

            wordCount++;

          }

        }

        return wordCount;

      }

      static public void Main ()

      {

        string str = "One two       threen fourtfive ";

        Console.WriteLine("No of words : " + countWords(str));

      }

    }

    Javascript

    <script>

    function countWords(str)

    {

            if(str    == null || str.length==0)

                return 0;

            let wordCount = 0;

            let isWord = false;

            let endOfLine = str.length - 1;

            let ch = str.split("");

            for (let i = 0; i < ch.length; i++) {

                if (isLetter(ch[i])

                    && i != endOfLine)

                    isWord = true;

                else if (!isLetter(ch[i])

                         && isWord) {

                    wordCount++;

                    isWord = false;

                }

                else if (isLetter(ch[i])

                         && i == endOfLine)

                    wordCount++;

            }

            return wordCount;

    }

    function isLetter(c) {

      return c.toLowerCase() != c.toUpperCase();

    }

    let str="One two       threen fourtfive ";

    document.write("No of words : " +

              countWords(str));

    </script>

    Time Complexity: O(N)
    Auxiliary Space: O(1) 

    Like Article

    Save Article

    I use the str_count function from the stringr library with the escape sequence w that represents:

    any ‘word’ character (letter, digit or underscore in the current
    locale: in UTF-8 mode only ASCII letters and digits are considered)

    Example:

    > str_count("How many words are in this sentence", '\w+')
    [1] 7
    

    Of all other 9 answers that I was able to test, only two (by Vincent Zoonekynd, and by petermeissner) worked for all inputs presented here so far, but they also require stringr.

    But only this solution works with all inputs presented so far, plus inputs such as "foo+bar+baz~spam+eggs" or "Combien de mots sont dans cette phrase ?".

    Benchmark:

    library(stringr)
    
    questions <-
      c(
        "", "x", "x y", "x y!", "x y! z",
        "foo+bar+baz~spam+eggs",
        "one,   two three 4,,,, 5 6",
        "How many words are in this sentence",
        "How  many words    are in this   sentence",
        "Combien de mots sont dans cette phrase ?",
        "
        Day after day, day after day,
        We stuck, nor breath nor motion;
        "
      )
    
    answers <- c(0, 1, 2, 2, 3, 5, 6, 7, 7, 7, 12)
    
    score <- function(f) sum(unlist(lapply(questions, f)) == answers)
    
    funs <-
      c(
        function(s) sapply(gregexpr("\W+", s), length) + 1,
        function(s) sapply(gregexpr("[[:alpha:]]+", s), function(x) sum(x > 0)),
        function(s) vapply(strsplit(s, "\W+"), length, integer(1)),
        function(s) length(strsplit(gsub(' {2,}', ' ', s), ' ')[[1]]),
        function(s) length(str_match_all(s, "\S+")[[1]]),
        function(s) str_count(s, "\S+"),
        function(s) sapply(gregexpr("\W+", s), function(x) sum(x > 0)) + 1,
        function(s) length(unlist(strsplit(s," "))),
        function(s) sapply(strsplit(s, " "), length),
        function(s) str_count(s, '\w+')
      )
    
    unlist(lapply(funs, score))
    

    Output (11 is the maximum possible score):

    6 10 10  8  9  9  7  6  6 11
    

    (PHP 4 >= 4.3.0, PHP 5, PHP 7, PHP 8)

    str_word_count
    Return information about words used in a string

    Description

    str_word_count(string $string, int $format = 0, ?string $characters = null): array|int

    For the purpose of this function, ‘word’ is defined as a locale dependent
    string containing alphabetic characters, which also may contain, but not start
    with «‘» and «-» characters.
    Note that multibyte locales are not supported.

    Parameters

    string

    The string

    format

    Specify the return value of this function. The current supported values
    are:


    • 0 — returns the number of words found

    • 1 — returns an array containing all the words found inside the
      string

    • 2 — returns an associative array, where the key is the numeric
      position of the word inside the string and
      the value is the actual word itself
    characters

    A list of additional characters which will be considered as ‘word’

    Return Values

    Returns an array or an integer, depending on the
    format chosen.

    Changelog

    Version Description
    8.0.0 characters is nullable now.

    Examples

    Example #1 A str_word_count() example


    <?php

    $str

    = "Hello fri3nd, you're
    looking good today!"
    ;print_r(str_word_count($str, 1));
    print_r(str_word_count($str, 2));
    print_r(str_word_count($str, 1, 'àáãç3'));

    echo

    str_word_count($str);?>

    The above example will output:

    Array
    (
        [0] => Hello
        [1] => fri
        [2] => nd
        [3] => you're
        [4] => looking
        [5] => good
        [6] => today
    )
    
    Array
    (
        [0] => Hello
        [6] => fri
        [10] => nd
        [14] => you're
        [29] => looking
        [46] => good
        [51] => today
    )
    
    Array
    (
        [0] => Hello
        [1] => fri3nd
        [2] => you're
        [3] => looking
        [4] => good
        [5] => today
    )
    
    7
    

    See Also

    • explode() — Split a string by a string
    • preg_split() — Split string by a regular expression
    • count_chars() — Return information about characters used in a string
    • substr_count() — Count the number of substring occurrences

    cito at wikatu dot com

    11 years ago


    <?php/***
    * This simple utf-8 word count function (it only counts)
    * is a bit faster then the one with preg_match_all
    * about 10x slower then the built-in str_word_count
    *
    * If you need the hyphen or other code points as word-characters
    * just put them into the [brackets] like [^p{L}p{N}'-]
    * If the pattern contains utf-8, utf8_encode() the pattern,
    * as it is expected to be valid utf-8 (using the u modifier).
    **/

    // Jonny 5's simple word splitter

    function str_word_count_utf8($str) {
      return
    count(preg_split('~[^p{L}p{N}']+~u',$str));
    }
    ?>

    splogamurugan at gmail dot com

    14 years ago


    We can also specify a range of values for charlist.

    <?php

    $str
    = "Hello fri3nd, you're

           looking          good today!

           look1234ing"
    ;

    print_r(str_word_count($str, 1, '0..3'));

    ?>



    will give the result as

    Array ( [0] => Hello [1] => fri3nd [2] => you're [3] => looking [4] => good [5] => today [6] => look123 [7] => ing )


    Adeel Khan

    15 years ago


    <?php/**
    * Returns the number of words in a string.
    * As far as I have tested, it is very accurate.
    * The string can have HTML in it,
    * but you should do something like this first:
    *
    *    $search = array(
    *      '@<script[^>]*?>.*?</script>@si',
    *      '@<style[^>]*?>.*?</style>@siU',
    *      '@<![sS]*?--[ tnr]*>@'
    *    );
    *    $html = preg_replace($search, '', $html);
    *
    */
    function word_count($html) {# strip all html tags
     
    $wc = strip_tags($html);# remove 'words' that don't consist of alphanumerical characters or punctuation
     
    $pattern = "#[^(w|d|'|"|.|!|?|;|,|\|/|-|:|&|@)]+#";
     
    $wc = trim(preg_replace($pattern, " ", $wc));# remove one-letter 'words' that consist only of punctuation
     
    $wc = trim(preg_replace("#s*[('|"|.|!|?|;|,|\|/|-|:|&|@)]s*#", " ", $wc));# remove superfluous whitespace
     
    $wc = preg_replace("/ss+/", " ", $wc);# split string into an array of words
     
    $wc = explode(" ", $wc);# remove empty elements
     
    $wc = array_filter($wc);# return the number of words
     
    return count($wc);

    }

    ?>

    uri at speedy dot net

    10 years ago


    Here is a count words function which supports UTF-8 and Hebrew. I tried other functions but they don't work. Notice that in Hebrew, '"' and ''' can be used in words, so they are not separators. This function is not perfect, I would prefer a function we are using in JavaScript which considers all characters except [a-zA-Zא-ת0-9_'"] as separators, but I don't know how to do it in PHP.

    I removed some of the separators which don't work well with Hebrew ("x20", "xA0", "x0A", "x0D", "x09", "x0B", "x2E"). I also removed the underline.

    This is a fix to my previous post on this page - I found out that my function returned an incorrect result for an empty string. I corrected it and I'm also attaching another function - my_strlen.

    <?php function count_words($string) {
       
    // Return the number of words in a string.
       
    $string= str_replace("'", "'", $string);
       
    $t= array(' ', "t", '=', '+', '-', '*', '/', '\', ',', '.', ';', ':', '[', ']', '{', '}', '(', ')', '<', '>', '&', '%', '$', '@', '#', '^', '!', '?', '~'); // separators
       
    $string= str_replace($t, " ", $string);
       
    $string= trim(preg_replace("/s+/", " ", $string));
       
    $num= 0;
        if (
    my_strlen($string)>0) {
           
    $word_array= explode(" ", $string);
           
    $num= count($word_array);
        }
        return
    $num;
    }

    function

    my_strlen($s) {
       
    // Return mb_strlen with encoding UTF-8.
       
    return mb_strlen($s, "UTF-8");
    }
    ?>


    charliefrancis at gmail dot com

    13 years ago


    Hi this is the first time I have posted on the php manual, I hope some of you will like this little function I wrote.

    It returns a string with a certain character limit, but still retaining whole words.
    It breaks out of the foreach loop once it has found a string short enough to display, and the character list can be edited.

    <?php
    function word_limiter( $text, $limit = 30, $chars = '0123456789' ) {
        if(
    strlen( $text ) > $limit ) {
           
    $words = str_word_count( $text, 2, $chars );
           
    $words = array_reverse( $words, TRUE );
            foreach(
    $words as $length => $word ) {
                if(
    $length + strlen( $word ) >= $limit ) {
                   
    array_shift( $words );
                } else {
                    break;
                }
            }
           
    $words = array_reverse( $words );
           
    $text = implode( " ", $words ) . '&hellip;';
        }
        return
    $text;
    }
    $str = "Hello this is a list of words that is too long";
    echo
    '1: ' . word_limiter( $str );
    $str = "Hello this is a list of words";
    echo
    '2: ' . word_limiter( $str );
    ?>

    1: Hello this is a list of words&hellip;
    2: Hello this is a list of words


    amosbatto at yahoo dot com

    2 years ago


    //To get an accurate word count in English, some diacritical marks have
    // to be added for words like née, Chloë, naïve, coöpt, façade, piñata, etc. 
    $count = str_word_count($str, 0, 'éëïöçñÉËÏÖÇÑ');

    //To get the word count for any European language using a Roman alphabet:
    $count = str_word_count($str, 0, 'äëïöüÄËÏÖÜáǽćéíĺńóŕśúźÁǼĆÉÍĹŃÓŔŚÚŹ'.
       'àèìòùÀÈÌÒÙãẽĩõñũÃẼĨÕÑŨâêîôûÂÊÎÔÛăĕğĭŏœ̆ŭĂĔĞĬŎŒ̆Ŭ'.
       'āēīōūĀĒĪŌŪőűŐŰąęįųĄĘĮŲåůÅŮæÆøØýÝÿŸþÞẞßđĐıIœŒ'.
       'čďěľňřšťžČĎĚĽŇŘŠŤŽƒƑðÐłŁçģķļșțÇĢĶĻȘȚħĦċėġżĊĖĠŻʒƷǯǮŋŊŧŦ');


    manrash at gmail dot com

    14 years ago


    For spanish speakers a valid character map may be:

    <?php

    $characterMap
    = 'áéíóúüñ';
    $count = str_word_count($text, 0, $characterMap);

    ?>


    MadCoder

    17 years ago


    Here's a function that will trim a $string down to a certian number of words, and add a...   on the end of it.

    (explansion of muz1's 1st 100 words code)

    ----------------------------------------------

    <?php

    function trim_text($text, $count){

    $text = str_replace("  ", " ", $text);

    $string = explode(" ", $text);

    for (
    $wordCounter = 0; $wordCounter <= $count;wordCounter++ ){

    $trimed .= $string[$wordCounter];

    if (
    $wordCounter < $count ){ $trimed .= " "; }

    else {
    $trimed .= "..."; }

    }

    $trimed = trim($trimed);

    return
    $trimed;

    }

    ?>



    Usage

    ------------------------------------------------

    <?php

    $string
    = "one two three four";

    echo
    trim_text($string, 3);

    ?>



    returns:

    one two three...


    Anonymous

    18 years ago


    This function seems to view numbers as whitespace. I.e. a word consisting of numbers only won't be counted.

    brettNOSPAM at olwm dot NO_SPAM dot com

    20 years ago


    This example may not be pretty, but It proves accurate:

    <?php

    //count words

    $words_to_count = strip_tags($body);

    $pattern = "/[^(w|d|'|"|.|!|?|;|,|\|/|--|:|&|@)]+/";

    $words_to_count = preg_replace ($pattern, " ", $words_to_count);

    $words_to_count = trim($words_to_count);

    $total_words = count(explode(" ",$words_to_count));

    ?>



    Hope I didn't miss any punctuation. ;-)


    brettz9 — see yahoo

    13 years ago


    Words also cannot end in a hyphen unless allowed by the charlist...

    php dot net at salagir dot com

    5 years ago


    This function doesn't handle  accents, even in a locale with accent.
    <?php
    echo str_word_count("Is working"); // =2setlocale(LC_ALL, 'fr_FR.utf8');
    echo
    str_word_count("Not wôrking"); // expects 2, got 3.
    ?>

    Cito solution treats punctuation as words and thus isn't a good workaround.
    <?php
    function str_word_count_utf8($str) {
          return
    count(preg_split('~[^p{L}p{N}']+~u',$str));
    }
    echo
    str_word_count_utf8("Is wôrking"); //=2
    echo str_word_count_utf8("Not wôrking."); //=3
    ?>

    My solution:
    <?php
    function str_word_count_utf8($str) {
       
    $a = preg_split('/W+/u', $str, -1, PREG_SPLIT_NO_EMPTY);
        return
    count($a);
    }
    echo
    str_word_count_utf8("Is wôrking"); // = 2
    echo str_word_count_utf8("Is wôrking! :)"); // = 2
    ?>

    joshua dot blake at gmail dot com

    16 years ago


    I needed a function which would extract the first hundred words out of a given input while retaining all markup such as line breaks, double spaces and the like. Most of the regexp based functions posted above were accurate in that they counted out a hundred words, but recombined the paragraph by imploding an array down to a string. This did away with any such hopes of line breaks, and thus I devised a crude but very accurate function which does all that I ask it to:

    <?php

    function Truncate($input, $numWords)

    {

      if(
    str_word_count($input,0)>$numWords)

      {

       
    $WordKey = str_word_count($input,1);

       
    $PosKey = str_word_count($input,2);

       
    reset($PosKey);

        foreach(
    $WordKey as $key => &$value)

        {

           
    $value=key($PosKey);

           
    next($PosKey);

        }

        return
    substr($input,0,$WordKey[$numWords]);

      }

      else {return
    $input;}

    }

    ?>



    The idea behind it? Go through the keys of the arrays returned by str_word_count and associate the number of each word with its character position in the phrase. Then use substr to return everything up until the nth character. I have tested this function on rather large entries and it seems to be efficient enough that it does not bog down at all.

    Cheers!

    Josh


    jazz090

    14 years ago


    Personally, I dont like using this function becuase the characters it omits are sometime nessesery for instance MS Word counts ">" or "<" alone as single word where this function doesnt. I like using this however, it counts EVERYTHING:

    <?php

    function num_words($string){

       
    preg_match_all("/S+/", $string, $matches);

        return
    count($matches[0]);

    }

    ?>


    Samer Ata

    11 years ago


    This is my own version of to get SEO meta description from wordpress post content. it is also generic usage function to get the first n words from a string.

    <?php
    function my_meta_description($text,$n=10)
    {
    $text=strip_tags($text);  // not neccssary for none HTML
    // $text=strip_shortcodes($text); // uncomment only inside wordpress system
    $text = trim(preg_replace("/s+/"," ",$text));
    $word_array = explode(" ", $text);
    if (
    count($word_array) <= $n)
    return
    implode(" ",$word_array);
    else
    {
    $text='';
    foreach (
    $word_array as $length=>$word)
    {
       
    $text.=$word ;
        if(
    $length==$n) break;
        else
    $text.=" ";
    }
    }
    return
    $text;
    ?>


    josh at joshblake.net

    16 years ago


    I was interested in a function which returned the first few words out of a larger string.

    In reality, I wanted a preview of the first hundred words of a blog entry which was well over that.

    I found all of the other functions which explode and implode strings to arrays lost key markups such as line breaks etc.

    So, this is what I came up with:

    <?php

    function WordTruncate($input, $numWords) {

    if(
    str_word_count($input,0)>$numWords)

    {

       
    $WordKey = str_word_count($input,1);

       
    $WordIndex = array_flip(str_word_count($input,2));

        return
    substr($input,0,$WordIndex[$WordKey[$numWords]]);

    }

    else {return
    $input;}

    }

    ?>



    While I haven't counted per se, it's accurate enough for my needs. It will also return the entire string if it's less than the specified number of words.

    The idea behind it? Use str_word_count to identify the nth word, then use str_word_count to identify the position of that word within the string, then use substr to extract up to that position.

    Josh.


    dmVuY2lAc3RyYWhvdG5pLmNvbQ== (base64)

    12 years ago


    to count words after converting a msword document to plain text with antiword, you can use this function:

    <?php
    function count_words($text) {
       
    $text = str_replace(str_split('|'), '', $text); // remove these chars (you can specify more)
       
    $text = trim(preg_replace('/s+/', ' ', $text)); // remove extra spaces
       
    $text = preg_replace('/-{2,}/', '', $text); // remove 2 or more dashes in a row
       
    $len = strlen($text);

            if (

    0 === $len) {
            return
    0;
        }
    $words = 1;

            while (

    $len--) {
            if (
    ' ' === $text[$len]) {
                ++
    $words;
            }
        }

            return

    $words;
    }
    ?>

    it strips the pipe "|" chars, which antiword uses to format tables in its plain text output, removes more than one dashes in a row (also used in tables), then counts the words.

    counting words using explode() and then count() is not a good idea for huge texts, because it uses much memory to store the text once more as an array. this is why i'm using while() { .. } to walk the string


    philip at cornado dot com

    20 years ago


    Some ask not just split on ' ', well, it's because simply exploding on a ' ' isn't fully accurate.  Words can be separated by tabs, newlines, double spaces, etc.  This is why people tend to seperate on all whitespace with regular expressions.

    rcATinterfacesDOTfr

    20 years ago


    Here is another way to count words :
    $word_count = count(preg_split('/W+/', $text, -1, PREG_SPLIT_NO_EMPTY));

    aix at lux dot ee

    18 years ago


    One function.
    <?php
    if (!function_exists('word_count')) {
    function
    word_count($str,$n = "0"){
       
    $m=strlen($str)/2;
       
    $a=1;
        while (
    $a<$m) {
           
    $str=str_replace("  "," ",$str);
           
    $a++;
            }
       
    $b = explode(" ", $str);
       
    $i = 0;
        foreach (
    $b as $v) {
           
    $i++;
            }
        if (
    $n==1) return $b;
        else  return
    $i;

        }
    }

    $str="Tere Tartu linn";
    $c  = word_count($str,1); // it return an array
    $d  = word_count($str); // it return int - how many words was in text
    print_r($c);
    echo
    $d;
    ?>

    Anonymous

    16 years ago


    Here is a php work counting function together with a javascript version which will print the same result.

    <?php
         
    //Php word counting function
         
    function word_count($theString)
          {
           
    $char_count = strlen($theString);
           
    $fullStr = $theString." ";
           
    $initial_whitespace_rExp = "^[[:alnum:]]$";$left_trimmedStr = ereg_replace($initial_whitespace_rExp,"",$fullStr);
           
    $non_alphanumerics_rExp = "^[[:alnum:]]$";
           
    $cleanedStr = ereg_replace($non_alphanumerics_rExp," ",$left_trimmedStr);
           
    $splitString = explode(" ",$cleanedStr);$word_count = count($splitString)-1;

                    if(

    strlen($fullStr)<2)
            {
             
    $word_count=0;
            }     
            return
    $word_count;
          }
    ?>

    <?php
         
    //Function to count words in a phrase
         
    function wordCount(theString)
          {
            var
    char_count = theString.length;
            var
    fullStr = theString + " ";
            var
    initial_whitespace_rExp = /^[^A-Za-z0-9]+/gi;
            var
    left_trimmedStr = fullStr.replace(initial_whitespace_rExp, "");
            var
    non_alphanumerics_rExp = rExp = /[^A-Za-z0-9]+/gi;
            var
    cleanedStr = left_trimmedStr.replace(non_alphanumerics_rExp, " ");
            var
    splitString = cleanedStr.split(" ");

                    var

    word_count = splitString.length -1;

                    if (

    fullStr.length <2)
            {
             
    word_count = 0;
            }     
            return
    word_count;
          }
    ?>


    Artimis

    19 years ago


    Never use this function to count/separate alphanumeric words, it will just split them up words to words, numbers to numbers.  You could refer to another function "preg_split" when splitting alphanumeric words.  It works with Chinese characters as well.

    matthewkastor at live dot com

    12 years ago


    This needs improvement, but works well as is.

    <?php
    /**
    * Generates an alphabetical index of unique words, and a count of their occurrences, in a file.
    *
    * This works on html pages or plain text files.
    * This function uses file_get_contents, so it
    * is possible to use a url instead of a local filename.
    *
    * Change the search pattern at
    * <code> $junk = preg_match('/[^a-zA-Z]/', $word); </code>
    * if you want to keep words with numbers or other characters. The pattern
    * I've set searches for anything that is not an upper or lowercase letter,
    * you may want something else.
    *
    * The array returned will look something like this:
    * <code>
    * Array
    * (
    *     [0] => Array
    *        (
    *            [word] => a
    *            [count] => 21
    *        )
    *
    *     [1] => Array
    *        (
    *            [word] => ability
    *            [count] => 1
    *        )
    * )
    * </code>
    *
    * @param string $file The file ( or url ) you want to create an index from.
    * @return array
    */
    function index_page($file) {
       
    $index = array();
       
    $find = array(
           
    '/r/',
           
    '/n/',
           
    '/ss+/'
       
    );
       
    $replace = array(
           
    ' ',
           
    ' ',
           
    ' '
       
    );
       
    $work = file_get_contents($file);
       
    $work = preg_replace('/[>][<]/', '> <', $work);
       
    $work = strip_tags($work);
       
    $work = strtolower($work);
       
    $work = preg_replace($find, $replace, $work);
       
    $work = trim($work);
       
    $work = explode(' ', $work);
       
    natcasesort($work);
       
    $i = 0;
        foreach(
    $work as $word) {
           
    $word = trim($word);
           
    $junk = preg_match('/[^a-zA-Z]/', $word);
            if(
    $junk == 1) {
               
    $word = '';
            }
            if( (!empty(
    $word)) && ($word != '') ) {
                if(!isset(
    $index[$i]['word'])) { // if not set this is a new index
                   
    $index[$i]['word'] = $word;
                   
    $index[$i]['count'] = 1;
                } elseif(
    $index[$i]['word'] == $word ) {  // count repeats
                   
    $index[$i]['count'] += 1;
                } else {
    // else this is a different word, increment $i and create an entry
                   
    $i++;
                   
    $index[$i]['word'] = $word;
                   
    $index[$i]['count'] = 1;
                }
            }
        }
        unset(
    $work);
        return(
    $index);
    }
    ?>

    example usage:

    <?php
    $file
    = 'http://www.php.net/';
    // or use a local file, see file_get_contents() for valid filenames and restrictions.$index = index_page($file);
    echo
    '<pre>'.print_r($index,true).'</pre>';
    ?>


    Kirils Solovjovs

    19 years ago


    Nothing of this worked for me. I think countwords() is very encoding dependent. This is the code for win1257. For other layots you just need to redefine the ranges of letters...

    <?php
    function countwords($text){
           
    $ls=0;//was it a whitespace?
           
    $cc33=0;//counter
           
    for($i=0;$i<strlen($text);$i++){
                   
    $spstat=false; //is it a number or a letter?
                   
    $ot=ord($text[$i]);
                    if( ((
    $ot>=48) && ($ot<=57)) ||  (($ot>=97) && ($ot<=122)) || (($ot>=65) && ($ot<=90)) || ($ot==170) ||
                    ((
    $ot>=192) && ($ot<=214)) || (($ot>=216) && ($ot<=246)) || (($ot>=248) && ($ot<=254))  )$spstat=true;
                    if((
    $ls==0)&&($spstat)){
                           
    $ls=1;
                           
    $cc33++;
                    }
                    if(!
    $spstat)$ls=0;
            }
            return
    $cc33;
    }
    ?>


    andrea at 3site dot it

    19 years ago


    if string doesn't contain the space " ", the explode method doesn't do anything, so i've wrote this and it seems works better ... i don't know about time and resource

    <?php

    function str_incounter($match,$string) {

    $count_match = 0;

    for(
    $i=0;$i<strlen($string);$i++) {

    if(
    strtolower(substr($string,$i,strlen($match)))==strtolower($match)) {

    $count_match++;

    }

    }

    return
    $count_match;

    }

    ?>



    example

    <?php

    $string
    = "something:something!!something";

    $count_some = str_incounter("something",$string);

    // will return 3

    ?>


    lwright at psu dot edu

    16 years ago


    If you are looking to count the frequency of words, try:

    <?php

    $wordfrequency

    = array_count_values( str_word_count( $string, 1) );?>


    broncha at rajesharma dot com

    7 years ago


    Turns out the charlist is set by default for the web. For example, the string

    Copyright &copy; ABC Ltd.

    is 3 words in the cli and 4 words if executing in web context.


    eanimator at yahoo dot com

    14 years ago


    My quick and rough wordLimiter function.

    <?php

    function WordLimiter($text,$limit=20){

       
    $explode = explode(' ',$text);

       
    $string  = '';
    $dots = '...';

        if(
    count($explode) <= $limit){

           
    $dots = '';

        }

        for(
    $i=0;$i<$limit;$i++){

           
    $string .= $explode[$i]." ";

        }

           
        return

    $string.$dots;

    }

    ?>


    lballard dot cat at gmail dot com

    12 years ago


    word limiter:

    <?php

    $str
    = "my hella long string" ;

    $length = 3;

    $shortened =

    implode(' ',array_slice(str_word_count($str,1),0,$length));

    ?>


    dev dot vegera at gmail dot com

    2 years ago


    preg_match_all based function to mimic str_word_count behavior:

    <?php
    function mb_str_word_count($str, $format = 2, $charlist = '') {
      if (
    $format < 0 || $format > 2) {
        throw new
    InvalidArgumentException('Argument #2 ($format) must be a valid format value');
      }
     
    $count = preg_match_all('#[p{L}p{N}][p{L}p{N}'' . $charlist . ']*#u', $str, $matches, $format === 2 ? PREG_OFFSET_CAPTURE : PREG_PATTERN_ORDER);
      if (
    $format === 0) {
        return
    $count;
      }
     
    $matches = $matches[0] ?? [];
      if (
    $format === 2) {
       
    $result = [];
        foreach (
    $matches as $match) {
         
    $result[$match[1]] = $match[0];
        }
        return
    $result;
      }
      return
    $matches;
    }
    ?>


    aidan at php dot net

    18 years ago


    This functionality is now implemented in the PEAR package PHP_Compat.

    More information about using this function without upgrading your version of PHP can be found on the below link:

    http://pear.php.net/package/PHP_Compat


    jak74 at interia dot pl

    6 years ago


    // split the phrase by any number of commas or space characters,
    // which include " ", r, t, n and f

    $keywords = preg_split("/[s,]+/", "hypertext language, programming");
    print_r($keywords);


    C program to count the total number of words in a string – In this article, we will detail in on the several means to count the total number of words in a string in C programming.

    Suitable examples and sample programs have also been added so that you can understand the whole thing very clearly. The compiler has also been added with which you can execute it yourself.

    The means used in this piece are as follows:

    • Using Standard Method
    • Using Function
    • Using Recursion
    • Using Pointers and While Loop

    A string is nothing but an array of characters. The value of a string is determined by the terminating character. Its value is considered to be 0.

    C Program Count Number Of Words In A String

    As given in the image above, firstly, you need to enter a string.

    The string specified here is as follows:

    “always first never give up”

    As you can see, there are 5 words in the given string.

    It can be found out by basic reading itself.

    Hence, doing the same in C programming is as follows:

    Using Standard Method

    1. Read the entered string and initialize to s using gets(s).

    2) We are finding the words count based on white spaces present in the given string. The ASCII value of white space is 32.

    3) for loop iterates through the string with the structure for(i=0;s[i];i++),

    If  ASCII value of any character of a string is equal to ASCII value of white space i.e 32, then increase the word count.

    4) After all iterations of for loop increase the word count, if i>0.

    5) Print the number of words present in the string.

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    #include <stdio.h>

    #include <string.h>

    int main()

    {

        char s[1000];  

        int i,words=0;

        printf(«Enter  the string : «);

        gets(s);

        for(i=0;s[i];i++)  

        {

         if(s[i]==32)

         words++;

    }

    if(i>0)

    words++;

        printf(«no of words in string = %dn»,words);

        return 0;

    }

    Output:

    Enter  the string: welcome to Cbeginners

    no of words in string = 3

    Using Function

    1. The main() function calls the stringwordcount(char *s) function, passing the string as an argument to the function.

    2) The function stringwordcount() function compares each character’s ASCII value with white space ASCII value 32.If any character is equal to white space, then it increases the word count.

    3) After all iterations of for loop, if i>0 then increase the word count by 1.

    4) The function returns the word count to main() function. The main() function prints the number of words present in the given string.

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    33

    #include <stdio.h>

    #include <string.h>

    int stringwordcount(char *s)

    {

        int i,words=0;

    for(i=0;s[i];i++)  

        {

         if(s[i]==32)

         words++;

    }

    if(i>0)

          words++;

    return words;

    }

    int main()

    {

        char s[1000];  

        int wordscount;

        printf(«Enter  the string: «);

        gets(s);

        wordscount=stringwordcount(s);

        printf(«no of words in string = %dn»,wordscount);

    }

    Output:

    Enter  the string: always first never give up

    no of words in string = 5

    Using Recursion

    1. The main() calls the function stringwordcount(char *s).

    2) The function counts the number of words as

    a) If the s[i] is null, then it increases the word count by 1 if i>0 and returns the word count value to the main() function.

    b) If s[i] is not null, then compare ASCII value of s[i] with 32 which is the ASCII value of white space. If s[i] is equal to white space then increase the word count and call the function stringwordcount().

    The function calls itself recursively until s[i] becomes to null.

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    31

    32

    33

    34

    35

    36

    37

    38

    #include <stdio.h>

    #include <string.h>

    int stringwordcount(char *s)

    {

        static int i,words=0;

        if(!s[i])

        {

           if(i>0)

          words++;

          return words;

    }

    else

    {

         if(s[i++]==32)

         words++;

            stringwordcount(s);

    }

    }

    int main()

    {

        char s[1000];  

        int wordscount;

        printf(«Enter  the string: «);

        gets(s);

        wordscount=stringwordcount(s);

        printf(«no of words in string = %dn»,wordscount);

    }

    Output:

    Enter  the string: Without music, life would be a mistake

    no of words in string = 7

    Using Pointers And While Loop
    1. The pointer variable p points the string s.

    2) The while loop iterates until the character at the pointer variable become null.

    a) If the ASCII value of the character at the pointer variable p is equal to white space ASCII value. Then increase the word count.

    3) Increase the word count by 1 if the length of the string is greater than zero.

    4) Print the number of words present in the string.

    1

    2

    3

    4

    5

    6

    7

    8

    9

    10

    11

    12

    13

    14

    15

    16

    17

    18

    19

    20

    21

    22

    23

    24

    25

    26

    27

    28

    29

    30

    #include <stdio.h>

    #include <string.h>

    int main()

    {

        char s[1000],*p;  

        int words=0;

        printf(«Enter  the string : «);

        gets(s);

        p=s;

    while(*p)  

        {

         if(*p++==32)

               words++;

    }

        if(strlen(s)>0)

        words++;

        printf(«no of words in string = %dn»,words);

         return 0;

    }

    Output:

    Enter the string: I have not failed. I‘ve just found 10,000 ways that won’t work

    no of words in string = 12

    Понравилась статья? Поделить с друзьями:
  • Count number of numbers in excel cell
  • Count number of lines in word file
  • Count not sum in excel
  • Count no of sheets in excel
  • Count no of rows in excel worksheet