嵌套项目符号列表的文本到HTML

时间:2013-01-26 19:47:40

标签: java html text-to-html

我有这个: 示例输入:

* First item
* Second item
    * Subitem 1
        * sub-subitem!
    * Subitem 3
* Third item

示例输出:

<ul>
    <li>First item</li>
    <li>Second item
        <ul>
            <li>Subitem 1
                <ul>
                    <li>sub-subitem!</li>
                </ul>
            </li>
            <li>Subitem 3</li>
        </ul>
    </li>
    <li>Third item</li>
</ul>

我创建了一个Java类,它将每个String行发送到一个chars数组,并且我只处理每个字符。 我的问题是何时关闭标签 有什么想法吗?

这是我的代码:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class TextToHtml {
    StringBuilder itemName = new StringBuilder();
    String sCurrentLine;
    int usingUlTAG=0;

public TextToHtml(){
        BufferedReader br = null; 
        try {
            boolean closeLItag=false;
            br = new BufferedReader(new FileReader("NestedText.txt"));
            System.out.println("<ul>");
            while ((sCurrentLine = br.readLine()) != null) {
                    char[] item = sCurrentLine.toCharArray();
                    for(int i=0; i<item.length;i++){
                            if(item[i]!='*' && item[i]!='\n' && item[i]!='\t'){
                                    itemName.append(item[i]); 
                continue;
            }   
            if(item[i]=='*'){   
                itemName.append("<li>");
                closeLItag=true;
            }
            else if(item[i]=='\t'){ 
                if(item[i+1]=='*'){ 
                    if(usingUlTAG<1)
                    itemName.append("\t<ul>\n\t\t");
                    itemName.append("\t\n\t\t");
                    usingUlTAG= 1;
                    continue;
                }
                if(item[i+1]=='\t'){    
                    itemName.append("\t\t<ul>\n\n\t\t");
                    usingUlTAG=2;
                    continue;
                }
            }
        }
        if(closeLItag){
            itemName.append("</li>\n");
        }

    }       
    System.out.println(itemName+"/ul>");
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            if (br != null)br.close();
        } catch (IOException ex) {
            ex.printStackTrace();
        }
    }
}

public static void main(String[] args) {
    new TextToHtml();   
}
}

3 个答案:

答案 0 :(得分:1)

你必须展望下一行,看看它的列表级别是否与当前项目不同。然后,您可以根据级别的差异添加或关闭标记(如果有)。这是执行此操作的代码:

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;

public class TextToHtml
{
    StringBuilder itemName = new StringBuilder();
    String sCurrentLine;
    String sNextLine; // A "peek" at what's next to determine if </li> is needed

    public TextToHtml()
    {
        BufferedReader br = null;
        try
        {
            br = new BufferedReader(new FileReader("NestedText.txt"));
            System.out.println("<ul>");
            sNextLine = br.readLine();
            while ((sCurrentLine = sNextLine) != null)
            {
                sNextLine = br.readLine();

                char[] item = sCurrentLine.toCharArray();
                int itemLevel = 0;
                for (int i = 0; i < item.length; i++)
                {
                    if (item[i] != '*' && item[i] != '\n' && item[i] != '\t')
                    {
                        itemName.append(item[i]);
                    }
                    else if (item[i] == '*')
                    {
                        itemName.append("\t<li>");

                        // Trim leading space character
                        if (item[i + 1] == ' ')
                            i++;
                    }
                    else if (item[i] == '\t')
                    {
                        itemLevel++;
                        itemName.append("\t\t");
                    }
                }

                int nextItemLevel = 0;
                if (sNextLine != null)
                {
                    char[] nextItem = sNextLine.toCharArray();
                    for (int i = 0; i < nextItem.length; i++)
                    {
                        if (nextItem[i] == '\t')
                            nextItemLevel++;
                        else
                            break;
                    }
                }
                // Next is the same level; there are no subitems
                if (itemLevel == nextItemLevel)
                    itemName.append("</li>");
                // Next is a deeper level; there are subitems
                else if (itemLevel < nextItemLevel)
                {
                    // In case the next item is more than 1 level deeper
                    for (int i = itemLevel + 1; i <= nextItemLevel; i++)
                    {
                        itemName.append("\n");
                        for (int j = 0; j < i; j++)
                            itemName.append("\t\t");
                        itemName.append("<ul>");

                        // If the next item's level is reached, it will create its own     <li>
                        if (i != nextItemLevel)
                        {
                            itemName.append("\n");
                            for (int j = 0; j < i; j++)
                                itemName.append("\t\t");
                            itemName.append("\t<li>");
                        }
                    }
                }
                // Next is a higher level; there are tags to close
                else // (itemLevel > nextItemLevel)
                {
                    itemName.append("</li>");
                    for (int i = itemLevel - 1; i >= nextItemLevel; i--)
                    {
                        itemName.append("\n");
                        for (int j = 0; j <= i; j++)
                            itemName.append("\t\t");
                        itemName.append("</ul>\n");
                        for (int j = 0; j < i; j++)
                            itemName.append("\t\t");
                        itemName.append("\t</li>");
                    }
                }
                itemName.append("\n");
            }
            System.out.println(itemName + "</ul>");
        }
        catch (IOException e)
        {
            e.printStackTrace();
        }
        finally
        {
            try
            {
                if (br != null)
                    br.close();
            }
            catch (IOException ex)
            {
                ex.printStackTrace();
            }
        }
    }

    public static void main(String[] args)
    {
        new TextToHtml();
    }
}

请注意,这仅适用于使用制表符而不是空格缩进级别的情况。

答案 1 :(得分:0)

如果,正如您当前的代码所示,原始文本中的列表项行都使用硬标签进行缩进,那么您需要做的就是一次一行地处理文本,跟踪缩进级别(数字) 上一个行的标签页面。此代码在生成的HTML中不会产生很好的缩进,但它会使<ul><li>嵌套正确,这是HTML浏览器真正关心的所有

import java.io.*;
import java.util.regex.*;

public class Main {
  public static void main(String[] args) throws Exception {
    StringBuilder result = new StringBuilder();
    BufferedReader br = new BufferedReader(new FileReader("NestedText.txt"));
    try {
      int lastIndent = -1; // indent level of last line
      int depth = 0; // number of levels of <ul> we are currently inside
      String line;
      Pattern indentPattern = Pattern.compile("((\\t*)\\* )?(.*)");
      while((line = br.readLine()) != null) {
        Matcher m = indentPattern.matcher(line);
        m.matches(); // guaranteed to be true, but needed to update matcher state

        if(m.group(1) != null) { // this is a new list item
          int thisIndent = m.end(2); // number of leading tabs, may be zero

          // there are three possible cases
          if(thisIndent == lastIndent) {
            // same level as last list item
            result.append("</li>");
          } else if(thisIndent > lastIndent) {
            // starting a child list
            result.append("<ul>");
            depth++;
          } else {
            // returning to parent list
            result.append("</li>");
            depth--;
            result.append("</ul>");
            result.append("</li>");
          }

          result.append("<li>");
          lastIndent = thisIndent;
        } else { // this is a continuation of the previous list item
          result.append(" ");
        }
        // append this line's text (not including the indent and *)
        result.append(m.group(3));
      }

      // run out of items, close any outstanding lists
      while(depth-- > 0) {
        result.append("</li>");
        result.append("</ul>");
      }

      System.out.println(result);
    } finally {
      br.close();
    }
  }
}

在这里,我将任何不以制表符和星号开头的行视为前一个<li>的延续,即

* This is a very long list
item that continues over several
  lines
* This is a second item
    * this is a child item
  that also continues
   over several lines

没问题。

答案 2 :(得分:0)

我按照Jan Dvorak的建议解决了这个问题。 以下代码适用于我,如果它可以帮助其他人,我将在下面。 感谢您的所有贡献

MarkdownProcessor m = new MarkdownProcessor(); 
String html = null;
try {
html = m.markdown(MyString));
} catch (IOException e) {
e.printStackTrace();
} 
System.out.println(html);