代码未能返回最新名称

时间:2017-10-04 14:27:24

标签: java html

我正在使用Jsoup API通过阅读网页(html页面)获取最新的文件名。问题是它没有从网页上读取最新文件。以下是示例代码:

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.File;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.*;

public class ReadFileName {

    static String returnLatestFileName(List<String> keys, List<String> values) throws ParseException {
        if (keys.size() != values.size())
            throw new IllegalArgumentException("Exception..");
        DateFormat df = new SimpleDateFormat("dd/MM/yyyy h:mm a");

        Map<String, Date> map = new HashMap<String, Date>();
        for (int i = 0; i < keys.size(); i++) {
            map.put(keys.get(i), df.parse(values.get(i)));
        }
        String keyMax = Collections.max(map.entrySet(), Map.Entry.comparingByValue()).getKey();
        return keyMax;
    }

    public String getLatestFile() throws Exception{
        Map<String, String> reportDetailsMap = new HashMap<String, String>();
        String weeklyReportName = null;
        try {

            File file = new File("testFile.html");
            Document doc = Jsoup.parse(file, "UTF-8");

            List fileName = new ArrayList();
            Elements div_class = doc.select("div.ms-vb, .itx");
            for (Element td : div_class) {
                fileName.add(td.select("a").text());
            }

            Elements nobr = doc.select("nobr");
            List fileTime = new ArrayList();
            for (Element td : nobr) {
                fileTime.add(td.text());
            }
           weeklyReportName = returnLatestFileName(fileName, fileTime);
           System.out.println("---------Latest File Name-------------- "+ weeklyReportName);

          } catch (Exception e) {

        }
        return weeklyReportName;
    }
   public static void main(String argp[]) throws Exception {
        new ReadFileName().getLatestFile();
   }
}

输出:

My_File_MonthlyEcsStatus_August2017

预期产出:

My_File_MonthlyEcsStatus_sep2017

Html代码(testFile.html):

<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN">
<html dir="ltr" class="ms-isBot" lang="en-US">
<head>
    <meta name="GENERATOR" content="Microsoft SharePoint" />
    <meta http-equiv="Content-type" content="text/html; charset=utf-8" />
    <title>

    </title>
    <!-- === Favicon / Windows Tile ==================================================================== -->
    <link rel="shortcut icon" href=" " type="image/vnd.microsoft.icon" id="favicon" />
    <meta name="msapplication-TileImage" content=" " />
    <meta name="msapplication-TileColor" content="#0072C6" />
    <script type="text/javascript" src=" "></script>
    <link rel="stylesheet" type="text/css" href=" " />
    <link id="Csdion1" rel="stylesheet" type="text/css" href=" " />
    <link id="Cssdfdsn2" rel="stylesheet" type="text/css" href=" 0" />

    <script type="text/javascript">RegisterSod("strings.js", "\u002f_layouts\u002f15\u002f1033\u002fstrings.js?rev=cG2ZohQxWuyz1\u00252BF2exRTjA\u00253D\u00253D");RegisterSodDep("strings.js", "initstrings.js");

    <link type="text/xml" rel="alternate" href="/_asd.xls" />
            <!-- Additional header placeholder  =========================== -->
            <link rel="alternate" type="application/rss+xml" title="Documents" href="/_layouts/15/listfeed.aspx?List=573d80cd%2D44f6%2D47b4%2D942f%2Da12a5a1841cb" />
            <span id="analytics">
            <script language="JavaScript" type="text/javascript">

            <noscript>
            <div class="noindex">
            You may be trying to access this site from a secured browser on the server. Please enable scripts and reload this page.
    </div>
    </noscript>
    <!-- ===== SP IDs / Prefetch SP images / SP Form  =========================================================================== -->
    <div id="imgPrefetch" style="display:none">
            <img src="/_layouts/15/images/spcommon.png" />
            </div>
            <form method="post" action="./AllItems.aspx?RootFolder=%2fShared+Documents%2f08.Test+Report%2fMY20+Test+Reports%2fSanity%2fRaw+Data&amp;FolderCTID=0x0120003C2FB175ACD9FE42B875BA259F53A6E3&amp;View=%7bF8BC514C-49A5-47A2-8A6D-52DF70D61AE7%7d" id="aspnetForm">
            <input type="hidden" name="_wpcmWpid" id="_wpcmWpid" value="" />
            <input type="hidden" name="wpcmVal" id="wpcmVal" value="" />
            <input type="hidden" name="MSOWebPartPage_PostbackSource" id="MSOWebPartPage_PostbackSource" value="" />

    </script>

    <div id="ctl00_ctl47_asdasd" class="asdaBrandMenu">
        <a href="http://www.qwer.com/" target="_blank"> </a>

        <!-- =============Suite Bar Links ======================-->
        <div id="DeltaSuiteLinks" class="ms-core-deltaSuiteLinks">
            <div id="suiteLinksBox">
                <div id="SuiteLinksHidden" style="display: none">
                </div>
                <div id="launcherIconContainer">
                </div>

                <span style="display:none">
        <menu type="ServerMenu" id="zz1_ID_PersonalActionMenu" hideicons="true">
         <ie:menuitem id="zz2_ID_MyProfile" type="option" onmenuclick="" text="My Profile" menugroupid="100"></ie:menuitem>
         <ie:menuitem id="zz3_ID_Logout" type="option" onmenuclick="" text="Sign Out" description="Logout of this site." menugroupid="100"></ie:menuitem>
        </menu></span>
                <span id="zz4_Menu_t" class="ms-menu-althov ms-welcome-root" title="Open Menu" onmouseover="MMU_PopMenuIfShowing(this);MMU_EcbTableMouseOverOut(this, true)" hoveractive="ms-menu-althov-active ms-welcome-root ms-welcome-hover" hoverinactive="ms-menu-althov ms-welcome-root" onclick=" CoreInvoke('MMU_Open',byid('zz1_ID_PersonalActionMenu'), MMU_GetMenuFromClientId('zz4_Menu'),event,true, null, 0); return false;" foa="MMU_GetMenuFromClientId('zz4_Menu')" oncontextmenu="ClkElmt(this); return false;" style="white-space:nowrap"><a class="ms-core-menu-root" id="zz4_Menu" accesskey="/" href="javascript:;" title="Open Menu" onfocus="MMU_EcbLinkOnFocusBlur(byid('zz1_ID_PersonalActionMenu'), this, true);" onkeydown="MMU_EcbLinkOnKeyDown(byid('zz1_ID_PersonalActionMenu'), MMU_GetMenuFromClientId('zz4_Menu'), event);" onclick=" CoreInvoke('MMU_Open',byid('zz1_ID_PersonalActionMenu'), MMU_GetMenuFromClientId('zz4_Menu'),event,true, null, 0); return false;" oncontextmenu="ClkElmt(this); return false;" menutokenvalues="MENUCLIENTID=zz4_Menu,TEMPLATECLIENTID=zz1_ID_PersonalActionMenu" serverclientid="zz4_Menu"></a><span style="height:4px;width:7px;position:relative;display:inline-block;overflow:hidden;" class="s4-clust ms-viewselector-arrow ms-menu-stdarw ms-core-menu-arrow"><img src="/_catalogs/theme/Themed/EB5E82F/spcommon-B35BB0A9.themedpng?ctag=3" alt="Open Menu" style="position:absolute;left:-95px !important;top:-259px !important;" /></span><span style="height:4px;width:7px;position:relative;display:inline-block;overflow:hidden;" class="s4-clust ms-core-menu-arrow ms-viewselector-arrow ms-menu-hovarw"><img src="/_catalogs/theme/Themed/EB5E82F/spcommon-B35BB0A9.themedpng?ctag=3" alt="Open Menu" style="position:absolute;left:-86px !important;top:-259px !important;" /></span></span>
            </div>
            <!-- ======== Start: Site Actions menu ============= -->
            <div id="suiteBarButtons">
       <span class="ms-siteactions-root" id="siteactiontd"> <span style="display:none">
         <menu type="ServerMenu" id="zz5_FeatureMenuTemplate1" hideicons="true">
          <ie:menuitem id="zz" type="option" onmenuclick="" description="" menugroupid="100"></ie:menuitem>
          <ie:menuitem id="zz7" type="option" iconsrc="" onmenuclick="STSNavigate2(event,'/_layouts/15/viewlsts.aspx');" text="Site contents" description="View all libraries and lists in this site." menugroupid="200"></ie:menuitem>
         </menu></span><span id="zz8_SiteActionsMenu_t" class="ms-siteactions-normal" title="Settings" onmouseover="MMU_PopMenuIfShowing(this);MMU_EcbTableMouseOverOut(this, true)" hoveractive="ms-siteactions-normal ms-siteactions-hover" hoverinactive="ms-siteactions-normal">
         <a class="ms-core-menu-root" id="zz8_SiteActionsMenu" accesskey="/" href="javascript:;" title="Settings" onkeydown="MMU_EcbLinkOnKeyDown(byid('zz5_FeatureMenuTemplate1'), MMU_GetMenuFromClientId('zz8_SiteActionsMenu'));" menutokenvalues="MENUCLIENTID=zz8_SiteActionsMenu,TEMPLATECLIENTID=zz5_FeatureMenuTemplate1" serverclientid="zz8_SiteActionsMenu"><span class="ms-siteactions-imgspan"><img class="ms-core-menu-buttonIcon" src="/_catalogs/theme/Themed/EB5E82F/Settings-white-94FE89A9.themedpng?ctag=3" alt="Settings" title="Settings" /></span><span class="ms-accessible">Use SHIFT+ENTER to open the menu (new window).</span></a></span> </span>
            </div>
            <!-- ================== End: Site Actions Menu ============================================ -->
            <!-- ================== IT Help Link ============================================ -->

            <div class="ms-core-listMenu-verticalBox">
            </div>
        </div>
    </div>
    </div>
    <!-- ===== Main Content ========================================================================================== -->



    <tr class="ms-itmhover" iid="161,113,0">
        <td class="ms-vb-itmcbx ms-vb-firstCell"><input type="checkbox" class="s4-itm-cbx" /></td>
        <td class="ms-vb-icon"><img border="0" alt="My_File_MonthlyEcsStatus_July2017" title="My_File_MonthlyEcsStatus_July2017" src="/_layouts/15/images/icpdf.png?rev=40" /></td>
        <td height="100%" onmouseover="OnChildItem(this)" class="ms-vb-title">
            <div class="ms-vb itx" onmouseover="OnItem(this)" ctxname="ctx161" id="113" field="LinkFilename" perm="0xb008431061" eventtype="">
                <a onfocus="OnLink(this)" href="/MyDocs/My_File_MonthlyEcsStatus_July2017" onmousedown="return VerifyHref(this,event,'1','','')" onclick="return DispEx(this,event,'TRUE','FALSE','FALSE','','1','','','','','66','0','0','0xb008431061','','')">My_File_MonthlyEcsStatus_July2017</a>
            </div>
            <div class="s4-ctx" onmouseover="OnChildItem(this.parentNode); return false;">
                <span>&nbsp;</span>
                <a onfocus="OnChildItem(this.parentNode.parentNode); return false;" onclick="PopMenuFromChevron(event); return false;" href="javascript:;" title="Open Menu"></a>
                <span>&nbsp;</span>
            </div></td>
        <td class="ms-vb2">
            <nobr>
                7/12/2017 3:20 PM
            </nobr></td>
        <td class="ms-vb-user"><span class="ms-noWrap"><span class="ms-imnSpan"><a href="#" onclick="IMNImageOnClick(event);return false;" class="ms-imnlink ms-spimn-presenceLink"><span class="ms-spimn-presenceWrapper ms-imnImg ms-spimn-imgSize-10x10"><img name="imnmark" class="ms-spimn-img ms-spimn-presence-disconnected-10x10x32" title="" showofflinepawn="1" src="/_layouts/15/images/spimn.png?rev=40" alt="No presence information" /></span></a></span><span class="ms-noWrap ms-imnSpan"><a href="#" onclick="IMNImageOnClick(event);return false;" class="ms-imnlink" tabindex="-1"><img name="imnmark" class="ms-hide" title="" showofflinepawn="1" src="/_layouts/15/images/blank.gif?rev=40" alt=""/></a><a class="ms-subtleLink" onclick="GoToLinkOrDialogNewWindow(this);return false;" href="">Jen Kulchycki</a></span></span></td>
        <td class="ms-vb-user">
            <div class="ms-peopleux-vanillauser">
            </div></td>
    </tr>
    <tr class="ms-alternating ms-itmhover" iid="161,119,0">
        <td class="ms-vb-itmcbx ms-vb-firstCell"><input type="checkbox" class="s4-itm-cbx" /></td>
        <td class="ms-vb-icon"><img border="0" alt="My_File_MonthlyEcsStatus_August2017" title="My_File_MonthlyEcsStatus_August2017" src="/_layouts/15/images/icpdf.png?rev=40" /></td>
        <td height="100%" onmouseover="OnChildItem(this)" class="ms-vb-title">
            <div class="ms-vb itx" onmouseover="OnItem(this)" ctxname="ctx161" id="119" field="LinkFilename" perm="0xb008431061" eventtype="">
                <a onfocus="OnLink(this)" href="/MyDocs/My_File_MonthlyEcsStatus_August2017.xls" onmousedown="return VerifyHref(this,event,'1','','')" onclick="return DispEx(this,event,'TRUE','FALSE','FALSE','','1','','','','','66','0','0','0xb008431061','','')">My_File_MonthlyEcsStatus_August2017</a>
            </div>
            <div class="s4-ctx" onmouseover="OnChildItem(this.parentNode); return false;">
                <span>&nbsp;</span>
                <a onfocus="OnChildItem(this.parentNode.parentNode); return false;" onclick="PopMenuFromChevron(event); return false;" href="javascript:;" title="Open Menu"></a>
                <span>&nbsp;</span>
            </div></td>
        <td class="ms-vb2">
            <nobr>
                08/21/2017 11:49 PM
            </nobr></td>
        <td class="ms-vb-user"><span class="ms-noWrap"><span class="ms-imnSpan"><a href="#" onclick="IMNImageOnClick(event);return false;" class="ms-imnlink ms-spimn-presenceLink"><span class="ms-spimn-presenceWrapper ms-imnImg ms-spimn-imgSize-10x10"><img name="imnmark" class="ms-spimn-img ms-spimn-presence-disconnected-10x10x32" title=""/></span></a></span><span class="ms-noWrap ms-imnSpan"><a href="#" onclick="IMNImageOnClick(event);return false;" class="ms-imnlink" tabindex="-1"><img name="imnmark" class="ms-hide" title="" showofflinepawn="1" src="/_layouts/15/images/blank.gif?rev=40" alt="" /></a><a class="ms-subtleLink" onclick="GoToLinkOrDialogNewWindow(this);return false;" href="">Jen Kulchycki</a></span></span></td>
        <td class="ms-vb-user">
            <div class="ms-peopleux-vanillauser">
            </div></td>
    </tr>
    <tr class="ms-alternating ms-itmhover" iid="161,119,0">
        <td class="ms-vb-itmcbx ms-vb-firstCell"><input type="checkbox" class="s4-itm-cbx" /></td>
        <td class="ms-vb-icon"><img border="0" alt="My_File_MonthlyEcsStatus_sep2017" title="My_File_MonthlyEcsStatus_sep2017" src="/_layouts/15/images/icpdf.png?rev=40" /></td>
        <td height="100%" onmouseover="OnChildItem(this)" class="ms-vb-title">
            <div class="ms-vb itx" onmouseover="OnItem(this)" ctxname="ctx161" id="119" field="LinkFilename" perm="0xb008431061" eventtype="">
                <a onfocus="OnLink(this)" href="/MyDocs/My_File_MonthlyEcsStatus_sep2017.xls" onmousedown="return VerifyHref(this,event,'1','','')" onclick="return DispEx(this,event,'TRUE','FALSE','FALSE','','1','','','','','66','0','0','0xb008431061','','')">My_File_MonthlyEcsStatus_sep2017</a>
            </div>
            <div class="s4-ctx" onmouseover="OnChildItem(this.parentNode); return false;">
                <span>&nbsp;</span>
                <a onfocus="OnChildItem(this.parentNode.parentNode); return false;" onclick="PopMenuFromChevron(event); return false;" href="javascript:;" title="Open Menu"></a>
                <span>&nbsp;</span>
            </div></td>
        <td class="ms-vb2">
            <nobr>
                09/20/2017 10:49 PM
            </nobr></td>
        <td class="ms-vb-user"><span class="ms-noWrap"><span class="ms-imnSpan"><a href="#" onclick="IMNImageOnClick(event);return false;" class="ms-imnlink ms-spimn-presenceLink"><span class="ms-spimn-presenceWrapper ms-imnImg ms-spimn-imgSize-10x10"><img name="imnmark" class="ms-spimn-img ms-spimn-presence-disconnected-10x10x32" title="" showofflinepawn="1"/></span></a></span><span class="ms-noWrap ms-imnSpan"><a href="#" onclick="IMNImageOnClick(event);return false;" class="ms-imnlink" tabindex="-1"><img name="imnmark" class="ms-hide" title="" showofflinepawn="1" src="/_layouts/15/images/blank.gif?rev=40" alt="" /></a><a class="ms-subtleLink" onclick="GoToLinkOrDialogNewWindow(this);return false;" href="">tHJoee</a></span></span></td>
        <td class="ms-vb-user">
            <div class="ms-peopleux-vanillauser">
            </div></td>
    </tr>
    </tbody>
    </table></td>
    </tr>

从上面的html文件中,我想读取列出的文件名并返回最近上传的文件名。需要做哪些更改才能获得最近上传的文件名。我无法弄清楚问题,任何输入?

1 个答案:

答案 0 :(得分:0)

更改此行:

DateFormat df = new SimpleDateFormat("dd/MM/yyyy h:mm a");

通过这个:

DateFormat df = new SimpleDateFormat("MM/dd/yyyy h:mm a");

您必须指定从html文件中读取日期的模式。