我正在使用Hibernate来存储使用jsoup解析html的数据。这是我的实体:
Sentence.hbm.xml
<class name="Sentence">
<id name="id">
<column name="SENTENCE_ID"/>
<generator class="native" />
</id>
<property name="content" type="text"/>
<many-to-one name="processedurl" class="src.model.ProcessedUrl">
<column name="PROCESSED_URL_ID" not-null="true" />
</many-to-one>
</class>
ProcessedUrl.hbm.xml
<class name="ProcessedUrl">
<id name="id">
<column name="url_id" />
<generator class="native"/>
</id>
<property name="url" type="text"/>
<property name="date" type="java.util.Date" />
<set name="sentences" cascade="all">
<key column="PROCESSED_URL_ID"/>
<one-to-many class="src.model.Sentence" />
</set>
</class>
POJO句子:
public class Sentence {
private long id;
private ProcessedUrl processedurl;
private String content;
public Sentence()
{
}
public Sentence(String content)
{
this.setContent(content);
//this.setUrl(url);
}
public Sentence(String content, ProcessedUrl processed_url) {
this.setContent(content);
this.setProcessedurl(processed_url);
}
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
public ProcessedUrl getProcessedurl() {
return processedurl;
}
public void setProcessedurl(ProcessedUrl processed_url) {
this.processedurl = processed_url;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}
POJO ProcessedUrl:
public class ProcessedUrl {
private long id;
private String url;
private Date date;
private Set<Sentence> sentences;
public ProcessedUrl() {
}
public ProcessedUrl(String url, Date date) {
this.setUrl(url);
this.setDate(date);
}
public ProcessedUrl(String url, Date date, Set<Sentence> sentences) {
this.setUrl(url);
this.setDate(date);
this.setSentences(sentences);
}
public long getId() {
return id;
}
public void setId(long id) {
this.id = id;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public Date getDate() {
return date;
}
public void setDate(Date date) {
this.date = date;
}
public Set<Sentence> getSentences() {
return this.sentences;
}
public void setSentences(Set<Sentence> sentences) {
this.sentences = sentences;
}
@Override
public boolean equals(Object obj) {
if(this == obj) return true;
if(!(obj instanceof ProcessedUrl)) return false;
ProcessedUrl that = (ProcessedUrl) obj;
EqualsBuilder eb = new EqualsBuilder();
eb.append(url, that.url);
return eb.isEquals();
}
@Override
public int hashCode() {
HashCodeBuilder hcb = new HashCodeBuilder();
hcb.append(url);
return hcb.toHashCode();
}
}
索引方法:
public void indexWebPage(String url) throws IOException
{
Document doc = Jsoup.connect(url).get();
Elements elements = doc.body().select("*");
HashSet<Sentence> sentencesCollection = new HashSet<Sentence>();
ProcessedUrl processedUrl = new ProcessedUrl(url, new Date(), sentencesCollection);
for (Element element : elements)
{
if (element.ownText().trim().length() > 1)
{
for (String sentenceContent : element.ownText().split("\\. "))
{
Sentence sentence = new Sentence(sentenceContent, processedUrl);
sentencesCollection.add(sentence);
}
}
}
Session session = HibernateUtils.getSession();
Transaction transaction = session.beginTransaction();
session.persist(processedUrl);
transaction.commit();
session.close();
}
这个方法正在解析一些http://...
以后我从中创建一个Sentence,将它添加到HashSet中并在解析完所有内容之后我将带有这个填充的HasSet的ProcessedUrl对象添加到DB中,因为在xml文件中是级联设置就会填充Sentence表。它有效。但是,当我再次解析相同的链接时,它会复制两个表。我使用url作为buiseness键来覆盖equals()/ hashcode(),我认为它会看到下一个解析的url之间的区别并且不会添加它(因此也不会添加句子)。但显然我并不清楚它是如何运作的。
一些提示&amp;澄清?也许我的方式完全是愚蠢的?