我正在尝试读取JSON文件并执行一些操作。此JSON文件位于Amazon S3
中,大小为500MB。未来的文件将在TeraBytes中。以下是我的代码。
public void createHash() throws IOException
{
System.out.println("Hash Creation Started");
strBuffer = new StringBuffer("");
try
{
//List all the Buckets
List<Bucket>buckets = s3.listBuckets();
for(int i=0;i<buckets.size();i++)
{
System.out.println("- "+(buckets.get(i)).getName());
}
//Downloading the Object
System.out.println("Downloading Object");
S3Object s3Object = s3.getObject(new GetObjectRequest(inputBucket, inputFile));
System.out.println("Content-Type: " + s3Object.getObjectMetadata().getContentType());
//Read the JSON File
/*BufferedReader reader = new BufferedReader(new InputStreamReader(s3Object.getObjectContent()));
while (true) {
String line = reader.readLine();
if (line == null) break;
// System.out.println(" " + line);
strBuffer.append(line);
}*/
JSONTokener jTokener = new JSONTokener(new BufferedReader(new InputStreamReader(s3Object.getObjectContent())));
jsonArray = new JSONArray(jTokener);
System.out.println("Json array length: "+jsonArray.length());
for(int i=0;i<jsonArray.length();i++)
{
JSONObject jsonObject1 = jsonArray.getJSONObject(i);
StringBuffer hashIndex = new StringBuffer("");
//Add Title and Body Together to the list
String titleAndBodyContainer = jsonObject1.getString("title")+" "+jsonObject1.getString("body");
//Remove full stops and commas
titleAndBodyContainer = titleAndBodyContainer.replaceAll("\\.(?=\\s|$)", " ");
titleAndBodyContainer = titleAndBodyContainer.replaceAll(",", " ");
titleAndBodyContainer = titleAndBodyContainer.toLowerCase();
//Create a word list without duplicated words
StringBuilder result = new StringBuilder();
HashSet<String> set = new HashSet<String>();
for(String s : titleAndBodyContainer.split(" ")) {
if (!set.contains(s)) {
result.append(s);
result.append(" ");
set.add(s);
}
}
//System.out.println(result.toString());
//Re-Arranging everything into Alphabetic Order
String testString = "acarpous barnyard gleet diabolize acarus creosol eaten gleet absorbance";
//String testHash = "057 1$k 983 5*1 058 52j 6!v 983 03z";
String[]finalWordHolder = (result.toString()).split(" ");
Arrays.sort(finalWordHolder);
//Navigate through text and create the Hash
for(int arrayCount=0;arrayCount<finalWordHolder.length;arrayCount++)
{
if(wordMap.containsKey(finalWordHolder[arrayCount]))
{
hashIndex.append((String)wordMap.get(finalWordHolder[arrayCount]));
}
}
//System.out.println(hashIndex.toString().trim());
jsonObject1.put("hash_index", hashIndex.toString().trim()); //Add the Hash to the JSON Object
jsonObject1.put("primary_key", i); //Create the primary key
jsonObjectHolder.add(jsonObject1); //Add the JSON Object to the JSON collection
jsonHashHolder.add(hashIndex.toString().trim());
System.out.println("JSON Number: "+i);
//System.out.println(Arrays.toString(finalWordHolder));
//System.out.println("- "+hashIndex.toString());
//break;
}
System.out.println("Hash Creation Completed");
}
catch(Exception e)
{
e.printStackTrace();
}
}
但是,执行代码时出现以下错误。
Exception in thread "main" java.lang.OutOfMemoryError: Java heap space
at java.util.Arrays.copyOf(Arrays.java:2894)
at java.lang.AbstractStringBuilder.expandCapacity(AbstractStringBuilder.java:117)
at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:589)
at java.lang.StringBuffer.append(StringBuffer.java:337)
at com.amazonaws.util.json.JSONTokener.nextString(JSONTokener.java:284)
at com.amazonaws.util.json.JSONTokener.nextValue(JSONTokener.java:348)
at com.amazonaws.util.json.JSONObject.<init>(JSONObject.java:222)
at com.amazonaws.util.json.JSONTokener.nextValue(JSONTokener.java:351)
at com.amazonaws.util.json.JSONArray.<init>(JSONArray.java:125)
at HashCreator.createHash(HashCreator.java:215)
at HashCreator.<init>(HashCreator.java:61)
at Main.main(Main.java:9)
[root@ip-172-31-45-123 JarFiles]#
它讨论的是第215行,这是代码 - jsonArray = new JSONArray(jTokener);
这里发生了什么,我该如何解决这个问题?
修改
这是我的JSON代码:
[
{
"hash_index": "00102x05h06l0aj0dw",
"body": "Who's signing up for Obamacare?",
"_type": "ArticleItem",
"title": "Who's signing up for Obamacare? - Jan. 13, 2014",
"source": "money.cnn.com",
"primary_key": 0,
"last_crawl_date": "2014-01-14",
"url": "http://money.cnn.com/2014/01/13/news/economy/obamacare-enrollment/index.html"
},
{
"hash_index": "00102x05h06l0aj0dw0iz0kn0l@0t#0",
"body": "Who's signing up for Obamacare?",
"_type": "ArticleItem",
"title": "Who's signing up for Obamacare? - Jan. 13, 2014",
"source": "money.cnn.com",
"primary_key": 1,
"last_crawl_date": "2014-01-14",
"url": "http://money.cnn.com/2014/01/13/news/economy/obamacare-enrollment/index.html"
}
]
无论如何,请不要向我提供“增加堆大小”的解决方案。它不是解决方案,而是“石膏”。
答案 0 :(得分:0)
只需增加堆大小:
java -Xmx2g myprogram
答案 1 :(得分:0)
堆是存储对象实例的地方。考虑您的程序范围,尝试查看您是否正在创建范围内的实例。这适用于调用createHash()的方法,它可能会在其范围内保留它自己的对象。
如果你的堆耗尽,这可能表明正在某处创建了大量对象,并且可能存储在一个列表或其他数据结构中,这些结构会保留它们并阻止垃圾收集器清理商店。
增加堆大小会有所帮助,但它可能只会延迟问题。