#creating the on/off signal column
df_zinc['action'] = 0
#creating the loop
for index,row in df_zinc.iterrows():
if row.reversal == 1:
df_zinc.loc[index,'action'] = 1
if index<len(df_zinc.index)-126: #the purpose of this condition is to not have the action column longer than the reversal column. Thuogh, it appears not to be working
df_zinc.loc[index+126, 'action'] = -1
index= index + 127
时>>> l = [('a',1),('b',2)]
>>> spark.createDataFrame(l)
>>> l = [(1),(2)]
>>> spark.createDataFrame(l)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/home/tianlh/spark/python/pyspark/sql/session.py", line 526, in createDataFrame
rdd, schema = self._createFromLocal(map(prepare, data), schema)
File "/home/tianlh/spark/python/pyspark/sql/session.py", line 390, in _createFromLocal
struct = self._inferSchemaFromList(data)
File "/home/tianlh/spark/python/pyspark/sql/session.py", line 322, in _inferSchemaFromList
schema = reduce(_merge_type, map(_infer_schema, data))
File "/home/tianlh/spark/python/pyspark/sql/types.py", line 992, in _infer_schema
raise TypeError("Can not infer schema for type: %s" % type(row))
TypeError: Can not infer schema for type: <type 'int'>
>>>l = [('a',1),('b',2)]
| a| 1|
| b| 2|
我的问题是: 为什么会这样呢? 创建两个列和创建一个列有什么区别? 为什么只创建单个列不起作用?