Apache Spark,NameError:名称“ flatMap”未定义

时间:2019-08-14 16:59:25

标签: apache-spark

当我尝试

tokens = cleaned_book(flatMap(normalize_tokenize))
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
NameError: name 'flatMap' is not defined

其中

cleaned_book.count()
65744

def normalize_tokenize(line):
...     return re.sub('\s+', ' ', line).strip().lower().split(' ')

在另一边

sc.parallelize([3,4,5]).flatMap(lambda x: range(1,x)).collect()

在相同的Pyspark外壳上正常工作

[1, 2, 1, 2, 3, 1, 2, 3, 4]

为什么会有NameError?

1 个答案:

答案 0 :(得分:1)

好的,这是一个带令牌化程序的Scala示例,使我觉得您看错了。

 func voteUp() {
        self.postRef.child(comment.postID).child(comment.snap).child("upvotes").runTransactionBlock({
            (currentData: MutableData!) in

            //value of the counter before an update
            var value = currentData.value as? Int

            //checking for nil data is very important when using
            //transactional writes
            if value == nil {
                value = 0
            }

            //actual update
            currentData.value = value! + 1
            UserDefaults.standard.set(self.checkbox.isChecked, forKey: "\(comment.snap) upvotes")
            UserDefaults.standard.set(self.downVote.isChecked, forKey: "\(comment.snap) downvotes")
            return TransactionResult.success(withValue: currentData)

        }, andCompletionBlock: {
            error, commited, snap in

            //if the transaction was commited, i.e. the data
            //under snap variable has the value of the counter after
            //updates are done
            if commited {
                let upvotes = snap?.value as! Int

            } else {
                UserDefaults.standard.set(self.checkbox.isChecked, forKey: "\(comment.snap) upvotes")
                UserDefaults.standard.set(self.downVote.isChecked, forKey: "\(comment.snap) downvotes")
                TransactionResult.abort()
            }
        })

    }

  func voteDown(){
    self.postRef.child(comment.postID).child(comment.snap).child("upvotes").runTransactionBlock({
        (currentData: MutableData!) in

        //value of the counter before an update
        var value = currentData.value as? Int
        UserDefaults.standard.set(self.checkbox.isChecked, forKey: "\(comment.snap) upvotes")
        UserDefaults.standard.set(self.downVote.isChecked, forKey: "\(comment.snap) downvotes")
        //checking for nil data is very important when using
        //transactional writes
        if value == nil {
            value = 0
            UserDefaults.standard.set(self.checkbox.isChecked, forKey: "\(comment.snap) upvotes")
            UserDefaults.standard.set(self.downVote.isChecked, forKey: "\(comment.snap) downvotes")
        }

        //actual update
        UserDefaults.standard.set(self.checkbox.isChecked, forKey: "\(comment.snap) upvotes")
        UserDefaults.standard.set(self.downVote.isChecked, forKey: "\(comment.snap) downvotes")
        currentData.value = value! - 1
        return TransactionResult.success(withValue: currentData)
    }, andCompletionBlock: {
        error, commited, snap in

        //if the transaction was commited, i.e. the data
        //under snap variable has the value of the counter after
        //updates are done
        if commited {
            let upvotes = snap?.value as! Int
            UserDefaults.standard.set(self.checkbox.isChecked, forKey: "\(comment.snap) upvotes")
            UserDefaults.standard.set(self.downVote.isChecked, forKey: "\(comment.snap) downvotes")
        } else {

            TransactionResult.abort()
            UserDefaults.standard.set(self.checkbox.isChecked, forKey: "\(comment.snap) upvotes")
            UserDefaults.standard.set(self.downVote.isChecked, forKey: "\(comment.snap) downvotes")
        }
    })

    }

这很好用,需要功能。方面,因此是.flatMap,并按此顺序。 内联方法使我发现更容易,但是我注意到该注释还暗示了.flatMap。