我有一个带有一些“表格”数据的长文本文件,即:
12/10/2018 aaaa bbb xxxxxxxxxxxxxxxxxxxxxxxxxxxxx 002424004234
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
xxxxxxxxxxxxxxxxxxxxxxxxxxxxx
12/11/2018 cccc dddd yyyyyyyyyyyyyyyyyyyyyy 0542121212122
yyyyyyyyyyyyyyyyyyyyyy
12/12/2018 eeee ffffff zzzzzzzzzzzzzzzzzzzzzzz 0639872651252
12/13/2018 ggggggg hhhhhh vvvvv vvvvvvvvvvvvvvvvv 1968745213648
vvvvvvvvvvvvvvvvvvvvvvv
12/14/2018 ....
扫描的结果,其中某些列就像电子表格的“单元格”。 如何使用某些命令行工具在CSV文件上进行转换,例如:
12/10/2018,aaaaaaaa,bbbbb,xxxxxx.......xxxx,002424004234
12/11/2018,ccccccc,dddddd,yyyyyy.......yyyy,0542121212122
等等?
谢谢
编辑: 扫描文件后得到一个文本文件。本文以“表格形式”显示数据,即第三列为“多行”文本。我会将其转换为简单的CSV文件,即,在第一行中,我将输入多行“单元格”的所有文本。 xxxxx ... xxxx重现第三列的多行文字
EDIT2: 数据示例
Date AMOUNT OP DESCRIPTION CODE
12/10/2018 $123,45 id01 payment for hotel in Las Vegas 005214875462
room
room service
dinner
golf club
12/11/2018 $400,00 id04 cash from ATM 0528158852687
located in L.A.
12/12/2018 $1000,00 id99 ACME tornado pill 854674852658
我想转化
12/10/2018;$123,45;id01;payment for hotel in Las Vegas room room service dinner golf club;005214875462
12/11/2018;$400,00;id04;cash from ATM located in L.A.;0528158852687
12/12/2018;$1000,00;id99;ACME tornado pill;854674852658
答案 0 :(得分:2)
您需要使用多个空格作为字段分隔符(FS),并在输入中修剪尾随空白。 检查以下代码(另存为ip.awk)
@IBOutlet weak var messageField: UITextField!
@IBOutlet weak var chipResponse: UILabel!
var a_count = 0
var b_count = 0
var docRef : DocumentReference!
let uid = Auth.auth().currentUser?.uid
var db: Firestore!
var a = ""
private let speechRecognizer = SFSpeechRecognizer(locale: Locale.init(identifier: "en-US"))
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
@IBOutlet weak var microphonebutton: UIButton!
@IBAction func sendMessage(_ sender: Any) {
if audioEngine.isRunning
{
audioEngine.stop()
recognitionRequest?.endAudio()
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord)
try audioSession.setMode(AVAudioSessionModeDefault)
} catch {
print("audioSession properties weren't set because of an error.")
}
microphonebutton.isEnabled = false
microphonebutton.setTitle("Start Recording", for: .normal)
messageField.isUserInteractionEnabled = true
let request = ApiAI.shared().textRequest()
if let text = self.messageField.text, text != "" {
if self.a_count == 1
{
print("count",self.a_count)
}
let new = uid! + text
request?.query = new
print("new",new)
print("text",text)
}
else
{
return
}
request?.setMappedCompletionBlockSuccess({ (request1, response1) in
let response1 = response1 as! AIResponse
if let textResponse1 = response1.result.fulfillment.messages{
let textRespoArray = textResponse1 [ 0 ] as NSDictionary
print(textRespoArray.value(forKey: "speech") as! String)
self.a = textRespoArray.value(forKey: "speech") as! String
print("a",self.a)
print("else count",self.a_count)
if self.messageField.text != "Early years"
{
}
if self.a == "Level 1 completed. Unlocked the next level. Visit the View Your Story section to see your answers."
{
print("ramya")
self.messageField.isHidden = true
}
if self.a == "Are there any childhood experiences that stand out that you’d like to share"
{
print("In loop")
self.a_count = 1
print("a count",self.a_count)
}
self.speechAndText(text: textRespoArray.value(forKey: "speech") as! String)
}
}, failure: { (request1, error) in
print(error!)
})
ApiAI.shared().enqueue(request)
messageField.text = ""
}
else
{
startRecording()
microphonebutton.setTitle("Stop Recording", for: .normal)
}
}
let speechSynthesizer = AVSpeechSynthesizer()
func speechAndText(text: String) {
let speechUtterance = AVSpeechUtterance(string: text)
speechSynthesizer.speak(speechUtterance)
UIView.animate(withDuration: 1.0, delay: 0.0, options: .curveEaseInOut, animations: {
self.chipResponse.text = text
}, completion: nil)
}
override func viewDidLoad() {
super.viewDidLoad()
messageField.isUserInteractionEnabled = false
microphonebutton.isEnabled = false //2
speechRecognizer!.delegate = self //3
SFSpeechRecognizer.requestAuthorization { (authStatus) in //4
var isButtonEnabled = false
switch authStatus { //5
case .authorized:
isButtonEnabled = true
case .denied:
isButtonEnabled = false
print("User denied access to speech recognition")
case .restricted:
isButtonEnabled = false
print("Speech recognition restricted on this device")
case .notDetermined:
isButtonEnabled = false
print("Speech recognition not yet authorized")
}
OperationQueue.main.addOperation() {
self.microphonebutton.isEnabled = isButtonEnabled
}
}
// Do any additional setup after loading the view.
}
func startRecording() {
messageField.text = ""
if recognitionTask != nil {
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
do {
try AVAudioSession.sharedInstance().setCategory(AVAudioSessionCategoryPlayAndRecord, mode: AVAudioSessionModeDefault, options: .defaultToSpeaker)
try AVAudioSession.sharedInstance().setActive(true)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
let inputNode = audioEngine.inputNode
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer!.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false
if result != nil {
self.messageField.text = result?.bestTranscription.formattedString
isFinal = (result?.isFinal)!
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.microphonebutton.isEnabled = true
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
messageField.text = "Say something, I'm listening!"
}
}
您可以像这样运行代码 1.txt是输入文件
BEGIN{
FS="[[:space:]][[:space:]]+";
op[0] = "";
line = 0;
}
{
if(NR <= 1 || NF == 0)
skip;
if(NF==5)
{
line = line + 1;
op[line,"1"] = $1;
op[line,"2"] = $2;
op[line,"3"] = $3;
op[line,"4"] = $4;
op[line,"5"] = $5;
}
else{
#printf("line:%d,tok=%s,ex=%s\n",line,$2,op[line,"4"]);
op[line,"4"] = op[line,"4"] " " $2;
}
}
END{
OFS=";"
for(i=1;i<=line;i++)
print op[i,"1"],op[i,"2"],op[i,"3"],op[i,"4"],op[i,"5"];
}
OP是
cat 1.txt | sed 's/[ \t]*$//g' | awk -f ip.awk
答案 1 :(得分:2)
使用Perl单线版
> cat tomc_in.dat
Date AMOUNT OP DESCRIPTION CODE
12/10/2018 $123,45 id01 payment for hotel in Las Vegas 005214875462
room
room service
dinner
golf club
12/11/2018 $400,00 id04 cash from ATM 0528158852687
located in L.A.
12/12/2018 $1000,00 id99 ACME tornado pill 854674852658
> perl -F"/\s+/" -lane ' print $all if $idp and /^\d+/ and $idp ne $F[2] ;if($.>1) { if(/^\d+/) { $p="$F[0];$F[1];$F[2]";$c=$F[-1];$de=join(" ",@F[3..$#F-1]);$idp=$F[2]} else {s/^\s*|\s*$//g;$de.=" ".$_}; $all="$p;$de;$c"; } END { print $all } ' tomc_in.dat
12/10/2018;$123,45;id01;payment for hotel in Las Vegas room room service dinner golf club;005214875462
12/11/2018;$400,00;id04;cash from ATM located in L.A.;0528158852687
12/12/2018;$1000,00;id99;ACME tornado pill;854674852658
>
答案 2 :(得分:1)
var HtmlWebpackPlugin = require('html-webpack-plugin');
module.exports = {
...
plugins: [
new HtmlWebpackPlugin({
template: 'path/to/empty/template',
filename: 'path/to/templated/output'
})
],
...
};
以上内容将在描述中保留空白,以防万一。
答案 3 :(得分:0)
输入:
$ cat input.dat
Date AMOUNT OP DESCRIPTION CODE
12/10/2018 $123,45 id01 payment for hotel in Las Vegas 005214875462
room
room service
dinner
golf club
12/11/2018 $400,00 id04 cash from ATM 0528158852687
located in L.A.
12/12/2018 $1000,00 id99 ACME tornado pill 854674852658
命令:
$ awk 'BEGIN{OFS=";"}/^[0-9]{2}\/[0-9]{2}\/[0-9]{4}/{if(NR>2){print date,amount,desc, op}date=$1; amount=$2; op=$3; code=$NF; for(i=4;i<=NF-1;i++){if(i==4){desc=$i}else{desc=desc" "$i}};next}{for(i=1;i<=NF;i++){desc=desc" "$i}}END{print date,amount,desc, op}' input.dat
输出:
Date;AMOUNT;OP;DESCRIPTION;CODE
12/10/2018;$123,45;payment for hotel in Las Vegas room room service dinner golf club;id01
12/11/2018;$400,00;cash from ATM located in L.A.;id04
12/12/2018;$1000,00;ACME tornado pill;id99
说明:
BEGIN{OFS=";"; print "Date;AMOUNT;OP;DESCRIPTION;CODE"}
将输出字段分隔符设置为;
并打印标题行/^[0-9]{2}\/[0-9]{2}\/[0-9]{4}/
将标识以日期开头的行{if(NR>2){print date,amount,desc, op}
,如果我们当前不在第一个数据行,则打印用于执行数据转换的变量的内容date=$1; amount=$2; op=$3; code=$NF; for(i=4;i<=NF-1;i++){if(i==4){desc=$i}else{desc=desc" "$i}};next
将输入数据字段保存在变量中,并通过将所有字段附加到代码之前的第一个字段并跳转到下一行来生成desc变量{for(i=1;i<=NF;i++){desc=desc" "$i}}
将所有内容附加到说明中 END{print date,amount,desc, op}
打印最后一行
# gawk profile, created Wed Dec 12 17:34:39 2018
# BEGIN rule(s)
BEGIN {
OFS = ";"
print "Date;AMOUNT;OP;DESCRIPTION;CODE"
}
# Rule(s)
/^[0-9]{2}\/[0-9]{2}\/[0-9]{4}/ {
if (NR > 2) {
print date, amount, desc, op
}
date = $1
amount = $2
op = $3
code = $NF
for (i = 4; i <= NF - 1; i++) {
if (i == 4) {
desc = $i
} else {
desc = desc " " $i
}
}
next
}
{
for (i = 1; i <= NF; i++) {
desc = desc " " $i
}
}
# END rule(s)
END {
print date, amount, desc, op
}
答案 4 :(得分:0)
我确定您输入的文件用制表符分隔
并且您会在不同行的描述之间出现逗号。
(因为我不喜欢看到“ ...客房服务...”)
您的样本数据
cat input;
Date AMOUNT OP DESCRIPTION CODE
12/10/2018 $123,45 id01 payment for hotel in Las Vegas 005214875462
room
room service
dinner
golf club
12/11/2018 $400,00 id04 cash from ATM 0528158852687
located in L.A.
12/12/2018 $1000,00 id99 ACME tornado pill 854674852658
我的脚本
cat collapse_column.awk
#! /usr/bin/awk -f
# collapse_column.awk
BEGIN{FS="\t"; OFS=";"; getline}
/^[0-9]/{
if(Date){
print Date,AMOUNT,OP,DESCRIPTION,CODE;
}
Date=$1;AMOUNT=$2;OP=$3;DESCRIPTION=$4;CODE=$5
}
/^[^0-9]/{
gsub(/ [[:space:]]+/, ",")
DESCRIPTION = DESCRIPTION $0
}
END{print Date,AMOUNT,OP,DESCRIPTION,CODE}
结果
$ ./collapse_column.awk input
12/10/2018;$123,45;id01;payment for hotel in Las Vegas,room,room service,dinner,golf club;005214875462
12/11/2018;$400,00;id04;cash from ATM,located in L.A.;0528158852687
12/12/2018;$1000,00;id99;ACME tornado pill;854674852658
awk脚本正在识别以数字开头的完整行,并将字段分配给变量 不以数字开头的行被假定为前一行的延续,并且修剪后的值被累积。找到新的完整行时,将输出先前的完整行以及任何累积的项目