我有一个像这样的数据表:
id number
1 5562,4024,...,1213
2 4244,4214,...,244
3 424,4213
4 1213,441
...
我只想对数字的每一列的最后一部分进行子集化,
id number
1 1213
2 244
3 4213
4 441
...
那我应该怎么做呢?
答案 0 :(得分:1)
一个选项是将字符串末尾(from flask import current_app,
from multiprocessing.pool import ThreadPool
from Server.database import db
import boto3
import io
import uuid
def upload_audio_file_to_s3(file):
app = current_app._get_current_object()
with app.app_context():
s3 = boto3.client(service_name='s3',
aws_access_key_id=app.config.get('BOTO3_ACCESS_KEY'),
aws_secret_access_key=app.config.get('BOTO3_SECRET_KEY'))
extension = file.filename.rsplit('.', 1)[1].lower()
file.filename = f"{uuid.uuid4().hex}.{extension}"
s3.upload_fileobj(file,
app.config.get('S3_BUCKET'),
f"{app.config.get('UPLOADED_AUDIO_FOLDER')}/{file.filename}",
ExtraArgs={"ACL": 'public-read', "ContentType": file.content_type})
return file.filename
def generate_polly(voice_id, text):
app = current_app._get_current_object()
with app.app_context():
polly_client = boto3.Session(
aws_access_key_id=app.config.get('BOTO3_ACCESS_KEY'),
aws_secret_access_key=app.config.get('BOTO3_SECRET_KEY'),
region_name=app.config.get('AWS_REGION')).client('polly')
response = polly_client.synthesize_speech(VoiceId=voice_id,
OutputFormat='mp3', Text=text)
return response['AudioStream'].read()
def generate_polly_from_term(vocab_term, gender='m'):
app = current_app._get_current_object()
with app.app_context():
audio = generate_polly('Celine', vocab_term.term)
file = io.BytesIO(audio)
file.filename = 'temp.mp3'
file.content_type = 'mp3'
return vocab_term.id, upload_audio_file_to_s3(file)
def generate_async_audio_service(terms):
pool = ThreadPool(processes=12)
results = pool.map(generate_polly_from_term, terms)
# do something w/ results
)的数字捕获为一个组,紧跟$
,然后用捕获组的后向引用(,
)替换< / p>
\\1
或将字符(df$number <- as.numeric(sub(".*,(\\d+)$", "\\1", df$number))
)匹配到.*
,然后将其替换为空白(,
)
""
df$number <- as.numeric(sub(".*,", "", df$number))