如何使用pandas在每个列上搜索字符串值。 假设我有32列,
class Venue < ActiveRecord::Base
nilify_blanks
attr_accessor :misc_info_raw
def misc_info_raw
self.misc_info.join("\n, ") unless self.misc_info.nil?
end
def misc_info_raw=(values)
self.misc_info = []
self.misc_info = values
end
# Geocoding
geocoded_by :full_address do |venue, results|
if geo = results.first
venue.latitude = geo.latitude
venue.longitude = geo.longitude
venue.district = geo.neighborhood if geo.neighborhood
end
end
after_validation :geocode, :if => :address_changed?
CLUB_TYPES = [nil, ‘xxx’, ‘xxxx’, ‘xxxx’,
‘Xx’, ‘xx xxxx’, ‘xxx xxx’]
belongs_to :subdivision
has_many :events
has_many :stake_holders
has_one :thumbnail_image
has_many :inventories, :dependent => :destroy
has_many :promotions
has_many :claimed_promotions
has_many :reviews
has_many :images, :dependent => :destroy
has_many :sections, :dependent => :destroy
accepts_nested_attributes_for :subdivision,
:images, :inventories,
:thumbnail_image, :sections, :allow_destroy => true
validates :name,
:presence => true,
:length => { :maximum => 30 }
validates :misc_info, :about,
:length => { :maximum => 255 }
validates :club_type,
:about,
:percent_discount,
:bt_cut,
:district,
:email, :presence => true
#cell_image is used instead of thumbnail_image for versions < 2.0
has_attached_file :cell_image,
:url => "/thumbnails/:id/:style/:basename.:extension",
:path => "/thumbnails/:id/:style/:basename.:extension"
validates_attachment_content_type :cell_image, :content_type => /\Aimage\/.*\Z/
# Required for geocoding
def full_address
address
end
def first_image_url
self.images.empty? ? nil : self.images.first.image_url
end
def thumb_image_url
self.cell_image.url if self.cell_image
end
def small_thumb_image_url
self.thumbnail_image.image.url if self.thumbnail_image
end
DESC_HTML_BEG = "<div style=\"font-family:helvetica;color:white;\" >\r\n<p>\r\nDetails\r\n</p>\r\n<hr>\r\n<div>\r\n<ul>\r\n"
DESC_HTML_END = "\r\n</div>\r\n</div>"
#don't return quote if it is not a day of request
def description(days_in_future)
base = "#{description_1}#{description4}#{description2}#{description3}"
base = "#{self.special_event_html}#{base}" unless self.tonights_event.nil? if days_in_future == 0
base = "#{self.bt_offer_html}#{base}" unless self.bt_offer.nil?
"#{DESC_HTML_BEG}#{base}#{DESC_HTML_END}"
end
def special_event_html
return_li_html("SPECIAL EVENT: #{self.tonights_event}")
end
def bt_offer_html
return_li_html("BT EXCLUSIVE OFFER: Free #{self.bt_offer.inventory.master_inventory.brand} #{ self.bt_offer.inventory.master_inventory.name } with every order!")
end
#returns string wrapped in li html style
def return_li_html(s)
"<li style=\"padding-bottom:1ex\">#{s}</li>"
end
def merchant_cut
((100 - self.bt_cut) * 0.01).to_f
end
def discount
(self.percent_discount * 0.01).to_f
end
#returns club type string based off database int column club_type
def club_type_string
CLUB_TYPES[self.club_type]
end
#returns club types
def self.club_types
CLUB_TYPES
end
def get_bt_merchant_id
"#{ name.gsub(/[^0-9a-z\\s]/i, '').downcase }_#{ self.subdivision.locale_code }_#{ self.subdivision.postal_code }"
end
end
这将返回值是否存在于“A”列中,如何搜索每个列以及存在该值的行。 数据集:
df[df['A'].str.contains("hello")]
如果我搜索“hello”或“Hello”输出应该是:
A B C
1 hi hie
2 bye Hello
答案 0 :(得分:4)
我认为你可以使用:
df = pd.DataFrame({'A':['hello fgf','s','f'],'B':['d','ff hello','f'],'C':[4,7,8]})
print (df)
A B C
0 hello fgf d 4
1 s ff hello 7
2 f f 8
mask = df.applymap(lambda x: 'hello' in str(x))
print (mask)
A B C
0 True False False
1 False True False
2 False False False
然后,如果需要过滤添加any
,则每行boolean indexing
检查至少一个True
:
df1 = df[mask.any(axis=1)]
print (df1)
A B C
0 hello fgf d 4
1 s ff hello 7
编辑:
tested = 'hello'
mask = df.applymap(lambda x: tested.lower() in str(x).lower())
print (mask)
A B C
0 False False False
1 False False True
答案 1 :(得分:1)
您还可以将所有列连接成一个字符串,并在连接字符串中搜索您的子字符串:
In [21]: df[df.astype(str).add('|').sum(1).str.contains('hello')]
Out[21]:
A B C
0 hello fgf d 4
1 s ff hello 7
说明:
In [22]: df.astype(str).add('|').sum(1)
Out[22]:
0 hello fgf|d|4|
1 s|ff hello|7|
2 f|f|8|
dtype: object