从数据帧中提取字符串与列表进行比较

时间:2017-01-31 10:12:03

标签: python pandas

我正在尝试从pandas数据帧中的DF中提取字符串,并且源字符串位于我必须匹配的列表中。我尝试使用df.str.extract(list1),但我得到了一个不可用类型的错误我想我的方式我将列表与DF进行比较是不正确的

  

Col 1   Col 2
1       The date
2       Three has come
3       Mail Sent
4       Done Deal
  

Col 1   Col 2           Col 3 
1       The date        NaN
2       Three has come  Three has
3       Mail Sent        Mail
4       Done Deal        Done

我的清单如下

List1 = ['Three has' , 'Mail' , 'Done' , 'Game' , 'Time has come']

1 个答案:

答案 0 :(得分:4)

您可join List | or regex List1 = ['Three has' , 'Mail' , 'Done' , 'Game' , 'Time has come'] df['Col 3'] = df['Col 2'].str.extract("(" + "|".join(List1) +")", expand=False) print (df) Col 1 Col 2 Col 3 0 1 The date NaN 1 2 Three has come Three has 2 3 Mail Sent Mail 3 4 Done Deal Done List1 = ['Three has' , 'Mail' , 'Done' , 'Game' , 'Time has come'] df['Col 3'] = df['Col 2'].apply(lambda x: ''.join([L for L in List1 if L in x])) df['Col 3'] = df['Col 3'].mask(df['Col 3'] == '') print (df) Col 1 Col 2 Col 3 0 1 The date NaN 1 2 Three has come Three has 2 3 Mail Sent Mail 3 4 Done Deal Done final UserService userService = UserServiceFactory.getUserService(); final User user = userService.getCurrentUser(); public class ConcreteSheetWriter implements SheetWriter { public ConcreteSheetWriter(DriveFileMaker driveFileMaker) { DriveFileMaker driveFileMaker1 = driveFileMaker; try { httpTransport = GoogleNetHttpTransport.newTrustedTransport(); dataStoreFactory = AppEngineDataStoreFactory.getDefaultInstance(); //TODO replace with appenginedatastore otherwise restart is painful } catch (Throwable t) { t.printStackTrace(); // System.exit(1); TODO potentially fix for app engine logger.warning("Could not connect to sheets"); throw new RuntimeException(t); } } private static Credential authorize(HttpTransport HTTP_TRANSPORT, DataStoreFactory dataStoreFactory) throws IOException { // Load client secrets. InputStream in = ConcreteSheetWriter.class.getResourceAsStream(SECRET_PATH); GoogleClientSecrets clientSecrets = GoogleClientSecrets.load(JSON_FACTORY, new InputStreamReader(in)); /* THE CODE BELOW IN THIS METHOD REPRESENT STEP 6 */ // Build flow and trigger user authorization request. GoogleAuthorizationCodeFlow flow = new GoogleAuthorizationCodeFlow.Builder( HTTP_TRANSPORT, JSON_FACTORY, clientSecrets, SCOPES) .setDataStoreFactory(dataStoreFactory) .setAccessType("offline") .build(); /* The credentials before deploying to GAE. Problems when deploying on GAE Credential credential = new AuthorizationCodeInstalledApp( flow, new LocalServerReceiver()).authorize("user"); */ final UserService userService = UserServiceFactory.getUserService(); final User user = userService.getCurrentUser(); logger.info("User is " + user); final String userId = user.getUserId(); final Credential credential = flow.loadCredential(userId); return credential; } @Override public List<List<String>> read(String changedFileId) { Sheets service = null; final String range = "Sheet1!A1:AF30"; try { service = getSheetsService(authorize(httpTransport, dataStoreFactory), httpTransport); ValueRange spreadsheets = service.spreadsheets().values().get(changedFileId, range).execute(); return convert(spreadsheets.getValues()); } catch (IOException e) { throw new CouldNotCommunicateWithGoogleSheetsException(e); } } } 中的所有值public class PlusSampleServlet extends AbstractAppEngineAuthorizationCodeServlet { private final static Logger logger = Logger.getLogger(PlusSampleServlet.class.getName()); private static final long serialVersionUID = 1L; private final DriveUtilityService driveUtilityService; public PlusSampleServlet() { //omitted } private static void addLoginLogoutButtons(HttpServletRequest req, HttpServletResponse resp, StringBuilder resultFromWatch, UserService userService, String thisUrl, PrintWriter respWriter) throws IOException { //omitted } private static Optional<Channel> watchFile(Drive service, String fileId, String channelId, String channelType, String channelAddress) throws IOException { final Channel returnValue; final Channel channel = new Channel(); channel.setId(channelId); channel.setType(channelType); channel.setAddress(channelAddress); final Drive.Files tmp = service.files(); returnValue = tmp.watch(fileId, channel).execute(); return Optional.fromNullable(returnValue); } @Override public void doGet(HttpServletRequest req, HttpServletResponse resp) throws IOException, ServletException { AuthorizationCodeFlow authFlow = initializeFlow(); final String userId = getUserId(req); Credential credential = authFlow.loadCredential(userId); logger.info("Executing listener activation for user " + userId); StringBuilder resultFromWatch = new StringBuilder(); Drive drive = new Drive.Builder(Utils.HTTP_TRANSPORT, Utils.JSON_FACTORY, credential).setApplicationName("t").build(); try { Optional<Channel> channel = watchFile(drive, driveUtilityService.getFileId(), driveUtilityService.getChannelId(), "web_hook", driveUtilityService.getPushUrl()); String channelStringTmp; if (channel.isPresent()) { channelStringTmp = channel.get().toString(); } else { channelStringTmp = "null..."; } resultFromWatch.append(channelStringTmp); } catch (Exception e) { resultFromWatch.append(e.getMessage()); } final UserService userService = UserServiceFactory.getUserService(); final String thisUrl = req.getRequestURI(); // Send the results as the response PrintWriter respWriter = resp.getWriter(); resp.setStatus(200); resp.setContentType("text/html"); addLoginLogoutButtons(req, resp, resultFromWatch, userService, thisUrl, respWriter); logger.warning("user is " + userId + " sample has done its job and channel " + resultFromWatch.toString()); } @Override protected AuthorizationCodeFlow initializeFlow() throws ServletException, IOException { return Utils.initializeFlow(); } @Override protected String getRedirectUri(HttpServletRequest req) throws ServletException, IOException { return Utils.getRedirectUri(req); } }

class Utils {
    static final String MAIN_SERVLET_PATH = "/plussampleservlet";
    static final String AUTH_CALLBACK_SERVLET_PATH = "/oauth2callback";
    static final UrlFetchTransport HTTP_TRANSPORT = new UrlFetchTransport();
    static final JacksonFactory JSON_FACTORY = JacksonFactory.getDefaultInstance();
    private final static Logger logger = Logger.getLogger(Utils.class.getName());
    /**
     * Global instance of the {@link DataStoreFactory}. The best practice is to make it a single
     * globally shared instance across your application.
     */
    private static final AppEngineDataStoreFactory DATA_STORE_FACTORY =
            AppEngineDataStoreFactory.getDefaultInstance();
    private static final Set<String> SCOPES = getScopes();
    private static GoogleClientSecrets clientSecrets = null;


    private static Set<String> getScopes() {
        List<String> scopeList = Arrays.asList(DriveScopes.DRIVE_READONLY, SheetsScopes.SPREADSHEETS_READONLY);
        Set<String> scopes = Sets.newHashSet();
        scopes.addAll(scopeList);
        return scopes;
    }

    private static GoogleClientSecrets getClientSecrets() throws IOException {
        if (clientSecrets == null) {
            clientSecrets = GoogleClientSecrets.load(JSON_FACTORY,
                    new InputStreamReader(Utils.class.getResourceAsStream("/plus_secret.json")));
            Preconditions.checkArgument(!clientSecrets.getDetails().getClientId().startsWith("Enter ")
                            && !clientSecrets.getDetails().getClientSecret().startsWith("Enter "),
                    "Download client_secrets.json file from https://code.google.com/apis/console/?api=plus "
                            + "into plus-appengine-sample/src/main/resources/client_secrets.json");
        }
        logger.info("Something asked for the secret");
        return clientSecrets;
    }

    static GoogleAuthorizationCodeFlow initializeFlow() throws IOException {
        logger.info("flow is initialized soon");
        return new GoogleAuthorizationCodeFlow.Builder(
                HTTP_TRANSPORT, JSON_FACTORY, getClientSecrets(), SCOPES).setDataStoreFactory(
                DATA_STORE_FACTORY).setAccessType("offline").build();
    }

    static String getRedirectUri(HttpServletRequest req) {
        GenericUrl requestUrl = new GenericUrl(req.getRequestURL().toString());
        requestUrl.setRawPath(AUTH_CALLBACK_SERVLET_PATH);
        logger.info("retrieved redirecturl");
        return requestUrl.build();
    }
}

另一种解决方案:

public class PlusSampleAuthCallbackServlet
        extends AbstractAppEngineAuthorizationCodeCallbackServlet {
    private final static Logger logger = Logger.getLogger(PlusSampleAuthCallbackServlet.class.getName());

    private static final long serialVersionUID = 1L;

    @Override
    protected void onSuccess(HttpServletRequest req, HttpServletResponse resp, Credential credential)
            throws ServletException, IOException {
        resp.sendRedirect(Utils.MAIN_SERVLET_PATH);
        logger.info("ON success");
    }

    @Override
    protected void onError(
            HttpServletRequest req, HttpServletResponse resp, AuthorizationCodeResponseUrl errorResponse)
            throws ServletException, IOException {
        String nickname = UserServiceFactory.getUserService().getCurrentUser().getNickname();
        resp.getWriter().print("<h3>Hey " + nickname + ", why don't you want to play with me?</h1>");
        resp.setStatus(200);
        resp.addHeader("Content-Type", "text/html");
        logger.info("ON error");
        return;
    }

    @Override
    protected AuthorizationCodeFlow initializeFlow() throws ServletException, IOException {
        logger.info("initializing flow");
        return Utils.initializeFlow();
    }

    @Override
    protected String getRedirectUri(HttpServletRequest req) throws ServletException, IOException {
        logger.info("get redirect");
        return Utils.getRedirectUri(req);
    }

}