Webpack构建不适用于爬虫

时间:2019-06-03 23:19:33

标签: javascript webpack babeljs

我的构建在浏览器中运行正常(甚至是旧版v40

我尝试过

  • 禁用生产版本以查看未最小化的错误,但是爬网程序随后由于其太大而拒绝加载js文件
  • 在尽可能多的浏览器(IE除外)中运行该网站,使其在所有浏览器中均可工作。
  • 禁用抓取工具的预加载(使用无头Chrome浏览器渲染应用的html)

这是googlebot看到的错误:

enter image description here

您可以在https://wavedistrict.com上对此进行测试

Webpack配置:

const { resolve } = require("path")
const ForkTsCheckerWebpackPlugin = require("fork-ts-checker-webpack-plugin")
const CleanWebpackPlugin = require("clean-webpack-plugin")
const CopyWebpackPlugin = require("copy-webpack-plugin")
const HtmlWebpackPlugin = require("html-webpack-plugin")
const HtmlWebpackInlineSourcePlugin = require("html-webpack-inline-source-plugin")
const MiniCssExtractPlugin = require("mini-css-extract-plugin")
const OptimizeCssAssetsWebpackPlugin = require("optimize-css-assets-webpack-plugin")
const WebpackPwaManifest = require("webpack-pwa-manifest")
const webpackMerge = require("webpack-merge")
const Visualizer = require("webpack-visualizer-plugin")

const isProduction = process.env.NODE_ENV === "production"

/**
 * Variable for the project root.
 * Change this when moving the configuration files
 */
const projectRoot = resolve(__dirname)
const sourceFolder = resolve(projectRoot, "src")
const tsFolder = resolve(sourceFolder, "ts")
const buildFolder = resolve(projectRoot, "build")
const publicFolder = resolve(projectRoot, "public")
const htmlTemplateFile = resolve(publicFolder, "index.html")
const tsconfigPath = resolve(projectRoot, "tsconfig.json")
const tslintPath = resolve(projectRoot, "tslint.json")

const tsLoader = {
  loader: "ts-loader",
  options: {
    compilerOptions: {
      module: "esnext",
      target: "es5",
      allowSyntheticDefaultImports: true,
    },
    transpileOnly: true,
    configFile: tsconfigPath,
    allowTsInNodeModules: true,
  },
}

const babelLoader = {
  loader: "babel-loader",
}

const workerRule = {
  test: /\.worker\.ts$/,
  use: {
    loader: "worker-loader",
  },
}

const babelRule = {
  test: /\.(js|ts|tsx)$/,
  use: [babelLoader],
}

const sassRule = {
  test: /\.scss$/,
  use: [
    isProduction
      ? MiniCssExtractPlugin.loader
      : {
          loader: "style-loader",
          options: {
            singleton: true,
          },
        },
    { loader: "css-loader" },
    {
      loader: "sass-loader",
      options: {
        data: "@import './ts/modules/core/styles/_.scss';",
        includePaths: [sourceFolder],
      },
    },
  ],
}

/** @type {import('webpack').Configuration} */
const baseConfig = {
  context: projectRoot,

  entry: [
    "babel-polyfill",
    "url-search-params-polyfill",
    resolve(tsFolder, "init"),
  ],

  output: {
    filename: "js/[name].js",
    path: buildFolder,
    publicPath: "/",
  },

  module: {
    rules: [workerRule, babelRule, sassRule],
  },

  resolve: {
    modules: ["node_modules"],
    extensions: [".js", ".ts", ".tsx", ".scss"],
    alias: {
      modules: resolve(tsFolder, "modules"),
      common: resolve(tsFolder, "common"),
    },
    mainFields: ["jsnext:main", "module", "main"],
  },

  plugins: [
    new CopyWebpackPlugin([
      {
        from: publicFolder,
        ignore: [htmlTemplateFile],
      },
    ]),
    new CleanWebpackPlugin(buildFolder, { root: projectRoot, verbose: false }),
    /**new ForkTsCheckerWebpackPlugin({
      tslint: tslintPath,
      tsconfig: tsconfigPath,
    }),**/
  ],

  stats: {
    children: false,
    entrypoints: false,
    modules: false,
  },
}

if (process.argv.includes("--stats")) {
  if (baseConfig.plugins) {
    baseConfig.plugins.push(new Visualizer())
  }
}

const devConfig = webpackMerge(baseConfig, {
  mode: "development",
  plugins: [
    new HtmlWebpackPlugin({
      template: htmlTemplateFile,
      chunksSortMode: "dependency",
    }),
  ],
  devtool: "inline-source-map",
  devServer: {
    hot: false,
    historyApiFallback: true,
  },
})

const prodConfig = webpackMerge(baseConfig, {
  mode: "production",

  optimization: {
    minimize: true,
    nodeEnv: "production",
  },

  plugins: [
    new WebpackPwaManifest({
      name: "WaveDistrict",
      short_name: "WaveDistrict",
      description: "",
      background_color: "#091F35",
      theme_color: "#00ba8c",
      orientation: "any",
      icons: [
        {
          src: resolve(publicFolder, "img/logo.svg"),
          sizes: [48, 72, 96, 128, 144, 192, 256, 512],
          destination: "icons",
        },
        {
          src: resolve(publicFolder, "img/logo.png"),
          sizes: [48, 72, 96, 128, 144, 192, 256, 512],
          destination: "icons",
        },
      ],
    }),

    new MiniCssExtractPlugin({
      filename: "css/[name].css",
    }),

    new OptimizeCssAssetsWebpackPlugin(),

    new HtmlWebpackPlugin({
      template: htmlTemplateFile,
      minify: {
        removeComments: true,
        collapseWhitespace: true,
        removeRedundantAttributes: true,
        useShortDoctype: true,
        removeEmptyAttributes: true,
        removeStyleLinkTypeAttributes: true,
        keepClosingSlash: true,
        minifyJS: true,
        minifyCSS: true,
        minifyURLs: true,
      },
      inject: true,
    }),

    new HtmlWebpackInlineSourcePlugin(),
  ],

  performance: {
    maxAssetSize: 500000,
  },

  devtool: "source-map",
})

module.exports = isProduction ? prodConfig : devConfig

Babel配置(需要将ES6 node_modules转换为ES5):

const babelEnv = {
  targets: {
    chrome: "41", // For googlebot
  },
}

/** Keep track of all conflicting dependencies here */
const nonES5Deps = ["qs", "querystring", "query-string", "decko"]

module.exports = function(api) {
  api.cache(true)

  return {
    exclude: [],
    include: (path) => {
      if (nonES5Deps.some((p) => path.match(p))) {
        return true
      }
      if (path.match(/node_modules/)) return false

      return true
    },
    presets: [
      ["@babel/preset-env", babelEnv],
      "@babel/preset-react",
      "@babel/preset-typescript",
    ],
    plugins: [
      "@babel/plugin-syntax-dynamic-import",
      "@babel/plugin-transform-arrow-functions",
      [
        "@babel/plugin-proposal-decorators",
        {
          legacy: true,
        },
      ],
      [
        "@babel/plugin-proposal-class-properties",
        {
          loose: true,
        },
      ],
    ],
  }
}

那么这是怎么回事?当问题仅出现在搜寻器中时,我该如何调试?

1 个答案:

答案 0 :(得分:2)

我已经在朋友的帮助下发现了这个问题。看来Googlebot(和其他抓取工具)不支持AudioContext对象,因此不支持undefined is not a function

修改我的代码以进行检查,并在不存在的情况下安全地禁用功能已解决了该问题。