From 9ad89997a533744695b380b315f1f70293bb30c4 Mon Sep 17 00:00:00 2001
From: Jacky Zhao <j.zhao2k19@gmail.com>
Date: Sun, 04 Jun 2023 16:35:45 +0000
Subject: [PATCH] multi-core builds

---
 quartz/build.ts                    |   75 ++++++++++++
 /dev/null                          |   77 ------------
 .gitignore                         |    1 
 quartz/plugins/transformers/gfm.ts |    2 
 quartz/bootstrap-worker.mjs        |    7 +
 quartz/worker.ts                   |   30 +++++
 quartz/bootstrap-cli.mjs           |   15 +-
 quartz/processors/parse.ts         |  100 ++++++++++++++--
 quartz.config.ts                   |   25 +++-
 9 files changed, 224 insertions(+), 108 deletions(-)

diff --git a/.gitignore b/.gitignore
index 690975f..b39ea57 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 .DS_Store
 node_modules
 public
+.quartz-cache
diff --git a/quartz.config.ts b/quartz.config.ts
index 14ef1c1..6808d93 100644
--- a/quartz.config.ts
+++ b/quartz.config.ts
@@ -1,9 +1,20 @@
-import { buildQuartz } from "./quartz"
-import Head from "./quartz/components/Head"
-import Header from "./quartz/components/Header"
-import { ContentPage, CreatedModifiedDate, Description, FrontMatter, GitHubFlavoredMarkdown, Katex, ObsidianFlavoredMarkdown, RemoveDrafts, ResolveLinks, SyntaxHighlighting } from "./quartz/plugins"
+import { QuartzConfig } from "./quartz/cfg"
+import * as Head from "./quartz/components/Head"
+import * as Header from "./quartz/components/Header"
+import {
+  ContentPage,
+  CreatedModifiedDate,
+  Description,
+  FrontMatter,
+  GitHubFlavoredMarkdown,
+  Katex,
+  ObsidianFlavoredMarkdown,
+  RemoveDrafts,
+  ResolveLinks,
+  SyntaxHighlighting
+} from "./quartz/plugins"
 
-export default buildQuartz({
+const config: QuartzConfig = {
   configuration: {
     siteTitle: "🪴 Quartz 4.0",
     enableSPA: true,
@@ -61,4 +72,6 @@
       })
     ]
   },
-})
+}
+
+export default config
diff --git a/quartz/bootstrap.mjs b/quartz/bootstrap-cli.mjs
similarity index 89%
rename from quartz/bootstrap.mjs
rename to quartz/bootstrap-cli.mjs
index 16a0c69..be6270e 100755
--- a/quartz/bootstrap.mjs
+++ b/quartz/bootstrap-cli.mjs
@@ -5,10 +5,10 @@
 import { hideBin } from 'yargs/helpers'
 import esbuild from 'esbuild'
 import chalk from 'chalk'
-import requireFromString from 'require-from-string'
 import { sassPlugin } from 'esbuild-sass-plugin'
 
-const fp = "./quartz.config.ts"
+const cacheFile = "./.quartz-cache/transpiled-build.mjs"
+const fp = "./quartz/build.ts"
 const { version } = JSON.parse(readFileSync("./package.json").toString())
 
 export const BuildArgv = {
@@ -52,16 +52,16 @@
   .version(version)
   .usage('$0 <cmd> [args]')
   .command('build', 'Build Quartz into a bundle of static HTML files', BuildArgv, async (argv) => {
-    const out = await esbuild.build({
+    await esbuild.build({
       entryPoints: [fp],
-      write: false,
+      outfile: path.join("quartz", cacheFile),
       bundle: true,
       keepNames: true,
       platform: "node",
-      format: "cjs",
+      format: "esm",
       jsx: "automatic",
       jsxImportSource: "preact",
-      external: ["@napi-rs/simple-git", "shiki"],
+      packages: "external",
       plugins: [
         sassPlugin({
           type: 'css-text'
@@ -97,8 +97,7 @@
       process.exit(1)
     })
 
-    const mod = out.outputFiles[0].text
-    const init = requireFromString(mod, fp).default
+    const { default: init } = await import(cacheFile)
     init(argv, version)
   })
   .showHelpOnFail(false)
diff --git a/quartz/bootstrap-worker.mjs b/quartz/bootstrap-worker.mjs
new file mode 100644
index 0000000..7db24c0
--- /dev/null
+++ b/quartz/bootstrap-worker.mjs
@@ -0,0 +1,7 @@
+#!/usr/bin/env node
+import workerpool from 'workerpool'
+const cacheFile = "./.quartz-cache/transpiled-worker.mjs"
+const { parseFiles } = await import(cacheFile)
+workerpool.worker({
+  parseFiles
+})
diff --git a/quartz/build.ts b/quartz/build.ts
new file mode 100644
index 0000000..73cebce
--- /dev/null
+++ b/quartz/build.ts
@@ -0,0 +1,75 @@
+import path from "path"
+import { PerfTimer } from "./perf"
+import { rimraf } from "rimraf"
+import { globby } from "globby"
+import chalk from "chalk"
+import http from "http"
+import serveHandler from "serve-handler"
+import { parseMarkdown } from "./processors/parse"
+import { filterContent } from "./processors/filter"
+import { emitContent } from "./processors/emit"
+import cfg from "../quartz.config"
+
+interface Argv {
+  directory: string
+  verbose: boolean
+  output: string
+  clean: boolean
+  serve: boolean
+  port: number
+}
+
+export default async function buildQuartz(argv: Argv, version: string) {
+  console.log(chalk.bgGreen.black(`\n Quartz v${version} \n`))
+  const perf = new PerfTimer()
+  const output = argv.output
+
+  if (argv.verbose) {
+    const pluginCount = Object.values(cfg.plugins).flat().length
+    const pluginNames = (key: 'transformers' | 'filters' | 'emitters') => cfg.plugins[key].map(plugin => plugin.name)
+    console.log(`Loaded ${pluginCount} plugins`)
+    console.log(`  Transformers: ${pluginNames('transformers').join(", ")}`)
+    console.log(`  Filters: ${pluginNames('filters').join(", ")}`)
+    console.log(`  Emitters: ${pluginNames('emitters').join(", ")}`)
+  }
+
+  // clean
+  if (argv.clean) {
+    perf.addEvent('clean')
+    await rimraf(output)
+    if (argv.verbose) {
+      console.log(`Cleaned output directory \`${output}\` in ${perf.timeSince('clean')}`)
+    }
+  }
+
+  // glob
+  perf.addEvent('glob')
+  const fps = await globby('**/*.md', {
+    cwd: argv.directory,
+    ignore: cfg.configuration.ignorePatterns,
+    gitignore: true,
+  })
+
+  if (argv.verbose) {
+    console.log(`Found ${fps.length} input files in ${perf.timeSince('glob')}`)
+
+  }
+
+  const filePaths = fps.map(fp => `${argv.directory}${path.sep}${fp}`)
+  const parsedFiles = await parseMarkdown(cfg.plugins.transformers, argv.directory, filePaths, argv.verbose)
+  const filteredContent = filterContent(cfg.plugins.filters, parsedFiles, argv.verbose)
+  await emitContent(argv.directory, output, cfg, filteredContent, argv.verbose)
+  console.log(chalk.green(`Done processing ${fps.length} files in ${perf.timeSince()}`))
+
+  if (argv.serve) {
+    const server = http.createServer(async (req, res) => {
+      return serveHandler(req, res, {
+        public: output,
+        directoryListing: false
+      })
+    })
+    server.listen(argv.port)
+    console.log(`Started a Quartz server listening at http://localhost:${argv.port}`)
+    console.log('hint: exit with ctrl+c')
+  }
+}
diff --git a/quartz/index.ts b/quartz/index.ts
deleted file mode 100644
index c64f012..0000000
--- a/quartz/index.ts
+++ /dev/null
@@ -1,77 +0,0 @@
-import path from "path"
-import { QuartzConfig } from "./cfg"
-import { PerfTimer } from "./perf"
-import { rimraf } from "rimraf"
-import { globby } from "globby"
-import chalk from "chalk"
-import http from "http"
-import serveHandler from "serve-handler"
-import { createProcessor, parseMarkdown } from "./processors/parse"
-import { filterContent } from "./processors/filter"
-import { emitContent } from "./processors/emit"
-
-interface Argv {
-  directory: string
-  verbose: boolean
-  output: string
-  clean: boolean
-  serve: boolean
-  port: number
-}
-
-export function buildQuartz(cfg: QuartzConfig) {
-  return async (argv: Argv, version: string) => {
-    console.log(chalk.bgGreen.black(`\n Quartz v${version} \n`))
-    const perf = new PerfTimer()
-    const output = argv.output
-
-    if (argv.verbose) {
-      const pluginCount = Object.values(cfg.plugins).flat().length
-      const pluginNames = (key: 'transformers' | 'filters' | 'emitters') => cfg.plugins[key].map(plugin => plugin.name)
-      console.log(`Loaded ${pluginCount} plugins`)
-      console.log(`  Transformers: ${pluginNames('transformers').join(", ")}`)
-      console.log(`  Filters: ${pluginNames('filters').join(", ")}`)
-      console.log(`  Emitters: ${pluginNames('emitters').join(", ")}`)
-    }
-
-    // clean
-    if (argv.clean) {
-      perf.addEvent('clean')
-      await rimraf(output)
-      if (argv.verbose) {
-        console.log(`Cleaned output directory \`${output}\` in ${perf.timeSince('clean')}`)
-      }
-    }
-
-    // glob
-    perf.addEvent('glob')
-    const fps = await globby('**/*.md', {
-      cwd: argv.directory,
-      ignore: cfg.configuration.ignorePatterns,
-      gitignore: true,
-    })
-
-    if (argv.verbose) {
-      console.log(`Found ${fps.length} input files in ${perf.timeSince('glob')}`)
-    }
-
-    const processor = createProcessor(cfg.plugins.transformers)
-    const filePaths = fps.map(fp => `${argv.directory}${path.sep}${fp}`)
-    const parsedFiles = await parseMarkdown(processor, argv.directory, filePaths, argv.verbose)
-    const filteredContent = filterContent(cfg.plugins.filters, parsedFiles, argv.verbose)
-    await emitContent(argv.directory, output, cfg, filteredContent, argv.verbose)
-    console.log(chalk.green(`Done processing ${fps.length} files in ${perf.timeSince()}`))
-
-    if (argv.serve) {
-      const server = http.createServer(async (req, res) => {
-        return serveHandler(req, res, {
-          public: output,
-          directoryListing: false
-        })
-      })
-      server.listen(argv.port)
-      console.log(`Started a Quartz server listening at http://localhost:${argv.port}`)
-      console.log('hint: exit with ctrl+c')
-    }
-  }
-}
diff --git a/quartz/plugins/transformers/gfm.ts b/quartz/plugins/transformers/gfm.ts
index 55dbda2..1cb0fc6 100644
--- a/quartz/plugins/transformers/gfm.ts
+++ b/quartz/plugins/transformers/gfm.ts
@@ -3,7 +3,7 @@
 import smartypants from 'remark-smartypants'
 import { QuartzTransformerPlugin } from "../types"
 import rehypeSlug from "rehype-slug"
-import rehypeAutolinkHeadings from "rehype-autolink-headings/lib"
+import rehypeAutolinkHeadings from "rehype-autolink-headings"
 
 export interface Options {
   enableSmartyPants: boolean
diff --git a/quartz/processors/parse.ts b/quartz/processors/parse.ts
index 83a05d4..715a4e9 100644
--- a/quartz/processors/parse.ts
+++ b/quartz/processors/parse.ts
@@ -1,3 +1,4 @@
+import esbuild from 'esbuild'
 import remarkParse from 'remark-parse'
 import remarkRehype from 'remark-rehype'
 import { Processor, unified } from "unified"
@@ -8,6 +9,8 @@
 import { read } from 'to-vfile'
 import { slugify } from '../path'
 import path from 'path'
+import os from 'os'
+import workerpool, { Promise as WorkerPromise } from 'workerpool'
 import { QuartzTransformerPlugin } from '../plugins/types'
 
 export type QuartzProcessor = Processor<MDRoot, HTMLRoot, void>
@@ -32,24 +35,89 @@
   return processor
 }
 
-export async function parseMarkdown(processor: QuartzProcessor, baseDir: string, fps: string[], verbose: boolean): Promise<ProcessedContent[]> {
-  const perf = new PerfTimer()
-  const res: ProcessedContent[] = []
-  for (const fp of fps) {
-    const file = await read(fp)
+function* chunks<T>(arr: T[], n: number) {
+  for (let i = 0; i < arr.length; i += n) {
+    yield arr.slice(i, i + n)
+  }
+}
 
-    // base data properties that plugins may use
-    file.data.slug = slugify(path.relative(baseDir, file.path))
-    file.data.filePath = fp
-
-    const ast = processor.parse(file)
-    res.push([await processor.run(ast, file), file])
-
-    if (verbose) {
-      console.log(`[process] ${fp} -> ${file.data.slug}`)
-    }
+async function transpileWorkerScript(verbose: boolean) {
+  // transpile worker script
+  const cacheFile = "./.quartz-cache/transpiled-worker.mjs"
+  const fp = "./quartz/worker.ts"
+  if (verbose) {
+    console.log("Transpiling worker script")
   }
 
-  console.log(`Parsed and transformed ${res.length} Markdown files in ${perf.timeSince()}`)
+  await esbuild.build({
+    entryPoints: [fp],
+    outfile: path.join("quartz", cacheFile),
+    bundle: true,
+    keepNames: true,
+    platform: "node",
+    format: "esm",
+    packages: "external",
+    plugins: [
+      {
+        name: 'css-and-scripts-as-text',
+        setup(build) {
+          build.onLoad({ filter: /\.scss$/ }, (_) => ({
+            contents: '',
+            loader: 'text'
+          }))
+          build.onLoad({ filter: /\.inline\.(ts|js)$/ }, (_) => ({
+            contents: '',
+            loader: 'text'
+          }))
+        }
+      }
+    ]
+  })
+}
+
+export async function parseMarkdown(transformers: QuartzTransformerPlugin[], baseDir: string, fps: string[], verbose: boolean): Promise<ProcessedContent[]> {
+  const perf = new PerfTimer()
+
+  const CHUNK_SIZE = 128
+  let concurrency = fps.length < CHUNK_SIZE ? 1 : os.availableParallelism()
+  const res: ProcessedContent[] = []
+  if (concurrency === 1) {
+    // single-thread
+    const processor = createProcessor(transformers)
+    for (const fp of fps) {
+      const file = await read(fp)
+
+      // base data properties that plugins may use
+      file.data.slug = slugify(path.relative(baseDir, file.path))
+      file.data.filePath = fp
+
+      const ast = processor.parse(file)
+      res.push([await processor.run(ast, file), file])
+
+      if (verbose) {
+        console.log(`[process] ${fp} -> ${file.data.slug}`)
+      }
+    }
+  } else {
+    await transpileWorkerScript(verbose)
+    const pool = workerpool.pool(
+      './quartz/bootstrap-worker.mjs',
+      {
+        minWorkers: 'max',
+        maxWorkers: concurrency,
+        workerType: 'thread'
+      }
+    )
+
+    const childPromises: WorkerPromise<ProcessedContent[]>[] = []
+    for (const chunk of chunks(fps, CHUNK_SIZE)) {
+      childPromises.push(pool.exec('parseFiles', [baseDir, chunk, verbose]))
+    }
+    const results: ProcessedContent[][] = await WorkerPromise.all(childPromises)
+    res.push(...results.flat())
+    await pool.terminate()
+  }
+
+  console.log(`Parsed and transformed ${res.length} Markdown files with ${concurrency} cores in ${perf.timeSince()}`)
   return res
 }
diff --git a/quartz/worker.ts b/quartz/worker.ts
new file mode 100644
index 0000000..71678b8
--- /dev/null
+++ b/quartz/worker.ts
@@ -0,0 +1,30 @@
+import { read } from "to-vfile"
+import config from "../quartz.config"
+import { createProcessor } from "./processors/parse"
+import { slugify } from "./path"
+import path from "path"
+import { ProcessedContent } from "./plugins/vfile"
+
+const transformers = config.plugins.transformers
+const processor = createProcessor(transformers)
+
+// only called from worker thread
+export async function parseFiles(baseDir: string, fps: string[], verbose: boolean) {
+  const res: ProcessedContent[] = []
+  for (const fp of fps) {
+    const file = await read(fp)
+
+    // base data properties that plugins may use
+    file.data.slug = slugify(path.relative(baseDir, file.path))
+    file.data.filePath = fp
+
+    const ast = processor.parse(file)
+    res.push([await processor.run(ast, file), file])
+
+    if (verbose) {
+      console.log(`[process] ${fp} -> ${file.data.slug}`)
+    }
+  }
+
+  return res
+}

--
Gitblit v1.10.0