Skip to content
This repository was archived by the owner on Apr 1, 2025. It is now read-only.

Commit a2902fd

Browse files
author
Patrick Thomson
authored
Merge pull request #624 from github/direct-codegen
AOT codegen
2 parents aca39e8 + f1e22be commit a2902fd

21 files changed

Lines changed: 51127 additions & 149 deletions

File tree

.github/workflows/bazel.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ jobs:
2424
path: |
2525
.bazel-cache
2626
bin
27-
/home/runner/.cache
2827
key: ${{ runner.os }}-bazel-${{ github.run_id }}
2928
restore-keys: |
3029
${{ runner.os }}-bazel-

.github/workflows/haskell.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
runs-on: ubuntu-latest
1515
strategy:
1616
matrix:
17-
ghc: ["8.8.3", "8.10.1"]
17+
ghc: ["8.10.1"]
1818
cabal: ["3.2.0.0"]
1919

2020
steps:

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ Available options:
5858

5959
## Development
6060

61-
`semantic` requires at least GHC 8.8.3 and Cabal 3.0. We strongly recommend using [`ghcup`][ghcup] to sandbox GHC versions, as GHC packages installed through your OS's package manager may not install statically-linked versions of the GHC boot libraries. `semantic` currently builds only on Unix systems; users of other operating systems may wish to use the [Docker images](https://github.com/github/semantic/packages/11609).
61+
`semantic` requires at least GHC 8.10.1 and Cabal 3.0. We strongly recommend using [`ghcup`][ghcup] to sandbox GHC versions, as GHC packages installed through your OS's package manager may not install statically-linked versions of the GHC boot libraries. `semantic` currently builds only on Unix systems; users of other operating systems may wish to use the [Docker images](https://github.com/github/semantic/packages/11609).
6262

6363
We use `cabal's` [Nix-style local builds][nix] for development. To get started quickly:
6464

WORKSPACE

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,9 +96,11 @@ stack_snapshot(
9696
"hspec-expectations",
9797
"lens",
9898
"lingo",
99+
"neat-interpolation",
99100
"network",
100101
"network-uri",
101102
"optparse-applicative",
103+
"optparse-generic",
102104
"parsers",
103105
"pathtype",
104106
"pretty-show",
@@ -138,6 +140,7 @@ stack_snapshot(
138140
"vector",
139141
"yaml",
140142
],
143+
stack_snapshot_json = "//:stackage_snapshot.json",
141144
tools = ["@happy"],
142145
vendored_packages = {
143146
"tree-sitter-{}".format(name): "@tree-sitter-{name}//:tree-sitter-{name}".format(name = name)

script/astgen

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
#!/bin/bash
2+
bazel run //semantic-ast:generate-ast -- --language=all --rootdir=$PWD

semantic-ast/BUILD.bazel

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ load(
1212
)
1313
load(
1414
"//:build/common.bzl",
15+
"EXECUTABLE_FLAGS",
1516
"GHC_FLAGS",
1617
)
1718

@@ -46,3 +47,52 @@ haskell_library(
4647
"@stackage//:unordered-containers",
4748
],
4849
)
50+
51+
all_ts_deps = ["@tree-sitter-{name}".format(name = name) for name in [
52+
"go",
53+
"java",
54+
"json",
55+
"php",
56+
"python",
57+
"ql",
58+
"ruby",
59+
"rust",
60+
"tsx",
61+
"typescript",
62+
]]
63+
64+
all_file_deps = ["@tree-sitter-{name}//:src/node-types.json".format(name = name) for name in [
65+
"go",
66+
"java",
67+
"json",
68+
"php",
69+
"python",
70+
"ql",
71+
"ruby",
72+
"rust",
73+
"tsx",
74+
"typescript",
75+
]]
76+
77+
haskell_binary(
78+
name = "generate-ast",
79+
srcs = glob(["app/**/*.hs"]),
80+
compiler_flags = GHC_FLAGS + EXECUTABLE_FLAGS + ["-XStrictData"],
81+
data = all_file_deps,
82+
deps = [
83+
":semantic-ast",
84+
"//:base",
85+
"//:filepath",
86+
"//:process",
87+
"//:template-haskell",
88+
"//:text",
89+
"//semantic-source",
90+
"@stackage//:bazel-runfiles",
91+
"@stackage//:directory",
92+
"@stackage//:generic-lens",
93+
"@stackage//:lens",
94+
"@stackage//:neat-interpolation",
95+
"@stackage//:optparse-generic",
96+
"@stackage//:tree-sitter",
97+
] + all_ts_deps,
98+
)

semantic-ast/README.md

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
# semantic-ast
22

3-
This package is concerned with the CodeGen generation of strongly-typed ASTs.
3+
This package is concerned with the CodeGen generation of strongly-typed ASTs.
4+
5+
The provided `semantic-ast` executable is responsible for generating ASTs from language definitions. You can run it like so:
6+
7+
```
8+
cabal run semantic-ast -- --language=JSON
9+
```
10+
11+
You can also pass `all` to regenerate every language definition:
12+
13+
```
14+
cabal run semantic-ast -- --language=all
15+
```
416

517
[Documentation](https://github.com/github/semantic/blob/master/docs/codegen.md)

semantic-ast/app/Main.hs

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
{-# LANGUAGE DeriveAnyClass #-}
2+
{-# LANGUAGE DeriveGeneric #-}
3+
{-# LANGUAGE DerivingStrategies #-}
4+
{-# LANGUAGE FlexibleInstances #-}
5+
{-# LANGUAGE LambdaCase #-}
6+
{-# LANGUAGE OverloadedStrings #-}
7+
{-# LANGUAGE QuasiQuotes #-}
8+
{-# LANGUAGE TemplateHaskell #-}
9+
{-# LANGUAGE TypeApplications #-}
10+
{-# LANGUAGE TypeSynonymInstances #-}
11+
{-# OPTIONS_GHC -fno-warn-orphans #-}
12+
13+
module Main (main) where
14+
15+
import AST.GenerateSyntax
16+
import qualified Bazel.Runfiles as Bazel
17+
import Control.Lens (Traversal', mapped, (%~))
18+
import Control.Monad
19+
import Data.Foldable
20+
import Data.Generics.Product.Typed (typed)
21+
import Data.Maybe
22+
import Data.Text (Text)
23+
import qualified Data.Text as T
24+
import qualified Data.Text.IO as T
25+
import Foreign
26+
import GHC.Generics (Generic)
27+
import Language.Haskell.TH hiding (JavaScript)
28+
import Language.Haskell.TH.Lens
29+
import NeatInterpolation
30+
import qualified Options.Generic as Opt
31+
import Source.Language
32+
import System.FilePath
33+
import System.Directory
34+
import System.Exit
35+
import System.IO
36+
import System.Process
37+
import Text.Printf
38+
import qualified TreeSitter.Go as Go (tree_sitter_go)
39+
import qualified TreeSitter.JSON as JSON (tree_sitter_json)
40+
import qualified TreeSitter.Java as Java (tree_sitter_java)
41+
import qualified TreeSitter.Language
42+
import qualified TreeSitter.PHP as PHP (tree_sitter_php)
43+
import qualified TreeSitter.Python as Python (tree_sitter_python)
44+
import qualified TreeSitter.QL as CodeQL (tree_sitter_ql)
45+
import qualified TreeSitter.Ruby as Ruby (tree_sitter_ruby)
46+
import qualified TreeSitter.TSX as TSX (tree_sitter_tsx)
47+
import qualified TreeSitter.TypeScript as TypeScript (tree_sitter_typescript)
48+
49+
-- As a special case, you can pass
50+
data Config = Config {language :: Text, rootdir :: FilePath}
51+
deriving stock (Show, Generic)
52+
deriving anyclass (Opt.ParseRecord)
53+
54+
-- There are a few cases where the output emitted by TH's 'pprint' doesn't
55+
-- create entirely valid Haskell syntax, because sometimes we get
56+
-- a qualified name on the LHS of a typeclass declaration, which Haskell
57+
-- doesn't like at all. I haven't figured out quite why we get this qualified
58+
-- name, but for now the easiest thing to do is some nested updates with lens.
59+
adjust :: Dec -> Dec
60+
adjust = _InstanceD . typed . mapped %~ (values %~ truncate) . (functions %~ truncate)
61+
where
62+
-- Need to handle functions with no arguments, which are parsed as ValD entities,
63+
-- as well as those with arguments, which are FunD.
64+
values, functions :: Traversal' Dec Name
65+
values = _ValD . typed . _VarP
66+
functions = _FunD . typed
67+
68+
truncate :: Name -> Name
69+
truncate = mkName . nameBase
70+
71+
pathForLanguage :: Bazel.Runfiles -> Language -> FilePath
72+
pathForLanguage rf =
73+
let loc = Bazel.rlocation rf
74+
in \case
75+
CodeQL -> loc "tree-sitter-ql/vendor/tree-sitter-ql/src/node-types.json"
76+
Go -> loc "tree-sitter-go/vendor/tree-sitter-go/src/node-types.json"
77+
PHP -> loc "tree-sitter-php/vendor/tree-sitter-php/src/node-types.json"
78+
Python -> loc "tree-sitter-python/vendor/tree-sitter-python/src/node-types.json"
79+
Ruby -> loc "tree-sitter-ruby/vendor/tree-sitter-ruby/src/node-types.json"
80+
TypeScript -> loc "tree-sitter-typescript/vendor/tree-sitter-typescript/typescript/src/node-types.json"
81+
TSX -> loc "tree-sitter-tsx/vendor/tree-sitter-typescript/tsx/src/node-types.json"
82+
JavaScript -> loc "tree-sitter-typescript/vendor/tree-sitter-typescript/typescript/src/node-types.json"
83+
JSX -> loc "tree-sitter-typescript/vendor/tree-sitter-typescript/src/tsx/node-types.json"
84+
Java -> loc "tree-sitter-java/vendor/tree-sitter-java/src/node-types.json"
85+
other -> error ("Couldn't find path for " <> show other)
86+
87+
targetForLanguage :: Language -> FilePath
88+
targetForLanguage x =
89+
let go lc = printf "semantic-%s/src/Language/%s/AST.hs" (lc :: String) (show x)
90+
in case x of
91+
CodeQL -> go "codeql"
92+
Go -> go "go"
93+
PHP -> go "php"
94+
Python -> go "python"
95+
Ruby -> go "ruby"
96+
TypeScript -> go "typescript"
97+
TSX -> go "tsx"
98+
JavaScript -> go "javascript"
99+
Java -> go "java"
100+
other -> error ("Couldn't find path for " <> show other)
101+
102+
parserForLanguage :: Language -> Ptr TreeSitter.Language.Language
103+
parserForLanguage = \case
104+
Unknown -> error "Unknown language encountered"
105+
CodeQL -> (CodeQL.tree_sitter_ql)
106+
Go -> Go.tree_sitter_go
107+
Haskell -> error "Haskell backend not implemented yet"
108+
Java -> Java.tree_sitter_java
109+
JavaScript -> TypeScript.tree_sitter_typescript
110+
JSON -> JSON.tree_sitter_json
111+
JSX -> TSX.tree_sitter_tsx
112+
Markdown -> error "Markdown backend deprecated"
113+
PHP -> PHP.tree_sitter_php
114+
Python -> Python.tree_sitter_python
115+
Ruby -> Ruby.tree_sitter_ruby
116+
TypeScript -> TypeScript.tree_sitter_typescript
117+
TSX -> TSX.tree_sitter_tsx
118+
119+
-- nodeTypesPathForLanguage :: Bazel.Runfiles -> Language -> FilePath
120+
-- nodeTypesPathForLanguage rf = \case
121+
-- CodeQL -> r
122+
123+
validLanguages :: [Language]
124+
validLanguages = [CodeQL, Go, Java, PHP, Python, Ruby, TypeScript, TSX]
125+
126+
emit :: FilePath -> Language -> IO ()
127+
emit root lang = do
128+
rf <- Bazel.create
129+
let language = languageToText lang
130+
let languageHack = case lang of
131+
CodeQL -> "QL"
132+
_ -> language
133+
let path = pathForLanguage rf lang
134+
decls <- T.pack . pprint . fmap adjust <$> astDeclarationsIO (parserForLanguage lang) path
135+
136+
let programText =
137+
[trimming|
138+
-- Language definition for $language, generated by ast-generate. Do not edit!
139+
{-# LANGUAGE CPP #-}
140+
{-# LANGUAGE DataKinds #-}
141+
{-# LANGUAGE DeriveAnyClass #-}
142+
{-# LANGUAGE DeriveGeneric #-}
143+
{-# LANGUAGE DeriveTraversable #-}
144+
{-# LANGUAGE DerivingStrategies #-}
145+
{-# LANGUAGE DuplicateRecordFields #-}
146+
{-# LANGUAGE FlexibleInstances #-}
147+
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
148+
{-# LANGUAGE KindSignatures #-}
149+
{-# LANGUAGE MultiParamTypeClasses #-}
150+
{-# LANGUAGE QuantifiedConstraints #-}
151+
{-# LANGUAGE StandaloneDeriving #-}
152+
{-# LANGUAGE TemplateHaskell #-}
153+
{-# LANGUAGE TypeApplications #-}
154+
{-# LANGUAGE TypeOperators #-}
155+
{-# LANGUAGE UndecidableInstances #-}
156+
{-# OPTIONS_GHC -fno-warn-unused-imports #-}
157+
158+
module Language.$language.AST (module Language.$language.AST, getTestCorpusDir) where
159+
160+
import qualified AST.Parse
161+
import qualified AST.Token
162+
import qualified AST.Traversable1.Class
163+
import qualified AST.Unmarshal
164+
import qualified Data.Foldable
165+
import qualified Data.List as Data.OldList
166+
import qualified Data.Maybe as GHC.Maybe
167+
import qualified Data.Text.Internal
168+
import qualified Data.Traversable
169+
import qualified GHC.Base
170+
import qualified GHC.Generics
171+
import qualified GHC.Records
172+
import qualified GHC.Show
173+
import qualified Prelude as GHC.Classes
174+
import qualified TreeSitter.Node
175+
176+
import TreeSitter.$languageHack (getTestCorpusDir)
177+
178+
debugSymbolNames :: [GHC.Base.String]
179+
debugSymbolNames = debugSymbolNames_0
180+
181+
$decls
182+
|]
183+
hasOrmolu <- findExecutable "ormolu"
184+
if isNothing hasOrmolu
185+
then do
186+
T.putStrLn programText
187+
hPutStrLn stderr "generate-ast: No `ormolu` executable found, output will look janky."
188+
else do
189+
(path, tf) <- openTempFile "/tmp" "generated.hs"
190+
T.hPutStrLn tf programText
191+
hClose tf
192+
callProcess "ormolu" ["--mode", "inplace", path]
193+
callProcess "cp" [path, root </> targetForLanguage lang]
194+
195+
main :: IO ()
196+
main = do
197+
Config language root <- Opt.getRecord "generate-ast"
198+
if language == "all"
199+
then traverse_ (emit root) validLanguages
200+
else do
201+
let lang = textToLanguage language
202+
when (lang == Unknown) (die ("Couldn't determine language for " <> T.unpack language))
203+
emit root lang

semantic-ast/semantic-ast.cabal

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,3 @@ library
8080
, unordered-containers ^>= 0.2.10
8181

8282
hs-source-dirs: src
83-
default-language: Haskell2010

semantic-ast/src/AST/GenerateSyntax.hs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
module AST.GenerateSyntax
1010
( syntaxDatatype
1111
, astDeclarationsForLanguage
12+
, astDeclarationsIO
1213
) where
1314

1415
import AST.Deserialize (Children (..), Datatype (..), DatatypeName (..), Field (..), Multiple (..), Named (..), Required (..), Type (..))
@@ -44,13 +45,20 @@ astDeclarationsForLanguage language filePath = do
4445
currentFilename <- loc_filename <$> location
4546
pwd <- runIO getCurrentDirectory
4647
let invocationRelativePath = takeDirectory (pwd </> currentFilename) </> filePath
48+
astDeclarationsRelative lookupTypeName language invocationRelativePath
49+
50+
astDeclarationsIO :: Ptr TS.Language -> FilePath -> IO [Dec]
51+
astDeclarationsIO lang p = runQ (astDeclarationsRelative (const (pure Nothing)) lang p)
52+
53+
astDeclarationsRelative :: (String -> Q (Maybe Name)) -> Ptr TS.Language -> FilePath -> Q [Dec]
54+
astDeclarationsRelative lookupName language invocationRelativePath = do
4755
input <- runIO (eitherDecodeFileStrict' invocationRelativePath) >>= either fail pure
4856
allSymbols <- runIO (getAllSymbols language)
4957
debugSymbolNames <- [d|
5058
debugSymbolNames :: [String]
5159
debugSymbolNames = $(listE (map (litE . stringL . debugPrefix) allSymbols))
5260
|]
53-
mappend debugSymbolNames . concat @[] <$> traverse (syntaxDatatype language allSymbols) input
61+
mappend debugSymbolNames . concat @[] <$> traverse (syntaxDatatype lookupName language allSymbols) input
5462

5563
-- Build a list of all symbols
5664
getAllSymbols :: Ptr TS.Language -> IO [(String, Named)]
@@ -69,14 +77,14 @@ annParameterName :: Name
6977
annParameterName = mkName "a"
7078

7179
-- Auto-generate Haskell datatypes for sums, products and leaf types
72-
syntaxDatatype :: Ptr TS.Language -> [(String, Named)] -> Datatype -> Q [Dec]
73-
syntaxDatatype language allSymbols datatype = skipDefined $ do
80+
syntaxDatatype :: (String -> Q (Maybe Name)) -> Ptr TS.Language -> [(String, Named)] -> Datatype -> Q [Dec]
81+
syntaxDatatype lookupType language allSymbols datatype = skipDefined $ do
7482
let traversalInstances = mappend <$> makeStandaloneDerivings (conT name) <*> makeTraversalInstances (conT name)
7583
glue a b c = a : b <> c
7684
name = mkName nameStr
7785
generatedDatatype cons = dataD (cxt []) name [plainTV annParameterName] Nothing cons [deriveStockClause, deriveAnyClassClause]
7886
deriveStockClause = derivClause (Just StockStrategy) [conT ''Generic, conT ''Generic1]
79-
deriveAnyClassClause = derivClause (Just AnyclassStrategy) [conT ''Traversable1 `appT` varT (mkName "someConstraint")]
87+
deriveAnyClassClause = derivClause (Just AnyclassStrategy) [ [t| (forall a. Traversable1 a) |] ]
8088
deriveGN = derivClause (Just NewtypeStrategy) [conT ''TS.SymbolMatching]
8189
case datatype of
8290
SumType (DatatypeName _) _ subtypes ->
@@ -101,7 +109,7 @@ syntaxDatatype language allSymbols datatype = skipDefined $ do
101109
where
102110
-- Skip generating datatypes that have already been defined (overridden) in the module where the splice is running.
103111
skipDefined m = do
104-
isLocal <- lookupTypeName nameStr >>= maybe (pure False) isLocalName
112+
isLocal <- lookupType nameStr >>= maybe (pure False) isLocalName
105113
if isLocal then pure [] else m
106114
nameStr = toNameString (datatypeNameStatus datatype) (getDatatypeName (AST.Deserialize.datatypeName datatype))
107115

0 commit comments

Comments
 (0)