|
| 1 | +{-# LANGUAGE DeriveAnyClass #-} |
| 2 | +{-# LANGUAGE DeriveGeneric #-} |
| 3 | +{-# LANGUAGE DerivingStrategies #-} |
| 4 | +{-# LANGUAGE FlexibleInstances #-} |
| 5 | +{-# LANGUAGE LambdaCase #-} |
| 6 | +{-# LANGUAGE OverloadedStrings #-} |
| 7 | +{-# LANGUAGE QuasiQuotes #-} |
| 8 | +{-# LANGUAGE TemplateHaskell #-} |
| 9 | +{-# LANGUAGE TypeApplications #-} |
| 10 | +{-# LANGUAGE TypeSynonymInstances #-} |
| 11 | +{-# OPTIONS_GHC -fno-warn-orphans #-} |
| 12 | + |
| 13 | +module Main (main) where |
| 14 | + |
| 15 | +import AST.GenerateSyntax |
| 16 | +import qualified Bazel.Runfiles as Bazel |
| 17 | +import Control.Lens (Traversal', mapped, (%~)) |
| 18 | +import Control.Monad |
| 19 | +import Data.Foldable |
| 20 | +import Data.Generics.Product.Typed (typed) |
| 21 | +import Data.Maybe |
| 22 | +import Data.Text (Text) |
| 23 | +import qualified Data.Text as T |
| 24 | +import qualified Data.Text.IO as T |
| 25 | +import Foreign |
| 26 | +import GHC.Generics (Generic) |
| 27 | +import Language.Haskell.TH hiding (JavaScript) |
| 28 | +import Language.Haskell.TH.Lens |
| 29 | +import NeatInterpolation |
| 30 | +import qualified Options.Generic as Opt |
| 31 | +import Source.Language |
| 32 | +import System.FilePath |
| 33 | +import System.Directory |
| 34 | +import System.Exit |
| 35 | +import System.IO |
| 36 | +import System.Process |
| 37 | +import Text.Printf |
| 38 | +import qualified TreeSitter.Go as Go (tree_sitter_go) |
| 39 | +import qualified TreeSitter.JSON as JSON (tree_sitter_json) |
| 40 | +import qualified TreeSitter.Java as Java (tree_sitter_java) |
| 41 | +import qualified TreeSitter.Language |
| 42 | +import qualified TreeSitter.PHP as PHP (tree_sitter_php) |
| 43 | +import qualified TreeSitter.Python as Python (tree_sitter_python) |
| 44 | +import qualified TreeSitter.QL as CodeQL (tree_sitter_ql) |
| 45 | +import qualified TreeSitter.Ruby as Ruby (tree_sitter_ruby) |
| 46 | +import qualified TreeSitter.TSX as TSX (tree_sitter_tsx) |
| 47 | +import qualified TreeSitter.TypeScript as TypeScript (tree_sitter_typescript) |
| 48 | + |
| 49 | +-- As a special case, you can pass |
| 50 | +data Config = Config {language :: Text, rootdir :: FilePath} |
| 51 | + deriving stock (Show, Generic) |
| 52 | + deriving anyclass (Opt.ParseRecord) |
| 53 | + |
| 54 | +-- There are a few cases where the output emitted by TH's 'pprint' doesn't |
| 55 | +-- create entirely valid Haskell syntax, because sometimes we get |
| 56 | +-- a qualified name on the LHS of a typeclass declaration, which Haskell |
| 57 | +-- doesn't like at all. I haven't figured out quite why we get this qualified |
| 58 | +-- name, but for now the easiest thing to do is some nested updates with lens. |
| 59 | +adjust :: Dec -> Dec |
| 60 | +adjust = _InstanceD . typed . mapped %~ (values %~ truncate) . (functions %~ truncate) |
| 61 | + where |
| 62 | + -- Need to handle functions with no arguments, which are parsed as ValD entities, |
| 63 | + -- as well as those with arguments, which are FunD. |
| 64 | + values, functions :: Traversal' Dec Name |
| 65 | + values = _ValD . typed . _VarP |
| 66 | + functions = _FunD . typed |
| 67 | + |
| 68 | + truncate :: Name -> Name |
| 69 | + truncate = mkName . nameBase |
| 70 | + |
| 71 | +pathForLanguage :: Bazel.Runfiles -> Language -> FilePath |
| 72 | +pathForLanguage rf = |
| 73 | + let loc = Bazel.rlocation rf |
| 74 | + in \case |
| 75 | + CodeQL -> loc "tree-sitter-ql/vendor/tree-sitter-ql/src/node-types.json" |
| 76 | + Go -> loc "tree-sitter-go/vendor/tree-sitter-go/src/node-types.json" |
| 77 | + PHP -> loc "tree-sitter-php/vendor/tree-sitter-php/src/node-types.json" |
| 78 | + Python -> loc "tree-sitter-python/vendor/tree-sitter-python/src/node-types.json" |
| 79 | + Ruby -> loc "tree-sitter-ruby/vendor/tree-sitter-ruby/src/node-types.json" |
| 80 | + TypeScript -> loc "tree-sitter-typescript/vendor/tree-sitter-typescript/typescript/src/node-types.json" |
| 81 | + TSX -> loc "tree-sitter-tsx/vendor/tree-sitter-typescript/tsx/src/node-types.json" |
| 82 | + JavaScript -> loc "tree-sitter-typescript/vendor/tree-sitter-typescript/typescript/src/node-types.json" |
| 83 | + JSX -> loc "tree-sitter-typescript/vendor/tree-sitter-typescript/src/tsx/node-types.json" |
| 84 | + Java -> loc "tree-sitter-java/vendor/tree-sitter-java/src/node-types.json" |
| 85 | + other -> error ("Couldn't find path for " <> show other) |
| 86 | + |
| 87 | +targetForLanguage :: Language -> FilePath |
| 88 | +targetForLanguage x = |
| 89 | + let go lc = printf "semantic-%s/src/Language/%s/AST.hs" (lc :: String) (show x) |
| 90 | + in case x of |
| 91 | + CodeQL -> go "codeql" |
| 92 | + Go -> go "go" |
| 93 | + PHP -> go "php" |
| 94 | + Python -> go "python" |
| 95 | + Ruby -> go "ruby" |
| 96 | + TypeScript -> go "typescript" |
| 97 | + TSX -> go "tsx" |
| 98 | + JavaScript -> go "javascript" |
| 99 | + Java -> go "java" |
| 100 | + other -> error ("Couldn't find path for " <> show other) |
| 101 | + |
| 102 | +parserForLanguage :: Language -> Ptr TreeSitter.Language.Language |
| 103 | +parserForLanguage = \case |
| 104 | + Unknown -> error "Unknown language encountered" |
| 105 | + CodeQL -> (CodeQL.tree_sitter_ql) |
| 106 | + Go -> Go.tree_sitter_go |
| 107 | + Haskell -> error "Haskell backend not implemented yet" |
| 108 | + Java -> Java.tree_sitter_java |
| 109 | + JavaScript -> TypeScript.tree_sitter_typescript |
| 110 | + JSON -> JSON.tree_sitter_json |
| 111 | + JSX -> TSX.tree_sitter_tsx |
| 112 | + Markdown -> error "Markdown backend deprecated" |
| 113 | + PHP -> PHP.tree_sitter_php |
| 114 | + Python -> Python.tree_sitter_python |
| 115 | + Ruby -> Ruby.tree_sitter_ruby |
| 116 | + TypeScript -> TypeScript.tree_sitter_typescript |
| 117 | + TSX -> TSX.tree_sitter_tsx |
| 118 | + |
| 119 | +-- nodeTypesPathForLanguage :: Bazel.Runfiles -> Language -> FilePath |
| 120 | +-- nodeTypesPathForLanguage rf = \case |
| 121 | +-- CodeQL -> r |
| 122 | + |
| 123 | +validLanguages :: [Language] |
| 124 | +validLanguages = [CodeQL, Go, Java, PHP, Python, Ruby, TypeScript, TSX] |
| 125 | + |
| 126 | +emit :: FilePath -> Language -> IO () |
| 127 | +emit root lang = do |
| 128 | + rf <- Bazel.create |
| 129 | + let language = languageToText lang |
| 130 | + let languageHack = case lang of |
| 131 | + CodeQL -> "QL" |
| 132 | + _ -> language |
| 133 | + let path = pathForLanguage rf lang |
| 134 | + decls <- T.pack . pprint . fmap adjust <$> astDeclarationsIO (parserForLanguage lang) path |
| 135 | + |
| 136 | + let programText = |
| 137 | + [trimming| |
| 138 | +-- Language definition for $language, generated by ast-generate. Do not edit! |
| 139 | +{-# LANGUAGE CPP #-} |
| 140 | +{-# LANGUAGE DataKinds #-} |
| 141 | +{-# LANGUAGE DeriveAnyClass #-} |
| 142 | +{-# LANGUAGE DeriveGeneric #-} |
| 143 | +{-# LANGUAGE DeriveTraversable #-} |
| 144 | +{-# LANGUAGE DerivingStrategies #-} |
| 145 | +{-# LANGUAGE DuplicateRecordFields #-} |
| 146 | +{-# LANGUAGE FlexibleInstances #-} |
| 147 | +{-# LANGUAGE GeneralizedNewtypeDeriving #-} |
| 148 | +{-# LANGUAGE KindSignatures #-} |
| 149 | +{-# LANGUAGE MultiParamTypeClasses #-} |
| 150 | +{-# LANGUAGE QuantifiedConstraints #-} |
| 151 | +{-# LANGUAGE StandaloneDeriving #-} |
| 152 | +{-# LANGUAGE TemplateHaskell #-} |
| 153 | +{-# LANGUAGE TypeApplications #-} |
| 154 | +{-# LANGUAGE TypeOperators #-} |
| 155 | +{-# LANGUAGE UndecidableInstances #-} |
| 156 | +{-# OPTIONS_GHC -fno-warn-unused-imports #-} |
| 157 | + |
| 158 | +module Language.$language.AST (module Language.$language.AST, getTestCorpusDir) where |
| 159 | + |
| 160 | +import qualified AST.Parse |
| 161 | +import qualified AST.Token |
| 162 | +import qualified AST.Traversable1.Class |
| 163 | +import qualified AST.Unmarshal |
| 164 | +import qualified Data.Foldable |
| 165 | +import qualified Data.List as Data.OldList |
| 166 | +import qualified Data.Maybe as GHC.Maybe |
| 167 | +import qualified Data.Text.Internal |
| 168 | +import qualified Data.Traversable |
| 169 | +import qualified GHC.Base |
| 170 | +import qualified GHC.Generics |
| 171 | +import qualified GHC.Records |
| 172 | +import qualified GHC.Show |
| 173 | +import qualified Prelude as GHC.Classes |
| 174 | +import qualified TreeSitter.Node |
| 175 | + |
| 176 | +import TreeSitter.$languageHack (getTestCorpusDir) |
| 177 | + |
| 178 | +debugSymbolNames :: [GHC.Base.String] |
| 179 | +debugSymbolNames = debugSymbolNames_0 |
| 180 | + |
| 181 | +$decls |
| 182 | + |] |
| 183 | + hasOrmolu <- findExecutable "ormolu" |
| 184 | + if isNothing hasOrmolu |
| 185 | + then do |
| 186 | + T.putStrLn programText |
| 187 | + hPutStrLn stderr "generate-ast: No `ormolu` executable found, output will look janky." |
| 188 | + else do |
| 189 | + (path, tf) <- openTempFile "/tmp" "generated.hs" |
| 190 | + T.hPutStrLn tf programText |
| 191 | + hClose tf |
| 192 | + callProcess "ormolu" ["--mode", "inplace", path] |
| 193 | + callProcess "cp" [path, root </> targetForLanguage lang] |
| 194 | + |
| 195 | +main :: IO () |
| 196 | +main = do |
| 197 | + Config language root <- Opt.getRecord "generate-ast" |
| 198 | + if language == "all" |
| 199 | + then traverse_ (emit root) validLanguages |
| 200 | + else do |
| 201 | + let lang = textToLanguage language |
| 202 | + when (lang == Unknown) (die ("Couldn't determine language for " <> T.unpack language)) |
| 203 | + emit root lang |
0 commit comments