您好,登錄后才能下訂單哦!
本文小編為大家詳細介紹“PHP怎么實現(xiàn)詞法分析與自定義語言”,內(nèi)容詳細,步驟清晰,細節(jié)處理妥當,希望這篇“PHP怎么實現(xiàn)詞法分析與自定義語言”文章能幫助大家解決疑惑,下面跟著小編的思路慢慢深入,一起來學習新知識吧。
之前項目有一個需求,業(yè)務(wù)人員使用中文編寫一些自定義公式,然后需要我們后臺執(zhí)行將結(jié)果返回到界面上,于是就基于有限狀態(tài)機寫了這個詞法分析器,比較簡單,希望能夠拋磚引玉。
一、分析需求
輸入中文公式,返回結(jié)果,比如:
現(xiàn)有薪資=10000; 個稅起點=3000; 當前年份=2021; 如果(當前年份=2022){ 個稅起點=5000; } 返回 (現(xiàn)有薪資-個稅起點) * 0.2;
二、實現(xiàn)需求
最初的想法是使用字符串替換的方式,將中文關(guān)鍵字替換成php的關(guān)鍵字,然后調(diào)用eval執(zhí)行,這樣確實也是可以的,但是總覺得不是很美麗,并且不能實現(xiàn)動態(tài)解析。就想著自己實現(xiàn)一個簡單的詞法分析,然后結(jié)合ast將詞法轉(zhuǎn)換成php代碼執(zhí)行,豈不快哉。當前版本沒有用到抽象語法樹來生成代碼,全部使用字符串拼接。
<?php /** * Class Lexer * @package Sett\OaLang * 詞法分析器 */ class Lexer { // 內(nèi)置關(guān)鍵字集合 public $keywordList = []; // 內(nèi)置操作符集合 public $operatorList = [ "+", "-", "*", "/", "=", ">", "<", "!", "(", ")", "{", "}", ",", ";" ]; // 源代碼 private $input; // 當前的字符 private $currChar; // 當前字符位置 private $currCharPos = 0; // 結(jié)束符 private $eof = "eof"; // 當前編碼 private $currEncode = "UTF-8"; // 內(nèi)置關(guān)鍵字 public const VAR = "variable"; public const STR = "string"; public const KW = "keyword"; public const OPR = "operator"; public const INT = "integer"; public const NIL = "null"; /** * Lexer constructor. * @param string $input */ public function __construct(string $input) { $this->input = $input; $this->currChar = mb_substr($this->input, $this->currCharPos, 1); } /** * @param array $keywordList */ public function setKeywordList($keywordList) { $this->keywordList = $keywordList; } /** * @return array * @throws Exception */ public function parseInput() { if ($this->input == "") { throw new Exception("code can not be empty"); } $tokens = []; do { $token = $this->nextToken(); if ($token["type"] != "eof") { $tokens[] = $token; } if ($token["type"] == self::KW) { $tokens[] = $this->makeToken(self::NIL, " "); } } while ($token["type"] != "eof"); return $tokens; } /** * @return array */ public function nextToken() { $this->skipBlankChar(); $this->currChar == "" && $this->currChar = $this->eof; if ($this->isCnLetter()) { $word = $this->matchUntilNextCharIsNotCn(); if ($this->isKeyword($word)) { $this->currCharPos -= 1; return $this->currToken(static::KW, $word); } // 不是關(guān)鍵字的全部歸為變量 return $this->makeToken(static::VAR, $word); } // 如果是操作符 if ($this->isOperator()) { return $this->currToken(static::OPR, $this->currChar); } // 如果是數(shù)字 if ($this->isNumber()) { return $this->currToken(static::INT, $this->currChar); } // 如果是字符串 if ($str = $this->isStr()) { return $this->currToken(static::STR, $str); } // 如果是變量 if ($this->isVar()) { $word = $this->matchVar(); if ($this->isKeyword($word)) { return $this->currToken(static::KW, $word); } return $this->makeToken(static::VAR, $word); } if ($this->currChar == $this->eof) { return $this->currToken('eof', $this->currChar); } return $this->currToken(static::VAR, $this->currChar); } /** * @param string $input * @return string */ private function matchVar(string $input = "") { $word = $input ?: ''; while ($this->isVar()) { $word .= $this->currChar; $this->nextChar(); } return $word; } /** * @return bool * 是否為普通變量 */ private function isVar() { return $this->isCnLetter() || $this->isEnLetter(); } /** * 跳過空白字符 */ private function skipBlankChar() { while (ord($this->currChar) == 10 || ord($this->currChar) == 13 || ord($this->currChar) == 32) { $this->nextChar(); } } /** * @param string $type * @param $word * @return array * 記錄當前token和下一個字符 */ private function currToken(string $type, $word) { $token = $this->makeToken($type, $word); $this->nextChar(); return $token; } /** * @param string $type * @param string $char * @return array */ private function makeToken(string $type, string $char) { return ["type" => $type, "char" => $char, "pos" => $this->currCharPos]; } /** * @return bool * 判斷是否是英文字符 */ private function isEnLetter() { if ($this->currChar == "" || $this->currChar == $this->eof) { return false; } $ord = mb_ord($this->currChar, $this->currEncode); if ($ord > ord('a') && $ord < ord('z')) { return true; } return false; } /** * @return false|int * 是否中文字符 */ private function isCnLetter() { return preg_match("/^[\x{4e00}-\x{9fa5}]+$/u", $this->currChar); } /** * @return bool * 是否為數(shù)字 */ private function isNumber() { return is_numeric($this->currChar); } /** * @return bool * 是否是字符串 */ private function isStr() { return $this->matchCompleteStr(); } /** * @return string * 匹配完整字符串 */ private function matchCompleteStr() { $char = ""; if ($this->currChar == "\"") { $this->nextChar(); while ($this->currChar != "\"") { if ($this->currChar != "\"") { $char .= $this->currChar; } $this->nextChar(); } return $char; } return $char; } /** * @return bool * 是否是操作符 */ private function isOperator() { return in_array($this->currChar, $this->operatorList); } /** * @return string * 匹配中文字符 */ private function matchUntilNextCharIsNotCn() { $char = ""; while ($this->isCnLetter()) { $char .= $this->currChar; $this->nextChar(); } return $char; } /** * @return void 獲取下一個字符 * 獲取下一個字符 */ private function nextChar() { $this->currCharPos += 1; $this->currChar = mb_substr($this->input, $this->currCharPos, 1); if ($this->currChar == "") { $this->currChar = $this->eof; } } /** * @param string $input * @return bool * 是否是關(guān)鍵字 */ private function isKeyword(string $input) { return ($this->keywordList[$input] ?? "") != ""; } public function convert(array $tokens) { $code = ""; foreach ($this->lexerIterator($tokens) as $generator) { switch ($generator["type"]) { case static::KW: $code .= $this->keywordList[$generator["char"]]; break; case static::VAR: $code .= sprintf("$%s", $generator["char"]); break; case static::OPR: $code .= $this->replace($generator["char"]); break; case static::INT: $code .= $generator["char"]; break; case static::STR: $code .= sprintf("\"%s\"", $generator["char"]); break; default: $code .= $generator["char"]; } } return $code; } private function replace(string $char) { return str_replace("+", ".", $char); } /** * @param array $tokens * @return \Generator */ private function lexerIterator(array $tokens) { foreach ($tokens as $index => $token) { yield $token; } } }
三、如何使用
require __DIR__ . "/vendor/autoload.php"; // 定義一段代碼 $code = <<<EOF 姓名="腕豪"; 問候="你好啊"; 地址=(1+2) * 3; 如果(地址 > 3){ 地址=1; }否則{ 地址="艾歐尼亞" } 說話 = ("我"+"愛")+"你"; 返回 姓名+年齡; EOF; $lexer = new Lexer($code); // 自定義你的關(guān)鍵字 $kwMap = [ "如果" => "if", "否則" => "else", "返回" => "return", "否則如果" => "elseif" ]; $lexer->setKeywordList($kwMap); // 這里是生成的詞 $tokens = $lexer->parseInput(); // 將生成的詞轉(zhuǎn)成php,當然你也可以嘗試用php-parse轉(zhuǎn)ast再轉(zhuǎn)成php,這里只是簡單的拼接 var_dump($lexer->convert($tokens));
生成詞
[{ "type": "variable", "char": "姓名", "pos": 2}, { "type": "operator", "char": "=", "pos": 2}, { "type": "string", "char": "腕豪", "pos": 7}, { "type": "operator", "char": ";", "pos": 8}, { "type": "variable", "char": "問候", "pos": 13}, { "type": "operator", "char": "=", "pos": 13}, { "typ e": "string", "char": "你好啊", "pos": 17}, { "type": "operator", "char": ";", "pos": 18}, { "type": "variable", "char": "地址", "pos": 23}, { "type": "operator", "char": "=", "pos": 23}, { "type": "operator", "char": "(", "pos": 24}, { "type": "integer", "char": "1", "pos": 25}, { "type": "operator", "char": " +", "pos": 26}, { "type": "integer", "char": "2", "pos": 27}, { "type": "operator", "char": ")", "pos": 28}, { "type": "operator", "char": "*", "pos": 30}, { "type": "integer", "char": "3", "pos": 32}, { "type": "operator", "char": ";", "pos": 33}, { "type": "keyword", "char": "如果", "pos": 37}, { "type": "nul l", "char": " ", "pos": 38}, { "type": "operator", "char": "(", "pos": 38}, { "type": "variable", "char": "地址", "pos": 41}, { "type": "operator", "char": ">", "pos": 42}, { "type": "integer", "char": "3", "pos": 44}, { "type": "operator", "char": ")", "pos": 45}, { "type": "operator", "char": "{", "pos": 46}, { "type": "variable", "char": "地址", "pos": 55}, { "type": "operator", "char": "=", "pos": 55}, { "type": "integer", "char": "1", "pos": 56}, { "type": "operator", "char": ";", "pos": 57}, { "type": "operator", "char": "}", "pos": 60}, { "type": "keyword", "char": "否則", "pos": 62}, { "type": "null", "char ": " ", "pos": 63}, { "type": "operator", "char": "{", "pos": 63}, { "type": "variable", "char": "地址", "pos": 72}, { "type": "operator", "char": "=", "pos": 72}, { "type": "string", "char": "艾歐尼亞", "pos": 78}, { "type": "operator", "char": ";", "pos": 79}, { "type": "operator", "char": "}", "pos": 82}, { "type": "variable", "char": "說話", "pos": 87}, { "type": "operator", "char": "=", "pos": 88}, { "type": "operator", "char": "(", "pos": 90}, { "type": "string", "char": "我", "pos": 93}, { "type": "operator", "char": "+", "pos": 94}, { "type": "string", "char": "愛", "pos": 97}, { "type": "operator", "char": ")", "pos": 98}, { "type": "operator", "char": "+", "pos": 99}, { "type": "string", "char": "你", "pos": 102}, { "type": "operator", "char": ";", "pos": 103}, { "type": "keyword", "char": "返回", "pos": 107}, { "type": "null", "char": " ", "pos": 108}, { "type": "variable", "char": "姓名", "pos": 111}, { "typ e": "operator", "char": "+", "pos": 111}, { "type": "variable", "char": "年齡", "pos": 114}, { "type": "operator", "char": ";", "pos": 114}]
輸出:
$姓名="腕豪";$問候="你好啊";$地址=(1.2)*3;if ($地址>3){$地址=1;}else {$地址="艾歐尼亞";}$說話=("我"."愛")."你";return $姓名.$年齡;
能執(zhí)行嗎?當然能。還存在一些小bug,不想改了。
四、使用場景
什么,居然有人說沒什么用?oa系統(tǒng)總有用到的時候。
讀到這里,這篇“PHP怎么實現(xiàn)詞法分析與自定義語言”文章已經(jīng)介紹完畢,想要掌握這篇文章的知識點還需要大家自己動手實踐使用過才能領(lǐng)會,如果想了解更多相關(guān)內(nèi)容的文章,歡迎關(guān)注億速云行業(yè)資訊頻道。
免責聲明:本站發(fā)布的內(nèi)容(圖片、視頻和文字)以原創(chuàng)、轉(zhuǎn)載和分享為主,文章觀點不代表本網(wǎng)站立場,如果涉及侵權(quán)請聯(lián)系站長郵箱:is@yisu.com進行舉報,并提供相關(guān)證據(jù),一經(jīng)查實,將立刻刪除涉嫌侵權(quán)內(nèi)容。