From 7b01f9799f99851955f23c18c12e6651c1941022 Mon Sep 17 00:00:00 2001
From: jugglinmike <mike@mikepennisi.com>
Date: Fri, 9 Feb 2018 11:27:33 -0500
Subject: [PATCH] [generation] Tolerate unicode in source files (#1411)

---
 tools/generation/lib/case.py                       |  5 +++--
 tools/generation/lib/expander.py                   | 14 ++++++++------
 tools/generation/lib/template.py                   |  5 +++--
 .../test/expected/normal/nested/path2-normal.js    |  2 +-
 .../test/expected/normal/path1-normal.js           |  6 ++++--
 tools/generation/test/fixtures/normal.case         |  2 +-
 .../test/fixtures/normal/normal.template           |  2 ++
 7 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/tools/generation/lib/case.py b/tools/generation/lib/case.py
index f43d4f664b..5ed1c63de0 100644
--- a/tools/generation/lib/case.py
+++ b/tools/generation/lib/case.py
@@ -1,6 +1,7 @@
 # Copyright (C) 2016 the V8 project authors. All rights reserved.
 # This code is governed by the BSD license found in the LICENSE file.
 
+import codecs
 import re
 
 from util.find_comments import find_comments
@@ -9,10 +10,10 @@ from util.parse_yaml import parse_yaml
 regionStartPattern = re.compile(r'-\s+(\S+)')
 
 class Case:
-    def __init__(self, file_name):
+    def __init__(self, file_name, encoding):
         self.attribs = dict(meta=None, regions=dict())
 
-        with open(file_name) as handle:
+        with codecs.open(file_name, 'r', encoding) as handle:
             self.attribs = self._parse(handle.read())
 
     def _parse(self, source):
diff --git a/tools/generation/lib/expander.py b/tools/generation/lib/expander.py
index 5703a698b6..c0b7a57136 100644
--- a/tools/generation/lib/expander.py
+++ b/tools/generation/lib/expander.py
@@ -14,18 +14,20 @@ class Expander:
         self.templates = dict()
         self.case_dir = case_dir
 
-    def _load_templates(self, template_class):
+    def _load_templates(self, template_class, encoding):
         directory = os.path.join(self.case_dir, template_class)
         file_names = map(
             lambda x: os.path.join(directory, x),
             filter(self.is_template_file, os.listdir(directory))
         )
 
-        self.templates[template_class] = [Template(x) for x in file_names]
+        self.templates[template_class] = [
+            Template(x, encoding) for x in file_names
+        ]
 
-    def _get_templates(self, template_class):
+    def _get_templates(self, template_class, encoding):
         if not template_class in self.templates:
-            self._load_templates(template_class)
+            self._load_templates(template_class, encoding)
 
         return self.templates[template_class]
 
@@ -49,10 +51,10 @@ class Expander:
                 yield test
 
     def expand_case(self, file_name, encoding):
-        case = Case(file_name)
+        case = Case(file_name, encoding)
 
         template_class = case.attribs['meta']['template']
         templates = self.templates.get(template_class)
 
-        for template in self._get_templates(template_class):
+        for template in self._get_templates(template_class, encoding):
             yield template.expand(file_name, os.path.basename(file_name[:-5]), case.attribs, encoding)
diff --git a/tools/generation/lib/template.py b/tools/generation/lib/template.py
index 14cc3caebe..bc66b7e8cb 100644
--- a/tools/generation/lib/template.py
+++ b/tools/generation/lib/template.py
@@ -50,10 +50,10 @@ def indent(text, prefix = '    ', js_value = False):
     return '\n'.join(indented)
 
 class Template:
-    def __init__(self, filename):
+    def __init__(self, filename, encoding):
         self.filename = filename
 
-        with open(filename) as template_file:
+        with codecs.open(filename, 'r', encoding) as template_file:
             self.source = template_file.read()
 
         self.attribs = dict()
@@ -203,5 +203,6 @@ class Template:
         frontmatter = self._frontmatter(case_filename, case_values)
         body = self.expand_regions(self.source, case_values)
 
+        assert encoding == 'utf-8'
         return Test(self.attribs['meta']['path'] + case_name + '.js',
             source=codecs.encode(frontmatter + '\n' + body, encoding))
diff --git a/tools/generation/test/expected/normal/nested/path2-normal.js b/tools/generation/test/expected/normal/nested/path2-normal.js
index fc85583e75..36bfd2509e 100644
--- a/tools/generation/test/expected/normal/nested/path2-normal.js
+++ b/tools/generation/test/expected/normal/nested/path2-normal.js
@@ -12,7 +12,7 @@ info: |
     case info
 ---*/
 
-before-Third valueSecond value-after
+before-Third value (Special characters like `≠` should be tolerated.)Second value-after
 
 /* Improperly-terminated comments should not break the tokenizer *
 
diff --git a/tools/generation/test/expected/normal/path1-normal.js b/tools/generation/test/expected/normal/path1-normal.js
index e80120c38b..4d1481fb59 100644
--- a/tools/generation/test/expected/normal/path1-normal.js
+++ b/tools/generation/test/expected/normal/path1-normal.js
@@ -12,11 +12,13 @@ info: |
     case info
 ---*/
 
-before-First value-between-Third value-after
+before-First value-between-Third value (Special characters like `≠` should be tolerated.)-after
 
 before*Second value*between*First value*after
 
-before/* " */Third valueafter
+before/* " */Third value (Special characters like `≠` should be tolerated.)after
+
+// Special characters like `≠` should be tolerated.
 
 The following should not be expanded:
 
diff --git a/tools/generation/test/fixtures/normal.case b/tools/generation/test/fixtures/normal.case
index c97a6e7b7f..2b9a71936d 100644
--- a/tools/generation/test/fixtures/normal.case
+++ b/tools/generation/test/fixtures/normal.case
@@ -22,7 +22,7 @@ First value
 //- second
 Second value
 //- third
-Third value
+Third value (Special characters like `≠` should be tolerated.)
 //- fourth
 Quote characters: " ' `
 //- teardown
diff --git a/tools/generation/test/fixtures/normal/normal.template b/tools/generation/test/fixtures/normal/normal.template
index 32bc9cff19..29ff4d5915 100644
--- a/tools/generation/test/fixtures/normal/normal.template
+++ b/tools/generation/test/fixtures/normal/normal.template
@@ -14,6 +14,8 @@ before*/*{ second }*/*between*/*{ first }*/*after
 
 before/* " *//*{ third }*/after
 
+// Special characters like `≠` should be tolerated.
+
 The following should not be expanded:
 
 /* */*{ first }*/
-- 
GitLab