@Test public void substituteDotDot() { eval(" f<- function(...) substitute(list(...)) "); assertThat( eval("f(a,b)"), equalTo( (SEXP) new FunctionCall( Symbol.get("list"), PairList.Node.fromArray(Symbol.get("a"), Symbol.get("b"))))); }
public Builder add(String name, SEXP value) { SEXP tag = Null.INSTANCE; if (!Strings.isNullOrEmpty(name)) { tag = Symbol.get(name); } return add(tag, value); }
/** * Returns a canonical mapping for the specified nonterminal label; if <code>label</code> already * is in canonical form, it is returned. The canonical mapping refers to transformations performed * on nonterminals during the training process. Before obtaining a label's canonical form, it is * also stripped of all Treebank augmentations, meaning that only the characters before the first * occurrence of '-', '=' or '|' are kept. * * @return a <code>Symbol</code> with the same print name as <code>label</code>, except that all * training transformations and Treebank augmentations have been undone and stripped */ public final Symbol getCanonical(Symbol label) { if (outputLexLabels) { char lbracket = nonTreebankLeftBracket(); char rbracket = nonTreebankRightBracket(); int lbracketIdx = label.toString().indexOf(lbracket); int rbracketIdx = label.toString().indexOf(rbracket); if (lbracketIdx != -1 && rbracketIdx != -1) { String labelStr = label.toString(); Symbol unlexLabel = Symbol.get(labelStr.substring(0, lbracketIdx) + labelStr.substring(rbracketIdx + 1)); String canonStr = defaultGetCanonical(unlexLabel).toString(); return Symbol.get(canonStr + labelStr.substring(lbracketIdx, rbracketIdx + 1)); } } return defaultGetCanonical(label); }
@Test public void substituteWithMissingEllipses() { eval(" f<- function(a=1) substitute(list(...)) "); assertThat( eval("f()"), equalTo( (SEXP) new FunctionCall(Symbol.get("list"), PairList.Node.fromArray(Symbols.ELLIPSES)))); }
/** * Executes the default the standard R initialization sequence: * * <ol> * <li>Load the base package (/org/renjin/library/base/R/base) * <li>Execute the system profile (/org/renjin/library/base/R/Rprofile) * <li>Evaluate .OptRequireMethods() * <li>Evaluate .First.Sys() * </ol> */ public void init() throws IOException { BaseFrame baseFrame = (BaseFrame) session.getBaseEnvironment().getFrame(); baseFrame.load(this); evaluate(FunctionCall.newCall(Symbol.get(".onLoad")), session.getBaseNamespaceEnv()); // evalBaseResource("/org/renjin/library/base/R/Rprofile"); // // // FunctionCall.newCall(new Symbol(".OptRequireMethods")).evaluate(this, environment); // evaluate( FunctionCall.newCall(Symbol.get(".First.sys")), environment); }
/** * Implements an interface to the Jakarta ORO regular expression engine. * * <p>The Scheme API is composed of the following functions: * * <ul> * <li><b>regexp/pattern</b> <it>string</it> [<it>type</it> <it>options</it>] -> <it>pattern</it> * <br> * <p>Compiles a string representing a regular expression into a pattern object. <it>type</it> * is a symbol describing the type of regular expression; currently supported types are * <it>glob</it>, <it>perl5</it>, or <it>awk</it>. If no regexp type is specified, the default * one is <it>perl5</it>. * <p>If the <it>options</it> argument is present, it is a list composed of any of the * following symbols: * <ul> * <li>case-insensitive</it> * <li>multiline</it> * </ul> * <li><b>regexp/match</b> <it>string</it> <it>pattern</it> -> #f | list of <it>matches</it><br> * <p>Try to match <it>pattern</it> on the input string. If there's no match <tt>#f</tt> is * returned, otherwise a list of <it>matches</it> is returned. A <it>match</it> is a cons cell * containing the beginning and end positions of a match group. * </ul> * * @author <a href="mailto:[email protected]">Ovidiu Predescu</a> * @since December 14, 2001 * @see org.apache.oro.text.regex.Pattern * @see org.apache.oro.text.regex.PatternCompiler * @see org.apache.oro.text.regex.PatternMatcher */ public class Regexp extends IndexedProcedure { public Regexp(int id) { super(id); } public static final int RPATTERN = 1, RMATCH = 2, RMATCH_POSITIONS = 3, RREPLACE = 4, RREPLACE_ALL = 5, RSPLIT = 6, RSPLIT_DELIM = 7; public static final Symbol REGEX_PERL5 = Symbol.get("perl5"), REGEX_GLOB = Symbol.get("glob"), REGEX_AWK = Symbol.get("awk"), CASE_INSENSITIVE = Symbol.get("case-insensitive"), EXTENDED = Symbol.get("extended"), SINGLELINE = Symbol.get("singleline"), MULTILINE = Symbol.get("multiline"); public static final int optionsFromScheme(Value value, Value type) { if (value instanceof Symbol) { if (value == CASE_INSENSITIVE) { if (type == REGEX_PERL5) return Perl5Compiler.CASE_INSENSITIVE_MASK; else if (type == REGEX_GLOB) return GlobCompiler.CASE_INSENSITIVE_MASK; else if (type == REGEX_AWK) return AwkCompiler.CASE_INSENSITIVE_MASK; else throw new RuntimeException("Unknown compiler " + type); } else if (value == EXTENDED) { if (type == REGEX_PERL5) return Perl5Compiler.EXTENDED_MASK; else throw new RuntimeException("The extended mask is supported only by Perl5 regexps"); } else if (value == SINGLELINE) { if (type == REGEX_PERL5) return Perl5Compiler.SINGLELINE_MASK; else throw new RuntimeException("The singleline mask is supported only by Perl5 regexps"); } else if (value == MULTILINE) { if (type == REGEX_PERL5) return Perl5Compiler.MULTILINE_MASK; else if (type == REGEX_GLOB) throw new RuntimeException("Glob compiler doesn't support this option: " + value); else if (type == REGEX_AWK) return AwkCompiler.MULTILINE_MASK; } else throw new RuntimeException("Unsupported regexp option " + value); } else if (value instanceof Pair) { int options = 0; Pair pv = (Pair) value; while (pv != EMPTYLIST) { options |= optionsFromScheme(pv.car(), type); pv = (Pair) pv.cdr(); } return options; } else throw new RuntimeException("Invalid format for options " + value); // Not reached, but keeps the Java compiler happy return 0; } public static class Index extends IndexedLibraryAdapter { public String getLibraryName() { return "Jakarta ORO regexp"; } public float getLibraryVersion() { return 1.0f; } public Value construct(Object context, int id) { return new Regexp(id); } public Index() { define("regexp", RPATTERN); define("regexp-match", RMATCH); define("regexp-match-positions", RMATCH_POSITIONS); define("regexp-replace", RREPLACE); define("regexp-replace*", RREPLACE_ALL); define("regexp-split", RSPLIT); define("regexp-split/delimiter", RSPLIT_DELIM); } } protected static RPattern patternFor(Value v) { RPattern pat; if (v instanceof RPattern) pat = (RPattern) v; else pat = new RPattern(string(v)); return pat; } public Value doApply(Interpreter r) throws ContinuationException { switch (r.vlr.length) { // One argument functions case 1: if (id == RPATTERN) return new RPattern(string(r.vlr[0]), REGEX_PERL5, 0); else break; // Two argument functions case 2: switch (id) { case RPATTERN: return new RPattern(string(r.vlr[0]), r.vlr[1], 0); case RMATCH: return patternFor(r.vlr[0]).match(r.vlr[1]); case RMATCH_POSITIONS: return patternFor(r.vlr[0]).matchPositions(r.vlr[1]); case RSPLIT: return RPattern.splitNoDelimiters(r.vlr[0], r.vlr[1]); case RSPLIT_DELIM: return patternFor(r.vlr[0]).splitWithDelimiters(r.vlr[1]); default: break; } // Three argument functions case 3: switch (id) { case RPATTERN: return new RPattern(string(r.vlr[0]), r.vlr[1], optionsFromScheme(r.vlr[2], r.vlr[1])); case RREPLACE: return patternFor(r.vlr[0]).replaceFirst(r.vlr[1], r.vlr[2]); case RREPLACE_ALL: return patternFor(r.vlr[0]).replaceAll(r.vlr[1], r.vlr[2]); default: break; } } throw new RuntimeException("Invalid number of arguments to function " + r.acc); } public static class RPattern extends Value { public Pattern pattern; Symbol type; int options; public RPattern() {} public RPattern(String pat) { setup(pat, REGEX_PERL5, 0); } public RPattern(String pat, Value type, int options) { setup(pat, type, options); } public void setup(String pat, Value type, int options) { try { if (type == REGEX_PERL5) pattern = (new Perl5Compiler()).compile(pat, options); else if (type == REGEX_GLOB) pattern = (new GlobCompiler()).compile(pat, options); else if (type == REGEX_AWK) pattern = (new AwkCompiler()).compile(pat, options); else throw new RuntimeException("unkown regular expression type: " + type); } catch (MalformedPatternException ex) { throw new RuntimeException("Malformed pattern: " + pat + "\n" + ex); } this.type = (Symbol) type; this.options = options; } protected PatternMatcher getMatcher() { if (type == REGEX_PERL5 || type == REGEX_GLOB) return new Perl5Matcher(); else if (type == REGEX_AWK) return new AwkMatcher(); else throw new RuntimeException("Unknown regular expression type: " + type); } public Value match(Value str) { PatternMatcher matcher = getMatcher(); // Do the matching PatternMatcherInput jStr = new PatternMatcherInput(string(str)); Pair result = null; Pair prev = null; boolean found = false; while (matcher.contains(jStr, pattern)) { found = true; MatchResult matchResult = matcher.getMatch(); for (int i = 0, length = matchResult.groups(); i < length; i++) { Pair m = new Pair(new SchemeString(matchResult.group(i)), EMPTYLIST); if (result == null) result = prev = m; else { prev.setCdr(m); prev = m; } } } if (!found) return FALSE; else return result; } public Value matchPositions(Value str) { PatternMatcher matcher = getMatcher(); // Do the matching PatternMatcherInput jStr = new PatternMatcherInput(string(str)); Pair result = null; Pair prev = null; boolean found = false; while (matcher.contains(jStr, pattern)) { found = true; MatchResult matchResult = matcher.getMatch(); for (int i = 0, length = matchResult.groups(); i < length; i++) { Pair m = new Pair( Quantity.valueOf(matchResult.beginOffset(i)), Quantity.valueOf(matchResult.endOffset(i))); Pair elem = new Pair(m, EMPTYLIST); if (result == null) result = prev = elem; else { prev.setCdr(elem); prev = elem; } } } if (!found) return FALSE; else return result; } public Value replaceFirst(Value input, Value substitution) { String result = Util.substitute( getMatcher(), pattern, new StringSubstitution(string(substitution)), string(input)); return new SchemeString(result); } public Value replaceAll(Value input, Value substitution) { String result = Util.substitute( getMatcher(), pattern, new StringSubstitution(string(substitution)), string(input), Util.SUBSTITUTE_ALL); return new SchemeString(result); } public static Value splitNoDelimiters(Value pat, Value input) { String pattern; if (pat instanceof RPattern) pattern = ((RPattern) pat).pattern.getPattern(); else pattern = string(pat); ArrayList list = new ArrayList(); (new Perl5Util()).split(list, pattern, string(input)); SchemeVector result = new SchemeVector(list.size()); for (int i = 0, length = list.size(); i < length; i++) result.vals[i] = new SchemeString((String) list.get(i)); return result; } public Value splitWithDelimiters(Value input) { try { ArrayList list = new ArrayList(); Util.split(list, getMatcher(), pattern, string(input)); SchemeVector result = new SchemeVector(list.size()); for (int i = 0, length = list.size(); i < length; i++) result.vals[i] = new SchemeString((String) list.get(i)); return result; } catch (Exception ex) { System.out.println("error: " + ex); ex.printStackTrace(); throw new RuntimeException(ex.toString()); } } public void display(ValueWriter w) throws IOException { w.append("#<regexp ") .append(pattern.getClass().getName()) .append(" '") .append(pattern.getPattern().toString()) .append("'>"); } public boolean valueEqual(Value ov) { return (ov instanceof RPattern) && pattern.equals(((RPattern) ov).pattern); } public void serialize(sisc.ser.Serializer s) throws IOException { s.writeExpression(type); s.writeUTF(pattern.getPattern()); s.writeInt(pattern.getOptions()); } public void deserialize(sisc.ser.Deserializer s) throws IOException { type = (Symbol) s.readExpression(); String pat = s.readUTF(); options = s.readInt(); setup(pat, type, options); } } }
public class ObjectDOSBuiltInFunctionsTest { ObjectDOS vm; ObjectDOS object; ObjectDOS value; Symbol local = Symbol.get("local"); private OpCodeInterpreter interpreter; private Environment environment; private Activation context; @Before public void setUp() { interpreter = new OpCodeInterpreter(); environment = interpreter.getEnvironment(); vm = VMObjectDOS.getVMObject(environment); object = environment.createNewObject(); value = environment.createNewObject(); context = interpreter.newActivation(); context.setVictim(environment.createNewObject()); } @Test public void shouldSetSlotInCurrentFunction() { FunctionDOS function = createFunctionThatSetsAndReturnsValueToLocal(); ObjectDOS result = function.execute(interpreter, object, new ArrayList<ObjectDOS>()); assertThat(((ValueObject) result).getValue(), is(1234)); } @Test public void shouldSetSlotInCurrentContextualFunction() { FunctionWithContext function = environment.createFunctionWithContext( createFunctionThatSetsAndReturnsValueToLocal(), context); ObjectDOS result = function.execute(interpreter, object, new ArrayList<ObjectDOS>()); assertThat(((ValueObject) result).getValue(), is(1234)); assertThat(context.getSlot(local), is(environment.getUndefined())); } @Test public void shouldSetSlotInFunctionsContext() { context.setSlot(local, environment.getUndefined()); FunctionWithContext function = createFunctionWithContextThatSetsLocalToValue(context); function.execute(interpreter, object, new ArrayList<ObjectDOS>()); assertThat(((ValueObject) context.getSlot(local)).getValue(), is(1234)); } @Test public void shouldSetSlotInObject() { object.setSlot(local, environment.getUndefined()); FunctionWithContext function = createFunctionWithContextThatSetsLocalToValue(context); function.execute(interpreter, object, new ArrayList<ObjectDOS>()); assertThat(context.getSlot(local), is(environment.getUndefined())); assertThat(((ValueObject) object.getSlot(local)).getValue(), is(1234)); } private FunctionWithContext createFunctionWithContextThatSetsLocalToValue( Activation localContext) { FunctionWithContext function = environment.createFunctionWithContext( new Symbol[] {}, new OpCode[] { new OpCode.CreateValueObject(1234), new OpCode.PushSymbol(local), new OpCode.Push(Symbol.RESULT), new OpCode.FunctionCall(Symbol.SET_SLOT_$_TO_$) }, localContext); return function; } private FunctionDOS createFunctionThatSetsAndReturnsValueToLocal() { FunctionDOS function = environment.createFunction( new Symbol[] {}, new OpCode[] { new OpCode.CreateValueObject(1234), new OpCode.PushSymbol(local), new OpCode.Push(Symbol.RESULT), new OpCode.FunctionCall(Symbol.SET_SLOT_$_TO_$), new OpCode.PushSymbol(local), new OpCode.FunctionCall(Symbol.GET_SLOT_$) }); return function; } // @Test // public void shouldSetSlotInParentContextIfDefinedThere() { // ObjectDOS parentContext = interpreter.newContext(); // parentContext.setSlot(local, interpreter.getEnvironment().createNewObject()); // // context.setContext(parentContext); // // arguments.add(new SymbolWrapper(local)); // arguments.add(object); // context.getFunction(Symbol.SET_SLOT_$_TO_$).execute(context, arguments); // // assertThat(parentContext.getSlot(local), is(object)); // assertThat(context.getSlot(local), is(object)); // goes up to parent... // } }
public static Node singleton(String tag, SEXP value) { return singleton(Symbol.get(tag), value); }
public class SubstituteFunction extends SpecialFunction { private static final Symbol EXPR_ARGUMENT = Symbol.get("expr"); private static final Symbol ENV_ARGUMENT = Symbol.get("env"); private final PairList formals; public SubstituteFunction() { super("substitute"); this.formals = new PairList.Builder() .add(EXPR_ARGUMENT, Symbol.MISSING_ARG) .add(ENV_ARGUMENT, Symbol.MISSING_ARG) .build(); } @Override public SEXP apply(Context context, Environment rho, FunctionCall call, PairList args) { PairList matchedArguments = ClosureDispatcher.matchArguments(formals, args); SEXP exprArgument = matchedArguments.findByTag(EXPR_ARGUMENT); SEXP envArgument = matchedArguments.findByTag(ENV_ARGUMENT); // Substitute handles ... in an idiosyncratic way: // Only the first argument is used, and there is no attempt to // match subsequent arguments against the 'env' argument. SEXP expr; if (exprArgument == Symbols.ELLIPSES) { SEXP ellipses = rho.getVariable(Symbols.ELLIPSES); if (ellipses == Null.INSTANCE) { expr = Null.INSTANCE; } else { PromisePairList.Node promisePairList = (PromisePairList.Node) ellipses; Promise promisedArg = (Promise) promisePairList.getValue(); expr = promisedArg.getExpression(); } } else { expr = exprArgument; } return substitute(expr, buildContext(context, rho, envArgument)); } private static SubstituteContext buildContext(Context context, Environment rho, SEXP argument) { if (argument == Symbol.MISSING_ARG) { return buildContext(context, rho); } SEXP env = context.evaluate(argument, rho); return buildContext(context, env); } private static SubstituteContext buildContext(Context context, SEXP evaluatedEnv) { if (evaluatedEnv instanceof Environment) { if (context.getGlobalEnvironment() == evaluatedEnv) { return new GlobalEnvironmentContext(); } else { return new EnvironmentContext((Environment) evaluatedEnv); } } else if (evaluatedEnv instanceof ListVector) { return new ListContext((ListVector) evaluatedEnv); } else if (evaluatedEnv instanceof PairList) { return new PairListContext((PairList) evaluatedEnv); } else { throw new EvalException( "Cannot substitute using environment of type %s: expected list, pairlist, or environment", evaluatedEnv.getTypeName()); } } public static SEXP substitute(Context context, SEXP exp, SEXP environment) { return substitute(exp, buildContext(context, environment)); } private static SEXP substitute(SEXP exp, SubstituteContext context) { SubstitutingVisitor visitor = new SubstitutingVisitor(context); exp.accept(visitor); return visitor.getResult(); } public static class SubstitutingVisitor extends SexpVisitor<SEXP> { private final SubstituteContext context; private SEXP result; public SubstitutingVisitor(SubstituteContext context) { this.context = context; } @Override public void visit(FunctionCall call) { result = new FunctionCall( substitute(call.getFunction()), substituteArgumentList(call.getArguments()), call.getAttributes()); } private PairList substituteArgumentList(PairList arguments) { PairList.Builder builder = PairList.Node.newBuilder(); for (PairList.Node node : arguments.nodes()) { if (node.getValue().equals(Symbols.ELLIPSES)) { SEXP extraArguments = context.getVariable(Symbols.ELLIPSES); if (extraArguments != Symbol.UNBOUND_VALUE) { builder.addAll(unpackPromiseList((PromisePairList) extraArguments)); } else { builder.add(Symbols.ELLIPSES); } } else { builder.add(node.getRawTag(), substitute(node.getValue())); } } return builder.build(); } @Override public void visit(PairList.Node pairList) { PairList.Builder builder = PairList.Node.newBuilder(); for (PairList.Node node : pairList.nodes()) { builder.add(node.getRawTag(), substitute(node.getValue())); } result = builder.build(); } @Override public void visit(ListVector list) { ListVector.Builder builder = ListVector.newBuilder(); for (SEXP exp : list) { builder.add(substitute(exp)); } builder.copyAttributesFrom(list); result = builder.build(); } @Override public void visit(ExpressionVector vector) { List<SEXP> list = Lists.newArrayList(); for (SEXP exp : vector) { list.add(substitute(exp)); } result = new ExpressionVector(list, vector.getAttributes()); } @Override public void visit(Symbol symbol) { if (context.hasVariable(symbol)) { result = unpromise(context.getVariable(symbol)); } else { result = symbol; } } private PairList unpackPromiseList(PromisePairList dotExp) { PairList.Builder unpacked = new PairList.Node.Builder(); for (PairList.Node node : dotExp.nodes()) { unpacked.add(node.getRawTag(), unpromise(node.getValue())); } return unpacked.build(); } private SEXP unpromise(SEXP value) { while (value instanceof Promise) { value = ((Promise) value).getExpression(); } return value; } @Override public void visit(PromisePairList dotExp) { super.visit(dotExp); } @Override protected void unhandled(SEXP exp) { result = exp; } @Override public SEXP getResult() { return result; } private SEXP substitute(SEXP exp) { return SubstituteFunction.substitute(exp, context); } } private interface SubstituteContext { SEXP getVariable(Symbol name); boolean hasVariable(Symbol name); } private static class EnvironmentContext implements SubstituteContext { private final Environment rho; public EnvironmentContext(Environment rho) { super(); this.rho = rho; } @Override public SEXP getVariable(Symbol name) { return rho.getVariable(name); } @Override public boolean hasVariable(Symbol name) { return rho.hasVariable(name); } } private static class GlobalEnvironmentContext implements SubstituteContext { @Override public SEXP getVariable(Symbol name) { return Symbol.UNBOUND_VALUE; } @Override public boolean hasVariable(Symbol name) { return false; } } private static class ListContext implements SubstituteContext { private ListVector list; public ListContext(ListVector list) { this.list = list; } @Override public SEXP getVariable(Symbol name) { int index = list.getIndexByName(name.getPrintName()); if (index == -1) { return Symbol.UNBOUND_VALUE; } else { return list.getElementAsSEXP(index); } } @Override public boolean hasVariable(Symbol name) { return list.getIndexByName(name.getPrintName()) != -1; } } private static class PairListContext implements SubstituteContext { private PairList list; public PairListContext(PairList list) { this.list = list; } @Override public SEXP getVariable(Symbol name) { for (PairList.Node node : list.nodes()) { if (node.getTag() == name) { return node.getValue(); } } return Symbol.UNBOUND_VALUE; } @Override public boolean hasVariable(Symbol name) { return getVariable(name) != Symbol.UNBOUND_VALUE; } } }